Total coverage: 471039 (23%)of 2092511
40 29 16 18 20 298 332 333 310 328 32 32 310 33 39 39 33 6 13 13 10 16 4 6 286 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2023 Isovalent */ #ifndef __NET_TCX_H #define __NET_TCX_H #include <linux/bpf.h> #include <linux/bpf_mprog.h> #include <net/sch_generic.h> struct mini_Qdisc; struct tcx_entry { struct mini_Qdisc __rcu *miniq; struct bpf_mprog_bundle bundle; u32 miniq_active; struct rcu_head rcu; }; struct tcx_link { struct bpf_link link; struct net_device *dev; }; static inline void tcx_set_ingress(struct sk_buff *skb, bool ingress) { #ifdef CONFIG_NET_XGRESS skb->tc_at_ingress = ingress; #endif } #ifdef CONFIG_NET_XGRESS static inline struct tcx_entry *tcx_entry(struct bpf_mprog_entry *entry) { struct bpf_mprog_bundle *bundle = entry->parent; return container_of(bundle, struct tcx_entry, bundle); } static inline struct tcx_link *tcx_link(const struct bpf_link *link) { return container_of(link, struct tcx_link, link); } void tcx_inc(void); void tcx_dec(void); static inline void tcx_entry_sync(void) { /* bpf_mprog_entry got a/b swapped, therefore ensure that * there are no inflight users on the old one anymore. */ synchronize_rcu(); } static inline void tcx_entry_update(struct net_device *dev, struct bpf_mprog_entry *entry, bool ingress) { ASSERT_RTNL(); if (ingress) rcu_assign_pointer(dev->tcx_ingress, entry); else rcu_assign_pointer(dev->tcx_egress, entry); } static inline struct bpf_mprog_entry * tcx_entry_fetch(struct net_device *dev, bool ingress) { ASSERT_RTNL(); if (ingress) return rcu_dereference_rtnl(dev->tcx_ingress); else return rcu_dereference_rtnl(dev->tcx_egress); } static inline struct bpf_mprog_entry *tcx_entry_create_noprof(void) { struct tcx_entry *tcx = kzalloc_noprof(sizeof(*tcx), GFP_KERNEL); if (tcx) { bpf_mprog_bundle_init(&tcx->bundle); return &tcx->bundle.a; } return NULL; } #define tcx_entry_create(...) alloc_hooks(tcx_entry_create_noprof(__VA_ARGS__)) static inline void tcx_entry_free(struct bpf_mprog_entry *entry) { kfree_rcu(tcx_entry(entry), rcu); } static inline struct bpf_mprog_entry * tcx_entry_fetch_or_create(struct net_device *dev, bool ingress, bool *created) { struct bpf_mprog_entry *entry = tcx_entry_fetch(dev, ingress); *created = false; if (!entry) { entry = tcx_entry_create(); if (!entry) return NULL; *created = true; } return entry; } static inline void tcx_skeys_inc(bool ingress) { tcx_inc(); if (ingress) net_inc_ingress_queue(); else net_inc_egress_queue(); } static inline void tcx_skeys_dec(bool ingress) { if (ingress) net_dec_ingress_queue(); else net_dec_egress_queue(); tcx_dec(); } static inline void tcx_miniq_inc(struct bpf_mprog_entry *entry) { ASSERT_RTNL(); tcx_entry(entry)->miniq_active++; } static inline void tcx_miniq_dec(struct bpf_mprog_entry *entry) { ASSERT_RTNL(); tcx_entry(entry)->miniq_active--; } static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry) { ASSERT_RTNL(); return bpf_mprog_total(entry) || tcx_entry(entry)->miniq_active; } static inline enum tcx_action_base tcx_action_code(struct sk_buff *skb, int code) { switch (code) { case TCX_PASS: skb->tc_index = qdisc_skb_cb(skb)->tc_classid; fallthrough; case TCX_DROP: case TCX_REDIRECT: return code; case TCX_NEXT: default: return TCX_NEXT; } } #endif /* CONFIG_NET_XGRESS */ #if defined(CONFIG_NET_XGRESS) && defined(CONFIG_BPF_SYSCALL) int tcx_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int tcx_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog); void tcx_uninstall(struct net_device *dev, bool ingress); int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); static inline void dev_tcx_uninstall(struct net_device *dev) { ASSERT_RTNL(); tcx_uninstall(dev, true); tcx_uninstall(dev, false); } #else static inline int tcx_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EINVAL; } static inline int tcx_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EINVAL; } static inline int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog) { return -EINVAL; } static inline int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { return -EINVAL; } static inline void dev_tcx_uninstall(struct net_device *dev) { } #endif /* CONFIG_NET_XGRESS && CONFIG_BPF_SYSCALL */ #endif /* __NET_TCX_H */
13 3 3 3 13 10 10 10 378 471 472 8 8 2 2 2 1 1 1 1 1 1 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 5 5 5 5 5 5 5 5 5 5 830 474 378 831 828 8 8 4 3 3 2 1 5 5 869 833 828 834 872 940 940 925 509 506 869 385 381 162 872 84 82 78 84 64 84 84 84 84 84 122 117 84 163 162 162 102 102 102 12 90 731 730 392 731 391 66 328 796 804 800 283 284 35 247 11326 11320 1470 710 11330 366 19 350 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 // SPDX-License-Identifier: GPL-2.0 /* * security/tomoyo/network.c * * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include "common.h" #include <linux/slab.h> /* Structure for holding inet domain socket's address. */ struct tomoyo_inet_addr_info { __be16 port; /* In network byte order. */ const __be32 *address; /* In network byte order. */ bool is_ipv6; }; /* Structure for holding unix domain socket's address. */ struct tomoyo_unix_addr_info { u8 *addr; /* This may not be '\0' terminated string. */ unsigned int addr_len; }; /* Structure for holding socket address. */ struct tomoyo_addr_info { u8 protocol; u8 operation; struct tomoyo_inet_addr_info inet; struct tomoyo_unix_addr_info unix0; }; /* String table for socket's protocols. */ const char * const tomoyo_proto_keyword[TOMOYO_SOCK_MAX] = { [SOCK_STREAM] = "stream", [SOCK_DGRAM] = "dgram", [SOCK_RAW] = "raw", [SOCK_SEQPACKET] = "seqpacket", [0] = " ", /* Dummy for avoiding NULL pointer dereference. */ [4] = " ", /* Dummy for avoiding NULL pointer dereference. */ }; /** * tomoyo_parse_ipaddr_union - Parse an IP address. * * @param: Pointer to "struct tomoyo_acl_param". * @ptr: Pointer to "struct tomoyo_ipaddr_union". * * Returns true on success, false otherwise. */ bool tomoyo_parse_ipaddr_union(struct tomoyo_acl_param *param, struct tomoyo_ipaddr_union *ptr) { u8 * const min = ptr->ip[0].in6_u.u6_addr8; u8 * const max = ptr->ip[1].in6_u.u6_addr8; char *address = tomoyo_read_token(param); const char *end; if (!strchr(address, ':') && in4_pton(address, -1, min, '-', &end) > 0) { ptr->is_ipv6 = false; if (!*end) ptr->ip[1].s6_addr32[0] = ptr->ip[0].s6_addr32[0]; else if (*end++ != '-' || in4_pton(end, -1, max, '\0', &end) <= 0 || *end) return false; return true; } if (in6_pton(address, -1, min, '-', &end) > 0) { ptr->is_ipv6 = true; if (!*end) memmove(max, min, sizeof(u16) * 8); else if (*end++ != '-' || in6_pton(end, -1, max, '\0', &end) <= 0 || *end) return false; return true; } return false; } /** * tomoyo_print_ipv4 - Print an IPv4 address. * * @buffer: Buffer to write to. * @buffer_len: Size of @buffer. * @min_ip: Pointer to __be32. * @max_ip: Pointer to __be32. * * Returns nothing. */ static void tomoyo_print_ipv4(char *buffer, const unsigned int buffer_len, const __be32 *min_ip, const __be32 *max_ip) { snprintf(buffer, buffer_len, "%pI4%c%pI4", min_ip, *min_ip == *max_ip ? '\0' : '-', max_ip); } /** * tomoyo_print_ipv6 - Print an IPv6 address. * * @buffer: Buffer to write to. * @buffer_len: Size of @buffer. * @min_ip: Pointer to "struct in6_addr". * @max_ip: Pointer to "struct in6_addr". * * Returns nothing. */ static void tomoyo_print_ipv6(char *buffer, const unsigned int buffer_len, const struct in6_addr *min_ip, const struct in6_addr *max_ip) { snprintf(buffer, buffer_len, "%pI6c%c%pI6c", min_ip, !memcmp(min_ip, max_ip, 16) ? '\0' : '-', max_ip); } /** * tomoyo_print_ip - Print an IP address. * * @buf: Buffer to write to. * @size: Size of @buf. * @ptr: Pointer to "struct ipaddr_union". * * Returns nothing. */ void tomoyo_print_ip(char *buf, const unsigned int size, const struct tomoyo_ipaddr_union *ptr) { if (ptr->is_ipv6) tomoyo_print_ipv6(buf, size, &ptr->ip[0], &ptr->ip[1]); else tomoyo_print_ipv4(buf, size, &ptr->ip[0].s6_addr32[0], &ptr->ip[1].s6_addr32[0]); } /* * Mapping table from "enum tomoyo_network_acl_index" to * "enum tomoyo_mac_index" for inet domain socket. */ static const u8 tomoyo_inet2mac [TOMOYO_SOCK_MAX][TOMOYO_MAX_NETWORK_OPERATION] = { [SOCK_STREAM] = { [TOMOYO_NETWORK_BIND] = TOMOYO_MAC_NETWORK_INET_STREAM_BIND, [TOMOYO_NETWORK_LISTEN] = TOMOYO_MAC_NETWORK_INET_STREAM_LISTEN, [TOMOYO_NETWORK_CONNECT] = TOMOYO_MAC_NETWORK_INET_STREAM_CONNECT, }, [SOCK_DGRAM] = { [TOMOYO_NETWORK_BIND] = TOMOYO_MAC_NETWORK_INET_DGRAM_BIND, [TOMOYO_NETWORK_SEND] = TOMOYO_MAC_NETWORK_INET_DGRAM_SEND, }, [SOCK_RAW] = { [TOMOYO_NETWORK_BIND] = TOMOYO_MAC_NETWORK_INET_RAW_BIND, [TOMOYO_NETWORK_SEND] = TOMOYO_MAC_NETWORK_INET_RAW_SEND, }, }; /* * Mapping table from "enum tomoyo_network_acl_index" to * "enum tomoyo_mac_index" for unix domain socket. */ static const u8 tomoyo_unix2mac [TOMOYO_SOCK_MAX][TOMOYO_MAX_NETWORK_OPERATION] = { [SOCK_STREAM] = { [TOMOYO_NETWORK_BIND] = TOMOYO_MAC_NETWORK_UNIX_STREAM_BIND, [TOMOYO_NETWORK_LISTEN] = TOMOYO_MAC_NETWORK_UNIX_STREAM_LISTEN, [TOMOYO_NETWORK_CONNECT] = TOMOYO_MAC_NETWORK_UNIX_STREAM_CONNECT, }, [SOCK_DGRAM] = { [TOMOYO_NETWORK_BIND] = TOMOYO_MAC_NETWORK_UNIX_DGRAM_BIND, [TOMOYO_NETWORK_SEND] = TOMOYO_MAC_NETWORK_UNIX_DGRAM_SEND, }, [SOCK_SEQPACKET] = { [TOMOYO_NETWORK_BIND] = TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_BIND, [TOMOYO_NETWORK_LISTEN] = TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_LISTEN, [TOMOYO_NETWORK_CONNECT] = TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_CONNECT, }, }; /** * tomoyo_same_inet_acl - Check for duplicated "struct tomoyo_inet_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * * Returns true if @a == @b except permission bits, false otherwise. */ static bool tomoyo_same_inet_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { const struct tomoyo_inet_acl *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_inet_acl *p2 = container_of(b, typeof(*p2), head); return p1->protocol == p2->protocol && tomoyo_same_ipaddr_union(&p1->address, &p2->address) && tomoyo_same_number_union(&p1->port, &p2->port); } /** * tomoyo_same_unix_acl - Check for duplicated "struct tomoyo_unix_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * * Returns true if @a == @b except permission bits, false otherwise. */ static bool tomoyo_same_unix_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { const struct tomoyo_unix_acl *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_unix_acl *p2 = container_of(b, typeof(*p2), head); return p1->protocol == p2->protocol && tomoyo_same_name_union(&p1->name, &p2->name); } /** * tomoyo_merge_inet_acl - Merge duplicated "struct tomoyo_inet_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * @is_delete: True for @a &= ~@b, false for @a |= @b. * * Returns true if @a is empty, false otherwise. */ static bool tomoyo_merge_inet_acl(struct tomoyo_acl_info *a, struct tomoyo_acl_info *b, const bool is_delete) { u8 * const a_perm = &container_of(a, struct tomoyo_inet_acl, head)->perm; u8 perm = READ_ONCE(*a_perm); const u8 b_perm = container_of(b, struct tomoyo_inet_acl, head)->perm; if (is_delete) perm &= ~b_perm; else perm |= b_perm; WRITE_ONCE(*a_perm, perm); return !perm; } /** * tomoyo_merge_unix_acl - Merge duplicated "struct tomoyo_unix_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * @is_delete: True for @a &= ~@b, false for @a |= @b. * * Returns true if @a is empty, false otherwise. */ static bool tomoyo_merge_unix_acl(struct tomoyo_acl_info *a, struct tomoyo_acl_info *b, const bool is_delete) { u8 * const a_perm = &container_of(a, struct tomoyo_unix_acl, head)->perm; u8 perm = READ_ONCE(*a_perm); const u8 b_perm = container_of(b, struct tomoyo_unix_acl, head)->perm; if (is_delete) perm &= ~b_perm; else perm |= b_perm; WRITE_ONCE(*a_perm, perm); return !perm; } /** * tomoyo_write_inet_network - Write "struct tomoyo_inet_acl" list. * * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ int tomoyo_write_inet_network(struct tomoyo_acl_param *param) { struct tomoyo_inet_acl e = { .head.type = TOMOYO_TYPE_INET_ACL }; int error = -EINVAL; u8 type; const char *protocol = tomoyo_read_token(param); const char *operation = tomoyo_read_token(param); for (e.protocol = 0; e.protocol < TOMOYO_SOCK_MAX; e.protocol++) if (!strcmp(protocol, tomoyo_proto_keyword[e.protocol])) break; for (type = 0; type < TOMOYO_MAX_NETWORK_OPERATION; type++) if (tomoyo_permstr(operation, tomoyo_socket_keyword[type])) e.perm |= 1 << type; if (e.protocol == TOMOYO_SOCK_MAX || !e.perm) return -EINVAL; if (param->data[0] == '@') { param->data++; e.address.group = tomoyo_get_group(param, TOMOYO_ADDRESS_GROUP); if (!e.address.group) return -ENOMEM; } else { if (!tomoyo_parse_ipaddr_union(param, &e.address)) goto out; } if (!tomoyo_parse_number_union(param, &e.port) || e.port.values[1] > 65535) goto out; error = tomoyo_update_domain(&e.head, sizeof(e), param, tomoyo_same_inet_acl, tomoyo_merge_inet_acl); out: tomoyo_put_group(e.address.group); tomoyo_put_number_union(&e.port); return error; } /** * tomoyo_write_unix_network - Write "struct tomoyo_unix_acl" list. * * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. */ int tomoyo_write_unix_network(struct tomoyo_acl_param *param) { struct tomoyo_unix_acl e = { .head.type = TOMOYO_TYPE_UNIX_ACL }; int error; u8 type; const char *protocol = tomoyo_read_token(param); const char *operation = tomoyo_read_token(param); for (e.protocol = 0; e.protocol < TOMOYO_SOCK_MAX; e.protocol++) if (!strcmp(protocol, tomoyo_proto_keyword[e.protocol])) break; for (type = 0; type < TOMOYO_MAX_NETWORK_OPERATION; type++) if (tomoyo_permstr(operation, tomoyo_socket_keyword[type])) e.perm |= 1 << type; if (e.protocol == TOMOYO_SOCK_MAX || !e.perm) return -EINVAL; if (!tomoyo_parse_name_union(param, &e.name)) return -EINVAL; error = tomoyo_update_domain(&e.head, sizeof(e), param, tomoyo_same_unix_acl, tomoyo_merge_unix_acl); tomoyo_put_name_union(&e.name); return error; } /** * tomoyo_audit_net_log - Audit network log. * * @r: Pointer to "struct tomoyo_request_info". * @family: Name of socket family ("inet" or "unix"). * @protocol: Name of protocol in @family. * @operation: Name of socket operation. * @address: Name of address. * * Returns 0 on success, negative value otherwise. */ static int tomoyo_audit_net_log(struct tomoyo_request_info *r, const char *family, const u8 protocol, const u8 operation, const char *address) __must_hold_shared(&tomoyo_ss) { return tomoyo_supervisor(r, "network %s %s %s %s\n", family, tomoyo_proto_keyword[protocol], tomoyo_socket_keyword[operation], address); } /** * tomoyo_audit_inet_log - Audit INET network log. * * @r: Pointer to "struct tomoyo_request_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_audit_inet_log(struct tomoyo_request_info *r) __must_hold_shared(&tomoyo_ss) { char buf[128]; int len; const __be32 *address = r->param.inet_network.address; if (r->param.inet_network.is_ipv6) tomoyo_print_ipv6(buf, sizeof(buf), (const struct in6_addr *) address, (const struct in6_addr *) address); else tomoyo_print_ipv4(buf, sizeof(buf), address, address); len = strlen(buf); snprintf(buf + len, sizeof(buf) - len, " %u", r->param.inet_network.port); return tomoyo_audit_net_log(r, "inet", r->param.inet_network.protocol, r->param.inet_network.operation, buf); } /** * tomoyo_audit_unix_log - Audit UNIX network log. * * @r: Pointer to "struct tomoyo_request_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_audit_unix_log(struct tomoyo_request_info *r) __must_hold_shared(&tomoyo_ss) { return tomoyo_audit_net_log(r, "unix", r->param.unix_network.protocol, r->param.unix_network.operation, r->param.unix_network.address->name); } /** * tomoyo_check_inet_acl - Check permission for inet domain socket operation. * * @r: Pointer to "struct tomoyo_request_info". * @ptr: Pointer to "struct tomoyo_acl_info". * * Returns true if granted, false otherwise. */ static bool tomoyo_check_inet_acl(struct tomoyo_request_info *r, const struct tomoyo_acl_info *ptr) { const struct tomoyo_inet_acl *acl = container_of(ptr, typeof(*acl), head); const u8 size = r->param.inet_network.is_ipv6 ? 16 : 4; if (!(acl->perm & (1 << r->param.inet_network.operation)) || !tomoyo_compare_number_union(r->param.inet_network.port, &acl->port)) return false; if (acl->address.group) return tomoyo_address_matches_group (r->param.inet_network.is_ipv6, r->param.inet_network.address, acl->address.group); return acl->address.is_ipv6 == r->param.inet_network.is_ipv6 && memcmp(&acl->address.ip[0], r->param.inet_network.address, size) <= 0 && memcmp(r->param.inet_network.address, &acl->address.ip[1], size) <= 0; } /** * tomoyo_check_unix_acl - Check permission for unix domain socket operation. * * @r: Pointer to "struct tomoyo_request_info". * @ptr: Pointer to "struct tomoyo_acl_info". * * Returns true if granted, false otherwise. */ static bool tomoyo_check_unix_acl(struct tomoyo_request_info *r, const struct tomoyo_acl_info *ptr) { const struct tomoyo_unix_acl *acl = container_of(ptr, typeof(*acl), head); return (acl->perm & (1 << r->param.unix_network.operation)) && tomoyo_compare_name_union(r->param.unix_network.address, &acl->name); } /** * tomoyo_inet_entry - Check permission for INET network operation. * * @address: Pointer to "struct tomoyo_addr_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_inet_entry(const struct tomoyo_addr_info *address) { const int idx = tomoyo_read_lock(); struct tomoyo_request_info r; int error = 0; const u8 type = tomoyo_inet2mac[address->protocol][address->operation]; if (type && tomoyo_init_request_info(&r, NULL, type) != TOMOYO_CONFIG_DISABLED) { r.param_type = TOMOYO_TYPE_INET_ACL; r.param.inet_network.protocol = address->protocol; r.param.inet_network.operation = address->operation; r.param.inet_network.is_ipv6 = address->inet.is_ipv6; r.param.inet_network.address = address->inet.address; r.param.inet_network.port = ntohs(address->inet.port); do { tomoyo_check_acl(&r, tomoyo_check_inet_acl); error = tomoyo_audit_inet_log(&r); } while (error == TOMOYO_RETRY_REQUEST); } tomoyo_read_unlock(idx); return error; } /** * tomoyo_check_inet_address - Check permission for inet domain socket's operation. * * @addr: Pointer to "struct sockaddr". * @addr_len: Size of @addr. * @port: Port number. * @address: Pointer to "struct tomoyo_addr_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_check_inet_address(const struct sockaddr *addr, const unsigned int addr_len, const u16 port, struct tomoyo_addr_info *address) { struct tomoyo_inet_addr_info *i = &address->inet; if (addr_len < offsetofend(struct sockaddr, sa_family)) return 0; switch (addr->sa_family) { case AF_INET6: if (addr_len < SIN6_LEN_RFC2133) goto skip; i->is_ipv6 = true; i->address = (__be32 *) ((struct sockaddr_in6 *) addr)->sin6_addr.s6_addr; i->port = ((struct sockaddr_in6 *) addr)->sin6_port; break; case AF_INET: if (addr_len < sizeof(struct sockaddr_in)) goto skip; i->is_ipv6 = false; i->address = (__be32 *) &((struct sockaddr_in *) addr)->sin_addr; i->port = ((struct sockaddr_in *) addr)->sin_port; break; default: goto skip; } if (address->protocol == SOCK_RAW) i->port = htons(port); return tomoyo_inet_entry(address); skip: return 0; } /** * tomoyo_unix_entry - Check permission for UNIX network operation. * * @address: Pointer to "struct tomoyo_addr_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_unix_entry(const struct tomoyo_addr_info *address) { const int idx = tomoyo_read_lock(); struct tomoyo_request_info r; int error = 0; const u8 type = tomoyo_unix2mac[address->protocol][address->operation]; if (type && tomoyo_init_request_info(&r, NULL, type) != TOMOYO_CONFIG_DISABLED) { char *buf = address->unix0.addr; int len = address->unix0.addr_len - sizeof(sa_family_t); if (len <= 0) { buf = "anonymous"; len = 9; } else if (buf[0]) { len = strnlen(buf, len); } buf = tomoyo_encode2(buf, len); if (buf) { struct tomoyo_path_info addr; addr.name = buf; tomoyo_fill_path_info(&addr); r.param_type = TOMOYO_TYPE_UNIX_ACL; r.param.unix_network.protocol = address->protocol; r.param.unix_network.operation = address->operation; r.param.unix_network.address = &addr; do { tomoyo_check_acl(&r, tomoyo_check_unix_acl); error = tomoyo_audit_unix_log(&r); } while (error == TOMOYO_RETRY_REQUEST); kfree(buf); } else error = -ENOMEM; } tomoyo_read_unlock(idx); return error; } /** * tomoyo_check_unix_address - Check permission for unix domain socket's operation. * * @addr: Pointer to "struct sockaddr". * @addr_len: Size of @addr. * @address: Pointer to "struct tomoyo_addr_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_check_unix_address(struct sockaddr *addr, const unsigned int addr_len, struct tomoyo_addr_info *address) { struct tomoyo_unix_addr_info *u = &address->unix0; if (addr_len < offsetofend(struct sockaddr, sa_family)) return 0; if (addr->sa_family != AF_UNIX) return 0; u->addr = ((struct sockaddr_un *) addr)->sun_path; u->addr_len = addr_len; return tomoyo_unix_entry(address); } /** * tomoyo_kernel_service - Check whether I'm kernel service or not. * * Returns true if I'm kernel service, false otherwise. */ static bool tomoyo_kernel_service(void) { /* Nothing to do if I am a kernel service. */ return current->flags & PF_KTHREAD; } /** * tomoyo_sock_family - Get socket's family. * * @sk: Pointer to "struct sock". * * Returns one of PF_INET, PF_INET6, PF_UNIX or 0. */ static u8 tomoyo_sock_family(struct sock *sk) { u8 family; if (tomoyo_kernel_service()) return 0; family = sk->sk_family; switch (family) { case PF_INET: case PF_INET6: case PF_UNIX: return family; default: return 0; } } /** * tomoyo_socket_listen_permission - Check permission for listening a socket. * * @sock: Pointer to "struct socket". * * Returns 0 on success, negative value otherwise. */ int tomoyo_socket_listen_permission(struct socket *sock) { struct tomoyo_addr_info address; const u8 family = tomoyo_sock_family(sock->sk); const unsigned int type = sock->type; struct sockaddr_storage addr; int addr_len; if (!family || (type != SOCK_STREAM && type != SOCK_SEQPACKET)) return 0; { const int error = sock->ops->getname(sock, (struct sockaddr *) &addr, 0); if (error < 0) return error; addr_len = error; } address.protocol = type; address.operation = TOMOYO_NETWORK_LISTEN; if (family == PF_UNIX) return tomoyo_check_unix_address((struct sockaddr *) &addr, addr_len, &address); return tomoyo_check_inet_address((struct sockaddr *) &addr, addr_len, 0, &address); } /** * tomoyo_socket_connect_permission - Check permission for setting the remote address of a socket. * * @sock: Pointer to "struct socket". * @addr: Pointer to "struct sockaddr". * @addr_len: Size of @addr. * * Returns 0 on success, negative value otherwise. */ int tomoyo_socket_connect_permission(struct socket *sock, struct sockaddr *addr, int addr_len) { struct tomoyo_addr_info address; const u8 family = tomoyo_sock_family(sock->sk); const unsigned int type = sock->type; if (!family) return 0; address.protocol = type; switch (type) { case SOCK_DGRAM: case SOCK_RAW: address.operation = TOMOYO_NETWORK_SEND; break; case SOCK_STREAM: case SOCK_SEQPACKET: address.operation = TOMOYO_NETWORK_CONNECT; break; default: return 0; } if (family == PF_UNIX) return tomoyo_check_unix_address(addr, addr_len, &address); return tomoyo_check_inet_address(addr, addr_len, sock->sk->sk_protocol, &address); } /** * tomoyo_socket_bind_permission - Check permission for setting the local address of a socket. * * @sock: Pointer to "struct socket". * @addr: Pointer to "struct sockaddr". * @addr_len: Size of @addr. * * Returns 0 on success, negative value otherwise. */ int tomoyo_socket_bind_permission(struct socket *sock, struct sockaddr *addr, int addr_len) { struct tomoyo_addr_info address; const u8 family = tomoyo_sock_family(sock->sk); const unsigned int type = sock->type; if (!family) return 0; switch (type) { case SOCK_STREAM: case SOCK_DGRAM: case SOCK_RAW: case SOCK_SEQPACKET: address.protocol = type; address.operation = TOMOYO_NETWORK_BIND; break; default: return 0; } if (family == PF_UNIX) return tomoyo_check_unix_address(addr, addr_len, &address); return tomoyo_check_inet_address(addr, addr_len, sock->sk->sk_protocol, &address); } /** * tomoyo_socket_sendmsg_permission - Check permission for sending a datagram. * * @sock: Pointer to "struct socket". * @msg: Pointer to "struct msghdr". * @size: Unused. * * Returns 0 on success, negative value otherwise. */ int tomoyo_socket_sendmsg_permission(struct socket *sock, struct msghdr *msg, int size) { struct tomoyo_addr_info address; const u8 family = tomoyo_sock_family(sock->sk); const unsigned int type = sock->type; if (!msg->msg_name || !family || (type != SOCK_DGRAM && type != SOCK_RAW)) return 0; address.protocol = type; address.operation = TOMOYO_NETWORK_SEND; if (family == PF_UNIX) return tomoyo_check_unix_address((struct sockaddr *) msg->msg_name, msg->msg_namelen, &address); return tomoyo_check_inet_address((struct sockaddr *) msg->msg_name, msg->msg_namelen, sock->sk->sk_protocol, &address); }
222 85 222 166 222 17 182 25 149 242 100 1101 40 2 5 29 100 99 99 99 84 84 83 4 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SWAPOPS_H #define _LINUX_SWAPOPS_H #include <linux/radix-tree.h> #include <linux/bug.h> #include <linux/mm_types.h> #ifdef CONFIG_MMU #ifdef CONFIG_SWAP #include <linux/swapfile.h> #endif /* CONFIG_SWAP */ /* * swapcache pages are stored in the swapper_space radix tree. We want to * get good packing density in that tree, so the index should be dense in * the low-order bits. * * We arrange the `type' and `offset' fields so that `type' is at the six * high-order bits of the swp_entry_t and `offset' is right-aligned in the * remaining bits. Although `type' itself needs only five bits, we allow for * shmem/tmpfs to shift it all up a further one bit: see swp_to_radix_entry(). * * swp_entry_t's are *never* stored anywhere in their arch-dependent format. */ #define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT) #define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1) /* * Definitions only for PFN swap entries (see leafeant_has_pfn()). To * store PFN, we only need SWP_PFN_BITS bits. Each of the pfn swap entries * can use the extra bits to store other information besides PFN. */ #ifdef MAX_PHYSMEM_BITS #define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) #else /* MAX_PHYSMEM_BITS */ #define SWP_PFN_BITS min_t(int, \ sizeof(phys_addr_t) * 8 - PAGE_SHIFT, \ SWP_TYPE_SHIFT) #endif /* MAX_PHYSMEM_BITS */ #define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1) /** * Migration swap entry specific bitfield definitions. Layout: * * |----------+--------------------| * | swp_type | swp_offset | * |----------+--------+-+-+-------| * | | resv |D|A| PFN | * |----------+--------+-+-+-------| * * @SWP_MIG_YOUNG_BIT: Whether the page used to have young bit set (bit A) * @SWP_MIG_DIRTY_BIT: Whether the page used to have dirty bit set (bit D) * * Note: A/D bits will be stored in migration entries iff there're enough * free bits in arch specific swp offset. By default we'll ignore A/D bits * when migrating a page. Please refer to migration_entry_supports_ad() * for more information. If there're more bits besides PFN and A/D bits, * they should be reserved and always be zeros. */ #define SWP_MIG_YOUNG_BIT (SWP_PFN_BITS) #define SWP_MIG_DIRTY_BIT (SWP_PFN_BITS + 1) #define SWP_MIG_TOTAL_BITS (SWP_PFN_BITS + 2) #define SWP_MIG_YOUNG BIT(SWP_MIG_YOUNG_BIT) #define SWP_MIG_DIRTY BIT(SWP_MIG_DIRTY_BIT) /* Clear all flags but only keep swp_entry_t related information */ static inline pte_t pte_swp_clear_flags(pte_t pte) { if (pte_swp_exclusive(pte)) pte = pte_swp_clear_exclusive(pte); if (pte_swp_soft_dirty(pte)) pte = pte_swp_clear_soft_dirty(pte); if (pte_swp_uffd_wp(pte)) pte = pte_swp_clear_uffd_wp(pte); return pte; } /* * Store a type+offset into a swp_entry_t in an arch-independent format */ static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset) { swp_entry_t ret; ret.val = (type << SWP_TYPE_SHIFT) | (offset & SWP_OFFSET_MASK); return ret; } /* * Extract the `type' field from a swp_entry_t. The swp_entry_t is in * arch-independent format */ static inline unsigned swp_type(swp_entry_t entry) { return (entry.val >> SWP_TYPE_SHIFT); } /* * Extract the `offset' field from a swp_entry_t. The swp_entry_t is in * arch-independent format */ static inline pgoff_t swp_offset(swp_entry_t entry) { return entry.val & SWP_OFFSET_MASK; } /* * Convert the arch-independent representation of a swp_entry_t into the * arch-dependent pte representation. */ static inline pte_t swp_entry_to_pte(swp_entry_t entry) { swp_entry_t arch_entry; arch_entry = __swp_entry(swp_type(entry), swp_offset(entry)); return __swp_entry_to_pte(arch_entry); } static inline swp_entry_t radix_to_swp_entry(void *arg) { swp_entry_t entry; entry.val = xa_to_value(arg); return entry; } static inline void *swp_to_radix_entry(swp_entry_t entry) { return xa_mk_value(entry.val); } #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { return swp_entry(SWP_DEVICE_READ, offset); } static inline swp_entry_t make_writable_device_private_entry(pgoff_t offset) { return swp_entry(SWP_DEVICE_WRITE, offset); } static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { return swp_entry(SWP_DEVICE_EXCLUSIVE, offset); } #else /* CONFIG_DEVICE_PRIVATE */ static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { return swp_entry(0, 0); } static inline swp_entry_t make_writable_device_private_entry(pgoff_t offset) { return swp_entry(0, 0); } static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { return swp_entry(0, 0); } #endif /* CONFIG_DEVICE_PRIVATE */ #ifdef CONFIG_MIGRATION static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { return swp_entry(SWP_MIGRATION_READ, offset); } static inline swp_entry_t make_readable_exclusive_migration_entry(pgoff_t offset) { return swp_entry(SWP_MIGRATION_READ_EXCLUSIVE, offset); } static inline swp_entry_t make_writable_migration_entry(pgoff_t offset) { return swp_entry(SWP_MIGRATION_WRITE, offset); } /* * Returns whether the host has large enough swap offset field to support * carrying over pgtable A/D bits for page migrations. The result is * pretty much arch specific. */ static inline bool migration_entry_supports_ad(void) { #ifdef CONFIG_SWAP return swap_migration_ad_supported; #else /* CONFIG_SWAP */ return false; #endif /* CONFIG_SWAP */ } static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) { if (migration_entry_supports_ad()) return swp_entry(swp_type(entry), swp_offset(entry) | SWP_MIG_YOUNG); return entry; } static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) { if (migration_entry_supports_ad()) return swp_entry(swp_type(entry), swp_offset(entry) | SWP_MIG_DIRTY); return entry; } extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); extern void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *pte); #else /* CONFIG_MIGRATION */ static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { return swp_entry(0, 0); } static inline swp_entry_t make_readable_exclusive_migration_entry(pgoff_t offset) { return swp_entry(0, 0); } static inline swp_entry_t make_writable_migration_entry(pgoff_t offset) { return swp_entry(0, 0); } static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { } static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) { return entry; } static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) { return entry; } #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_MEMORY_FAILURE /* * Support for hardware poisoned pages */ static inline swp_entry_t make_hwpoison_entry(struct page *page) { BUG_ON(!PageLocked(page)); return swp_entry(SWP_HWPOISON, page_to_pfn(page)); } static inline int is_hwpoison_entry(swp_entry_t entry) { return swp_type(entry) == SWP_HWPOISON; } #else static inline swp_entry_t make_hwpoison_entry(struct page *page) { return swp_entry(0, 0); } static inline int is_hwpoison_entry(swp_entry_t swp) { return 0; } #endif typedef unsigned long pte_marker; #define PTE_MARKER_UFFD_WP BIT(0) /* * "Poisoned" here is meant in the very general sense of "future accesses are * invalid", instead of referring very specifically to hardware memory errors. * This marker is meant to represent any of various different causes of this. * * Note that, when encountered by the faulting logic, PTEs with this marker will * result in VM_FAULT_HWPOISON and thus regardless trigger hardware memory error * logic. */ #define PTE_MARKER_POISONED BIT(1) /* * Indicates that, on fault, this PTE will case a SIGSEGV signal to be * sent. This means guard markers behave in effect as if the region were mapped * PROT_NONE, rather than if they were a memory hole or equivalent. */ #define PTE_MARKER_GUARD BIT(2) #define PTE_MARKER_MASK (BIT(3) - 1) static inline swp_entry_t make_pte_marker_entry(pte_marker marker) { return swp_entry(SWP_PTE_MARKER, marker); } static inline pte_t make_pte_marker(pte_marker marker) { return swp_entry_to_pte(make_pte_marker_entry(marker)); } static inline swp_entry_t make_poisoned_swp_entry(void) { return make_pte_marker_entry(PTE_MARKER_POISONED); } static inline swp_entry_t make_guard_swp_entry(void) { return make_pte_marker_entry(PTE_MARKER_GUARD); } struct page_vma_mapped_walk; #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION extern int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, struct page *page); extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new); extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd); static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) { swp_entry_t arch_entry; arch_entry = __swp_entry(swp_type(entry), swp_offset(entry)); return __swp_entry_to_pmd(arch_entry); } #else /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ static inline int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, struct page *page) { BUILD_BUG(); } static inline void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) { BUILD_BUG(); } static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { } static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) { return __pmd(0); } #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ #endif /* CONFIG_MMU */ #endif /* _LINUX_SWAPOPS_H */
11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 /* SPDX-License-Identifier: GPL-2.0+ */ #ifndef _VKMS_CONFIG_H_ #define _VKMS_CONFIG_H_ #include <linux/list.h> #include <linux/types.h> #include <linux/xarray.h> #include <drm/drm_connector.h> #include "vkms_drv.h" /** * struct vkms_config - General configuration for VKMS driver * * @dev_name: Name of the device * @planes: List of planes configured for the device * @crtcs: List of CRTCs configured for the device * @encoders: List of encoders configured for the device * @connectors: List of connectors configured for the device * @dev: Used to store the current VKMS device. Only set when the device is instantiated. */ struct vkms_config { const char *dev_name; struct list_head planes; struct list_head crtcs; struct list_head encoders; struct list_head connectors; struct vkms_device *dev; }; /** * struct vkms_config_plane * * @link: Link to the others planes in vkms_config * @config: The vkms_config this plane belongs to * @type: Type of the plane. The creator of configuration needs to ensures that * at least one primary plane is present. * @possible_crtcs: Array of CRTCs that can be used with this plane * @plane: Internal usage. This pointer should never be considered as valid. * It can be used to store a temporary reference to a VKMS plane during * device creation. This pointer is not managed by the configuration and * must be managed by other means. */ struct vkms_config_plane { struct list_head link; struct vkms_config *config; enum drm_plane_type type; struct xarray possible_crtcs; bool default_pipeline; /* Internal usage */ struct vkms_plane *plane; }; /** * struct vkms_config_crtc * * @link: Link to the others CRTCs in vkms_config * @config: The vkms_config this CRTC belongs to * @writeback: If true, a writeback buffer can be attached to the CRTC * @crtc: Internal usage. This pointer should never be considered as valid. * It can be used to store a temporary reference to a VKMS CRTC during * device creation. This pointer is not managed by the configuration and * must be managed by other means. */ struct vkms_config_crtc { struct list_head link; struct vkms_config *config; bool writeback; /* Internal usage */ struct vkms_output *crtc; }; /** * struct vkms_config_encoder * * @link: Link to the others encoders in vkms_config * @config: The vkms_config this CRTC belongs to * @possible_crtcs: Array of CRTCs that can be used with this encoder * @encoder: Internal usage. This pointer should never be considered as valid. * It can be used to store a temporary reference to a VKMS encoder * during device creation. This pointer is not managed by the * configuration and must be managed by other means. */ struct vkms_config_encoder { struct list_head link; struct vkms_config *config; struct xarray possible_crtcs; /* Internal usage */ struct drm_encoder *encoder; }; /** * struct vkms_config_connector * * @link: Link to the others connector in vkms_config * @config: The vkms_config this connector belongs to * @status: Status (connected, disconnected...) of the connector * @possible_encoders: Array of encoders that can be used with this connector * @connector: Internal usage. This pointer should never be considered as valid. * It can be used to store a temporary reference to a VKMS connector * during device creation. This pointer is not managed by the * configuration and must be managed by other means. */ struct vkms_config_connector { struct list_head link; struct vkms_config *config; enum drm_connector_status status; struct xarray possible_encoders; /* Internal usage */ struct vkms_connector *connector; }; /** * vkms_config_for_each_plane - Iterate over the vkms_config planes * @config: &struct vkms_config pointer * @plane_cfg: &struct vkms_config_plane pointer used as cursor */ #define vkms_config_for_each_plane(config, plane_cfg) \ list_for_each_entry((plane_cfg), &(config)->planes, link) /** * vkms_config_for_each_crtc - Iterate over the vkms_config CRTCs * @config: &struct vkms_config pointer * @crtc_cfg: &struct vkms_config_crtc pointer used as cursor */ #define vkms_config_for_each_crtc(config, crtc_cfg) \ list_for_each_entry((crtc_cfg), &(config)->crtcs, link) /** * vkms_config_for_each_encoder - Iterate over the vkms_config encoders * @config: &struct vkms_config pointer * @encoder_cfg: &struct vkms_config_encoder pointer used as cursor */ #define vkms_config_for_each_encoder(config, encoder_cfg) \ list_for_each_entry((encoder_cfg), &(config)->encoders, link) /** * vkms_config_for_each_connector - Iterate over the vkms_config connectors * @config: &struct vkms_config pointer * @connector_cfg: &struct vkms_config_connector pointer used as cursor */ #define vkms_config_for_each_connector(config, connector_cfg) \ list_for_each_entry((connector_cfg), &(config)->connectors, link) /** * vkms_config_plane_for_each_possible_crtc - Iterate over the vkms_config_plane * possible CRTCs * @plane_cfg: &struct vkms_config_plane pointer * @idx: Index of the cursor * @possible_crtc: &struct vkms_config_crtc pointer used as cursor */ #define vkms_config_plane_for_each_possible_crtc(plane_cfg, idx, possible_crtc) \ xa_for_each(&(plane_cfg)->possible_crtcs, idx, (possible_crtc)) /** * vkms_config_encoder_for_each_possible_crtc - Iterate over the * vkms_config_encoder possible CRTCs * @encoder_cfg: &struct vkms_config_encoder pointer * @idx: Index of the cursor * @possible_crtc: &struct vkms_config_crtc pointer used as cursor */ #define vkms_config_encoder_for_each_possible_crtc(encoder_cfg, idx, possible_crtc) \ xa_for_each(&(encoder_cfg)->possible_crtcs, idx, (possible_crtc)) /** * vkms_config_connector_for_each_possible_encoder - Iterate over the * vkms_config_connector possible encoders * @connector_cfg: &struct vkms_config_connector pointer * @idx: Index of the cursor * @possible_encoder: &struct vkms_config_encoder pointer used as cursor */ #define vkms_config_connector_for_each_possible_encoder(connector_cfg, idx, possible_encoder) \ xa_for_each(&(connector_cfg)->possible_encoders, idx, (possible_encoder)) /** * vkms_config_create() - Create a new VKMS configuration * @dev_name: Name of the device * * Returns: * The new vkms_config or an error. Call vkms_config_destroy() to free the * returned configuration. */ struct vkms_config *vkms_config_create(const char *dev_name); /** * vkms_config_default_create() - Create the configuration for the default device * @enable_cursor: Create or not a cursor plane * @enable_writeback: Create or not a writeback connector * @enable_overlay: Create or not overlay planes * * Returns: * The default vkms_config or an error. Call vkms_config_destroy() to free the * returned configuration. */ struct vkms_config *vkms_config_default_create(bool enable_cursor, bool enable_writeback, bool enable_overlay, bool enable_plane_pipeline); /** * vkms_config_destroy() - Free a VKMS configuration * @config: vkms_config to free */ void vkms_config_destroy(struct vkms_config *config); /** * vkms_config_get_device_name() - Return the name of the device * @config: Configuration to get the device name from * * Returns: * The device name. Only valid while @config is valid. */ static inline const char * vkms_config_get_device_name(struct vkms_config *config) { return config->dev_name; } /** * vkms_config_get_num_crtcs() - Return the number of CRTCs in the configuration * @config: Configuration to get the number of CRTCs from */ static inline size_t vkms_config_get_num_crtcs(struct vkms_config *config) { return list_count_nodes(&config->crtcs); } /** * vkms_config_is_valid() - Validate a configuration * @config: Configuration to validate * * Returns: * Whether the configuration is valid or not. * For example, a configuration without primary planes is not valid. */ bool vkms_config_is_valid(const struct vkms_config *config); /** * vkms_config_register_debugfs() - Register a debugfs file to show the device's * configuration * @vkms_device: Device to register */ void vkms_config_register_debugfs(struct vkms_device *vkms_device); /** * vkms_config_create_plane() - Add a new plane configuration * @config: Configuration to add the plane to * * Returns: * The new plane configuration or an error. Call vkms_config_destroy_plane() to * free the returned plane configuration. */ struct vkms_config_plane *vkms_config_create_plane(struct vkms_config *config); /** * vkms_config_destroy_plane() - Remove and free a plane configuration * @plane_cfg: Plane configuration to destroy */ void vkms_config_destroy_plane(struct vkms_config_plane *plane_cfg); /** * vkms_config_plane_type() - Return the plane type * @plane_cfg: Plane to get the type from */ static inline enum drm_plane_type vkms_config_plane_get_type(struct vkms_config_plane *plane_cfg) { return plane_cfg->type; } /** * vkms_config_plane_set_type() - Set the plane type * @plane_cfg: Plane to set the type to * @type: New plane type */ static inline void vkms_config_plane_set_type(struct vkms_config_plane *plane_cfg, enum drm_plane_type type) { plane_cfg->type = type; } /** * vkms_config_plane_get_default_pipeline() - Return if the plane will * be created with the default pipeline * @plane_cfg: Plane to get the information from */ static inline bool vkms_config_plane_get_default_pipeline(struct vkms_config_plane *plane_cfg) { return plane_cfg->default_pipeline; } /** * vkms_config_plane_set_default_pipeline() - Set if the plane will * be created with the default pipeline * @plane_cfg: Plane to configure the pipeline * @default_pipeline: New default pipeline value */ static inline void vkms_config_plane_set_default_pipeline(struct vkms_config_plane *plane_cfg, bool default_pipeline) { plane_cfg->default_pipeline = default_pipeline; } /** * vkms_config_plane_attach_crtc - Attach a plane to a CRTC * @plane_cfg: Plane to attach * @crtc_cfg: CRTC to attach @plane_cfg to */ int __must_check vkms_config_plane_attach_crtc(struct vkms_config_plane *plane_cfg, struct vkms_config_crtc *crtc_cfg); /** * vkms_config_plane_detach_crtc - Detach a plane from a CRTC * @plane_cfg: Plane to detach * @crtc_cfg: CRTC to detach @plane_cfg from */ void vkms_config_plane_detach_crtc(struct vkms_config_plane *plane_cfg, struct vkms_config_crtc *crtc_cfg); /** * vkms_config_create_crtc() - Add a new CRTC configuration * @config: Configuration to add the CRTC to * * Returns: * The new CRTC configuration or an error. Call vkms_config_destroy_crtc() to * free the returned CRTC configuration. */ struct vkms_config_crtc *vkms_config_create_crtc(struct vkms_config *config); /** * vkms_config_destroy_crtc() - Remove and free a CRTC configuration * @config: Configuration to remove the CRTC from * @crtc_cfg: CRTC configuration to destroy */ void vkms_config_destroy_crtc(struct vkms_config *config, struct vkms_config_crtc *crtc_cfg); /** * vkms_config_crtc_get_writeback() - If a writeback connector will be created * @crtc_cfg: CRTC with or without a writeback connector */ static inline bool vkms_config_crtc_get_writeback(struct vkms_config_crtc *crtc_cfg) { return crtc_cfg->writeback; } /** * vkms_config_crtc_set_writeback() - If a writeback connector will be created * @crtc_cfg: Target CRTC * @writeback: Enable or disable the writeback connector */ static inline void vkms_config_crtc_set_writeback(struct vkms_config_crtc *crtc_cfg, bool writeback) { crtc_cfg->writeback = writeback; } /** * vkms_config_crtc_primary_plane() - Return the primary plane for a CRTC * @config: Configuration containing the CRTC * @crtc_config: Target CRTC * * Note that, if multiple primary planes are found, the first one is returned. * In this case, the configuration will be invalid. See vkms_config_is_valid(). * * Returns: * The primary plane or NULL if none is assigned yet. */ struct vkms_config_plane *vkms_config_crtc_primary_plane(const struct vkms_config *config, struct vkms_config_crtc *crtc_cfg); /** * vkms_config_crtc_cursor_plane() - Return the cursor plane for a CRTC * @config: Configuration containing the CRTC * @crtc_config: Target CRTC * * Note that, if multiple cursor planes are found, the first one is returned. * In this case, the configuration will be invalid. See vkms_config_is_valid(). * * Returns: * The cursor plane or NULL if none is assigned yet. */ struct vkms_config_plane *vkms_config_crtc_cursor_plane(const struct vkms_config *config, struct vkms_config_crtc *crtc_cfg); /** * vkms_config_create_encoder() - Add a new encoder configuration * @config: Configuration to add the encoder to * * Returns: * The new encoder configuration or an error. Call vkms_config_destroy_encoder() * to free the returned encoder configuration. */ struct vkms_config_encoder *vkms_config_create_encoder(struct vkms_config *config); /** * vkms_config_destroy_encoder() - Remove and free a encoder configuration * @config: Configuration to remove the encoder from * @encoder_cfg: Encoder configuration to destroy */ void vkms_config_destroy_encoder(struct vkms_config *config, struct vkms_config_encoder *encoder_cfg); /** * vkms_config_encoder_attach_crtc - Attach a encoder to a CRTC * @encoder_cfg: Encoder to attach * @crtc_cfg: CRTC to attach @encoder_cfg to */ int __must_check vkms_config_encoder_attach_crtc(struct vkms_config_encoder *encoder_cfg, struct vkms_config_crtc *crtc_cfg); /** * vkms_config_encoder_detach_crtc - Detach a encoder from a CRTC * @encoder_cfg: Encoder to detach * @crtc_cfg: CRTC to detach @encoder_cfg from */ void vkms_config_encoder_detach_crtc(struct vkms_config_encoder *encoder_cfg, struct vkms_config_crtc *crtc_cfg); /** * vkms_config_create_connector() - Add a new connector configuration * @config: Configuration to add the connector to * * Returns: * The new connector configuration or an error. Call * vkms_config_destroy_connector() to free the returned connector configuration. */ struct vkms_config_connector *vkms_config_create_connector(struct vkms_config *config); /** * vkms_config_destroy_connector() - Remove and free a connector configuration * @connector_cfg: Connector configuration to destroy */ void vkms_config_destroy_connector(struct vkms_config_connector *connector_cfg); /** * vkms_config_connector_attach_encoder - Attach a connector to an encoder * @connector_cfg: Connector to attach * @encoder_cfg: Encoder to attach @connector_cfg to */ int __must_check vkms_config_connector_attach_encoder(struct vkms_config_connector *connector_cfg, struct vkms_config_encoder *encoder_cfg); /** * vkms_config_connector_detach_encoder - Detach a connector from an encoder * @connector_cfg: Connector to detach * @encoder_cfg: Encoder to detach @connector_cfg from */ void vkms_config_connector_detach_encoder(struct vkms_config_connector *connector_cfg, struct vkms_config_encoder *encoder_cfg); /** * vkms_config_connector_get_status() - Return the status of the connector * @connector_cfg: Connector to get the status from */ static inline enum drm_connector_status vkms_config_connector_get_status(struct vkms_config_connector *connector_cfg) { return connector_cfg->status; } /** * vkms_config_connector_set_status() - Set the status of the connector * @connector_cfg: Connector to set the status to * @status: New connector status */ static inline void vkms_config_connector_set_status(struct vkms_config_connector *connector_cfg, enum drm_connector_status status) { connector_cfg->status = status; } #endif /* _VKMS_CONFIG_H_ */
3 2 3 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2009 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/list.h> #include <linux/rbtree.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> struct nft_lookup { struct nft_set *set; u8 sreg; u8 dreg; bool dreg_set; bool invert; struct nft_set_binding binding; }; static const struct nft_set_ext * __nft_set_do_lookup(const struct net *net, const struct nft_set *set, const u32 *key) { #ifdef CONFIG_MITIGATION_RETPOLINE if (set->ops == &nft_set_hash_fast_type.ops) return nft_hash_lookup_fast(net, set, key); if (set->ops == &nft_set_hash_type.ops) return nft_hash_lookup(net, set, key); if (set->ops == &nft_set_rhash_type.ops) return nft_rhash_lookup(net, set, key); if (set->ops == &nft_set_bitmap_type.ops) return nft_bitmap_lookup(net, set, key); if (set->ops == &nft_set_pipapo_type.ops) return nft_pipapo_lookup(net, set, key); #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) if (set->ops == &nft_set_pipapo_avx2_type.ops) return nft_pipapo_avx2_lookup(net, set, key); #endif if (set->ops == &nft_set_rbtree_type.ops) return nft_rbtree_lookup(net, set, key); WARN_ON_ONCE(1); #endif return set->ops->lookup(net, set, key); } static unsigned int nft_base_seq(const struct net *net) { /* pairs with smp_store_release() in nf_tables_commit() */ return smp_load_acquire(&net->nft.base_seq); } static bool nft_lookup_should_retry(const struct net *net, unsigned int seq) { return unlikely(seq != nft_base_seq(net)); } const struct nft_set_ext * nft_set_do_lookup(const struct net *net, const struct nft_set *set, const u32 *key) { const struct nft_set_ext *ext; unsigned int base_seq; do { base_seq = nft_base_seq(net); ext = __nft_set_do_lookup(net, set, key); if (ext) break; /* No match? There is a small chance that lookup was * performed in the old generation, but nf_tables_commit() * already unlinked a (matching) element. * * We need to repeat the lookup to make sure that we didn't * miss a matching element in the new generation. */ } while (nft_lookup_should_retry(net, base_seq)); return ext; } EXPORT_SYMBOL_GPL(nft_set_do_lookup); void nft_lookup_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_lookup *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; const struct net *net = nft_net(pkt); const struct nft_set_ext *ext; bool found; ext = nft_set_do_lookup(net, set, &regs->data[priv->sreg]); found = !!ext ^ priv->invert; if (!found) { ext = nft_set_catchall_lookup(net, set); if (!ext) { regs->verdict.code = NFT_BREAK; return; } } if (ext) { if (priv->dreg_set) nft_data_copy(&regs->data[priv->dreg], nft_set_ext_data(ext), set->dlen); nft_set_elem_update_expr(ext, regs, pkt); } } static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { [NFTA_LOOKUP_SET] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 }, [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, [NFTA_LOOKUP_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_LOOKUP_F_INV), }; static int nft_lookup_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_lookup *priv = nft_expr_priv(expr); u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; u32 flags; int err; if (tb[NFTA_LOOKUP_SET] == NULL || tb[NFTA_LOOKUP_SREG] == NULL) return -EINVAL; set = nft_set_lookup_global(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET], tb[NFTA_LOOKUP_SET_ID], genmask); if (IS_ERR(set)) return PTR_ERR(set); err = nft_parse_register_load(ctx, tb[NFTA_LOOKUP_SREG], &priv->sreg, set->klen); if (err < 0) return err; if (tb[NFTA_LOOKUP_FLAGS]) { flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS])); if (flags & NFT_LOOKUP_F_INV) priv->invert = true; } if (tb[NFTA_LOOKUP_DREG] != NULL) { if (priv->invert) return -EINVAL; if (!(set->flags & NFT_SET_MAP)) return -EINVAL; err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG], &priv->dreg, NULL, nft_set_datatype(set), set->dlen); if (err < 0) return err; priv->dreg_set = true; } else if (set->flags & NFT_SET_MAP) { /* Map given, but user asks for lookup only (i.e. to * ignore value assoicated with key). * * This makes no sense for anonymous maps since they are * scoped to the rule, but for named sets this can be useful. */ if (set->flags & NFT_SET_ANONYMOUS) return -EINVAL; } priv->binding.flags = set->flags & NFT_SET_MAP; err = nf_tables_bind_set(ctx, set, &priv->binding); if (err < 0) return err; priv->set = set; return 0; } static void nft_lookup_deactivate(const struct nft_ctx *ctx, const struct nft_expr *expr, enum nft_trans_phase phase) { struct nft_lookup *priv = nft_expr_priv(expr); nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); } static void nft_lookup_activate(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_lookup *priv = nft_expr_priv(expr); nf_tables_activate_set(ctx, priv->set); } static void nft_lookup_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_lookup *priv = nft_expr_priv(expr); nf_tables_destroy_set(ctx, priv->set); } static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_lookup *priv = nft_expr_priv(expr); u32 flags = priv->invert ? NFT_LOOKUP_F_INV : 0; if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name)) goto nla_put_failure; if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg)) goto nla_put_failure; if (priv->dreg_set) if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int nft_lookup_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_lookup *priv = nft_expr_priv(expr); struct nft_set_iter iter = { .genmask = nft_genmask_next(ctx->net), .type = NFT_ITER_UPDATE, .fn = nft_setelem_validate, }; if (!(priv->set->flags & NFT_SET_MAP) || priv->set->dtype != NFT_DATA_VERDICT) return 0; priv->set->ops->walk(ctx, priv->set, &iter); if (!iter.err) iter.err = nft_set_catchall_validate(ctx, priv->set); if (iter.err < 0) return iter.err; return 0; } static bool nft_lookup_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { const struct nft_lookup *priv = nft_expr_priv(expr); if (priv->set->flags & NFT_SET_MAP) nft_reg_track_cancel(track, priv->dreg, priv->set->dlen); return false; } static const struct nft_expr_ops nft_lookup_ops = { .type = &nft_lookup_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), .eval = nft_lookup_eval, .init = nft_lookup_init, .activate = nft_lookup_activate, .deactivate = nft_lookup_deactivate, .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, .validate = nft_lookup_validate, .reduce = nft_lookup_reduce, }; struct nft_expr_type nft_lookup_type __read_mostly = { .name = "lookup", .ops = &nft_lookup_ops, .policy = nft_lookup_policy, .maxattr = NFTA_LOOKUP_MAX, .owner = THIS_MODULE, };
33 4 4 4 124 55 69 124 13 9 6 7 13 76 32 32 14 26 862 476 477 413 384 269 248 76 861 1285 1285 862 19 859 1289 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 #include <linux/init.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/net_namespace.h> #include <net/netfilter/nf_tables.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> #include <linux/netfilter_arp.h> #include <net/netfilter/nf_tables_ipv4.h> #include <net/netfilter/nf_tables_ipv6.h> #ifdef CONFIG_NF_TABLES_IPV4 static unsigned int nft_do_chain_ipv4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); nft_set_pktinfo_ipv4(&pkt); return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_ipv4 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_IPV4, .hook_mask = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .hooks = { [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4, [NF_INET_FORWARD] = nft_do_chain_ipv4, [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, }, }; static void nft_chain_filter_ipv4_init(void) { nft_register_chain_type(&nft_chain_filter_ipv4); } static void nft_chain_filter_ipv4_fini(void) { nft_unregister_chain_type(&nft_chain_filter_ipv4); } #else static inline void nft_chain_filter_ipv4_init(void) {} static inline void nft_chain_filter_ipv4_fini(void) {} #endif /* CONFIG_NF_TABLES_IPV4 */ #ifdef CONFIG_NF_TABLES_ARP static unsigned int nft_do_chain_arp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); nft_set_pktinfo_unspec(&pkt); return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_arp = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_ARP, .owner = THIS_MODULE, .hook_mask = (1 << NF_ARP_IN) | (1 << NF_ARP_OUT), .hooks = { [NF_ARP_IN] = nft_do_chain_arp, [NF_ARP_OUT] = nft_do_chain_arp, }, }; static void nft_chain_filter_arp_init(void) { nft_register_chain_type(&nft_chain_filter_arp); } static void nft_chain_filter_arp_fini(void) { nft_unregister_chain_type(&nft_chain_filter_arp); } #else static inline void nft_chain_filter_arp_init(void) {} static inline void nft_chain_filter_arp_fini(void) {} #endif /* CONFIG_NF_TABLES_ARP */ #ifdef CONFIG_NF_TABLES_IPV6 static unsigned int nft_do_chain_ipv6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); nft_set_pktinfo_ipv6(&pkt); return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_ipv6 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_IPV6, .hook_mask = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .hooks = { [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6, [NF_INET_FORWARD] = nft_do_chain_ipv6, [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, }, }; static void nft_chain_filter_ipv6_init(void) { nft_register_chain_type(&nft_chain_filter_ipv6); } static void nft_chain_filter_ipv6_fini(void) { nft_unregister_chain_type(&nft_chain_filter_ipv6); } #else static inline void nft_chain_filter_ipv6_init(void) {} static inline void nft_chain_filter_ipv6_fini(void) {} #endif /* CONFIG_NF_TABLES_IPV6 */ #ifdef CONFIG_NF_TABLES_INET static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); switch (state->pf) { case NFPROTO_IPV4: nft_set_pktinfo_ipv4(&pkt); break; case NFPROTO_IPV6: nft_set_pktinfo_ipv6(&pkt); break; default: break; } return nft_do_chain(&pkt, priv); } static unsigned int nft_do_chain_inet_ingress(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_hook_state ingress_state = *state; struct nft_pktinfo pkt; switch (skb->protocol) { case htons(ETH_P_IP): /* Original hook is NFPROTO_NETDEV and NF_NETDEV_INGRESS. */ ingress_state.pf = NFPROTO_IPV4; ingress_state.hook = NF_INET_INGRESS; nft_set_pktinfo(&pkt, skb, &ingress_state); if (nft_set_pktinfo_ipv4_ingress(&pkt) < 0) return NF_DROP; break; case htons(ETH_P_IPV6): ingress_state.pf = NFPROTO_IPV6; ingress_state.hook = NF_INET_INGRESS; nft_set_pktinfo(&pkt, skb, &ingress_state); if (nft_set_pktinfo_ipv6_ingress(&pkt) < 0) return NF_DROP; break; default: return NF_ACCEPT; } return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_inet = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_INET, .hook_mask = (1 << NF_INET_INGRESS) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .hooks = { [NF_INET_INGRESS] = nft_do_chain_inet_ingress, [NF_INET_LOCAL_IN] = nft_do_chain_inet, [NF_INET_LOCAL_OUT] = nft_do_chain_inet, [NF_INET_FORWARD] = nft_do_chain_inet, [NF_INET_PRE_ROUTING] = nft_do_chain_inet, [NF_INET_POST_ROUTING] = nft_do_chain_inet, }, }; static void nft_chain_filter_inet_init(void) { nft_register_chain_type(&nft_chain_filter_inet); } static void nft_chain_filter_inet_fini(void) { nft_unregister_chain_type(&nft_chain_filter_inet); } #else static inline void nft_chain_filter_inet_init(void) {} static inline void nft_chain_filter_inet_fini(void) {} #endif /* CONFIG_NF_TABLES_IPV6 */ #if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) static unsigned int nft_do_chain_bridge(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): nft_set_pktinfo_ipv4_validate(&pkt); break; case htons(ETH_P_IPV6): nft_set_pktinfo_ipv6_validate(&pkt); break; default: nft_set_pktinfo_unspec(&pkt); break; } return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_bridge = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_BRIDGE, .hook_mask = (1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_POST_ROUTING), .hooks = { [NF_BR_PRE_ROUTING] = nft_do_chain_bridge, [NF_BR_LOCAL_IN] = nft_do_chain_bridge, [NF_BR_FORWARD] = nft_do_chain_bridge, [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, [NF_BR_POST_ROUTING] = nft_do_chain_bridge, }, }; static void nft_chain_filter_bridge_init(void) { nft_register_chain_type(&nft_chain_filter_bridge); } static void nft_chain_filter_bridge_fini(void) { nft_unregister_chain_type(&nft_chain_filter_bridge); } #else static inline void nft_chain_filter_bridge_init(void) {} static inline void nft_chain_filter_bridge_fini(void) {} #endif /* CONFIG_NF_TABLES_BRIDGE */ #ifdef CONFIG_NF_TABLES_NETDEV static unsigned int nft_do_chain_netdev(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; nft_set_pktinfo(&pkt, skb, state); switch (skb->protocol) { case htons(ETH_P_IP): nft_set_pktinfo_ipv4_validate(&pkt); break; case htons(ETH_P_IPV6): nft_set_pktinfo_ipv6_validate(&pkt); break; default: nft_set_pktinfo_unspec(&pkt); break; } return nft_do_chain(&pkt, priv); } static const struct nft_chain_type nft_chain_filter_netdev = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_NETDEV, .hook_mask = (1 << NF_NETDEV_INGRESS) | (1 << NF_NETDEV_EGRESS), .hooks = { [NF_NETDEV_INGRESS] = nft_do_chain_netdev, [NF_NETDEV_EGRESS] = nft_do_chain_netdev, }, }; static int nft_netdev_event(unsigned long event, struct net_device *dev, struct nft_base_chain *basechain, bool changename) { struct nft_table *table = basechain->chain.table; struct nf_hook_ops *ops; struct nft_hook *hook; bool match; list_for_each_entry(hook, &basechain->hook_list, list) { ops = nft_hook_find_ops(hook, dev); match = !strncmp(hook->ifname, dev->name, hook->ifnamelen); switch (event) { case NETDEV_UNREGISTER: /* NOP if not found or new name still matching */ if (!ops || (changename && match)) continue; if (!(table->flags & NFT_TABLE_F_DORMANT)) nf_unregister_net_hook(dev_net(dev), ops); list_del_rcu(&ops->list); kfree_rcu(ops, rcu); break; case NETDEV_REGISTER: /* NOP if not matching or already registered */ if (!match || ops) continue; ops = kmemdup(&basechain->ops, sizeof(struct nf_hook_ops), GFP_KERNEL_ACCOUNT); if (!ops) return 1; ops->dev = dev; if (!(table->flags & NFT_TABLE_F_DORMANT) && nf_register_net_hook(dev_net(dev), ops)) { kfree(ops); return 1; } list_add_tail_rcu(&ops->list, &hook->ops_list); break; } break; } return 0; } static int __nf_tables_netdev_event(unsigned long event, struct net_device *dev, bool changename) { struct nft_base_chain *basechain; struct nftables_pernet *nft_net; struct nft_chain *chain; struct nft_table *table; nft_net = nft_pernet(dev_net(dev)); list_for_each_entry(table, &nft_net->tables, list) { if (table->family != NFPROTO_NETDEV && table->family != NFPROTO_INET) continue; list_for_each_entry(chain, &table->chains, list) { if (!nft_is_base_chain(chain)) continue; basechain = nft_base_chain(chain); if (table->family == NFPROTO_INET && basechain->ops.hooknum != NF_INET_INGRESS) continue; if (nft_netdev_event(event, dev, basechain, changename)) return 1; } } return 0; } static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nftables_pernet *nft_net; int ret = NOTIFY_DONE; if (event != NETDEV_REGISTER && event != NETDEV_UNREGISTER && event != NETDEV_CHANGENAME) return NOTIFY_DONE; nft_net = nft_pernet(dev_net(dev)); mutex_lock(&nft_net->commit_mutex); if (event == NETDEV_CHANGENAME) { if (__nf_tables_netdev_event(NETDEV_REGISTER, dev, true)) { ret = NOTIFY_BAD; goto out_unlock; } __nf_tables_netdev_event(NETDEV_UNREGISTER, dev, true); } else if (__nf_tables_netdev_event(event, dev, false)) { ret = NOTIFY_BAD; } out_unlock: mutex_unlock(&nft_net->commit_mutex); return ret; } static struct notifier_block nf_tables_netdev_notifier = { .notifier_call = nf_tables_netdev_event, }; static int nft_chain_filter_netdev_init(void) { int err; nft_register_chain_type(&nft_chain_filter_netdev); err = register_netdevice_notifier(&nf_tables_netdev_notifier); if (err) goto err_register_netdevice_notifier; return 0; err_register_netdevice_notifier: nft_unregister_chain_type(&nft_chain_filter_netdev); return err; } static void nft_chain_filter_netdev_fini(void) { nft_unregister_chain_type(&nft_chain_filter_netdev); unregister_netdevice_notifier(&nf_tables_netdev_notifier); } #else static inline int nft_chain_filter_netdev_init(void) { return 0; } static inline void nft_chain_filter_netdev_fini(void) {} #endif /* CONFIG_NF_TABLES_NETDEV */ int __init nft_chain_filter_init(void) { int err; err = nft_chain_filter_netdev_init(); if (err < 0) return err; nft_chain_filter_ipv4_init(); nft_chain_filter_ipv6_init(); nft_chain_filter_arp_init(); nft_chain_filter_inet_init(); nft_chain_filter_bridge_init(); return 0; } void nft_chain_filter_fini(void) { nft_chain_filter_bridge_fini(); nft_chain_filter_inet_fini(); nft_chain_filter_arp_fini(); nft_chain_filter_ipv6_fini(); nft_chain_filter_ipv4_fini(); nft_chain_filter_netdev_fini(); }
17 12 17 1 17 1 17 1 17 1 17 13 13 13 2 2 13 16 2 1 2 1 16 16 16 15 2 13 18 18 17 17 17 18 13 18 16 18 15 18 6 18 16 18 20 18 18 18 18 17 18 20 18 23 22 21 20 23 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 // SPDX-License-Identifier: GPL-2.0-only #include <linux/module.h> #include <linux/sock_diag.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/packet_diag.h> #include <linux/percpu.h> #include <net/net_namespace.h> #include <net/sock.h> #include "internal.h" static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) { struct packet_diag_info pinfo; pinfo.pdi_index = po->ifindex; pinfo.pdi_version = po->tp_version; pinfo.pdi_reserve = po->tp_reserve; pinfo.pdi_copy_thresh = READ_ONCE(po->copy_thresh); pinfo.pdi_tstamp = READ_ONCE(po->tp_tstamp); pinfo.pdi_flags = 0; if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) pinfo.pdi_flags |= PDI_RUNNING; if (packet_sock_flag(po, PACKET_SOCK_AUXDATA)) pinfo.pdi_flags |= PDI_AUXDATA; if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV)) pinfo.pdi_flags |= PDI_ORIGDEV; if (READ_ONCE(po->vnet_hdr_sz)) pinfo.pdi_flags |= PDI_VNETHDR; if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) pinfo.pdi_flags |= PDI_LOSS; return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo); } static int pdiag_put_mclist(const struct packet_sock *po, struct sk_buff *nlskb) { struct nlattr *mca; struct packet_mclist *ml; mca = nla_nest_start_noflag(nlskb, PACKET_DIAG_MCLIST); if (!mca) return -EMSGSIZE; rtnl_lock(); for (ml = po->mclist; ml; ml = ml->next) { struct packet_diag_mclist *dml; dml = nla_reserve_nohdr(nlskb, sizeof(*dml)); if (!dml) { rtnl_unlock(); nla_nest_cancel(nlskb, mca); return -EMSGSIZE; } dml->pdmc_index = ml->ifindex; dml->pdmc_type = ml->type; dml->pdmc_alen = ml->alen; dml->pdmc_count = ml->count; BUILD_BUG_ON(sizeof(dml->pdmc_addr) != sizeof(ml->addr)); memcpy(dml->pdmc_addr, ml->addr, sizeof(ml->addr)); } rtnl_unlock(); nla_nest_end(nlskb, mca); return 0; } static int pdiag_put_ring(struct packet_ring_buffer *ring, int ver, int nl_type, struct sk_buff *nlskb) { struct packet_diag_ring pdr; if (!ring->pg_vec) return 0; pdr.pdr_block_size = ring->pg_vec_pages << PAGE_SHIFT; pdr.pdr_block_nr = ring->pg_vec_len; pdr.pdr_frame_size = ring->frame_size; pdr.pdr_frame_nr = ring->frame_max + 1; if (ver > TPACKET_V2) { pdr.pdr_retire_tmo = ktime_to_ms(ring->prb_bdqc.interval_ktime); pdr.pdr_sizeof_priv = ring->prb_bdqc.blk_sizeof_priv; pdr.pdr_features = ring->prb_bdqc.feature_req_word; } else { pdr.pdr_retire_tmo = 0; pdr.pdr_sizeof_priv = 0; pdr.pdr_features = 0; } return nla_put(nlskb, nl_type, sizeof(pdr), &pdr); } static int pdiag_put_rings_cfg(struct packet_sock *po, struct sk_buff *skb) { int ret; mutex_lock(&po->pg_vec_lock); ret = pdiag_put_ring(&po->rx_ring, po->tp_version, PACKET_DIAG_RX_RING, skb); if (!ret) ret = pdiag_put_ring(&po->tx_ring, po->tp_version, PACKET_DIAG_TX_RING, skb); mutex_unlock(&po->pg_vec_lock); return ret; } static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) { int ret = 0; mutex_lock(&fanout_mutex); if (po->fanout) { u32 val; val = (u32)po->fanout->id | ((u32)po->fanout->type << 16); ret = nla_put_u32(nlskb, PACKET_DIAG_FANOUT, val); } mutex_unlock(&fanout_mutex); return ret; } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, bool may_report_filterinfo, struct user_namespace *user_ns, u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct packet_diag_msg *rp; struct packet_sock *po = pkt_sk(sk); nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); if (!nlh) return -EMSGSIZE; rp = nlmsg_data(nlh); rp->pdiag_family = AF_PACKET; rp->pdiag_type = sk->sk_type; rp->pdiag_num = ntohs(READ_ONCE(po->num)); rp->pdiag_ino = sk_ino; sock_diag_save_cookie(sk, rp->pdiag_cookie); if ((req->pdiag_show & PACKET_SHOW_INFO) && pdiag_put_info(po, skb)) goto out_nlmsg_trim; if ((req->pdiag_show & PACKET_SHOW_INFO) && nla_put_u32(skb, PACKET_DIAG_UID, from_kuid_munged(user_ns, sk_uid(sk)))) goto out_nlmsg_trim; if ((req->pdiag_show & PACKET_SHOW_MCLIST) && pdiag_put_mclist(po, skb)) goto out_nlmsg_trim; if ((req->pdiag_show & PACKET_SHOW_RING_CFG) && pdiag_put_rings_cfg(po, skb)) goto out_nlmsg_trim; if ((req->pdiag_show & PACKET_SHOW_FANOUT) && pdiag_put_fanout(po, skb)) goto out_nlmsg_trim; if ((req->pdiag_show & PACKET_SHOW_MEMINFO) && sock_diag_put_meminfo(sk, skb, PACKET_DIAG_MEMINFO)) goto out_nlmsg_trim; if ((req->pdiag_show & PACKET_SHOW_FILTER) && sock_diag_put_filterinfo(may_report_filterinfo, sk, skb, PACKET_DIAG_FILTER)) goto out_nlmsg_trim; nlmsg_end(skb, nlh); return 0; out_nlmsg_trim: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { int num = 0, s_num = cb->args[0]; struct packet_diag_req *req; struct net *net; struct sock *sk; bool may_report_filterinfo; net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); may_report_filterinfo = netlink_net_capable(cb->skb, CAP_NET_ADMIN); mutex_lock(&net->packet.sklist_lock); sk_for_each(sk, &net->packet.sklist) { if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) goto next; if (sk_diag_fill(sk, skb, req, may_report_filterinfo, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, sock_i_ino(sk)) < 0) goto done; next: num++; } done: mutex_unlock(&net->packet.sklist_lock); cb->args[0] = num; return skb->len; } static int packet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct packet_diag_req); struct net *net = sock_net(skb->sk); struct packet_diag_req *req; if (nlmsg_len(h) < hdrlen) return -EINVAL; req = nlmsg_data(h); /* Make it possible to support protocol filtering later */ if (req->sdiag_protocol) return -EINVAL; if (h->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = packet_diag_dump, }; return netlink_dump_start(net->diag_nlsk, skb, h, &c); } else return -EOPNOTSUPP; } static const struct sock_diag_handler packet_diag_handler = { .owner = THIS_MODULE, .family = AF_PACKET, .dump = packet_diag_handler_dump, }; static int __init packet_diag_init(void) { return sock_diag_register(&packet_diag_handler); } static void __exit packet_diag_exit(void) { sock_diag_unregister(&packet_diag_handler); } module_init(packet_diag_init); module_exit(packet_diag_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("PACKET socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */);
1 1 1 1 1 1 31 29 30 8 8 8 8 1 8 8 8 8 8 8 8 13 13 13 13 13 26 12 12 26 28 28 28 4 4 4 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2018 HUAWEI, Inc. * https://www.huawei.com/ * Copyright (C) 2024 Alibaba Cloud */ #include "internal.h" struct z_erofs_gbuf { spinlock_t lock; void *ptr; struct page **pages; unsigned int nrpages; }; static struct z_erofs_gbuf *z_erofs_gbufpool, *z_erofs_rsvbuf; static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages, z_erofs_rsv_nrpages; module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444); module_param_named(reserved_pages, z_erofs_rsv_nrpages, uint, 0444); atomic_long_t erofs_global_shrink_cnt; /* for all mounted instances */ /* protects `erofs_sb_list_lock` and the mounted `erofs_sb_list` */ static DEFINE_SPINLOCK(erofs_sb_list_lock); static LIST_HEAD(erofs_sb_list); static unsigned int shrinker_run_no; static struct shrinker *erofs_shrinker_info; static unsigned int z_erofs_gbuf_id(void) { return raw_smp_processor_id() % z_erofs_gbuf_count; } void *z_erofs_get_gbuf(unsigned int requiredpages) __acquires(gbuf->lock) { struct z_erofs_gbuf *gbuf; migrate_disable(); gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; spin_lock(&gbuf->lock); /* check if the buffer is too small */ if (requiredpages > gbuf->nrpages) { spin_unlock(&gbuf->lock); migrate_enable(); /* (for sparse checker) pretend gbuf->lock is still taken */ __acquire(gbuf->lock); return NULL; } return gbuf->ptr; } void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock) { struct z_erofs_gbuf *gbuf; gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; DBG_BUGON(gbuf->ptr != ptr); spin_unlock(&gbuf->lock); migrate_enable(); } int z_erofs_gbuf_growsize(unsigned int nrpages) { static DEFINE_MUTEX(gbuf_resize_mutex); struct page **tmp_pages = NULL; struct z_erofs_gbuf *gbuf; void *ptr, *old_ptr; int last, i, j; mutex_lock(&gbuf_resize_mutex); /* avoid shrinking gbufs, since no idea how many fses rely on */ if (nrpages <= z_erofs_gbuf_nrpages) { mutex_unlock(&gbuf_resize_mutex); return 0; } for (i = 0; i < z_erofs_gbuf_count; ++i) { gbuf = &z_erofs_gbufpool[i]; tmp_pages = kzalloc_objs(*tmp_pages, nrpages); if (!tmp_pages) goto out; for (j = 0; j < gbuf->nrpages; ++j) tmp_pages[j] = gbuf->pages[j]; do { last = j; j = alloc_pages_bulk(GFP_KERNEL, nrpages, tmp_pages); if (last == j) goto out; } while (j != nrpages); ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL); if (!ptr) goto out; spin_lock(&gbuf->lock); kfree(gbuf->pages); gbuf->pages = tmp_pages; old_ptr = gbuf->ptr; gbuf->ptr = ptr; gbuf->nrpages = nrpages; spin_unlock(&gbuf->lock); if (old_ptr) vunmap(old_ptr); } z_erofs_gbuf_nrpages = nrpages; out: if (i < z_erofs_gbuf_count && tmp_pages) { for (j = 0; j < nrpages; ++j) if (tmp_pages[j] && (j >= gbuf->nrpages || tmp_pages[j] != gbuf->pages[j])) __free_page(tmp_pages[j]); kfree(tmp_pages); } mutex_unlock(&gbuf_resize_mutex); return i < z_erofs_gbuf_count ? -ENOMEM : 0; } int __init z_erofs_gbuf_init(void) { unsigned int i, total = num_possible_cpus(); if (z_erofs_gbuf_count) total = min(z_erofs_gbuf_count, total); z_erofs_gbuf_count = total; /* The last (special) global buffer is the reserved buffer */ total += !!z_erofs_rsv_nrpages; z_erofs_gbufpool = kzalloc_objs(*z_erofs_gbufpool, total); if (!z_erofs_gbufpool) return -ENOMEM; if (z_erofs_rsv_nrpages) { z_erofs_rsvbuf = &z_erofs_gbufpool[total - 1]; z_erofs_rsvbuf->pages = kzalloc_objs(*z_erofs_rsvbuf->pages, z_erofs_rsv_nrpages); if (!z_erofs_rsvbuf->pages) { z_erofs_rsvbuf = NULL; z_erofs_rsv_nrpages = 0; } } for (i = 0; i < total; ++i) spin_lock_init(&z_erofs_gbufpool[i].lock); return 0; } void z_erofs_gbuf_exit(void) { int i, j; for (i = 0; i < z_erofs_gbuf_count + (!!z_erofs_rsvbuf); ++i) { struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i]; if (gbuf->ptr) { vunmap(gbuf->ptr); gbuf->ptr = NULL; } if (!gbuf->pages) continue; for (j = 0; j < gbuf->nrpages; ++j) if (gbuf->pages[j]) put_page(gbuf->pages[j]); kfree(gbuf->pages); gbuf->pages = NULL; } kfree(z_erofs_gbufpool); } struct page *__erofs_allocpage(struct page **pagepool, gfp_t gfp, bool tryrsv) { struct page *page = *pagepool; if (page) { *pagepool = (struct page *)page_private(page); } else if (tryrsv && z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages) { spin_lock(&z_erofs_rsvbuf->lock); if (z_erofs_rsvbuf->nrpages) page = z_erofs_rsvbuf->pages[--z_erofs_rsvbuf->nrpages]; spin_unlock(&z_erofs_rsvbuf->lock); } if (!page) page = alloc_page(gfp); DBG_BUGON(page && page_ref_count(page) != 1); return page; } void erofs_release_pages(struct page **pagepool) { while (*pagepool) { struct page *page = *pagepool; *pagepool = (struct page *)page_private(page); /* try to fill reserved global pool first */ if (z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages < z_erofs_rsv_nrpages) { spin_lock(&z_erofs_rsvbuf->lock); if (z_erofs_rsvbuf->nrpages < z_erofs_rsv_nrpages) { z_erofs_rsvbuf->pages[z_erofs_rsvbuf->nrpages++] = page; spin_unlock(&z_erofs_rsvbuf->lock); continue; } spin_unlock(&z_erofs_rsvbuf->lock); } put_page(page); } } void erofs_shrinker_register(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); mutex_init(&sbi->umount_mutex); spin_lock(&erofs_sb_list_lock); list_add(&sbi->list, &erofs_sb_list); spin_unlock(&erofs_sb_list_lock); } void erofs_shrinker_unregister(struct super_block *sb) { struct erofs_sb_info *const sbi = EROFS_SB(sb); mutex_lock(&sbi->umount_mutex); while (!xa_empty(&sbi->managed_pslots)) { z_erofs_shrink_scan(sbi, ~0UL); cond_resched(); } spin_lock(&erofs_sb_list_lock); list_del(&sbi->list); spin_unlock(&erofs_sb_list_lock); mutex_unlock(&sbi->umount_mutex); } static unsigned long erofs_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { return atomic_long_read(&erofs_global_shrink_cnt) ?: SHRINK_EMPTY; } static unsigned long erofs_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct erofs_sb_info *sbi; struct list_head *p; unsigned long nr = sc->nr_to_scan; unsigned int run_no; unsigned long freed = 0; spin_lock(&erofs_sb_list_lock); do { run_no = ++shrinker_run_no; } while (run_no == 0); /* Iterate over all mounted superblocks and try to shrink them */ p = erofs_sb_list.next; while (p != &erofs_sb_list) { sbi = list_entry(p, struct erofs_sb_info, list); /* * We move the ones we do to the end of the list, so we stop * when we see one we have already done. */ if (sbi->shrinker_run_no == run_no) break; if (!mutex_trylock(&sbi->umount_mutex)) { p = p->next; continue; } spin_unlock(&erofs_sb_list_lock); sbi->shrinker_run_no = run_no; freed += z_erofs_shrink_scan(sbi, nr - freed); spin_lock(&erofs_sb_list_lock); /* Get the next list element before we move this one */ p = p->next; /* * Move this one to the end of the list to provide some * fairness. */ list_move_tail(&sbi->list, &erofs_sb_list); mutex_unlock(&sbi->umount_mutex); if (freed >= nr) break; } spin_unlock(&erofs_sb_list_lock); return freed; } int __init erofs_init_shrinker(void) { erofs_shrinker_info = shrinker_alloc(0, "erofs-shrinker"); if (!erofs_shrinker_info) return -ENOMEM; erofs_shrinker_info->count_objects = erofs_shrink_count; erofs_shrinker_info->scan_objects = erofs_shrink_scan; shrinker_register(erofs_shrinker_info); return 0; } void erofs_exit_shrinker(void) { shrinker_free(erofs_shrinker_info); }
8 69 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __IEEE802154_CORE_H #define __IEEE802154_CORE_H #include <net/cfg802154.h> struct cfg802154_registered_device { const struct cfg802154_ops *ops; struct list_head list; /* wpan_phy index, internal only */ int wpan_phy_idx; /* also protected by devlist_mtx */ int opencount; wait_queue_head_t dev_wait; /* protected by RTNL only */ int num_running_ifaces; /* associated wpan interfaces, protected by rtnl or RCU */ struct list_head wpan_dev_list; int devlist_generation, wpan_dev_id; /* must be last because of the way we do wpan_phy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wpan_phy wpan_phy __aligned(NETDEV_ALIGN); }; static inline struct cfg802154_registered_device * wpan_phy_to_rdev(struct wpan_phy *wpan_phy) { BUG_ON(!wpan_phy); return container_of(wpan_phy, struct cfg802154_registered_device, wpan_phy); } extern struct list_head cfg802154_rdev_list; extern int cfg802154_rdev_list_generation; int cfg802154_switch_netns(struct cfg802154_registered_device *rdev, struct net *net); /* free object */ void cfg802154_dev_free(struct cfg802154_registered_device *rdev); struct cfg802154_registered_device * cfg802154_rdev_by_wpan_phy_idx(int wpan_phy_idx); struct wpan_phy *wpan_phy_idx_to_wpan_phy(int wpan_phy_idx); #endif /* __IEEE802154_CORE_H */
71 8 7 81 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM snd_pcm #define TRACE_INCLUDE_FILE pcm_trace #if !defined(_PCM_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define _PCM_TRACE_H #include <linux/tracepoint.h> TRACE_EVENT(hwptr, TP_PROTO(struct snd_pcm_substream *substream, snd_pcm_uframes_t pos, bool irq), TP_ARGS(substream, pos, irq), TP_STRUCT__entry( __field( bool, in_interrupt ) __field( unsigned int, card ) __field( unsigned int, device ) __field( unsigned int, number ) __field( unsigned int, stream ) __field( snd_pcm_uframes_t, pos ) __field( snd_pcm_uframes_t, period_size ) __field( snd_pcm_uframes_t, buffer_size ) __field( snd_pcm_uframes_t, old_hw_ptr ) __field( snd_pcm_uframes_t, hw_ptr_base ) ), TP_fast_assign( __entry->in_interrupt = (irq); __entry->card = (substream)->pcm->card->number; __entry->device = (substream)->pcm->device; __entry->number = (substream)->number; __entry->stream = (substream)->stream; __entry->pos = (pos); __entry->period_size = (substream)->runtime->period_size; __entry->buffer_size = (substream)->runtime->buffer_size; __entry->old_hw_ptr = (substream)->runtime->status->hw_ptr; __entry->hw_ptr_base = (substream)->runtime->hw_ptr_base; ), TP_printk("pcmC%dD%d%s/sub%d: %s: pos=%lu, old=%lu, base=%lu, period=%lu, buf=%lu", __entry->card, __entry->device, __entry->stream == SNDRV_PCM_STREAM_PLAYBACK ? "p" : "c", __entry->number, __entry->in_interrupt ? "IRQ" : "POS", (unsigned long)__entry->pos, (unsigned long)__entry->old_hw_ptr, (unsigned long)__entry->hw_ptr_base, (unsigned long)__entry->period_size, (unsigned long)__entry->buffer_size) ); TRACE_EVENT(xrun, TP_PROTO(struct snd_pcm_substream *substream), TP_ARGS(substream), TP_STRUCT__entry( __field( unsigned int, card ) __field( unsigned int, device ) __field( unsigned int, number ) __field( unsigned int, stream ) __field( snd_pcm_uframes_t, period_size ) __field( snd_pcm_uframes_t, buffer_size ) __field( snd_pcm_uframes_t, old_hw_ptr ) __field( snd_pcm_uframes_t, hw_ptr_base ) ), TP_fast_assign( __entry->card = (substream)->pcm->card->number; __entry->device = (substream)->pcm->device; __entry->number = (substream)->number; __entry->stream = (substream)->stream; __entry->period_size = (substream)->runtime->period_size; __entry->buffer_size = (substream)->runtime->buffer_size; __entry->old_hw_ptr = (substream)->runtime->status->hw_ptr; __entry->hw_ptr_base = (substream)->runtime->hw_ptr_base; ), TP_printk("pcmC%dD%d%s/sub%d: XRUN: old=%lu, base=%lu, period=%lu, buf=%lu", __entry->card, __entry->device, __entry->stream == SNDRV_PCM_STREAM_PLAYBACK ? "p" : "c", __entry->number, (unsigned long)__entry->old_hw_ptr, (unsigned long)__entry->hw_ptr_base, (unsigned long)__entry->period_size, (unsigned long)__entry->buffer_size) ); TRACE_EVENT(hw_ptr_error, TP_PROTO(struct snd_pcm_substream *substream, const char *why), TP_ARGS(substream, why), TP_STRUCT__entry( __field( unsigned int, card ) __field( unsigned int, device ) __field( unsigned int, number ) __field( unsigned int, stream ) __string( reason, why ) ), TP_fast_assign( __entry->card = (substream)->pcm->card->number; __entry->device = (substream)->pcm->device; __entry->number = (substream)->number; __entry->stream = (substream)->stream; __assign_str(reason); ), TP_printk("pcmC%dD%d%s/sub%d: ERROR: %s", __entry->card, __entry->device, __entry->stream == SNDRV_PCM_STREAM_PLAYBACK ? "p" : "c", __entry->number, __get_str(reason)) ); TRACE_EVENT(applptr, TP_PROTO(struct snd_pcm_substream *substream, snd_pcm_uframes_t prev, snd_pcm_uframes_t curr), TP_ARGS(substream, prev, curr), TP_STRUCT__entry( __field( unsigned int, card ) __field( unsigned int, device ) __field( unsigned int, number ) __field( unsigned int, stream ) __field( snd_pcm_uframes_t, prev ) __field( snd_pcm_uframes_t, curr ) __field( snd_pcm_uframes_t, avail ) __field( snd_pcm_uframes_t, period_size ) __field( snd_pcm_uframes_t, buffer_size ) ), TP_fast_assign( __entry->card = (substream)->pcm->card->number; __entry->device = (substream)->pcm->device; __entry->number = (substream)->number; __entry->stream = (substream)->stream; __entry->prev = (prev); __entry->curr = (curr); __entry->avail = (substream)->stream ? snd_pcm_capture_avail(substream->runtime) : snd_pcm_playback_avail(substream->runtime); __entry->period_size = (substream)->runtime->period_size; __entry->buffer_size = (substream)->runtime->buffer_size; ), TP_printk("pcmC%dD%d%s/sub%d: prev=%lu, curr=%lu, avail=%lu, period=%lu, buf=%lu", __entry->card, __entry->device, __entry->stream ? "c" : "p", __entry->number, __entry->prev, __entry->curr, __entry->avail, __entry->period_size, __entry->buffer_size ) ); #endif /* _PCM_TRACE_H */ /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #include <trace/define_trace.h>
2 2 2 2 2 1 1 6 7 9 9 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 // SPDX-License-Identifier: GPL-2.0-or-later /* * Syncookies implementation for the Linux kernel * * Copyright (C) 1997 Andi Kleen * Based on ideas by D.J.Bernstein and Eric Schenk. */ #include <linux/tcp.h> #include <linux/siphash.h> #include <linux/kernel.h> #include <linux/export.h> #include <net/secure_seq.h> #include <net/tcp.h> #include <net/tcp_ecn.h> #include <net/route.h> static siphash_aligned_key_t syncookie_secret[2]; #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) /* TCP Timestamp: 6 lowest bits of timestamp sent in the cookie SYN-ACK * stores TCP options: * * MSB LSB * | 31 ... 6 | 5 | 4 | 3 2 1 0 | * | Timestamp | ECN | SACK | WScale | * * When we receive a valid cookie-ACK, we look at the echoed tsval (if * any) to figure out which TCP options we should use for the rebuilt * connection. * * A WScale setting of '0xf' (which is an invalid scaling value) * means that original syn did not include the TCP window scaling option. */ #define TS_OPT_WSCALE_MASK 0xf #define TS_OPT_SACK BIT(4) #define TS_OPT_ECN BIT(5) /* There is no TS_OPT_TIMESTAMP: * if ACK contains timestamp option, we already know it was * requested/supported by the syn/synack exchange. */ #define TSBITS 6 static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, u32 count, int c) { net_get_random_once(syncookie_secret, sizeof(syncookie_secret)); return siphash_4u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, count, &syncookie_secret[c]); } /* * when syncookies are in effect and tcp timestamps are enabled we encode * tcp options in the lower bits of the timestamp value that will be * sent in the syn-ack. * Since subsequent timestamps use the normal tcp_time_stamp value, we * must make sure that the resulting initial timestamp is <= tcp_time_stamp. */ u64 cookie_init_timestamp(struct request_sock *req, u64 now) { const struct inet_request_sock *ireq = inet_rsk(req); u64 ts, ts_now = tcp_ns_to_ts(false, now); u32 options = 0; options = ireq->wscale_ok ? ireq->snd_wscale : TS_OPT_WSCALE_MASK; if (ireq->sack_ok) options |= TS_OPT_SACK; if (ireq->ecn_ok) options |= TS_OPT_ECN; ts = (ts_now >> TSBITS) << TSBITS; ts |= options; if (ts > ts_now) ts -= (1UL << TSBITS); if (tcp_rsk(req)->req_usec_ts) return ts * NSEC_PER_USEC; return ts * NSEC_PER_MSEC; } static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, __u32 sseq, __u32 data) { /* * Compute the secure sequence number. * The output should be: * HASH(sec1,saddr,sport,daddr,dport,sec1) + sseq + (count * 2^24) * + (HASH(sec2,saddr,sport,daddr,dport,count,sec2) % 2^24). * Where sseq is their sequence number and count increases every * minute by 1. * As an extra hack, we add a small "data" value that encodes the * MSS into the second hash value. */ u32 count = tcp_cookie_time(); return (cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq + (count << COOKIEBITS) + ((cookie_hash(saddr, daddr, sport, dport, count, 1) + data) & COOKIEMASK)); } /* * This retrieves the small "data" value from the syncookie. * If the syncookie is bad, the data returned will be out of * range. This must be checked by the caller. * * The count value used to generate the cookie must be less than * MAX_SYNCOOKIE_AGE minutes in the past. * The return value (__u32)-1 if this test fails. */ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, __be16 sport, __be16 dport, __u32 sseq) { u32 diff, count = tcp_cookie_time(); /* Strip away the layers from the cookie */ cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; /* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */ diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS); if (diff >= MAX_SYNCOOKIE_AGE) return (__u32)-1; return (cookie - cookie_hash(saddr, daddr, sport, dport, count - diff, 1)) & COOKIEMASK; /* Leaving the data behind */ } /* * MSS Values are chosen based on the 2011 paper * 'An Analysis of TCP Maximum Segement Sizes' by S. Alcock and R. Nelson. * Values .. * .. lower than 536 are rare (< 0.2%) * .. between 537 and 1299 account for less than < 1.5% of observed values * .. in the 1300-1349 range account for about 15 to 20% of observed mss values * .. exceeding 1460 are very rare (< 0.04%) * * 1460 is the single most frequently announced mss value (30 to 46% depending * on monitor location). Table must be sorted. */ static __u16 const msstab[] = { 536, 1300, 1440, /* 1440, 1452: PPPoE */ 1460, }; /* * Generate a syncookie. mssp points to the mss, which is returned * rounded down to the value encoded in the cookie. */ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp) { int mssind; const __u16 mss = *mssp; for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) if (mss >= msstab[mssind]) break; *mssp = msstab[mssind]; return secure_tcp_syn_cookie(iph->saddr, iph->daddr, th->source, th->dest, ntohl(th->seq), mssind); } EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp) { const struct iphdr *iph = ip_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); return __cookie_v4_init_sequence(iph, th, mssp); } /* * Check if a ack sequence number is a valid syncookie. * Return the decoded mss if it is, or 0 if not. */ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th) { __u32 cookie = ntohl(th->ack_seq) - 1; __u32 seq = ntohl(th->seq) - 1; __u32 mssind; mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } EXPORT_SYMBOL_GPL(__cookie_v4_check); struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; bool own_req; child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, NULL, &own_req, NULL); if (child) { refcount_set(&req->rsk_refcnt, 1); sock_rps_save_rxhash(child, skb); if (rsk_drop_req(req)) { reqsk_put(req); return child; } if (inet_csk_reqsk_queue_add(sk, req, child)) return child; bh_unlock_sock(child); sock_put(child); } __reqsk_free(req); return NULL; } EXPORT_IPV6_MOD(tcp_get_cookie_sock); /* * when syncookies are in effect and tcp timestamps are enabled we stored * additional tcp options in the timestamp. * This extracts these options from the timestamp echo. * * return false if we decode a tcp option that is disabled * on the host. */ bool cookie_timestamp_decode(const struct net *net, struct tcp_options_received *tcp_opt) { /* echoed timestamp, lowest bits contain options */ u32 options = tcp_opt->rcv_tsecr; if (!tcp_opt->saw_tstamp) { tcp_clear_options(tcp_opt); return true; } if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps)) return false; tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0; if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack)) return false; if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK) return true; /* no window scaling */ tcp_opt->wscale_ok = 1; tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK; return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; } EXPORT_IPV6_MOD(cookie_timestamp_decode); static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); const struct tcphdr *th = tcp_hdr(skb); req->num_retrans = 0; ireq->ir_num = ntohs(th->dest); ireq->ir_rmt_port = th->source; ireq->ir_iif = inet_request_bound_dev_if(sk, skb); ireq->ir_mark = inet_request_mark(sk, skb); if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; treq->snt_synack = 0; treq->snt_tsval_first = 0; treq->tfo_listener = false; treq->txhash = net_tx_rndhash(); treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = ntohl(th->ack_seq) - 1; treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; treq->req_usec_ts = false; #if IS_ENABLED(CONFIG_MPTCP) treq->is_mptcp = sk_is_mptcp(sk); if (treq->is_mptcp) return mptcp_subflow_init_cookie_req(req, sk, skb); #endif return 0; } #if IS_ENABLED(CONFIG_BPF) struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb) { struct request_sock *req = inet_reqsk(skb->sk); skb->sk = NULL; skb->destructor = NULL; if (cookie_tcp_reqsk_init(sk, skb, req)) { reqsk_free(req); req = NULL; } return req; } EXPORT_IPV6_MOD_GPL(cookie_bpf_check); #endif struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk, struct sk_buff *skb, struct tcp_options_received *tcp_opt, int mss, u32 tsoff) { struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct request_sock *req; if (sk_is_mptcp(sk)) req = mptcp_subflow_reqsk_alloc(ops, sk, false); else req = inet_reqsk_alloc(ops, sk, false); if (!req) return NULL; if (cookie_tcp_reqsk_init(sk, skb, req)) { reqsk_free(req); return NULL; } ireq = inet_rsk(req); treq = tcp_rsk(req); req->mss = mss; req->ts_recent = tcp_opt->saw_tstamp ? tcp_opt->rcv_tsval : 0; ireq->snd_wscale = tcp_opt->snd_wscale; ireq->tstamp_ok = tcp_opt->saw_tstamp; ireq->sack_ok = tcp_opt->sack_ok; ireq->wscale_ok = tcp_opt->wscale_ok; ireq->ecn_ok = !!(tcp_opt->rcv_tsecr & TS_OPT_ECN); treq->ts_off = tsoff; return req; } EXPORT_IPV6_MOD_GPL(cookie_tcp_reqsk_alloc); static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; u32 tsoff = 0; int mss; if (tcp_synq_no_recent_overflow(sk)) goto out; mss = __cookie_v4_check(ip_hdr(skb), tcp_hdr(skb)); if (!mss) { __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; } __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { union tcp_seq_and_ts_off st; st = secure_tcp_seq_and_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, tcp_hdr(skb)->dest, tcp_hdr(skb)->source); tsoff = st.ts_off; tcp_opt.rcv_tsecr -= tsoff; } if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; return cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb, &tcp_opt, mss, tsoff); out: return ERR_PTR(-EINVAL); } /* On input, sk is a listener. * Output is listener if incoming packet would not create a child * NULL if memory could not be allocated. */ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; const struct tcphdr *th = tcp_hdr(skb); struct tcp_sock *tp = tcp_sk(sk); struct inet_request_sock *ireq; struct net *net = sock_net(sk); struct tcp_request_sock *treq; struct request_sock *req; struct sock *ret = sk; struct flowi4 fl4; struct rtable *rt; __u8 rcv_wscale; int full_space; SKB_DR(reason); if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) goto out; if (cookie_bpf_ok(skb)) { req = cookie_bpf_check(sk, skb); } else { req = cookie_tcp_check(net, sk, skb); if (IS_ERR(req)) goto out; } if (!req) { SKB_DR_SET(reason, NO_SOCKET); goto out_drop; } ireq = inet_rsk(req); treq = tcp_rsk(req); sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); if (security_inet_conn_request(sk, skb, req)) { SKB_DR_SET(reason, SECURITY_HOOK); goto out_free; } tcp_ao_syncookie(sk, skb, req, AF_INET); /* * We need to lookup the route here to get at the correct * window size. We should better make sure that the window size * hasn't changed since we received the original syn, but I see * no easy way to do this. */ flowi4_init_output(&fl4, ireq->ir_iif, ireq->ir_mark, ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, th->source, th->dest, sk_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { SKB_DR_SET(reason, IP_OUTNOROUTES); goto out_free; } /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? : dst_metric(&rt->dst, RTAX_WINDOW); /* limit the window selection if the user enforce a smaller rx buffer */ full_space = tcp_full_space(sk); if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) req->rsk_window_clamp = full_space; tcp_select_initial_window(sk, full_space, req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(&rt->dst, RTAX_INITRWND)); /* req->syncookie is set true only if ACK is validated * by BPF kfunc, then, rcv_wscale is already configured. */ if (!req->syncookie) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst); treq->accecn_ok = ireq->ecn_ok && cookie_accecn_ok(th); ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst); /* ip_queue_xmit() depends on our flow being setup * Normal sockets get it right from inet_csk_route_child_sock() */ if (!ret) { SKB_DR_SET(reason, NO_SOCKET); goto out_drop; } inet_sk(ret)->cork.fl.u.ip4 = fl4; out: return ret; out_free: reqsk_free(req); out_drop: sk_skb_reason_drop(sk, skb, reason); return NULL; }
4 5 5 5 5 5 5 5 5 5 5 5 4 5 4 1 1 1 1 4 1 1 1 1 1 1 1 4 4 1 3 3 2 2 2 4 4 1 1 3 1 1 1 1 1 1 1 1 5 5 5 5 5 1 5 5 1 5 5 1 5 5 5 5 5 5 1 5 1 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 // SPDX-License-Identifier: GPL-2.0-or-later /* * Apple USB BCM5974 (Macbook Air and Penryn Macbook Pro) multitouch driver * * Copyright (C) 2008 Henrik Rydberg (rydberg@euromail.se) * Copyright (C) 2015 John Horan (knasher@gmail.com) * * The USB initialization and package decoding was made by * Scott Shawcroft as part of the touchd user-space driver project: * Copyright (C) 2008 Scott Shawcroft (scott.shawcroft@gmail.com) * * The BCM5974 driver is based on the appletouch driver: * Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com) * Copyright (C) 2005 Johannes Berg (johannes@sipsolutions.net) * Copyright (C) 2005 Stelian Pop (stelian@popies.net) * Copyright (C) 2005 Frank Arnold (frank@scirocco-5v-turbo.de) * Copyright (C) 2005 Peter Osterlund (petero2@telia.com) * Copyright (C) 2005 Michael Hanselmann (linux-kernel@hansmi.ch) * Copyright (C) 2006 Nicolas Boichat (nicolas@boichat.ch) */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/usb/input.h> #include <linux/hid.h> #include <linux/mutex.h> #include <linux/input/mt.h> #define USB_VENDOR_ID_APPLE 0x05ac /* MacbookAir, aka wellspring */ #define USB_DEVICE_ID_APPLE_WELLSPRING_ANSI 0x0223 #define USB_DEVICE_ID_APPLE_WELLSPRING_ISO 0x0224 #define USB_DEVICE_ID_APPLE_WELLSPRING_JIS 0x0225 /* MacbookProPenryn, aka wellspring2 */ #define USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI 0x0230 #define USB_DEVICE_ID_APPLE_WELLSPRING2_ISO 0x0231 #define USB_DEVICE_ID_APPLE_WELLSPRING2_JIS 0x0232 /* Macbook5,1 (unibody), aka wellspring3 */ #define USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI 0x0236 #define USB_DEVICE_ID_APPLE_WELLSPRING3_ISO 0x0237 #define USB_DEVICE_ID_APPLE_WELLSPRING3_JIS 0x0238 /* MacbookAir3,2 (unibody), aka wellspring5 */ #define USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI 0x023f #define USB_DEVICE_ID_APPLE_WELLSPRING4_ISO 0x0240 #define USB_DEVICE_ID_APPLE_WELLSPRING4_JIS 0x0241 /* MacbookAir3,1 (unibody), aka wellspring4 */ #define USB_DEVICE_ID_APPLE_WELLSPRING4A_ANSI 0x0242 #define USB_DEVICE_ID_APPLE_WELLSPRING4A_ISO 0x0243 #define USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS 0x0244 /* Macbook8 (unibody, March 2011) */ #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI 0x0245 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO 0x0246 #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS 0x0247 /* MacbookAir4,1 (unibody, July 2011) */ #define USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI 0x0249 #define USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO 0x024a #define USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS 0x024b /* MacbookAir4,2 (unibody, July 2011) */ #define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI 0x024c #define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO 0x024d #define USB_DEVICE_ID_APPLE_WELLSPRING6_JIS 0x024e /* Macbook8,2 (unibody) */ #define USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI 0x0252 #define USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO 0x0253 #define USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS 0x0254 /* MacbookPro10,1 (unibody, June 2012) */ #define USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI 0x0262 #define USB_DEVICE_ID_APPLE_WELLSPRING7_ISO 0x0263 #define USB_DEVICE_ID_APPLE_WELLSPRING7_JIS 0x0264 /* MacbookPro10,2 (unibody, October 2012) */ #define USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI 0x0259 #define USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO 0x025a #define USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS 0x025b /* MacbookAir6,2 (unibody, June 2013) */ #define USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI 0x0290 #define USB_DEVICE_ID_APPLE_WELLSPRING8_ISO 0x0291 #define USB_DEVICE_ID_APPLE_WELLSPRING8_JIS 0x0292 /* MacbookPro12,1 (2015) */ #define USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI 0x0272 #define USB_DEVICE_ID_APPLE_WELLSPRING9_ISO 0x0273 #define USB_DEVICE_ID_APPLE_WELLSPRING9_JIS 0x0274 #define BCM5974_DEVICE(prod) { \ .match_flags = (USB_DEVICE_ID_MATCH_DEVICE | \ USB_DEVICE_ID_MATCH_INT_CLASS | \ USB_DEVICE_ID_MATCH_INT_PROTOCOL), \ .idVendor = USB_VENDOR_ID_APPLE, \ .idProduct = (prod), \ .bInterfaceClass = USB_INTERFACE_CLASS_HID, \ .bInterfaceProtocol = USB_INTERFACE_PROTOCOL_MOUSE \ } /* table of devices that work with this driver */ static const struct usb_device_id bcm5974_table[] = { /* MacbookAir1.1 */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING_JIS), /* MacbookProPenryn */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING2_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING2_JIS), /* Macbook5,1 */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING3_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING3_JIS), /* MacbookAir3,2 */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING4_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING4_JIS), /* MacbookAir3,1 */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING4A_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING4A_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS), /* MacbookPro8 */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_JIS), /* MacbookAir4,1 */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS), /* MacbookAir4,2 */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_JIS), /* MacbookPro8,2 */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS), /* MacbookPro10,1 */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7_JIS), /* MacbookPro10,2 */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS), /* MacbookAir6,2 */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING8_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING8_JIS), /* MacbookPro12,1 */ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_JIS), /* Terminating entry */ {} }; MODULE_DEVICE_TABLE(usb, bcm5974_table); MODULE_AUTHOR("Henrik Rydberg"); MODULE_DESCRIPTION("Apple USB BCM5974 multitouch driver"); MODULE_LICENSE("GPL"); #define dprintk(level, format, a...)\ { if (debug >= level) printk(KERN_DEBUG format, ##a); } static int debug = 1; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "Activate debugging output"); /* button data structure */ struct bt_data { u8 unknown1; /* constant */ u8 button; /* left button */ u8 rel_x; /* relative x coordinate */ u8 rel_y; /* relative y coordinate */ }; /* trackpad header types */ enum tp_type { TYPE1, /* plain trackpad */ TYPE2, /* button integrated in trackpad */ TYPE3, /* additional header fields since June 2013 */ TYPE4 /* additional header field for pressure data */ }; /* trackpad finger data offsets, le16-aligned */ #define HEADER_TYPE1 (13 * sizeof(__le16)) #define HEADER_TYPE2 (15 * sizeof(__le16)) #define HEADER_TYPE3 (19 * sizeof(__le16)) #define HEADER_TYPE4 (23 * sizeof(__le16)) /* trackpad button data offsets */ #define BUTTON_TYPE1 0 #define BUTTON_TYPE2 15 #define BUTTON_TYPE3 23 #define BUTTON_TYPE4 31 /* list of device capability bits */ #define HAS_INTEGRATED_BUTTON 1 /* trackpad finger data block size */ #define FSIZE_TYPE1 (14 * sizeof(__le16)) #define FSIZE_TYPE2 (14 * sizeof(__le16)) #define FSIZE_TYPE3 (14 * sizeof(__le16)) #define FSIZE_TYPE4 (15 * sizeof(__le16)) /* offset from header to finger struct */ #define DELTA_TYPE1 (0 * sizeof(__le16)) #define DELTA_TYPE2 (0 * sizeof(__le16)) #define DELTA_TYPE3 (0 * sizeof(__le16)) #define DELTA_TYPE4 (1 * sizeof(__le16)) /* usb control message mode switch data */ #define USBMSG_TYPE1 8, 0x300, 0, 0, 0x1, 0x8 #define USBMSG_TYPE2 8, 0x300, 0, 0, 0x1, 0x8 #define USBMSG_TYPE3 8, 0x300, 0, 0, 0x1, 0x8 #define USBMSG_TYPE4 2, 0x302, 2, 1, 0x1, 0x0 /* Wellspring initialization constants */ #define BCM5974_WELLSPRING_MODE_READ_REQUEST_ID 1 #define BCM5974_WELLSPRING_MODE_WRITE_REQUEST_ID 9 /* trackpad finger structure, le16-aligned */ struct tp_finger { __le16 origin; /* zero when switching track finger */ __le16 abs_x; /* absolute x coodinate */ __le16 abs_y; /* absolute y coodinate */ __le16 rel_x; /* relative x coodinate */ __le16 rel_y; /* relative y coodinate */ __le16 tool_major; /* tool area, major axis */ __le16 tool_minor; /* tool area, minor axis */ __le16 orientation; /* 16384 when point, else 15 bit angle */ __le16 touch_major; /* touch area, major axis */ __le16 touch_minor; /* touch area, minor axis */ __le16 unused[2]; /* zeros */ __le16 pressure; /* pressure on forcetouch touchpad */ __le16 multi; /* one finger: varies, more fingers: constant */ } __attribute__((packed,aligned(2))); /* trackpad finger data size, empirically at least ten fingers */ #define MAX_FINGERS 16 #define MAX_FINGER_ORIENTATION 16384 /* device-specific parameters */ struct bcm5974_param { int snratio; /* signal-to-noise ratio */ int min; /* device minimum reading */ int max; /* device maximum reading */ }; /* device-specific configuration */ struct bcm5974_config { int ansi, iso, jis; /* the product id of this device */ int caps; /* device capability bitmask */ int bt_ep; /* the endpoint of the button interface */ int bt_datalen; /* data length of the button interface */ int tp_ep; /* the endpoint of the trackpad interface */ enum tp_type tp_type; /* type of trackpad interface */ int tp_header; /* bytes in header block */ int tp_datalen; /* data length of the trackpad interface */ int tp_button; /* offset to button data */ int tp_fsize; /* bytes in single finger block */ int tp_delta; /* offset from header to finger struct */ int um_size; /* usb control message length */ int um_req_val; /* usb control message value */ int um_req_idx; /* usb control message index */ int um_switch_idx; /* usb control message mode switch index */ int um_switch_on; /* usb control message mode switch on */ int um_switch_off; /* usb control message mode switch off */ struct bcm5974_param p; /* finger pressure limits */ struct bcm5974_param w; /* finger width limits */ struct bcm5974_param x; /* horizontal limits */ struct bcm5974_param y; /* vertical limits */ struct bcm5974_param o; /* orientation limits */ }; /* logical device structure */ struct bcm5974 { char phys[64]; struct usb_device *udev; /* usb device */ struct usb_interface *intf; /* our interface */ struct input_dev *input; /* input dev */ struct bcm5974_config cfg; /* device configuration */ struct mutex pm_mutex; /* serialize access to open/suspend */ int opened; /* 1: opened, 0: closed */ struct urb *bt_urb; /* button usb request block */ struct bt_data *bt_data; /* button transferred data */ struct urb *tp_urb; /* trackpad usb request block */ u8 *tp_data; /* trackpad transferred data */ const struct tp_finger *index[MAX_FINGERS]; /* finger index data */ struct input_mt_pos pos[MAX_FINGERS]; /* position array */ int slots[MAX_FINGERS]; /* slot assignments */ struct work_struct mode_reset_work; unsigned long last_mode_reset; }; /* trackpad finger block data, le16-aligned */ static const struct tp_finger *get_tp_finger(const struct bcm5974 *dev, int i) { const struct bcm5974_config *c = &dev->cfg; u8 *f_base = dev->tp_data + c->tp_header + c->tp_delta; return (const struct tp_finger *)(f_base + i * c->tp_fsize); } #define DATAFORMAT(type) \ type, \ HEADER_##type, \ HEADER_##type + (MAX_FINGERS) * (FSIZE_##type), \ BUTTON_##type, \ FSIZE_##type, \ DELTA_##type, \ USBMSG_##type /* logical signal quality */ #define SN_PRESSURE 45 /* pressure signal-to-noise ratio */ #define SN_WIDTH 25 /* width signal-to-noise ratio */ #define SN_COORD 250 /* coordinate signal-to-noise ratio */ #define SN_ORIENT 10 /* orientation signal-to-noise ratio */ /* device constants */ static const struct bcm5974_config bcm5974_config_table[] = { { USB_DEVICE_ID_APPLE_WELLSPRING_ANSI, USB_DEVICE_ID_APPLE_WELLSPRING_ISO, USB_DEVICE_ID_APPLE_WELLSPRING_JIS, 0, 0x84, sizeof(struct bt_data), 0x81, DATAFORMAT(TYPE1), { SN_PRESSURE, 0, 256 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4824, 5342 }, { SN_COORD, -172, 5820 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI, USB_DEVICE_ID_APPLE_WELLSPRING2_ISO, USB_DEVICE_ID_APPLE_WELLSPRING2_JIS, 0, 0x84, sizeof(struct bt_data), 0x81, DATAFORMAT(TYPE1), { SN_PRESSURE, 0, 256 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4824, 4824 }, { SN_COORD, -172, 4290 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI, USB_DEVICE_ID_APPLE_WELLSPRING3_ISO, USB_DEVICE_ID_APPLE_WELLSPRING3_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4460, 5166 }, { SN_COORD, -75, 6700 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI, USB_DEVICE_ID_APPLE_WELLSPRING4_ISO, USB_DEVICE_ID_APPLE_WELLSPRING4_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4620, 5140 }, { SN_COORD, -150, 6600 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING4A_ANSI, USB_DEVICE_ID_APPLE_WELLSPRING4A_ISO, USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4616, 5112 }, { SN_COORD, -142, 5234 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4415, 5050 }, { SN_COORD, -55, 6680 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI, USB_DEVICE_ID_APPLE_WELLSPRING6_ISO, USB_DEVICE_ID_APPLE_WELLSPRING6_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4620, 5140 }, { SN_COORD, -150, 6600 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI, USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO, USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4750, 5280 }, { SN_COORD, -150, 6730 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI, USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO, USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4620, 5140 }, { SN_COORD, -150, 6600 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI, USB_DEVICE_ID_APPLE_WELLSPRING7_ISO, USB_DEVICE_ID_APPLE_WELLSPRING7_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4750, 5280 }, { SN_COORD, -150, 6730 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI, USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO, USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4750, 5280 }, { SN_COORD, -150, 6730 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI, USB_DEVICE_ID_APPLE_WELLSPRING8_ISO, USB_DEVICE_ID_APPLE_WELLSPRING8_JIS, HAS_INTEGRATED_BUTTON, 0, sizeof(struct bt_data), 0x83, DATAFORMAT(TYPE3), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4620, 5140 }, { SN_COORD, -150, 6600 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI, USB_DEVICE_ID_APPLE_WELLSPRING9_ISO, USB_DEVICE_ID_APPLE_WELLSPRING9_JIS, HAS_INTEGRATED_BUTTON, 0, sizeof(struct bt_data), 0x83, DATAFORMAT(TYPE4), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4828, 5345 }, { SN_COORD, -203, 6803 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, {} }; /* return the device-specific configuration by device */ static const struct bcm5974_config *bcm5974_get_config(struct usb_device *udev) { u16 id = le16_to_cpu(udev->descriptor.idProduct); const struct bcm5974_config *cfg; for (cfg = bcm5974_config_table; cfg->ansi; ++cfg) if (cfg->ansi == id || cfg->iso == id || cfg->jis == id) return cfg; return bcm5974_config_table; } /* convert 16-bit little endian to signed integer */ static inline int raw2int(__le16 x) { return (signed short)le16_to_cpu(x); } static void set_abs(struct input_dev *input, unsigned int code, const struct bcm5974_param *p) { int fuzz = p->snratio ? (p->max - p->min) / p->snratio : 0; input_set_abs_params(input, code, p->min, p->max, fuzz, 0); } /* setup which logical events to report */ static void setup_events_to_report(struct input_dev *input_dev, const struct bcm5974_config *cfg) { __set_bit(EV_ABS, input_dev->evbit); /* for synaptics only */ input_set_abs_params(input_dev, ABS_PRESSURE, 0, 256, 5, 0); input_set_abs_params(input_dev, ABS_TOOL_WIDTH, 0, 16, 0, 0); /* finger touch area */ set_abs(input_dev, ABS_MT_TOUCH_MAJOR, &cfg->w); set_abs(input_dev, ABS_MT_TOUCH_MINOR, &cfg->w); /* finger approach area */ set_abs(input_dev, ABS_MT_WIDTH_MAJOR, &cfg->w); set_abs(input_dev, ABS_MT_WIDTH_MINOR, &cfg->w); /* finger orientation */ set_abs(input_dev, ABS_MT_ORIENTATION, &cfg->o); /* finger position */ set_abs(input_dev, ABS_MT_POSITION_X, &cfg->x); set_abs(input_dev, ABS_MT_POSITION_Y, &cfg->y); __set_bit(EV_KEY, input_dev->evbit); __set_bit(BTN_LEFT, input_dev->keybit); if (cfg->caps & HAS_INTEGRATED_BUTTON) __set_bit(INPUT_PROP_BUTTONPAD, input_dev->propbit); input_mt_init_slots(input_dev, MAX_FINGERS, INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED | INPUT_MT_TRACK); } /* report button data as logical button state */ static int report_bt_state(struct bcm5974 *dev, int size) { if (size != sizeof(struct bt_data)) return -EIO; dprintk(7, "bcm5974: button data: %x %x %x %x\n", dev->bt_data->unknown1, dev->bt_data->button, dev->bt_data->rel_x, dev->bt_data->rel_y); input_report_key(dev->input, BTN_LEFT, dev->bt_data->button); input_sync(dev->input); return 0; } static void report_finger_data(struct input_dev *input, int slot, const struct input_mt_pos *pos, const struct tp_finger *f) { input_mt_slot(input, slot); input_mt_report_slot_state(input, MT_TOOL_FINGER, true); input_report_abs(input, ABS_MT_TOUCH_MAJOR, raw2int(f->touch_major) << 1); input_report_abs(input, ABS_MT_TOUCH_MINOR, raw2int(f->touch_minor) << 1); input_report_abs(input, ABS_MT_WIDTH_MAJOR, raw2int(f->tool_major) << 1); input_report_abs(input, ABS_MT_WIDTH_MINOR, raw2int(f->tool_minor) << 1); input_report_abs(input, ABS_MT_ORIENTATION, MAX_FINGER_ORIENTATION - raw2int(f->orientation)); input_report_abs(input, ABS_MT_POSITION_X, pos->x); input_report_abs(input, ABS_MT_POSITION_Y, pos->y); } static void report_synaptics_data(struct input_dev *input, const struct bcm5974_config *cfg, const struct tp_finger *f, int raw_n) { int abs_p = 0, abs_w = 0; if (raw_n) { int p = raw2int(f->touch_major); int w = raw2int(f->tool_major); if (p > 0 && raw2int(f->origin)) { abs_p = clamp_val(256 * p / cfg->p.max, 0, 255); abs_w = clamp_val(16 * w / cfg->w.max, 0, 15); } } input_report_abs(input, ABS_PRESSURE, abs_p); input_report_abs(input, ABS_TOOL_WIDTH, abs_w); } /* report trackpad data as logical trackpad state */ static int report_tp_state(struct bcm5974 *dev, int size) { const struct bcm5974_config *c = &dev->cfg; const struct tp_finger *f; struct input_dev *input = dev->input; int raw_n, i, n = 0; if (size < c->tp_header || (size - c->tp_header) % c->tp_fsize != 0) return -EIO; raw_n = (size - c->tp_header) / c->tp_fsize; for (i = 0; i < raw_n; i++) { f = get_tp_finger(dev, i); if (raw2int(f->touch_major) == 0) continue; dev->pos[n].x = raw2int(f->abs_x); dev->pos[n].y = c->y.min + c->y.max - raw2int(f->abs_y); dev->index[n++] = f; } input_mt_assign_slots(input, dev->slots, dev->pos, n, 0); for (i = 0; i < n; i++) report_finger_data(input, dev->slots[i], &dev->pos[i], dev->index[i]); input_mt_sync_frame(input); report_synaptics_data(input, c, get_tp_finger(dev, 0), raw_n); /* later types report button events via integrated button only */ if (c->caps & HAS_INTEGRATED_BUTTON) { int ibt = raw2int(dev->tp_data[c->tp_button]); input_report_key(input, BTN_LEFT, ibt); } input_sync(input); return 0; } static int bcm5974_wellspring_mode(struct bcm5974 *dev, bool on) { const struct bcm5974_config *c = &dev->cfg; int retval = 0, size; char *data; /* Type 3 does not require a mode switch */ if (c->tp_type == TYPE3) return 0; data = kmalloc(c->um_size, GFP_KERNEL); if (!data) { dev_err(&dev->intf->dev, "out of memory\n"); retval = -ENOMEM; goto out; } /* read configuration */ size = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), BCM5974_WELLSPRING_MODE_READ_REQUEST_ID, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, c->um_req_val, c->um_req_idx, data, c->um_size, 5000); if (size != c->um_size) { dev_err(&dev->intf->dev, "could not read from device\n"); retval = -EIO; goto out; } /* apply the mode switch */ data[c->um_switch_idx] = on ? c->um_switch_on : c->um_switch_off; /* write configuration */ size = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), BCM5974_WELLSPRING_MODE_WRITE_REQUEST_ID, USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE, c->um_req_val, c->um_req_idx, data, c->um_size, 5000); if (size != c->um_size) { dev_err(&dev->intf->dev, "could not write to device\n"); retval = -EIO; goto out; } dprintk(2, "bcm5974: switched to %s mode.\n", on ? "wellspring" : "normal"); out: kfree(data); return retval; } /* * Mode switches sent before the control response are ignored. * Fixing this state requires switching to normal mode and waiting * about 1ms before switching back to wellspring mode. */ static void bcm5974_mode_reset_work(struct work_struct *work) { struct bcm5974 *dev = container_of(work, struct bcm5974, mode_reset_work); int error; guard(mutex)(&dev->pm_mutex); dev->last_mode_reset = jiffies; error = bcm5974_wellspring_mode(dev, false); if (error) { dev_err(&dev->intf->dev, "reset to normal mode failed\n"); return; } fsleep(1000); error = bcm5974_wellspring_mode(dev, true); if (error) dev_err(&dev->intf->dev, "mode switch after reset failed\n"); } static void bcm5974_irq_button(struct urb *urb) { struct bcm5974 *dev = urb->context; struct usb_interface *intf = dev->intf; int error; switch (urb->status) { case 0: break; case -EOVERFLOW: case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: dev_dbg(&intf->dev, "button urb shutting down: %d\n", urb->status); return; default: dev_dbg(&intf->dev, "button urb status: %d\n", urb->status); goto exit; } if (report_bt_state(dev, dev->bt_urb->actual_length)) dprintk(1, "bcm5974: bad button package, length: %d\n", dev->bt_urb->actual_length); exit: error = usb_submit_urb(dev->bt_urb, GFP_ATOMIC); if (error) dev_err(&intf->dev, "button urb failed: %d\n", error); } static void bcm5974_irq_trackpad(struct urb *urb) { struct bcm5974 *dev = urb->context; struct usb_interface *intf = dev->intf; int error; switch (urb->status) { case 0: break; case -EOVERFLOW: case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: dev_dbg(&intf->dev, "trackpad urb shutting down: %d\n", urb->status); return; default: dev_dbg(&intf->dev, "trackpad urb status: %d\n", urb->status); goto exit; } /* control response ignored */ if (dev->tp_urb->actual_length == 2) goto exit; if (report_tp_state(dev, dev->tp_urb->actual_length)) { dprintk(1, "bcm5974: bad trackpad package, length: %d\n", dev->tp_urb->actual_length); /* * Receiving a HID packet means we aren't in wellspring mode. * If we haven't tried a reset in the last second, try now. */ if (dev->tp_urb->actual_length == 8 && time_after(jiffies, dev->last_mode_reset + msecs_to_jiffies(1000))) { schedule_work(&dev->mode_reset_work); } } exit: error = usb_submit_urb(dev->tp_urb, GFP_ATOMIC); if (error) dev_err(&intf->dev, "trackpad urb failed: %d\n", error); } /* * The Wellspring trackpad, like many recent Apple trackpads, share * the usb device with the keyboard. Since keyboards are usually * handled by the HID system, the device ends up being handled by two * modules. Setting up the device therefore becomes slightly * complicated. To enable multitouch features, a mode switch is * required, which is usually applied via the control interface of the * device. It can be argued where this switch should take place. In * some drivers, like appletouch, the switch is made during * probe. However, the hid module may also alter the state of the * device, resulting in trackpad malfunction under certain * circumstances. To get around this problem, there is at least one * example that utilizes the USB_QUIRK_RESET_RESUME quirk in order to * receive a reset_resume request rather than the normal resume. * Since the implementation of reset_resume is equal to mode switch * plus start_traffic, it seems easier to always do the switch when * starting traffic on the device. */ static int bcm5974_start_traffic(struct bcm5974 *dev) { int error; error = bcm5974_wellspring_mode(dev, true); if (error) { dprintk(1, "bcm5974: mode switch failed\n"); goto err_out; } if (dev->bt_urb) { error = usb_submit_urb(dev->bt_urb, GFP_KERNEL); if (error) goto err_reset_mode; } error = usb_submit_urb(dev->tp_urb, GFP_KERNEL); if (error) goto err_kill_bt; return 0; err_kill_bt: usb_kill_urb(dev->bt_urb); err_reset_mode: bcm5974_wellspring_mode(dev, false); err_out: return error; } static void bcm5974_pause_traffic(struct bcm5974 *dev) { usb_kill_urb(dev->tp_urb); usb_kill_urb(dev->bt_urb); bcm5974_wellspring_mode(dev, false); } /* * The code below implements open/close and manual suspend/resume. * All functions may be called in random order. * * Opening a suspended device fails with EACCES - permission denied. * * Failing a resume leaves the device resumed but closed. */ static int bcm5974_open(struct input_dev *input) { struct bcm5974 *dev = input_get_drvdata(input); int error; error = usb_autopm_get_interface(dev->intf); if (error) return error; scoped_guard(mutex, &dev->pm_mutex) { error = bcm5974_start_traffic(dev); if (!error) dev->opened = 1; } if (error) usb_autopm_put_interface(dev->intf); return error; } static void bcm5974_close(struct input_dev *input) { struct bcm5974 *dev = input_get_drvdata(input); scoped_guard(mutex, &dev->pm_mutex) { bcm5974_pause_traffic(dev); dev->opened = 0; } usb_autopm_put_interface(dev->intf); } static int bcm5974_suspend(struct usb_interface *iface, pm_message_t message) { struct bcm5974 *dev = usb_get_intfdata(iface); guard(mutex)(&dev->pm_mutex); if (dev->opened) bcm5974_pause_traffic(dev); return 0; } static int bcm5974_resume(struct usb_interface *iface) { struct bcm5974 *dev = usb_get_intfdata(iface); guard(mutex)(&dev->pm_mutex); if (dev->opened) return bcm5974_start_traffic(dev); return 0; } static int bcm5974_probe(struct usb_interface *iface, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(iface); const struct bcm5974_config *cfg; struct bcm5974 *dev; struct input_dev *input_dev; int error = -ENOMEM; /* find the product index */ cfg = bcm5974_get_config(udev); /* allocate memory for our device state and initialize it */ dev = kzalloc_obj(*dev); input_dev = input_allocate_device(); if (!dev || !input_dev) { dev_err(&iface->dev, "out of memory\n"); goto err_free_devs; } dev->udev = udev; dev->intf = iface; dev->input = input_dev; dev->cfg = *cfg; INIT_WORK(&dev->mode_reset_work, bcm5974_mode_reset_work); mutex_init(&dev->pm_mutex); /* setup urbs */ if (cfg->tp_type == TYPE1) { dev->bt_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->bt_urb) goto err_free_devs; } dev->tp_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->tp_urb) goto err_free_bt_urb; if (dev->bt_urb) { dev->bt_data = usb_alloc_coherent(dev->udev, dev->cfg.bt_datalen, GFP_KERNEL, &dev->bt_urb->transfer_dma); if (!dev->bt_data) goto err_free_urb; } dev->tp_data = usb_alloc_coherent(dev->udev, dev->cfg.tp_datalen, GFP_KERNEL, &dev->tp_urb->transfer_dma); if (!dev->tp_data) goto err_free_bt_buffer; if (dev->bt_urb) { usb_fill_int_urb(dev->bt_urb, udev, usb_rcvintpipe(udev, cfg->bt_ep), dev->bt_data, dev->cfg.bt_datalen, bcm5974_irq_button, dev, 1); dev->bt_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; } usb_fill_int_urb(dev->tp_urb, udev, usb_rcvintpipe(udev, cfg->tp_ep), dev->tp_data, dev->cfg.tp_datalen, bcm5974_irq_trackpad, dev, 1); dev->tp_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; /* create bcm5974 device */ usb_make_path(udev, dev->phys, sizeof(dev->phys)); strlcat(dev->phys, "/input0", sizeof(dev->phys)); input_dev->name = "bcm5974"; input_dev->phys = dev->phys; usb_to_input_id(dev->udev, &input_dev->id); /* report driver capabilities via the version field */ input_dev->id.version = cfg->caps; input_dev->dev.parent = &iface->dev; input_set_drvdata(input_dev, dev); input_dev->open = bcm5974_open; input_dev->close = bcm5974_close; setup_events_to_report(input_dev, cfg); error = input_register_device(dev->input); if (error) goto err_free_buffer; /* save our data pointer in this interface device */ usb_set_intfdata(iface, dev); return 0; err_free_buffer: usb_free_coherent(dev->udev, dev->cfg.tp_datalen, dev->tp_data, dev->tp_urb->transfer_dma); err_free_bt_buffer: if (dev->bt_urb) usb_free_coherent(dev->udev, dev->cfg.bt_datalen, dev->bt_data, dev->bt_urb->transfer_dma); err_free_urb: usb_free_urb(dev->tp_urb); err_free_bt_urb: usb_free_urb(dev->bt_urb); err_free_devs: usb_set_intfdata(iface, NULL); input_free_device(input_dev); kfree(dev); return error; } static void bcm5974_disconnect(struct usb_interface *iface) { struct bcm5974 *dev = usb_get_intfdata(iface); disable_work_sync(&dev->mode_reset_work); usb_set_intfdata(iface, NULL); input_unregister_device(dev->input); usb_free_coherent(dev->udev, dev->cfg.tp_datalen, dev->tp_data, dev->tp_urb->transfer_dma); if (dev->bt_urb) usb_free_coherent(dev->udev, dev->cfg.bt_datalen, dev->bt_data, dev->bt_urb->transfer_dma); usb_free_urb(dev->tp_urb); usb_free_urb(dev->bt_urb); kfree(dev); } static struct usb_driver bcm5974_driver = { .name = "bcm5974", .probe = bcm5974_probe, .disconnect = bcm5974_disconnect, .suspend = bcm5974_suspend, .resume = bcm5974_resume, .id_table = bcm5974_table, .supports_autosuspend = 1, }; module_usb_driver(bcm5974_driver);
150 34 15 9 1 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 /* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ /* Copyright (c) 2002-2007 Volkswagen Group Electronic Research * Copyright (c) 2017 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de> * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Volkswagen nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * The provided data structures and external interfaces from this code * are not restricted to be used by modules with a GPL compatible license. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ #ifndef CAN_ML_H #define CAN_ML_H #include <linux/can.h> #include <linux/list.h> #include <linux/netdevice.h> /* exposed CAN device capabilities for network layer */ #define CAN_CAP_CC BIT(0) /* CAN CC aka Classical CAN */ #define CAN_CAP_FD BIT(1) /* CAN FD */ #define CAN_CAP_XL BIT(2) /* CAN XL */ #define CAN_CAP_RO BIT(3) /* read-only mode (LISTEN/RESTRICTED) */ #define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS) #define CAN_EFF_RCV_HASH_BITS 10 #define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS) enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX }; struct can_dev_rcv_lists { struct hlist_head rx[RX_MAX]; struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ]; struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ]; int entries; }; struct can_ml_priv { struct can_dev_rcv_lists dev_rcv_lists; #ifdef CAN_J1939 struct j1939_priv *j1939_priv; #endif u32 can_cap; }; static inline struct can_ml_priv *can_get_ml_priv(struct net_device *dev) { return netdev_get_ml_priv(dev, ML_PRIV_CAN); } static inline void can_set_ml_priv(struct net_device *dev, struct can_ml_priv *ml_priv) { netdev_set_ml_priv(dev, ml_priv, ML_PRIV_CAN); } static inline bool can_cap_enabled(struct net_device *dev, u32 cap) { struct can_ml_priv *can_ml = can_get_ml_priv(dev); if (!can_ml) return false; return (can_ml->can_cap & cap); } static inline void can_set_cap(struct net_device *dev, u32 cap) { struct can_ml_priv *can_ml = can_get_ml_priv(dev); can_ml->can_cap = cap; } #endif /* CAN_ML_H */
2 1 1 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/xattr_user.c * Handler for extended user attributes. * * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> */ #include <linux/string.h> #include <linux/fs.h> #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" static bool ext4_xattr_user_list(struct dentry *dentry) { return test_opt(dentry->d_sb, XATTR_USER); } static int ext4_xattr_user_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) { if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); } static int ext4_xattr_user_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) { if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name, value, size, flags); } const struct xattr_handler ext4_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, .list = ext4_xattr_user_list, .get = ext4_xattr_user_get, .set = ext4_xattr_user_set, };
6 4 10 6 1 1 6 10 4 4 3 3 11 29 11 10 11 2 2 2 10 10 9 9 5 3 5 10 2 9 21 10 6 10 10 8 7 2 10 7 3 21 5 2 5 5 5 12 11 9 7 11 21 5 10 7 16 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1992 Darren Senn */ /* These are all the functions necessary to implement itimers */ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/syscalls.h> #include <linux/time.h> #include <linux/sched/signal.h> #include <linux/sched/cputime.h> #include <linux/posix-timers.h> #include <linux/hrtimer.h> #include <trace/events/timer.h> #include <linux/compat.h> #include <linux/uaccess.h> /** * itimer_get_remtime - get remaining time for the timer * * @timer: the timer to read * * Returns the delta between the expiry time and now, which can be * less than zero or 1usec for an pending expired timer */ static struct timespec64 itimer_get_remtime(struct hrtimer *timer) { ktime_t rem = __hrtimer_get_remaining(timer, true); /* * Racy but safe: if the itimer expires after the above * hrtimer_get_remtime() call but before this condition * then we return 0 - which is correct. */ if (hrtimer_active(timer)) { if (rem <= 0) rem = NSEC_PER_USEC; } else rem = 0; return ktime_to_timespec64(rem); } static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, struct itimerspec64 *const value) { u64 val, interval; struct cpu_itimer *it = &tsk->signal->it[clock_id]; spin_lock_irq(&tsk->sighand->siglock); val = it->expires; interval = it->incr; if (val) { u64 t, samples[CPUCLOCK_MAX]; thread_group_sample_cputime(tsk, samples); t = samples[clock_id]; if (val < t) /* about to fire */ val = TICK_NSEC; else val -= t; } spin_unlock_irq(&tsk->sighand->siglock); value->it_value = ns_to_timespec64(val); value->it_interval = ns_to_timespec64(interval); } static int do_getitimer(int which, struct itimerspec64 *value) { struct task_struct *tsk = current; switch (which) { case ITIMER_REAL: spin_lock_irq(&tsk->sighand->siglock); value->it_value = itimer_get_remtime(&tsk->signal->real_timer); value->it_interval = ktime_to_timespec64(tsk->signal->it_real_incr); spin_unlock_irq(&tsk->sighand->siglock); break; case ITIMER_VIRTUAL: get_cpu_itimer(tsk, CPUCLOCK_VIRT, value); break; case ITIMER_PROF: get_cpu_itimer(tsk, CPUCLOCK_PROF, value); break; default: return(-EINVAL); } return 0; } static int put_itimerval(struct __kernel_old_itimerval __user *o, const struct itimerspec64 *i) { struct __kernel_old_itimerval v; v.it_interval.tv_sec = i->it_interval.tv_sec; v.it_interval.tv_usec = i->it_interval.tv_nsec / NSEC_PER_USEC; v.it_value.tv_sec = i->it_value.tv_sec; v.it_value.tv_usec = i->it_value.tv_nsec / NSEC_PER_USEC; return copy_to_user(o, &v, sizeof(struct __kernel_old_itimerval)) ? -EFAULT : 0; } SYSCALL_DEFINE2(getitimer, int, which, struct __kernel_old_itimerval __user *, value) { struct itimerspec64 get_buffer; int error = do_getitimer(which, &get_buffer); if (!error && put_itimerval(value, &get_buffer)) error = -EFAULT; return error; } #if defined(CONFIG_COMPAT) || defined(CONFIG_ALPHA) struct old_itimerval32 { struct old_timeval32 it_interval; struct old_timeval32 it_value; }; static int put_old_itimerval32(struct old_itimerval32 __user *o, const struct itimerspec64 *i) { struct old_itimerval32 v32; v32.it_interval.tv_sec = i->it_interval.tv_sec; v32.it_interval.tv_usec = i->it_interval.tv_nsec / NSEC_PER_USEC; v32.it_value.tv_sec = i->it_value.tv_sec; v32.it_value.tv_usec = i->it_value.tv_nsec / NSEC_PER_USEC; return copy_to_user(o, &v32, sizeof(struct old_itimerval32)) ? -EFAULT : 0; } COMPAT_SYSCALL_DEFINE2(getitimer, int, which, struct old_itimerval32 __user *, value) { struct itimerspec64 get_buffer; int error = do_getitimer(which, &get_buffer); if (!error && put_old_itimerval32(value, &get_buffer)) error = -EFAULT; return error; } #endif /* * Invoked from dequeue_signal() when SIG_ALRM is delivered. * * Restart the ITIMER_REAL timer if it is armed as periodic timer. Doing * this in the signal delivery path instead of self rearming prevents a DoS * with small increments in the high reolution timer case and reduces timer * noise in general. */ void posixtimer_rearm_itimer(struct task_struct *tsk) { struct hrtimer *tmr = &tsk->signal->real_timer; if (!hrtimer_is_queued(tmr) && tsk->signal->it_real_incr != 0) { hrtimer_forward_now(tmr, tsk->signal->it_real_incr); hrtimer_restart(tmr); } } /* * Interval timers are restarted in the signal delivery path. See * posixtimer_rearm_itimer(). */ enum hrtimer_restart it_real_fn(struct hrtimer *timer) { struct signal_struct *sig = container_of(timer, struct signal_struct, real_timer); struct pid *leader_pid = sig->pids[PIDTYPE_TGID]; trace_itimer_expire(ITIMER_REAL, leader_pid, 0); kill_pid_info(SIGALRM, SEND_SIG_PRIV, leader_pid); return HRTIMER_NORESTART; } static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, const struct itimerspec64 *const value, struct itimerspec64 *const ovalue) { u64 oval, nval, ointerval, ninterval; struct cpu_itimer *it = &tsk->signal->it[clock_id]; nval = timespec64_to_ns(&value->it_value); ninterval = timespec64_to_ns(&value->it_interval); spin_lock_irq(&tsk->sighand->siglock); oval = it->expires; ointerval = it->incr; if (oval || nval) { if (nval > 0) nval += TICK_NSEC; set_process_cpu_timer(tsk, clock_id, &nval, &oval); } it->expires = nval; it->incr = ninterval; trace_itimer_state(clock_id == CPUCLOCK_VIRT ? ITIMER_VIRTUAL : ITIMER_PROF, value, nval); spin_unlock_irq(&tsk->sighand->siglock); if (ovalue) { ovalue->it_value = ns_to_timespec64(oval); ovalue->it_interval = ns_to_timespec64(ointerval); } } /* * Returns true if the timeval is in canonical form */ #define timeval_valid(t) \ (((t)->tv_sec >= 0) && (((unsigned long) (t)->tv_usec) < USEC_PER_SEC)) static int do_setitimer(int which, struct itimerspec64 *value, struct itimerspec64 *ovalue) { struct task_struct *tsk = current; struct hrtimer *timer; ktime_t expires; switch (which) { case ITIMER_REAL: again: spin_lock_irq(&tsk->sighand->siglock); timer = &tsk->signal->real_timer; if (ovalue) { ovalue->it_value = itimer_get_remtime(timer); ovalue->it_interval = ktime_to_timespec64(tsk->signal->it_real_incr); } /* We are sharing ->siglock with it_real_fn() */ if (hrtimer_try_to_cancel(timer) < 0) { spin_unlock_irq(&tsk->sighand->siglock); hrtimer_cancel_wait_running(timer); goto again; } expires = timespec64_to_ktime(value->it_value); if (expires != 0) { tsk->signal->it_real_incr = timespec64_to_ktime(value->it_interval); hrtimer_start(timer, expires, HRTIMER_MODE_REL); } else tsk->signal->it_real_incr = 0; trace_itimer_state(ITIMER_REAL, value, 0); spin_unlock_irq(&tsk->sighand->siglock); break; case ITIMER_VIRTUAL: set_cpu_itimer(tsk, CPUCLOCK_VIRT, value, ovalue); break; case ITIMER_PROF: set_cpu_itimer(tsk, CPUCLOCK_PROF, value, ovalue); break; default: return -EINVAL; } return 0; } #ifdef CONFIG_SECURITY_SELINUX void clear_itimer(void) { struct itimerspec64 v = {}; int i; for (i = 0; i < 3; i++) do_setitimer(i, &v, NULL); } #endif #ifdef __ARCH_WANT_SYS_ALARM /** * alarm_setitimer - set alarm in seconds * * @seconds: number of seconds until alarm * 0 disables the alarm * * Returns the remaining time in seconds of a pending timer or 0 when * the timer is not active. * * On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid * negative timeval settings which would cause immediate expiry. */ static unsigned int alarm_setitimer(unsigned int seconds) { struct itimerspec64 it_new, it_old; #if BITS_PER_LONG < 64 if (seconds > INT_MAX) seconds = INT_MAX; #endif it_new.it_value.tv_sec = seconds; it_new.it_value.tv_nsec = 0; it_new.it_interval.tv_sec = it_new.it_interval.tv_nsec = 0; do_setitimer(ITIMER_REAL, &it_new, &it_old); /* * We can't return 0 if we have an alarm pending ... And we'd * better return too much than too little anyway */ if ((!it_old.it_value.tv_sec && it_old.it_value.tv_nsec) || it_old.it_value.tv_nsec >= (NSEC_PER_SEC / 2)) it_old.it_value.tv_sec++; return it_old.it_value.tv_sec; } /* * For backwards compatibility? This can be done in libc so Alpha * and all newer ports shouldn't need it. */ SYSCALL_DEFINE1(alarm, unsigned int, seconds) { return alarm_setitimer(seconds); } #endif static int get_itimerval(struct itimerspec64 *o, const struct __kernel_old_itimerval __user *i) { struct __kernel_old_itimerval v; if (copy_from_user(&v, i, sizeof(struct __kernel_old_itimerval))) return -EFAULT; /* Validate the timevals in value. */ if (!timeval_valid(&v.it_value) || !timeval_valid(&v.it_interval)) return -EINVAL; o->it_interval.tv_sec = v.it_interval.tv_sec; o->it_interval.tv_nsec = v.it_interval.tv_usec * NSEC_PER_USEC; o->it_value.tv_sec = v.it_value.tv_sec; o->it_value.tv_nsec = v.it_value.tv_usec * NSEC_PER_USEC; return 0; } SYSCALL_DEFINE3(setitimer, int, which, struct __kernel_old_itimerval __user *, value, struct __kernel_old_itimerval __user *, ovalue) { struct itimerspec64 set_buffer, get_buffer; int error; if (value) { error = get_itimerval(&set_buffer, value); if (error) return error; } else { memset(&set_buffer, 0, sizeof(set_buffer)); printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer." " Misfeature support will be removed\n", current->comm); } error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL); if (error || !ovalue) return error; if (put_itimerval(ovalue, &get_buffer)) return -EFAULT; return 0; } #if defined(CONFIG_COMPAT) || defined(CONFIG_ALPHA) static int get_old_itimerval32(struct itimerspec64 *o, const struct old_itimerval32 __user *i) { struct old_itimerval32 v32; if (copy_from_user(&v32, i, sizeof(struct old_itimerval32))) return -EFAULT; /* Validate the timevals in value. */ if (!timeval_valid(&v32.it_value) || !timeval_valid(&v32.it_interval)) return -EINVAL; o->it_interval.tv_sec = v32.it_interval.tv_sec; o->it_interval.tv_nsec = v32.it_interval.tv_usec * NSEC_PER_USEC; o->it_value.tv_sec = v32.it_value.tv_sec; o->it_value.tv_nsec = v32.it_value.tv_usec * NSEC_PER_USEC; return 0; } COMPAT_SYSCALL_DEFINE3(setitimer, int, which, struct old_itimerval32 __user *, value, struct old_itimerval32 __user *, ovalue) { struct itimerspec64 set_buffer, get_buffer; int error; if (value) { error = get_old_itimerval32(&set_buffer, value); if (error) return error; } else { memset(&set_buffer, 0, sizeof(set_buffer)); printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer." " Misfeature support will be removed\n", current->comm); } error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL); if (error || !ovalue) return error; if (put_old_itimerval32(ovalue, &get_buffer)) return -EFAULT; return 0; } #endif
3 3 3 3 3 3 3 3 3 3 3 3 3 1 2 2 2 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright 2004-2011 Red Hat, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/fs.h> #include <linux/dlm.h> #include <linux/hex.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/delay.h> #include <linux/gfs2_ondisk.h> #include <linux/sched/signal.h> #include "incore.h" #include "util.h" #include "sys.h" #include "trace_gfs2.h" /** * gfs2_update_stats - Update time based stats * @s: The stats to update (local or global) * @index: The index inside @s * @sample: New data to include */ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index, s64 sample) { /* * @delta is the difference between the current rtt sample and the * running average srtt. We add 1/8 of that to the srtt in order to * update the current srtt estimate. The variance estimate is a bit * more complicated. We subtract the current variance estimate from * the abs value of the @delta and add 1/4 of that to the running * total. That's equivalent to 3/4 of the current variance * estimate plus 1/4 of the abs of @delta. * * Note that the index points at the array entry containing the * smoothed mean value, and the variance is always in the following * entry * * Reference: TCP/IP Illustrated, vol 2, p. 831,832 * All times are in units of integer nanoseconds. Unlike the TCP/IP * case, they are not scaled fixed point. */ s64 delta = sample - s->stats[index]; s->stats[index] += (delta >> 3); index++; s->stats[index] += (s64)(abs(delta) - s->stats[index]) >> 2; } /** * gfs2_update_reply_times - Update locking statistics * @gl: The glock to update * @blocking: The operation may have been blocking * * This assumes that gl->gl_dstamp has been set earlier. * * The rtt (lock round trip time) is an estimate of the time * taken to perform a dlm lock request. We update it on each * reply from the dlm. * * The blocking flag is set on the glock for all dlm requests * which may potentially block due to lock requests from other nodes. * DLM requests where the current lock state is exclusive, the * requested state is null (or unlocked) or where the TRY or * TRY_1CB flags are set are classified as non-blocking. All * other DLM requests are counted as (potentially) blocking. */ static inline void gfs2_update_reply_times(struct gfs2_glock *gl, bool blocking) { struct gfs2_pcpu_lkstats *lks; const unsigned gltype = glock_type(gl); unsigned index = blocking ? GFS2_LKS_SRTTB : GFS2_LKS_SRTT; s64 rtt; preempt_disable(); rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp)); lks = this_cpu_ptr(glock_sbd(gl)->sd_lkstats); gfs2_update_stats(&gl->gl_stats, index, rtt); /* Local */ gfs2_update_stats(&lks->lkstats[gltype], index, rtt); /* Global */ preempt_enable(); trace_gfs2_glock_lock_time(gl, rtt); } /** * gfs2_update_request_times - Update locking statistics * @gl: The glock to update * * The irt (lock inter-request times) measures the average time * between requests to the dlm. It is updated immediately before * each dlm call. */ static inline void gfs2_update_request_times(struct gfs2_glock *gl) { struct gfs2_pcpu_lkstats *lks; const unsigned gltype = glock_type(gl); ktime_t dstamp; s64 irt; preempt_disable(); dstamp = gl->gl_dstamp; gl->gl_dstamp = ktime_get_real(); irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp)); lks = this_cpu_ptr(glock_sbd(gl)->sd_lkstats); gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt); /* Local */ gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt); /* Global */ preempt_enable(); } static void gdlm_ast(void *arg) { struct gfs2_glock *gl = arg; bool blocking; unsigned ret; blocking = test_bit(GLF_BLOCKING, &gl->gl_flags); gfs2_update_reply_times(gl, blocking); clear_bit(GLF_BLOCKING, &gl->gl_flags); /* If the glock is dead, we only react to a dlm_unlock() reply. */ if (__lockref_is_dead(&gl->gl_lockref) && gl->gl_lksb.sb_status != -DLM_EUNLOCK) return; BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr) memset(gl->gl_lksb.sb_lvbptr, 0, GDLM_LVB_SIZE); switch (gl->gl_lksb.sb_status) { case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ gfs2_glock_free(gl); return; case -DLM_ECANCEL: /* Cancel while getting lock */ ret = LM_OUT_CANCELED; goto out; case -EAGAIN: /* Try lock fails */ ret = LM_OUT_TRY_AGAIN; goto out; case -EDEADLK: /* Deadlock detected */ ret = LM_OUT_DEADLOCK; goto out; case -ETIMEDOUT: /* Canceled due to timeout */ ret = LM_OUT_ERROR; goto out; case 0: /* Success */ break; default: /* Something unexpected */ BUG(); } ret = gl->gl_req; /* * The GLF_INITIAL flag is initially set for new glocks. Upon the * first successful new (non-conversion) request, we clear this flag to * indicate that a DLM lock exists and that gl->gl_lksb.sb_lkid is the * identifier to use for identifying it. * * Any failed initial requests do not create a DLM lock, so we ignore * the gl->gl_lksb.sb_lkid values that come with such requests. */ clear_bit(GLF_INITIAL, &gl->gl_flags); gfs2_glock_complete(gl, ret); return; out: if (test_bit(GLF_INITIAL, &gl->gl_flags)) gl->gl_lksb.sb_lkid = 0; gfs2_glock_complete(gl, ret); } static void gdlm_bast(void *arg, int mode) { struct gfs2_glock *gl = arg; if (__lockref_is_dead(&gl->gl_lockref)) return; switch (mode) { case DLM_LOCK_EX: gfs2_glock_cb(gl, LM_ST_UNLOCKED); break; case DLM_LOCK_CW: gfs2_glock_cb(gl, LM_ST_DEFERRED); break; case DLM_LOCK_PR: gfs2_glock_cb(gl, LM_ST_SHARED); break; default: fs_err(glock_sbd(gl), "unknown bast mode %d\n", mode); BUG(); } } /* convert gfs lock-state to dlm lock-mode */ static int make_mode(struct gfs2_sbd *sdp, const unsigned int lmstate) { switch (lmstate) { case LM_ST_UNLOCKED: return DLM_LOCK_NL; case LM_ST_EXCLUSIVE: return DLM_LOCK_EX; case LM_ST_DEFERRED: return DLM_LOCK_CW; case LM_ST_SHARED: return DLM_LOCK_PR; } fs_err(sdp, "unknown LM state %d\n", lmstate); BUG(); return -1; } /* Taken from fs/dlm/lock.c. */ static bool middle_conversion(int cur, int req) { return (cur == DLM_LOCK_PR && req == DLM_LOCK_CW) || (cur == DLM_LOCK_CW && req == DLM_LOCK_PR); } static bool down_conversion(int cur, int req) { return !middle_conversion(cur, req) && req < cur; } static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, const int req, bool blocking) { u32 lkf = 0; if (gl->gl_lksb.sb_lvbptr) lkf |= DLM_LKF_VALBLK; if (gfs_flags & LM_FLAG_TRY) lkf |= DLM_LKF_NOQUEUE; if (gfs_flags & LM_FLAG_TRY_1CB) { lkf |= DLM_LKF_NOQUEUE; lkf |= DLM_LKF_NOQUEUEBAST; } if (!test_bit(GLF_INITIAL, &gl->gl_flags)) { lkf |= DLM_LKF_CONVERT; /* * The DLM_LKF_QUECVT flag needs to be set for "first come, * first served" semantics, but it must only be set for * "upward" lock conversions or else DLM will reject the * request as invalid. */ if (blocking) lkf |= DLM_LKF_QUECVT; } return lkf; } static void gfs2_reverse_hex(char *c, u64 value) { *c = '0'; while (value) { *c-- = hex_asc[value & 0x0f]; value >>= 4; } } static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, unsigned int flags) { struct lm_lockstruct *ls = &glock_sbd(gl)->sd_lockstruct; bool blocking; int cur, req; u32 lkf; char strname[GDLM_STRNAME_BYTES] = ""; int error; gl->gl_req = req_state; cur = make_mode(glock_sbd(gl), gl->gl_state); req = make_mode(glock_sbd(gl), req_state); blocking = !down_conversion(cur, req) && !(flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)); lkf = make_flags(gl, flags, req, blocking); if (blocking) set_bit(GLF_BLOCKING, &gl->gl_flags); gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); if (test_bit(GLF_INITIAL, &gl->gl_flags)) { memset(strname, ' ', GDLM_STRNAME_BYTES - 1); strname[GDLM_STRNAME_BYTES - 1] = '\0'; gfs2_reverse_hex(strname + 7, glock_type(gl)); gfs2_reverse_hex(strname + 23, glock_number(gl)); gl->gl_dstamp = ktime_get_real(); } else { gfs2_update_request_times(gl); } /* * Submit the actual lock request. */ again: down_read(&ls->ls_sem); error = -ENODEV; if (likely(ls->ls_dlm != NULL)) { error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); } up_read(&ls->ls_sem); if (error == -EBUSY) { msleep(20); goto again; } return error; } static void gdlm_put_lock(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = glock_sbd(gl); struct lm_lockstruct *ls = &sdp->sd_lockstruct; uint32_t flags = 0; int error; BUG_ON(!__lockref_is_dead(&gl->gl_lockref)); if (test_bit(GLF_INITIAL, &gl->gl_flags)) { gfs2_glock_free(gl); return; } gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_update_request_times(gl); /* * When the lockspace is released, all remaining glocks will be * unlocked automatically. This is more efficient than unlocking them * individually, but when the lock is held in DLM_LOCK_EX or * DLM_LOCK_PW mode, the lock value block (LVB) would be lost. */ if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && (!gl->gl_lksb.sb_lvbptr || gl->gl_state != LM_ST_EXCLUSIVE)) { gfs2_glock_free_later(gl); return; } if (gl->gl_lksb.sb_lvbptr) flags |= DLM_LKF_VALBLK; again: down_read(&ls->ls_sem); error = -ENODEV; if (likely(ls->ls_dlm != NULL)) { error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, flags, NULL, gl); } up_read(&ls->ls_sem); if (error == -EBUSY) { msleep(20); goto again; } if (error == -ENODEV) { gfs2_glock_free(gl); return; } if (error) { fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n", glock_type(gl), (unsigned long long) glock_number(gl), error); } } static void gdlm_cancel(struct gfs2_glock *gl) { struct lm_lockstruct *ls = &glock_sbd(gl)->sd_lockstruct; down_read(&ls->ls_sem); if (likely(ls->ls_dlm != NULL)) { dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); } up_read(&ls->ls_sem); } /* * dlm/gfs2 recovery coordination using dlm_recover callbacks * * 1. dlm_controld sees lockspace members change * 2. dlm_controld blocks dlm-kernel locking activity * 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep) * 4. dlm_controld starts and finishes its own user level recovery * 5. dlm_controld starts dlm-kernel dlm_recoverd to do kernel recovery * 6. dlm_recoverd notifies gfs2 of failed nodes (recover_slot) * 7. dlm_recoverd does its own lock recovery * 8. dlm_recoverd unblocks dlm-kernel locking activity * 9. dlm_recoverd notifies gfs2 when done (recover_done with new generation) * 10. gfs2_control updates control_lock lvb with new generation and jid bits * 11. gfs2_control enqueues journals for gfs2_recover to recover (maybe none) * 12. gfs2_recover dequeues and recovers journals of failed nodes * 13. gfs2_recover provides recovery results to gfs2_control (recovery_result) * 14. gfs2_control updates control_lock lvb jid bits for recovered journals * 15. gfs2_control unblocks normal locking when all journals are recovered * * - failures during recovery * * recover_prep() may set BLOCK_LOCKS (step 3) again before gfs2_control * clears BLOCK_LOCKS (step 15), e.g. another node fails while still * recovering for a prior failure. gfs2_control needs a way to detect * this so it can leave BLOCK_LOCKS set in step 15. This is managed using * the recover_block and recover_start values. * * recover_done() provides a new lockspace generation number each time it * is called (step 9). This generation number is saved as recover_start. * When recover_prep() is called, it sets BLOCK_LOCKS and sets * recover_block = recover_start. So, while recover_block is equal to * recover_start, BLOCK_LOCKS should remain set. (recover_spin must * be held around the BLOCK_LOCKS/recover_block/recover_start logic.) * * - more specific gfs2 steps in sequence above * * 3. recover_prep sets BLOCK_LOCKS and sets recover_block = recover_start * 6. recover_slot records any failed jids (maybe none) * 9. recover_done sets recover_start = new generation number * 10. gfs2_control sets control_lock lvb = new gen + bits for failed jids * 12. gfs2_recover does journal recoveries for failed jids identified above * 14. gfs2_control clears control_lock lvb bits for recovered jids * 15. gfs2_control checks if recover_block == recover_start (step 3 occured * again) then do nothing, otherwise if recover_start > recover_block * then clear BLOCK_LOCKS. * * - parallel recovery steps across all nodes * * All nodes attempt to update the control_lock lvb with the new generation * number and jid bits, but only the first to get the control_lock EX will * do so; others will see that it's already done (lvb already contains new * generation number.) * * . All nodes get the same recover_prep/recover_slot/recover_done callbacks * . All nodes attempt to set control_lock lvb gen + bits for the new gen * . One node gets control_lock first and writes the lvb, others see it's done * . All nodes attempt to recover jids for which they see control_lock bits set * . One node succeeds for a jid, and that one clears the jid bit in the lvb * . All nodes will eventually see all lvb bits clear and unblock locks * * - is there a problem with clearing an lvb bit that should be set * and missing a journal recovery? * * 1. jid fails * 2. lvb bit set for step 1 * 3. jid recovered for step 1 * 4. jid taken again (new mount) * 5. jid fails (for step 4) * 6. lvb bit set for step 5 (will already be set) * 7. lvb bit cleared for step 3 * * This is not a problem because the failure in step 5 does not * require recovery, because the mount in step 4 could not have * progressed far enough to unblock locks and access the fs. The * control_mount() function waits for all recoveries to be complete * for the latest lockspace generation before ever unblocking locks * and returning. The mount in step 4 waits until the recovery in * step 1 is done. * * - special case of first mounter: first node to mount the fs * * The first node to mount a gfs2 fs needs to check all the journals * and recover any that need recovery before other nodes are allowed * to mount the fs. (Others may begin mounting, but they must wait * for the first mounter to be done before taking locks on the fs * or accessing the fs.) This has two parts: * * 1. The mounted_lock tells a node it's the first to mount the fs. * Each node holds the mounted_lock in PR while it's mounted. * Each node tries to acquire the mounted_lock in EX when it mounts. * If a node is granted the mounted_lock EX it means there are no * other mounted nodes (no PR locks exist), and it is the first mounter. * The mounted_lock is demoted to PR when first recovery is done, so * others will fail to get an EX lock, but will get a PR lock. * * 2. The control_lock blocks others in control_mount() while the first * mounter is doing first mount recovery of all journals. * A mounting node needs to acquire control_lock in EX mode before * it can proceed. The first mounter holds control_lock in EX while doing * the first mount recovery, blocking mounts from other nodes, then demotes * control_lock to NL when it's done (others_may_mount/first_done), * allowing other nodes to continue mounting. * * first mounter: * control_lock EX/NOQUEUE success * mounted_lock EX/NOQUEUE success (no other PR, so no other mounters) * set first=1 * do first mounter recovery * mounted_lock EX->PR * control_lock EX->NL, write lvb generation * * other mounter: * control_lock EX/NOQUEUE success (if fail -EAGAIN, retry) * mounted_lock EX/NOQUEUE fail -EAGAIN (expected due to other mounters PR) * mounted_lock PR/NOQUEUE success * read lvb generation * control_lock EX->NL * set first=0 * * - mount during recovery * * If a node mounts while others are doing recovery (not first mounter), * the mounting node will get its initial recover_done() callback without * having seen any previous failures/callbacks. * * It must wait for all recoveries preceding its mount to be finished * before it unblocks locks. It does this by repeating the "other mounter" * steps above until the lvb generation number is >= its mount generation * number (from initial recover_done) and all lvb bits are clear. * * - control_lock lvb format * * 4 bytes generation number: the latest dlm lockspace generation number * from recover_done callback. Indicates the jid bitmap has been updated * to reflect all slot failures through that generation. * 4 bytes unused. * GDLM_LVB_SIZE-8 bytes of jid bit map. If bit N is set, it indicates * that jid N needs recovery. */ #define JID_BITMAP_OFFSET 8 /* 4 byte generation number + 4 byte unused */ static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, char *lvb_bits) { __le32 gen; memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); memcpy(&gen, lvb_bits, sizeof(__le32)); *lvb_gen = le32_to_cpu(gen); } static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, char *lvb_bits) { __le32 gen; memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); gen = cpu_to_le32(lvb_gen); memcpy(ls->ls_control_lvb, &gen, sizeof(__le32)); } static int all_jid_bits_clear(char *lvb) { return !memchr_inv(lvb + JID_BITMAP_OFFSET, 0, GDLM_LVB_SIZE - JID_BITMAP_OFFSET); } static void sync_wait_cb(void *arg) { struct lm_lockstruct *ls = arg; complete(&ls->ls_sync_wait); } static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; down_read(&ls->ls_sem); error = -ENODEV; if (likely(ls->ls_dlm != NULL)) error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls); up_read(&ls->ls_sem); if (error) { fs_err(sdp, "%s lkid %x error %d\n", name, lksb->sb_lkid, error); return error; } wait_for_completion(&ls->ls_sync_wait); if (lksb->sb_status != -DLM_EUNLOCK) { fs_err(sdp, "%s lkid %x status %d\n", name, lksb->sb_lkid, lksb->sb_status); return -1; } return 0; } static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags, unsigned int num, struct dlm_lksb *lksb, char *name) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; char strname[GDLM_STRNAME_BYTES]; int error, status; memset(strname, 0, GDLM_STRNAME_BYTES); snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num); down_read(&ls->ls_sem); error = -ENODEV; if (likely(ls->ls_dlm != NULL)) { error = dlm_lock(ls->ls_dlm, mode, lksb, flags, strname, GDLM_STRNAME_BYTES - 1, 0, sync_wait_cb, ls, NULL); } up_read(&ls->ls_sem); if (error) { fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n", name, lksb->sb_lkid, flags, mode, error); return error; } wait_for_completion(&ls->ls_sync_wait); status = lksb->sb_status; if (status && status != -EAGAIN) { fs_err(sdp, "%s lkid %x flags %x mode %d status %d\n", name, lksb->sb_lkid, flags, mode, status); } return status; } static int mounted_unlock(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; return sync_unlock(sdp, &ls->ls_mounted_lksb, "mounted_lock"); } static int mounted_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; return sync_lock(sdp, mode, flags, GFS2_MOUNTED_LOCK, &ls->ls_mounted_lksb, "mounted_lock"); } static int control_unlock(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; return sync_unlock(sdp, &ls->ls_control_lksb, "control_lock"); } static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; return sync_lock(sdp, mode, flags, GFS2_CONTROL_LOCK, &ls->ls_control_lksb, "control_lock"); } static void gfs2_control_func(struct work_struct *work) { struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); struct lm_lockstruct *ls = &sdp->sd_lockstruct; uint32_t block_gen, start_gen, lvb_gen, flags; int recover_set = 0; int write_lvb = 0; int recover_size; int i, error; spin_lock(&ls->ls_recover_spin); /* * No MOUNT_DONE means we're still mounting; control_mount() * will set this flag, after which this thread will take over * all further clearing of BLOCK_LOCKS. * * FIRST_MOUNT means this node is doing first mounter recovery, * for which recovery control is handled by * control_mount()/control_first_done(), not this thread. */ if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { spin_unlock(&ls->ls_recover_spin); return; } block_gen = ls->ls_recover_block; start_gen = ls->ls_recover_start; spin_unlock(&ls->ls_recover_spin); /* * Equal block_gen and start_gen implies we are between * recover_prep and recover_done callbacks, which means * dlm recovery is in progress and dlm locking is blocked. * There's no point trying to do any work until recover_done. */ if (block_gen == start_gen) return; /* * Propagate recover_submit[] and recover_result[] to lvb: * dlm_recoverd adds to recover_submit[] jids needing recovery * gfs2_recover adds to recover_result[] journal recovery results * * set lvb bit for jids in recover_submit[] if the lvb has not * yet been updated for the generation of the failure * * clear lvb bit for jids in recover_result[] if the result of * the journal recovery is SUCCESS */ error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_VALBLK); if (error) { fs_err(sdp, "control lock EX error %d\n", error); return; } control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits); spin_lock(&ls->ls_recover_spin); if (block_gen != ls->ls_recover_block || start_gen != ls->ls_recover_start) { fs_info(sdp, "recover generation %u block1 %u %u\n", start_gen, block_gen, ls->ls_recover_block); spin_unlock(&ls->ls_recover_spin); control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); return; } recover_size = ls->ls_recover_size; if (lvb_gen <= start_gen) { /* * Clear lvb bits for jids we've successfully recovered. * Because all nodes attempt to recover failed journals, * a journal can be recovered multiple times successfully * in succession. Only the first will really do recovery, * the others find it clean, but still report a successful * recovery. So, another node may have already recovered * the jid and cleared the lvb bit for it. */ for (i = 0; i < recover_size; i++) { if (ls->ls_recover_result[i] != LM_RD_SUCCESS) continue; ls->ls_recover_result[i] = 0; if (!test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) continue; __clear_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET); write_lvb = 1; } } if (lvb_gen == start_gen) { /* * Failed slots before start_gen are already set in lvb. */ for (i = 0; i < recover_size; i++) { if (!ls->ls_recover_submit[i]) continue; if (ls->ls_recover_submit[i] < lvb_gen) ls->ls_recover_submit[i] = 0; } } else if (lvb_gen < start_gen) { /* * Failed slots before start_gen are not yet set in lvb. */ for (i = 0; i < recover_size; i++) { if (!ls->ls_recover_submit[i]) continue; if (ls->ls_recover_submit[i] < start_gen) { ls->ls_recover_submit[i] = 0; __set_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET); } } /* even if there are no bits to set, we need to write the latest generation to the lvb */ write_lvb = 1; } else { /* * we should be getting a recover_done() for lvb_gen soon */ } spin_unlock(&ls->ls_recover_spin); if (write_lvb) { control_lvb_write(ls, start_gen, ls->ls_lvb_bits); flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK; } else { flags = DLM_LKF_CONVERT; } error = control_lock(sdp, DLM_LOCK_NL, flags); if (error) { fs_err(sdp, "control lock NL error %d\n", error); return; } /* * Everyone will see jid bits set in the lvb, run gfs2_recover_set(), * and clear a jid bit in the lvb if the recovery is a success. * Eventually all journals will be recovered, all jid bits will * be cleared in the lvb, and everyone will clear BLOCK_LOCKS. */ for (i = 0; i < recover_size; i++) { if (test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) { fs_info(sdp, "recover generation %u jid %d\n", start_gen, i); gfs2_recover_set(sdp, i); recover_set++; } } if (recover_set) return; /* * No more jid bits set in lvb, all recovery is done, unblock locks * (unless a new recover_prep callback has occured blocking locks * again while working above) */ spin_lock(&ls->ls_recover_spin); if (ls->ls_recover_block == block_gen && ls->ls_recover_start == start_gen) { clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); spin_unlock(&ls->ls_recover_spin); fs_info(sdp, "recover generation %u done\n", start_gen); gfs2_glock_thaw(sdp); } else { fs_info(sdp, "recover generation %u block2 %u %u\n", start_gen, block_gen, ls->ls_recover_block); spin_unlock(&ls->ls_recover_spin); } } static int control_mount(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; uint32_t start_gen, block_gen, mount_gen, lvb_gen; int mounted_mode; int retries = 0; int error; memset(&ls->ls_mounted_lksb, 0, sizeof(struct dlm_lksb)); memset(&ls->ls_control_lksb, 0, sizeof(struct dlm_lksb)); memset(&ls->ls_control_lvb, 0, GDLM_LVB_SIZE); ls->ls_control_lksb.sb_lvbptr = ls->ls_control_lvb; init_completion(&ls->ls_sync_wait); set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_VALBLK); if (error) { fs_err(sdp, "control_mount control_lock NL error %d\n", error); return error; } error = mounted_lock(sdp, DLM_LOCK_NL, 0); if (error) { fs_err(sdp, "control_mount mounted_lock NL error %d\n", error); control_unlock(sdp); return error; } mounted_mode = DLM_LOCK_NL; restart: if (retries++ && signal_pending(current)) { error = -EINTR; goto fail; } /* * We always start with both locks in NL. control_lock is * demoted to NL below so we don't need to do it here. */ if (mounted_mode != DLM_LOCK_NL) { error = mounted_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); if (error) goto fail; mounted_mode = DLM_LOCK_NL; } /* * Other nodes need to do some work in dlm recovery and gfs2_control * before the recover_done and control_lock will be ready for us below. * A delay here is not required but often avoids having to retry. */ msleep_interruptible(500); /* * Acquire control_lock in EX and mounted_lock in either EX or PR. * control_lock lvb keeps track of any pending journal recoveries. * mounted_lock indicates if any other nodes have the fs mounted. */ error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE|DLM_LKF_VALBLK); if (error == -EAGAIN) { goto restart; } else if (error) { fs_err(sdp, "control_mount control_lock EX error %d\n", error); goto fail; } /** * If we're a spectator, we don't want to take the lock in EX because * we cannot do the first-mount responsibility it implies: recovery. */ if (sdp->sd_args.ar_spectator) goto locks_done; error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); if (!error) { mounted_mode = DLM_LOCK_EX; goto locks_done; } else if (error != -EAGAIN) { fs_err(sdp, "control_mount mounted_lock EX error %d\n", error); goto fail; } error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); if (!error) { mounted_mode = DLM_LOCK_PR; goto locks_done; } else { /* not even -EAGAIN should happen here */ fs_err(sdp, "control_mount mounted_lock PR error %d\n", error); goto fail; } locks_done: /* * If we got both locks above in EX, then we're the first mounter. * If not, then we need to wait for the control_lock lvb to be * updated by other mounted nodes to reflect our mount generation. * * In simple first mounter cases, first mounter will see zero lvb_gen, * but in cases where all existing nodes leave/fail before mounting * nodes finish control_mount, then all nodes will be mounting and * lvb_gen will be non-zero. */ control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits); if (lvb_gen == 0xFFFFFFFF) { /* special value to force mount attempts to fail */ fs_err(sdp, "control_mount control_lock disabled\n"); error = -EINVAL; goto fail; } if (mounted_mode == DLM_LOCK_EX) { /* first mounter, keep both EX while doing first recovery */ spin_lock(&ls->ls_recover_spin); clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags); set_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); spin_unlock(&ls->ls_recover_spin); fs_info(sdp, "first mounter control generation %u\n", lvb_gen); return 0; } error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); if (error) goto fail; /* * We are not first mounter, now we need to wait for the control_lock * lvb generation to be >= the generation from our first recover_done * and all lvb bits to be clear (no pending journal recoveries.) */ if (!all_jid_bits_clear(ls->ls_lvb_bits)) { /* journals need recovery, wait until all are clear */ fs_info(sdp, "control_mount wait for journal recovery\n"); goto restart; } spin_lock(&ls->ls_recover_spin); block_gen = ls->ls_recover_block; start_gen = ls->ls_recover_start; mount_gen = ls->ls_recover_mount; if (lvb_gen < mount_gen) { /* wait for mounted nodes to update control_lock lvb to our generation, which might include new recovery bits set */ if (sdp->sd_args.ar_spectator) { fs_info(sdp, "Recovery is required. Waiting for a " "non-spectator to mount.\n"); spin_unlock(&ls->ls_recover_spin); msleep_interruptible(1000); } else { fs_info(sdp, "control_mount wait1 block %u start %u " "mount %u lvb %u flags %lx\n", block_gen, start_gen, mount_gen, lvb_gen, ls->ls_recover_flags); spin_unlock(&ls->ls_recover_spin); } goto restart; } if (lvb_gen != start_gen) { /* wait for mounted nodes to update control_lock lvb to the latest recovery generation */ fs_info(sdp, "control_mount wait2 block %u start %u mount %u " "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, lvb_gen, ls->ls_recover_flags); spin_unlock(&ls->ls_recover_spin); goto restart; } if (block_gen == start_gen) { /* dlm recovery in progress, wait for it to finish */ fs_info(sdp, "control_mount wait3 block %u start %u mount %u " "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, lvb_gen, ls->ls_recover_flags); spin_unlock(&ls->ls_recover_spin); goto restart; } clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags); memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t)); memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); spin_unlock(&ls->ls_recover_spin); return 0; fail: mounted_unlock(sdp); control_unlock(sdp); return error; } static int control_first_done(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; uint32_t start_gen, block_gen; int error; restart: spin_lock(&ls->ls_recover_spin); start_gen = ls->ls_recover_start; block_gen = ls->ls_recover_block; if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags) || !test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || !test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { /* sanity check, should not happen */ fs_err(sdp, "control_first_done start %u block %u flags %lx\n", start_gen, block_gen, ls->ls_recover_flags); spin_unlock(&ls->ls_recover_spin); control_unlock(sdp); return -1; } if (start_gen == block_gen) { /* * Wait for the end of a dlm recovery cycle to switch from * first mounter recovery. We can ignore any recover_slot * callbacks between the recover_prep and next recover_done * because we are still the first mounter and any failed nodes * have not fully mounted, so they don't need recovery. */ spin_unlock(&ls->ls_recover_spin); fs_info(sdp, "control_first_done wait gen %u\n", start_gen); wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY, TASK_UNINTERRUPTIBLE); goto restart; } clear_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); set_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags); memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t)); memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); spin_unlock(&ls->ls_recover_spin); memset(ls->ls_lvb_bits, 0, GDLM_LVB_SIZE); control_lvb_write(ls, start_gen, ls->ls_lvb_bits); error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT); if (error) fs_err(sdp, "control_first_done mounted PR error %d\n", error); error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT|DLM_LKF_VALBLK); if (error) fs_err(sdp, "control_first_done control NL error %d\n", error); return error; } /* * Expand static jid arrays if necessary (by increments of RECOVER_SIZE_INC) * to accommodate the largest slot number. (NB dlm slot numbers start at 1, * gfs2 jids start at 0, so jid = slot - 1) */ #define RECOVER_SIZE_INC 16 static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, int num_slots) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; uint32_t *submit = NULL; uint32_t *result = NULL; uint32_t old_size, new_size; int i, max_jid; if (!ls->ls_lvb_bits) { ls->ls_lvb_bits = kzalloc(GDLM_LVB_SIZE, GFP_NOFS); if (!ls->ls_lvb_bits) return -ENOMEM; } max_jid = 0; for (i = 0; i < num_slots; i++) { if (max_jid < slots[i].slot - 1) max_jid = slots[i].slot - 1; } old_size = ls->ls_recover_size; new_size = old_size; while (new_size < max_jid + 1) new_size += RECOVER_SIZE_INC; if (new_size == old_size) return 0; submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS); result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS); if (!submit || !result) { kfree(submit); kfree(result); return -ENOMEM; } spin_lock(&ls->ls_recover_spin); memcpy(submit, ls->ls_recover_submit, old_size * sizeof(uint32_t)); memcpy(result, ls->ls_recover_result, old_size * sizeof(uint32_t)); kfree(ls->ls_recover_submit); kfree(ls->ls_recover_result); ls->ls_recover_submit = submit; ls->ls_recover_result = result; ls->ls_recover_size = new_size; spin_unlock(&ls->ls_recover_spin); return 0; } static void free_recover_size(struct lm_lockstruct *ls) { kfree(ls->ls_lvb_bits); kfree(ls->ls_recover_submit); kfree(ls->ls_recover_result); ls->ls_recover_submit = NULL; ls->ls_recover_result = NULL; ls->ls_recover_size = 0; ls->ls_lvb_bits = NULL; } /* dlm calls before it does lock recovery */ static void gdlm_recover_prep(void *arg) { struct gfs2_sbd *sdp = arg; struct lm_lockstruct *ls = &sdp->sd_lockstruct; if (gfs2_withdrawn(sdp)) { fs_err(sdp, "recover_prep ignored due to withdraw.\n"); return; } spin_lock(&ls->ls_recover_spin); ls->ls_recover_block = ls->ls_recover_start; set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { spin_unlock(&ls->ls_recover_spin); return; } set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); spin_unlock(&ls->ls_recover_spin); } /* dlm calls after recover_prep has been completed on all lockspace members; identifies slot/jid of failed member */ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot) { struct gfs2_sbd *sdp = arg; struct lm_lockstruct *ls = &sdp->sd_lockstruct; int jid = slot->slot - 1; if (gfs2_withdrawn(sdp)) { fs_err(sdp, "recover_slot jid %d ignored due to withdraw.\n", jid); return; } spin_lock(&ls->ls_recover_spin); if (ls->ls_recover_size < jid + 1) { fs_err(sdp, "recover_slot jid %d gen %u short size %d\n", jid, ls->ls_recover_block, ls->ls_recover_size); spin_unlock(&ls->ls_recover_spin); return; } if (ls->ls_recover_submit[jid]) { fs_info(sdp, "recover_slot jid %d gen %u prev %u\n", jid, ls->ls_recover_block, ls->ls_recover_submit[jid]); } ls->ls_recover_submit[jid] = ls->ls_recover_block; spin_unlock(&ls->ls_recover_spin); } /* dlm calls after recover_slot and after it completes lock recovery */ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots, int our_slot, uint32_t generation) { struct gfs2_sbd *sdp = arg; struct lm_lockstruct *ls = &sdp->sd_lockstruct; if (gfs2_withdrawn(sdp)) { fs_err(sdp, "recover_done ignored due to withdraw.\n"); return; } /* ensure the ls jid arrays are large enough */ set_recover_size(sdp, slots, num_slots); spin_lock(&ls->ls_recover_spin); ls->ls_recover_start = generation; if (!ls->ls_recover_mount) { ls->ls_recover_mount = generation; ls->ls_jid = our_slot - 1; } if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); smp_mb__after_atomic(); wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY); spin_unlock(&ls->ls_recover_spin); } /* gfs2_recover thread has a journal recovery result */ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid, unsigned int result) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; if (gfs2_withdrawn(sdp)) { fs_err(sdp, "recovery_result jid %d ignored due to withdraw.\n", jid); return; } if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) return; /* don't care about the recovery of own journal during mount */ if (jid == ls->ls_jid) return; spin_lock(&ls->ls_recover_spin); if (test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { spin_unlock(&ls->ls_recover_spin); return; } if (ls->ls_recover_size < jid + 1) { fs_err(sdp, "recovery_result jid %d short size %d\n", jid, ls->ls_recover_size); spin_unlock(&ls->ls_recover_spin); return; } fs_info(sdp, "recover jid %d result %s\n", jid, result == LM_RD_GAVEUP ? "busy" : "success"); ls->ls_recover_result[jid] = result; /* GAVEUP means another node is recovering the journal; delay our next attempt to recover it, to give the other node a chance to finish before trying again */ if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, result == LM_RD_GAVEUP ? HZ : 0); spin_unlock(&ls->ls_recover_spin); } static const struct dlm_lockspace_ops gdlm_lockspace_ops = { .recover_prep = gdlm_recover_prep, .recover_slot = gdlm_recover_slot, .recover_done = gdlm_recover_done, }; static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; char cluster[GFS2_LOCKNAME_LEN]; const char *fsname; uint32_t flags; int error, ops_result; /* * initialize everything */ INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func); ls->ls_dlm = NULL; spin_lock_init(&ls->ls_recover_spin); ls->ls_recover_flags = 0; ls->ls_recover_mount = 0; ls->ls_recover_start = 0; ls->ls_recover_block = 0; ls->ls_recover_size = 0; ls->ls_recover_submit = NULL; ls->ls_recover_result = NULL; ls->ls_lvb_bits = NULL; error = set_recover_size(sdp, NULL, 0); if (error) goto fail; /* * prepare dlm_new_lockspace args */ fsname = strchr(table, ':'); if (!fsname) { fs_info(sdp, "no fsname found\n"); error = -EINVAL; goto fail_free; } memset(cluster, 0, sizeof(cluster)); memcpy(cluster, table, strlen(table) - strlen(fsname)); fsname++; flags = DLM_LSFL_NEWEXCL; /* * create/join lockspace */ init_rwsem(&ls->ls_sem); error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE, &gdlm_lockspace_ops, sdp, &ops_result, &ls->ls_dlm); if (error) { fs_err(sdp, "dlm_new_lockspace error %d\n", error); goto fail_free; } if (ops_result < 0) { /* * dlm does not support ops callbacks, * old dlm_controld/gfs_controld are used, try without ops. */ fs_info(sdp, "dlm lockspace ops not used\n"); free_recover_size(ls); set_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags); return 0; } if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) { fs_err(sdp, "dlm lockspace ops disallow jid preset\n"); error = -EINVAL; goto fail_release; } /* * control_mount() uses control_lock to determine first mounter, * and for later mounts, waits for any recoveries to be cleared. */ error = control_mount(sdp); if (error) { fs_err(sdp, "mount control error %d\n", error); goto fail_release; } ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); smp_mb__after_atomic(); wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); return 0; fail_release: dlm_release_lockspace(ls->ls_dlm, DLM_RELEASE_NORMAL); fail_free: free_recover_size(ls); fail: return error; } static void gdlm_first_done(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) return; error = control_first_done(sdp); if (error) fs_err(sdp, "mount first_done error %d\n", error); } /* * gdlm_unmount - release our lockspace * @sdp: the superblock * @clean: Indicates whether or not the remaining nodes in the cluster should * perform recovery. Recovery is necessary when a node withdraws and * its journal remains dirty. Recovery isn't necessary when a node * cleanly unmounts a filesystem. */ static void gdlm_unmount(struct gfs2_sbd *sdp, bool clean) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) goto release; /* wait for gfs2_control_wq to be done with this mount */ spin_lock(&ls->ls_recover_spin); set_bit(DFL_UNMOUNT, &ls->ls_recover_flags); spin_unlock(&ls->ls_recover_spin); flush_delayed_work(&sdp->sd_control_work); /* mounted_lock and control_lock will be purged in dlm recovery */ release: down_write(&ls->ls_sem); if (ls->ls_dlm) { dlm_release_lockspace(ls->ls_dlm, clean ? DLM_RELEASE_NORMAL : DLM_RELEASE_RECOVER); ls->ls_dlm = NULL; } up_write(&ls->ls_sem); free_recover_size(ls); } static const match_table_t dlm_tokens = { { Opt_jid, "jid=%d"}, { Opt_id, "id=%d"}, { Opt_first, "first=%d"}, { Opt_nodir, "nodir=%d"}, { Opt_err, NULL }, }; const struct lm_lockops gfs2_dlm_ops = { .lm_proto_name = "lock_dlm", .lm_mount = gdlm_mount, .lm_first_done = gdlm_first_done, .lm_recovery_result = gdlm_recovery_result, .lm_unmount = gdlm_unmount, .lm_put_lock = gdlm_put_lock, .lm_lock = gdlm_lock, .lm_cancel = gdlm_cancel, .lm_tokens = &dlm_tokens, };
305 3760 3617 350 648 4 16 139 150 195 11 200 837 7 206 7 844 271 149 194 298 297 2 2 299 274 296 296 296 298 53 151 12 44 56 56 273 269 3 195 84 50 3413 1 3 2 68 44 188 190 195 7 3 1 3 198 195 7 7 7 194 194 195 196 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2001 Jens Axboe <axboe@suse.de> */ #ifndef __LINUX_BIO_H #define __LINUX_BIO_H #include <linux/mempool.h> /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ #include <linux/blk_types.h> #include <linux/uio.h> #define BIO_MAX_VECS 256U #define BIO_MAX_INLINE_VECS UIO_MAXIOV struct queue_limits; static inline unsigned int bio_max_segs(unsigned int nr_segs) { return min(nr_segs, BIO_MAX_VECS); } #define bio_iter_iovec(bio, iter) \ bvec_iter_bvec((bio)->bi_io_vec, (iter)) #define bio_iter_page(bio, iter) \ bvec_iter_page((bio)->bi_io_vec, (iter)) #define bio_iter_len(bio, iter) \ bvec_iter_len((bio)->bi_io_vec, (iter)) #define bio_iter_offset(bio, iter) \ bvec_iter_offset((bio)->bi_io_vec, (iter)) #define bio_page(bio) bio_iter_page((bio), (bio)->bi_iter) #define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter) #define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter) #define bvec_iter_sectors(iter) ((iter).bi_size >> 9) #define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter))) #define bio_sectors(bio) bvec_iter_sectors((bio)->bi_iter) #define bio_end_sector(bio) bvec_iter_end_sector((bio)->bi_iter) /* * Return the data direction, READ or WRITE. */ #define bio_data_dir(bio) \ (op_is_write(bio_op(bio)) ? WRITE : READ) static inline bool bio_flagged(const struct bio *bio, unsigned int bit) { return bio->bi_flags & (1U << bit); } static inline void bio_set_flag(struct bio *bio, unsigned int bit) { bio->bi_flags |= (1U << bit); } static inline void bio_clear_flag(struct bio *bio, unsigned int bit) { bio->bi_flags &= ~(1U << bit); } /* * Check whether this bio carries any data or not. A NULL bio is allowed. */ static inline bool bio_has_data(struct bio *bio) { if (bio && bio->bi_iter.bi_size && bio_op(bio) != REQ_OP_DISCARD && bio_op(bio) != REQ_OP_SECURE_ERASE && bio_op(bio) != REQ_OP_WRITE_ZEROES) return true; return false; } static inline bool bio_no_advance_iter(const struct bio *bio) { return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE || bio_op(bio) == REQ_OP_WRITE_ZEROES; } static inline void *bio_data(struct bio *bio) { if (bio_has_data(bio)) return page_address(bio_page(bio)) + bio_offset(bio); return NULL; } static inline bool bio_next_segment(const struct bio *bio, struct bvec_iter_all *iter) { if (iter->idx >= bio->bi_vcnt) return false; bvec_advance(&bio->bi_io_vec[iter->idx], iter); return true; } /* * drivers should _never_ use the all version - the bio may have been split * before it got to the driver and the driver won't own all of it */ #define bio_for_each_segment_all(bvl, bio, iter) \ for (bvl = bvec_init_iter_all(&iter); bio_next_segment((bio), &iter); ) static inline void bio_advance_iter(const struct bio *bio, struct bvec_iter *iter, unsigned int bytes) { iter->bi_sector += bytes >> 9; if (bio_no_advance_iter(bio)) iter->bi_size -= bytes; else bvec_iter_advance(bio->bi_io_vec, iter, bytes); /* TODO: It is reasonable to complete bio with error here. */ } /* @bytes should be less or equal to bvec[i->bi_idx].bv_len */ static inline void bio_advance_iter_single(const struct bio *bio, struct bvec_iter *iter, unsigned int bytes) { iter->bi_sector += bytes >> 9; if (bio_no_advance_iter(bio)) iter->bi_size -= bytes; else bvec_iter_advance_single(bio->bi_io_vec, iter, bytes); } void __bio_advance(struct bio *, unsigned bytes); /** * bio_advance - increment/complete a bio by some number of bytes * @bio: bio to advance * @nbytes: number of bytes to complete * * This updates bi_sector, bi_size and bi_idx; if the number of bytes to * complete doesn't align with a bvec boundary, then bv_len and bv_offset will * be updated on the last bvec as well. * * @bio will then represent the remaining, uncompleted portion of the io. */ static inline void bio_advance(struct bio *bio, unsigned int nbytes) { if (nbytes == bio->bi_iter.bi_size) { bio->bi_iter.bi_size = 0; return; } __bio_advance(bio, nbytes); } #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bio_iter_iovec((bio), (iter))), 1); \ bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) #define bio_for_each_segment(bvl, bio, iter) \ __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) #define __bio_for_each_bvec(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \ bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) /* iterate over multi-page bvec */ #define bio_for_each_bvec(bvl, bio, iter) \ __bio_for_each_bvec(bvl, bio, iter, (bio)->bi_iter) /* * Iterate over all multi-page bvecs. Drivers shouldn't use this version for the * same reasons as bio_for_each_segment_all(). */ #define bio_for_each_bvec_all(bvl, bio, i) \ for (i = 0, bvl = bio_first_bvec_all(bio); \ i < (bio)->bi_vcnt; i++, bvl++) #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) static inline unsigned bio_segments(struct bio *bio) { unsigned segs = 0; struct bio_vec bv; struct bvec_iter iter; /* * We special case discard/write same/write zeroes, because they * interpret bi_size differently: */ switch (bio_op(bio)) { case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: case REQ_OP_WRITE_ZEROES: return 0; default: break; } bio_for_each_segment(bv, bio, iter) segs++; return segs; } /* * get a reference to a bio, so it won't disappear. the intended use is * something like: * * bio_get(bio); * submit_bio(rw, bio); * if (bio->bi_flags ...) * do_something * bio_put(bio); * * without the bio_get(), it could potentially complete I/O before submit_bio * returns. and then bio would be freed memory when if (bio->bi_flags ...) * runs */ static inline void bio_get(struct bio *bio) { bio->bi_flags |= (1 << BIO_REFFED); smp_mb__before_atomic(); atomic_inc(&bio->__bi_cnt); } static inline void bio_cnt_set(struct bio *bio, unsigned int count) { if (count != 1) { bio->bi_flags |= (1 << BIO_REFFED); smp_mb(); } atomic_set(&bio->__bi_cnt, count); } static inline struct bio_vec *bio_first_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); return bio->bi_io_vec; } static inline struct page *bio_first_page_all(struct bio *bio) { return bio_first_bvec_all(bio)->bv_page; } static inline struct folio *bio_first_folio_all(struct bio *bio) { return page_folio(bio_first_page_all(bio)); } /** * struct folio_iter - State for iterating all folios in a bio. * @folio: The current folio we're iterating. NULL after the last folio. * @offset: The byte offset within the current folio. * @length: The number of bytes in this iteration (will not cross folio * boundary). */ struct folio_iter { struct folio *folio; size_t offset; size_t length; /* private: for use by the iterator */ struct folio *_next; size_t _seg_count; int _i; }; static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, int i) { struct bio_vec *bvec = bio_first_bvec_all(bio) + i; if (unlikely(i >= bio->bi_vcnt)) { fi->folio = NULL; return; } fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page); fi->_seg_count = bvec->bv_len; fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); fi->_next = folio_next(fi->folio); fi->_i = i; } static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) { fi->_seg_count -= fi->length; if (fi->_seg_count) { fi->folio = fi->_next; fi->offset = 0; fi->length = min(folio_size(fi->folio), fi->_seg_count); fi->_next = folio_next(fi->folio); } else { bio_first_folio(fi, bio, fi->_i + 1); } } /** * bio_for_each_folio_all - Iterate over each folio in a bio. * @fi: struct folio_iter which is updated for each folio. * @bio: struct bio to iterate over. */ #define bio_for_each_folio_all(fi, bio) \ for (bio_first_folio(&fi, bio, 0); fi.folio; bio_next_folio(&fi, bio)) void bio_trim(struct bio *bio, sector_t offset, sector_t size); extern struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs); int bio_split_io_at(struct bio *bio, const struct queue_limits *lim, unsigned *segs, unsigned max_bytes, unsigned len_align); u8 bio_seg_gap(struct request_queue *q, struct bio *prev, struct bio *next, u8 gaps_bit); /** * bio_next_split - get next @sectors from a bio, splitting if necessary * @bio: bio to split * @sectors: number of sectors to split from the front of @bio * @gfp: gfp mask * @bs: bio set to allocate from * * Return: a bio representing the next @sectors of @bio - if the bio is smaller * than @sectors, returns the original bio unchanged. */ static inline struct bio *bio_next_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) { if (sectors >= bio_sectors(bio)) return bio; return bio_split(bio, sectors, gfp, bs); } enum { BIOSET_NEED_BVECS = BIT(0), BIOSET_NEED_RESCUER = BIT(1), BIOSET_PERCPU_CACHE = BIT(2), }; extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags); extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp_mask, struct bio_set *bs); struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask); extern void bio_put(struct bio *); struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src, gfp_t gfp, struct bio_set *bs); int bio_init_clone(struct block_device *bdev, struct bio *bio, struct bio *bio_src, gfp_t gfp); extern struct bio_set fs_bio_set; static inline struct bio *bio_alloc(struct block_device *bdev, unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp_mask) { return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set); } void submit_bio(struct bio *bio); extern void bio_endio(struct bio *); static inline void bio_io_error(struct bio *bio) { bio->bi_status = BLK_STS_IOERR; bio_endio(bio); } static inline void bio_wouldblock_error(struct bio *bio) { bio_set_flag(bio, BIO_QUIET); bio->bi_status = BLK_STS_AGAIN; bio_endio(bio); } /* * Calculate number of bvec segments that should be allocated to fit data * pointed by @iter. If @iter is backed by bvec it's going to be reused * instead of allocating a new one. */ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) { if (iov_iter_is_bvec(iter)) return 0; return iov_iter_npages(iter, max_segs); } /** * bio_iov_bounce_nr_vecs - calculate number of bvecs for a bounce bio * @iter: iter to bounce from * @op: REQ_OP_* for the bio * * Calculates how many bvecs are needed for the next bio to bounce from/to * @iter. */ static inline unsigned short bio_iov_bounce_nr_vecs(struct iov_iter *iter, blk_opf_t op) { /* * We still need to bounce bvec iters, so don't special case them * here unlike in bio_iov_vecs_to_alloc. * * For reads we need to use a vector for the bounce buffer, account * for that here. */ if (op_is_write(op)) return iov_iter_npages(iter, BIO_MAX_VECS); return iov_iter_npages(iter, BIO_MAX_VECS - 1) + 1; } struct request_queue; void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, unsigned short max_vecs, blk_opf_t opf); static inline void bio_init_inline(struct bio *bio, struct block_device *bdev, unsigned short max_vecs, blk_opf_t opf) { bio_init(bio, bdev, bio_inline_vecs(bio), max_vecs, opf); } extern void bio_uninit(struct bio *); void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); void bio_reuse(struct bio *bio, blk_opf_t opf); void bio_chain(struct bio *, struct bio *); int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len, unsigned off); bool __must_check bio_add_folio(struct bio *bio, struct folio *folio, size_t len, size_t off); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, size_t off); void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len); /** * bio_add_max_vecs - number of bio_vecs needed to add data to a bio * @kaddr: kernel virtual address to add * @len: length in bytes to add * * Calculate how many bio_vecs need to be allocated to add the kernel virtual * address range in [@kaddr:@len] in the worse case. */ static inline unsigned int bio_add_max_vecs(void *kaddr, unsigned int len) { if (is_vmalloc_addr(kaddr)) return DIV_ROUND_UP(offset_in_page(kaddr) + len, PAGE_SIZE); return 1; } unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len); bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len); int submit_bio_wait(struct bio *bio); int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, size_t len, enum req_op op); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, unsigned len_align_mask); void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter); void bio_iov_iter_unbounce(struct bio *bio, bool is_error, bool mark_dirty); extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); void guard_bio_eod(struct bio *bio); void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); static inline void zero_fill_bio(struct bio *bio) { zero_fill_bio_iter(bio, bio->bi_iter); } static inline void bio_release_pages(struct bio *bio, bool mark_dirty) { if (bio_flagged(bio, BIO_PAGE_PINNED)) __bio_release_pages(bio, mark_dirty); } #define bio_dev(bio) \ disk_devt((bio)->bi_bdev->bd_disk) #ifdef CONFIG_BLK_CGROUP void bio_associate_blkg(struct bio *bio); void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css); void bio_clone_blkg_association(struct bio *dst, struct bio *src); void blkcg_punt_bio_submit(struct bio *bio); #else /* CONFIG_BLK_CGROUP */ static inline void bio_associate_blkg(struct bio *bio) { } static inline void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { } static inline void bio_clone_blkg_association(struct bio *dst, struct bio *src) { } static inline void blkcg_punt_bio_submit(struct bio *bio) { submit_bio(bio); } #endif /* CONFIG_BLK_CGROUP */ static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) { bio_clear_flag(bio, BIO_REMAPPED); if (bio->bi_bdev != bdev) bio_clear_flag(bio, BIO_BPS_THROTTLED); bio->bi_bdev = bdev; bio_associate_blkg(bio); } /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * * A bio_list anchors a singly-linked list of bios chained through the bi_next * member of the bio. The bio_list also caches the last list member to allow * fast access to the tail. */ struct bio_list { struct bio *head; struct bio *tail; }; static inline int bio_list_empty(const struct bio_list *bl) { return bl->head == NULL; } static inline void bio_list_init(struct bio_list *bl) { bl->head = bl->tail = NULL; } #define BIO_EMPTY_LIST { NULL, NULL } #define bio_list_for_each(bio, bl) \ for (bio = (bl)->head; bio; bio = bio->bi_next) static inline unsigned bio_list_size(const struct bio_list *bl) { unsigned sz = 0; struct bio *bio; bio_list_for_each(bio, bl) sz++; return sz; } static inline void bio_list_add(struct bio_list *bl, struct bio *bio) { bio->bi_next = NULL; if (bl->tail) bl->tail->bi_next = bio; else bl->head = bio; bl->tail = bio; } static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) { bio->bi_next = bl->head; bl->head = bio; if (!bl->tail) bl->tail = bio; } static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) { if (!bl2->head) return; if (bl->tail) bl->tail->bi_next = bl2->head; else bl->head = bl2->head; bl->tail = bl2->tail; } static inline void bio_list_merge_init(struct bio_list *bl, struct bio_list *bl2) { bio_list_merge(bl, bl2); bio_list_init(bl2); } static inline void bio_list_merge_head(struct bio_list *bl, struct bio_list *bl2) { if (!bl2->head) return; if (bl->head) bl2->tail->bi_next = bl->head; else bl->tail = bl2->tail; bl->head = bl2->head; } static inline struct bio *bio_list_peek(struct bio_list *bl) { return bl->head; } static inline struct bio *bio_list_pop(struct bio_list *bl) { struct bio *bio = bl->head; if (bio) { bl->head = bl->head->bi_next; if (!bl->head) bl->tail = NULL; bio->bi_next = NULL; } return bio; } static inline struct bio *bio_list_get(struct bio_list *bl) { struct bio *bio = bl->head; bl->head = bl->tail = NULL; return bio; } /* * Increment chain count for the bio. Make sure the CHAIN flag update * is visible before the raised count. */ static inline void bio_inc_remaining(struct bio *bio) { bio_set_flag(bio, BIO_CHAIN); smp_mb__before_atomic(); atomic_inc(&bio->__bi_remaining); } /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. * These memory pools in turn all allocate from the bio_slab * and the bvec_slabs[]. */ #define BIO_POOL_SIZE 2 struct bio_set { struct kmem_cache *bio_slab; unsigned int front_pad; /* * per-cpu bio alloc cache */ struct bio_alloc_cache __percpu *cache; mempool_t bio_pool; mempool_t bvec_pool; unsigned int back_pad; /* * Deadlock avoidance for stacking block drivers: see comments in * bio_alloc_bioset() for details */ spinlock_t rescue_lock; struct bio_list rescue_list; struct work_struct rescue_work; struct workqueue_struct *rescue_workqueue; /* * Hot un-plug notifier for the per-cpu cache, if used */ struct hlist_node cpuhp_dead; }; static inline bool bioset_initialized(struct bio_set *bs) { return bs->bio_slab != NULL; } /* * Mark a bio as polled. Note that for async polled IO, the caller must * expect -EWOULDBLOCK if we cannot allocate a request (or other resources). * We cannot block waiting for requests on polled IO, as those completions * must be found by the caller. This is different than IRQ driven IO, where * it's safe to wait for IO to complete. */ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) { bio->bi_opf |= REQ_POLLED; if (kiocb->ki_flags & IOCB_NOWAIT) bio->bi_opf |= REQ_NOWAIT; } static inline void bio_clear_polled(struct bio *bio) { bio->bi_opf &= ~REQ_POLLED; } /** * bio_is_zone_append - is this a zone append bio? * @bio: bio to check * * Check if @bio is a zone append operation. Core block layer code and end_io * handlers must use this instead of an open coded REQ_OP_ZONE_APPEND check * because the block layer can rewrite REQ_OP_ZONE_APPEND to REQ_OP_WRITE if * it is not natively supported. */ static inline bool bio_is_zone_append(struct bio *bio) { if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) return false; return bio_op(bio) == REQ_OP_ZONE_APPEND || bio_flagged(bio, BIO_EMULATES_ZONE_APPEND); } struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, unsigned int nr_pages, blk_opf_t opf, gfp_t gfp); struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new); struct bio *blk_alloc_discard_bio(struct block_device *bdev, sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask); #endif /* __LINUX_BIO_H */
144 145 145 145 145 144 1 1 1 1 54 54 55 2 1 1 1 1 3 1 2 3 3 3 3 3 3 5 5 124 124 124 191 189 5 5 190 187 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) STRATO AG 2012. All rights reserved. */ #include <linux/sched.h> #include <linux/bio.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/kthread.h> #include <linux/math64.h> #include "misc.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "volumes.h" #include "async-thread.h" #include "dev-replace.h" #include "sysfs.h" #include "zoned.h" #include "block-group.h" #include "fs.h" #include "accessors.h" #include "scrub.h" /* * Device replace overview * * [Objective] * To copy all extents (both new and on-disk) from source device to target * device, while still keeping the filesystem read-write. * * [Method] * There are two main methods involved: * * - Write duplication * * All new writes will be written to both target and source devices, so even * if replace gets canceled, sources device still contains up-to-date data. * * Location: handle_ops_on_dev_replace() from btrfs_map_block() * Start: btrfs_dev_replace_start() * End: btrfs_dev_replace_finishing() * Content: Latest data/metadata * * - Copy existing extents * * This happens by reusing scrub facility, as scrub also iterates through * existing extents from commit root. * * Location: scrub_write_block_to_dev_replace() from * scrub_block_complete() * Content: Data/meta from commit root. * * Due to the content difference, we need to avoid nocow write when dev-replace * is happening. This is done by marking the block group read-only and waiting * for NOCOW writes. * * After replace is done, the finishing part is done by swapping the target and * source devices. * * Location: btrfs_dev_replace_update_device_in_mapping_tree() from * btrfs_dev_replace_finishing() */ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, int scrub_ret); static int btrfs_dev_replace_kthread(void *data); int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) { struct btrfs_dev_lookup_args args = { .devid = BTRFS_DEV_REPLACE_DEVID }; struct btrfs_key key; struct btrfs_root *dev_root = fs_info->dev_root; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; struct extent_buffer *eb; int slot; int ret = 0; BTRFS_PATH_AUTO_FREE(path); int item_size; struct btrfs_dev_replace_item *ptr; u64 src_devid; if (!dev_root) return 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; key.objectid = 0; key.type = BTRFS_DEV_REPLACE_KEY; key.offset = 0; ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); if (ret) { no_valid_dev_replace_entry_found: /* * We don't have a replace item or it's corrupted. If there is * a replace target, fail the mount. */ if (unlikely(btrfs_find_device(fs_info->fs_devices, &args))) { btrfs_err(fs_info, "found replace target device without a valid replace item"); return -EUCLEAN; } dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED; dev_replace->cont_reading_from_srcdev_mode = BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS; dev_replace->time_started = 0; dev_replace->time_stopped = 0; atomic64_set(&dev_replace->num_write_errors, 0); atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0); dev_replace->cursor_left = 0; dev_replace->committed_cursor_left = 0; dev_replace->cursor_left_last_write_of_item = 0; dev_replace->cursor_right = 0; dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; dev_replace->is_valid = 0; dev_replace->item_needs_writeback = 0; return 0; } slot = path->slots[0]; eb = path->nodes[0]; item_size = btrfs_item_size(eb, slot); ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_replace_item); if (item_size != sizeof(struct btrfs_dev_replace_item)) { btrfs_warn(fs_info, "dev_replace entry found has unexpected size, ignore entry"); goto no_valid_dev_replace_entry_found; } src_devid = btrfs_dev_replace_src_devid(eb, ptr); dev_replace->cont_reading_from_srcdev_mode = btrfs_dev_replace_cont_reading_from_srcdev_mode(eb, ptr); dev_replace->replace_state = btrfs_dev_replace_replace_state(eb, ptr); dev_replace->time_started = btrfs_dev_replace_time_started(eb, ptr); dev_replace->time_stopped = btrfs_dev_replace_time_stopped(eb, ptr); atomic64_set(&dev_replace->num_write_errors, btrfs_dev_replace_num_write_errors(eb, ptr)); atomic64_set(&dev_replace->num_uncorrectable_read_errors, btrfs_dev_replace_num_uncorrectable_read_errors(eb, ptr)); dev_replace->cursor_left = btrfs_dev_replace_cursor_left(eb, ptr); dev_replace->committed_cursor_left = dev_replace->cursor_left; dev_replace->cursor_left_last_write_of_item = dev_replace->cursor_left; dev_replace->cursor_right = btrfs_dev_replace_cursor_right(eb, ptr); dev_replace->is_valid = 1; dev_replace->item_needs_writeback = 0; switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: /* * We don't have an active replace item but if there is a * replace target, fail the mount. */ if (unlikely(btrfs_find_device(fs_info->fs_devices, &args))) { btrfs_err(fs_info, "replace without active item, run 'device scan --forget' on the target device"); ret = -EUCLEAN; } else { dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; } break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices, &args); args.devid = src_devid; dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices, &args); /* * allow 'btrfs dev replace_cancel' if src/tgt device is * missing */ if (unlikely(!dev_replace->srcdev && !btrfs_test_opt(fs_info, DEGRADED))) { ret = -EIO; btrfs_warn(fs_info, "cannot mount because device replace operation is ongoing and"); btrfs_warn(fs_info, "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?", src_devid); } if (unlikely(!dev_replace->tgtdev && !btrfs_test_opt(fs_info, DEGRADED))) { ret = -EIO; btrfs_warn(fs_info, "cannot mount because device replace operation is ongoing and"); btrfs_warn(fs_info, "tgtdev (devid %llu) is missing, need to run 'btrfs dev scan'?", BTRFS_DEV_REPLACE_DEVID); } if (dev_replace->tgtdev) { if (dev_replace->srcdev) { dev_replace->tgtdev->total_bytes = dev_replace->srcdev->total_bytes; dev_replace->tgtdev->disk_total_bytes = dev_replace->srcdev->disk_total_bytes; dev_replace->tgtdev->commit_total_bytes = dev_replace->srcdev->commit_total_bytes; dev_replace->tgtdev->bytes_used = dev_replace->srcdev->bytes_used; dev_replace->tgtdev->commit_bytes_used = dev_replace->srcdev->commit_bytes_used; } set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev_replace->tgtdev->dev_state); WARN_ON(fs_info->fs_devices->rw_devices == 0); dev_replace->tgtdev->io_width = fs_info->sectorsize; dev_replace->tgtdev->io_align = fs_info->sectorsize; dev_replace->tgtdev->sector_size = fs_info->sectorsize; dev_replace->tgtdev->fs_info = fs_info; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev_replace->tgtdev->dev_state); } break; } return ret; } /* * Initialize a new device for device replace target from a given source dev * and path. * * Return 0 and new device in @device_out, otherwise return < 0 */ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, const char *device_path, struct btrfs_device *srcdev, struct btrfs_device **device_out) { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; struct file *bdev_file; struct block_device *bdev; u64 devid = BTRFS_DEV_REPLACE_DEVID; int ret = 0; *device_out = NULL; if (srcdev->fs_devices->seeding) { btrfs_err(fs_info, "the filesystem is a seed filesystem!"); return -EINVAL; } bdev_file = bdev_file_open_by_path(device_path, BLK_OPEN_WRITE, fs_info->sb, &fs_holder_ops); if (IS_ERR(bdev_file)) { btrfs_err(fs_info, "target device %s is invalid!", device_path); return PTR_ERR(bdev_file); } bdev = file_bdev(bdev_file); if (!btrfs_check_device_zone_type(fs_info, bdev)) { btrfs_err(fs_info, "dev-replace: zoned type of target device mismatch with filesystem"); ret = -EINVAL; goto error; } sync_blockdev(bdev); list_for_each_entry(device, &fs_devices->devices, dev_list) { if (device->bdev == bdev) { btrfs_err(fs_info, "target device is in the filesystem!"); ret = -EEXIST; goto error; } } if (bdev_nr_bytes(bdev) < btrfs_device_get_total_bytes(srcdev)) { btrfs_err(fs_info, "target device is smaller than source device!"); ret = -EINVAL; goto error; } device = btrfs_alloc_device(NULL, &devid, NULL, device_path); if (IS_ERR(device)) { ret = PTR_ERR(device); goto error; } ret = lookup_bdev(device_path, &device->devt); if (ret) goto error; set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); device->generation = 0; device->io_width = fs_info->sectorsize; device->io_align = fs_info->sectorsize; device->sector_size = fs_info->sectorsize; device->total_bytes = btrfs_device_get_total_bytes(srcdev); device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev); device->bytes_used = btrfs_device_get_bytes_used(srcdev); device->commit_total_bytes = srcdev->commit_total_bytes; device->commit_bytes_used = device->bytes_used; device->fs_info = fs_info; device->bdev = bdev; device->bdev_file = bdev_file; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); device->dev_stats_valid = 1; set_blocksize(bdev_file, BTRFS_BDEV_BLOCKSIZE); device->fs_devices = fs_devices; ret = btrfs_get_dev_zone_info(device, false); if (ret) goto error; mutex_lock(&fs_devices->device_list_mutex); list_add(&device->dev_list, &fs_devices->devices); fs_devices->num_devices++; fs_devices->open_devices++; mutex_unlock(&fs_devices->device_list_mutex); *device_out = device; return 0; error: bdev_fput(bdev_file); return ret; } /* * called from commit_transaction. Writes changed device replace state to * disk. */ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; int ret; struct btrfs_root *dev_root = fs_info->dev_root; BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; struct extent_buffer *eb; struct btrfs_dev_replace_item *ptr; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; down_read(&dev_replace->rwsem); if (!dev_replace->is_valid || !dev_replace->item_needs_writeback) { up_read(&dev_replace->rwsem); return 0; } up_read(&dev_replace->rwsem); key.objectid = 0; key.type = BTRFS_DEV_REPLACE_KEY; key.offset = 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); if (ret < 0) { btrfs_warn(fs_info, "error %d while searching for dev_replace item!", ret); return ret; } if (ret == 0 && btrfs_item_size(path->nodes[0], path->slots[0]) < sizeof(*ptr)) { /* * need to delete old one and insert a new one. * Since no attempt is made to recover any old state, if the * dev_replace state is 'running', the data on the target * drive is lost. * It would be possible to recover the state: just make sure * that the beginning of the item is never changed and always * contains all the essential information. Then read this * minimal set of information and use it as a base for the * new state. */ ret = btrfs_del_item(trans, dev_root, path); if (ret != 0) { btrfs_warn(fs_info, "delete too small dev_replace item failed %d!", ret); return ret; } ret = 1; } if (ret == 1) { /* need to insert a new item */ btrfs_release_path(path); ret = btrfs_insert_empty_item(trans, dev_root, path, &key, sizeof(*ptr)); if (ret < 0) { btrfs_warn(fs_info, "insert dev_replace item failed %d!", ret); return ret; } } eb = path->nodes[0]; ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_replace_item); down_write(&dev_replace->rwsem); if (dev_replace->srcdev) btrfs_set_dev_replace_src_devid(eb, ptr, dev_replace->srcdev->devid); else btrfs_set_dev_replace_src_devid(eb, ptr, (u64)-1); btrfs_set_dev_replace_cont_reading_from_srcdev_mode(eb, ptr, dev_replace->cont_reading_from_srcdev_mode); btrfs_set_dev_replace_replace_state(eb, ptr, dev_replace->replace_state); btrfs_set_dev_replace_time_started(eb, ptr, dev_replace->time_started); btrfs_set_dev_replace_time_stopped(eb, ptr, dev_replace->time_stopped); btrfs_set_dev_replace_num_write_errors(eb, ptr, atomic64_read(&dev_replace->num_write_errors)); btrfs_set_dev_replace_num_uncorrectable_read_errors(eb, ptr, atomic64_read(&dev_replace->num_uncorrectable_read_errors)); dev_replace->cursor_left_last_write_of_item = dev_replace->cursor_left; btrfs_set_dev_replace_cursor_left(eb, ptr, dev_replace->cursor_left_last_write_of_item); btrfs_set_dev_replace_cursor_right(eb, ptr, dev_replace->cursor_right); dev_replace->item_needs_writeback = 0; up_write(&dev_replace->rwsem); return ret; } static int mark_block_group_to_copy(struct btrfs_fs_info *fs_info, struct btrfs_device *src_dev) { struct btrfs_path *path; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_root *root = fs_info->dev_root; struct btrfs_dev_extent *dev_extent = NULL; struct btrfs_block_group *cache; struct btrfs_trans_handle *trans; int iter_ret = 0; int ret = 0; u64 chunk_offset; /* Do not use "to_copy" on non zoned filesystem for now */ if (!btrfs_is_zoned(fs_info)) return 0; mutex_lock(&fs_info->chunk_mutex); /* Ensure we don't have pending new block group */ spin_lock(&fs_info->trans_lock); while (fs_info->running_transaction && !list_empty(&fs_info->running_transaction->dev_update_list)) { spin_unlock(&fs_info->trans_lock); mutex_unlock(&fs_info->chunk_mutex); trans = btrfs_attach_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); mutex_lock(&fs_info->chunk_mutex); if (ret == -ENOENT) { spin_lock(&fs_info->trans_lock); continue; } else { goto unlock; } } ret = btrfs_commit_transaction(trans); mutex_lock(&fs_info->chunk_mutex); if (ret) goto unlock; spin_lock(&fs_info->trans_lock); } spin_unlock(&fs_info->trans_lock); path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto unlock; } path->reada = READA_FORWARD; path->search_commit_root = true; path->skip_locking = true; key.objectid = src_dev->devid; key.type = BTRFS_DEV_EXTENT_KEY; key.offset = 0; btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) { struct extent_buffer *leaf = path->nodes[0]; if (found_key.objectid != src_dev->devid) break; if (found_key.type != BTRFS_DEV_EXTENT_KEY) break; if (found_key.offset < key.offset) break; dev_extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dev_extent); cache = btrfs_lookup_block_group(fs_info, chunk_offset); if (!cache) continue; set_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags); btrfs_put_block_group(cache); } if (iter_ret < 0) ret = iter_ret; btrfs_free_path(path); unlock: mutex_unlock(&fs_info->chunk_mutex); return ret; } bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev, struct btrfs_block_group *cache, u64 physical) { struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_chunk_map *map; u64 chunk_offset = cache->start; int num_extents, cur_extent; int i; /* Do not use "to_copy" on non zoned filesystem for now */ if (!btrfs_is_zoned(fs_info)) return true; spin_lock(&cache->lock); if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &cache->runtime_flags)) { spin_unlock(&cache->lock); return true; } spin_unlock(&cache->lock); map = btrfs_get_chunk_map(fs_info, chunk_offset, 1); ASSERT(!IS_ERR(map)); num_extents = 0; cur_extent = 0; for (i = 0; i < map->num_stripes; i++) { /* We have more device extent to copy */ if (srcdev != map->stripes[i].dev) continue; num_extents++; if (physical == map->stripes[i].physical) cur_extent = i; } btrfs_free_chunk_map(map); if (num_extents > 1 && cur_extent < num_extents - 1) { /* * Has more stripes on this device. Keep this block group * readonly until we finish all the stripes. */ return false; } /* Last stripe on this device */ clear_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags); return true; } static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, const char *tgtdev_name, u64 srcdevid, const char *srcdev_name, int read_src) { struct btrfs_root *root = fs_info->dev_root; struct btrfs_trans_handle *trans; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int ret; struct btrfs_device *tgt_device = NULL; struct btrfs_device *src_device = NULL; src_device = btrfs_find_device_by_devspec(fs_info, srcdevid, srcdev_name); if (IS_ERR(src_device)) return PTR_ERR(src_device); if (btrfs_pinned_by_swapfile(fs_info, src_device)) { btrfs_warn(fs_info, "cannot replace device %s (devid %llu) due to active swapfile", btrfs_dev_name(src_device), src_device->devid); return -ETXTBSY; } /* * Here we commit the transaction to make sure commit_total_bytes * of all the devices are updated. */ trans = btrfs_attach_transaction(root); if (!IS_ERR(trans)) { ret = btrfs_commit_transaction(trans); if (ret) return ret; } else if (PTR_ERR(trans) != -ENOENT) { return PTR_ERR(trans); } ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name, src_device, &tgt_device); if (ret) return ret; ret = mark_block_group_to_copy(fs_info, src_device); if (ret) return ret; down_write(&dev_replace->rwsem); dev_replace->replace_task = current; switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: DEBUG_WARN("unexpected STARTED or SUSPENDED dev-replace state"); ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; up_write(&dev_replace->rwsem); goto leave; } dev_replace->cont_reading_from_srcdev_mode = read_src; dev_replace->srcdev = src_device; dev_replace->tgtdev = tgt_device; btrfs_info(fs_info, "dev_replace from %s (devid %llu) to %s started", btrfs_dev_name(src_device), src_device->devid, btrfs_dev_name(tgt_device)); /* * from now on, the writes to the srcdev are all duplicated to * go to the tgtdev as well (refer to btrfs_map_block()). */ dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; dev_replace->time_started = ktime_get_real_seconds(); dev_replace->cursor_left = 0; dev_replace->committed_cursor_left = 0; dev_replace->cursor_left_last_write_of_item = 0; dev_replace->cursor_right = 0; dev_replace->is_valid = 1; dev_replace->item_needs_writeback = 1; atomic64_set(&dev_replace->num_write_errors, 0); atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0); up_write(&dev_replace->rwsem); ret = btrfs_sysfs_add_device(tgt_device); if (ret) btrfs_err(fs_info, "kobj add dev failed %d", ret); btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL); /* * Commit dev_replace state and reserve 1 item for it. * This is crucial to ensure we won't miss copying extents for new block * groups that are allocated after we started the device replace, and * must be done after setting up the device replace state. */ trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); down_write(&dev_replace->rwsem); dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED; dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; up_write(&dev_replace->rwsem); goto leave; } ret = btrfs_commit_transaction(trans); WARN_ON(ret); /* the disk copy procedure reuses the scrub code */ ret = btrfs_scrub_dev(fs_info, src_device->devid, 0, btrfs_device_get_total_bytes(src_device), &dev_replace->scrub_progress, 0, 1); ret = btrfs_dev_replace_finishing(fs_info, ret); if (ret == -EINPROGRESS) ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS; return ret; leave: btrfs_destroy_dev_replace_tgtdev(tgt_device); return ret; } static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args) { if (args->start.srcdevid == 0) { if (memchr(args->start.srcdev_name, 0, sizeof(args->start.srcdev_name)) == NULL) return -ENAMETOOLONG; } else { args->start.srcdev_name[0] = 0; } if (memchr(args->start.tgtdev_name, 0, sizeof(args->start.tgtdev_name)) == NULL) return -ENAMETOOLONG; return 0; } int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_dev_replace_args *args) { int ret; switch (args->start.cont_reading_from_srcdev_mode) { case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: break; default: return -EINVAL; } ret = btrfs_check_replace_dev_names(args); if (ret < 0) return ret; ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name, args->start.srcdevid, args->start.srcdev_name, args->start.cont_reading_from_srcdev_mode); args->result = ret; /* don't warn if EINPROGRESS, someone else might be running scrub */ if (ret == BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS || ret == BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR) return 0; return ret; } /* * blocked until all in-flight bios operations are finished. */ static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) { set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); wait_event(fs_info->dev_replace.replace_wait, !percpu_counter_sum( &fs_info->dev_replace.bio_counter)); } /* * we have removed target device, it is safe to allow new bios request. */ static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info) { clear_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); wake_up(&fs_info->dev_replace.replace_wait); } /* * When finishing the device replace, before swapping the source device with the * target device we must update the chunk allocation state in the target device, * as it is empty because replace works by directly copying the chunks and not * through the normal chunk allocation path. */ static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev, struct btrfs_device *tgtdev) { struct extent_state *cached_state = NULL; u64 start = 0; u64 found_start; u64 found_end; int ret = 0; lockdep_assert_held(&srcdev->fs_info->chunk_mutex); while (btrfs_find_first_extent_bit(&srcdev->alloc_state, start, &found_start, &found_end, CHUNK_ALLOCATED, &cached_state)) { ret = btrfs_set_extent_bit(&tgtdev->alloc_state, found_start, found_end, CHUNK_ALLOCATED, NULL); if (ret) break; start = found_end + 1; } btrfs_free_extent_state(cached_state); return ret; } static void btrfs_dev_replace_update_device_in_mapping_tree( struct btrfs_fs_info *fs_info, struct btrfs_device *srcdev, struct btrfs_device *tgtdev) { struct rb_node *node; /* * The chunk mutex must be held so that no new chunks can be created * while we are updating existing chunks. This guarantees we don't miss * any new chunk that gets created for a range that falls before the * range of the last chunk we processed. */ lockdep_assert_held(&fs_info->chunk_mutex); write_lock(&fs_info->mapping_tree_lock); node = rb_first_cached(&fs_info->mapping_tree); while (node) { struct rb_node *next = rb_next(node); struct btrfs_chunk_map *map; u64 next_start; map = rb_entry(node, struct btrfs_chunk_map, rb_node); next_start = map->start + map->chunk_len; for (int i = 0; i < map->num_stripes; i++) if (srcdev == map->stripes[i].dev) map->stripes[i].dev = tgtdev; if (cond_resched_rwlock_write(&fs_info->mapping_tree_lock)) { map = btrfs_find_chunk_map_nolock(fs_info, next_start, U64_MAX); if (!map) break; node = &map->rb_node; /* * Drop the lookup reference since we are holding the * lock in write mode and no one can remove the chunk * map from the tree and drop its tree reference. */ btrfs_free_chunk_map(map); } else { node = next; } } write_unlock(&fs_info->mapping_tree_lock); } static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, int scrub_ret) { struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *tgt_device; struct btrfs_device *src_device; struct btrfs_root *root = fs_info->tree_root; u8 uuid_tmp[BTRFS_UUID_SIZE]; struct btrfs_trans_handle *trans; int ret = 0; /* don't allow cancel or unmount to disturb the finishing procedure */ mutex_lock(&dev_replace->lock_finishing_cancel_unmount); down_read(&dev_replace->rwsem); /* was the operation canceled, or is it finished? */ if (dev_replace->replace_state != BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) { up_read(&dev_replace->rwsem); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return 0; } tgt_device = dev_replace->tgtdev; src_device = dev_replace->srcdev; up_read(&dev_replace->rwsem); /* * flush all outstanding I/O and inode extent mappings before the * copy operation is declared as being finished */ ret = btrfs_start_delalloc_roots(fs_info, LONG_MAX, false); if (ret) { mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return ret; } btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL); /* * We have to use this loop approach because at this point src_device * has to be available for transaction commit to complete, yet new * chunks shouldn't be allocated on the device. */ while (1) { trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return PTR_ERR(trans); } ret = btrfs_commit_transaction(trans); WARN_ON(ret); /* Prevent write_all_supers() during the finishing procedure */ mutex_lock(&fs_devices->device_list_mutex); /* Prevent new chunks being allocated on the source device */ mutex_lock(&fs_info->chunk_mutex); if (!list_empty(&src_device->post_commit_list)) { mutex_unlock(&fs_devices->device_list_mutex); mutex_unlock(&fs_info->chunk_mutex); } else { break; } } down_write(&dev_replace->rwsem); dev_replace->replace_state = scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; dev_replace->tgtdev = NULL; dev_replace->srcdev = NULL; dev_replace->time_stopped = ktime_get_real_seconds(); dev_replace->item_needs_writeback = 1; /* * Update allocation state in the new device and replace the old device * with the new one in the mapping tree. */ if (!scrub_ret) { scrub_ret = btrfs_set_target_alloc_state(src_device, tgt_device); if (scrub_ret) goto error; btrfs_dev_replace_update_device_in_mapping_tree(fs_info, src_device, tgt_device); } else { if (scrub_ret != -ECANCELED) btrfs_err(fs_info, "btrfs_scrub_dev(%s, %llu, %s) failed %d", btrfs_dev_name(src_device), src_device->devid, btrfs_dev_name(tgt_device), scrub_ret); error: up_write(&dev_replace->rwsem); mutex_unlock(&fs_info->chunk_mutex); mutex_unlock(&fs_devices->device_list_mutex); btrfs_rm_dev_replace_blocked(fs_info); if (tgt_device) btrfs_destroy_dev_replace_tgtdev(tgt_device); btrfs_rm_dev_replace_unblocked(fs_info); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return scrub_ret; } btrfs_info(fs_info, "dev_replace from %s (devid %llu) to %s finished", btrfs_dev_name(src_device), src_device->devid, btrfs_dev_name(tgt_device)); clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &tgt_device->dev_state); tgt_device->devid = src_device->devid; src_device->devid = BTRFS_DEV_REPLACE_DEVID; memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp)); memcpy(tgt_device->uuid, src_device->uuid, sizeof(tgt_device->uuid)); memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid)); btrfs_device_set_total_bytes(tgt_device, src_device->total_bytes); btrfs_device_set_disk_total_bytes(tgt_device, src_device->disk_total_bytes); btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used); tgt_device->commit_bytes_used = src_device->bytes_used; btrfs_assign_next_active_device(src_device, tgt_device); list_add(&tgt_device->dev_alloc_list, &fs_devices->alloc_list); fs_devices->rw_devices++; dev_replace->replace_task = NULL; up_write(&dev_replace->rwsem); btrfs_rm_dev_replace_blocked(fs_info); btrfs_rm_dev_replace_remove_srcdev(src_device); btrfs_rm_dev_replace_unblocked(fs_info); /* * Increment dev_stats_ccnt so that btrfs_run_dev_stats() will * update on-disk dev stats value during commit transaction */ atomic_inc(&tgt_device->dev_stats_ccnt); /* * this is again a consistent state where no dev_replace procedure * is running, the target device is part of the filesystem, the * source device is not part of the filesystem anymore and its 1st * superblock is scratched out so that it is no longer marked to * belong to this filesystem. */ mutex_unlock(&fs_info->chunk_mutex); mutex_unlock(&fs_devices->device_list_mutex); /* replace the sysfs entry */ btrfs_sysfs_remove_device(src_device); btrfs_sysfs_update_devid(tgt_device); if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &src_device->dev_state)) btrfs_scratch_superblocks(fs_info, src_device); /* write back the superblocks */ trans = btrfs_start_transaction(root, 0); if (!IS_ERR(trans)) btrfs_commit_transaction(trans); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); btrfs_rm_dev_replace_free_srcdev(src_device); return 0; } /* * Read progress of device replace status according to the state and last * stored position. The value format is the same as for * btrfs_dev_replace::progress_1000 */ static u64 btrfs_dev_replace_progress(struct btrfs_fs_info *fs_info) { struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; u64 ret = 0; switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: ret = 0; break; case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: ret = 1000; break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: ret = div64_u64(dev_replace->cursor_left, div_u64(btrfs_device_get_total_bytes( dev_replace->srcdev), 1000)); break; } return ret; } void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_dev_replace_args *args) { struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; down_read(&dev_replace->rwsem); /* even if !dev_replace_is_valid, the values are good enough for * the replace_status ioctl */ args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; args->status.replace_state = dev_replace->replace_state; args->status.time_started = dev_replace->time_started; args->status.time_stopped = dev_replace->time_stopped; args->status.num_write_errors = atomic64_read(&dev_replace->num_write_errors); args->status.num_uncorrectable_read_errors = atomic64_read(&dev_replace->num_uncorrectable_read_errors); args->status.progress_1000 = btrfs_dev_replace_progress(fs_info); up_read(&dev_replace->rwsem); } int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) { struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; struct btrfs_device *tgt_device = NULL; struct btrfs_device *src_device = NULL; struct btrfs_trans_handle *trans; struct btrfs_root *root = fs_info->tree_root; int result; int ret; if (sb_rdonly(fs_info->sb)) return -EROFS; mutex_lock(&dev_replace->lock_finishing_cancel_unmount); down_write(&dev_replace->rwsem); switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED; up_write(&dev_replace->rwsem); break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: tgt_device = dev_replace->tgtdev; src_device = dev_replace->srcdev; up_write(&dev_replace->rwsem); ret = btrfs_scrub_cancel(fs_info); if (ret < 0) { result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED; } else { result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; /* * btrfs_dev_replace_finishing() will handle the * cleanup part */ btrfs_info(fs_info, "dev_replace from %s (devid %llu) to %s canceled", btrfs_dev_name(src_device), src_device->devid, btrfs_dev_name(tgt_device)); } break; case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: /* * Scrub doing the replace isn't running so we need to do the * cleanup step of btrfs_dev_replace_finishing() here */ result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; tgt_device = dev_replace->tgtdev; src_device = dev_replace->srcdev; dev_replace->tgtdev = NULL; dev_replace->srcdev = NULL; dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; dev_replace->time_stopped = ktime_get_real_seconds(); dev_replace->item_needs_writeback = 1; up_write(&dev_replace->rwsem); /* Scrub for replace must not be running in suspended state */ btrfs_scrub_cancel(fs_info); trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return PTR_ERR(trans); } ret = btrfs_commit_transaction(trans); WARN_ON(ret); btrfs_info(fs_info, "suspended dev_replace from %s (devid %llu) to %s canceled", btrfs_dev_name(src_device), src_device->devid, btrfs_dev_name(tgt_device)); if (tgt_device) btrfs_destroy_dev_replace_tgtdev(tgt_device); break; default: up_write(&dev_replace->rwsem); result = -EINVAL; } mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return result; } void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info) { struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; mutex_lock(&dev_replace->lock_finishing_cancel_unmount); down_write(&dev_replace->rwsem); switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; dev_replace->time_stopped = ktime_get_real_seconds(); dev_replace->item_needs_writeback = 1; btrfs_info(fs_info, "suspending dev_replace for unmount"); break; } up_write(&dev_replace->rwsem); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); } /* resume dev_replace procedure that was interrupted by unmount */ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info) { struct task_struct *task; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; down_write(&dev_replace->rwsem); switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: up_write(&dev_replace->rwsem); return 0; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: break; case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; break; } if (!dev_replace->tgtdev || !dev_replace->tgtdev->bdev) { btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing"); btrfs_info(fs_info, "you may cancel the operation after 'mount -o degraded'"); dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; up_write(&dev_replace->rwsem); return 0; } up_write(&dev_replace->rwsem); /* * This could collide with a paused balance, but the exclusive op logic * should never allow both to start and pause. We don't want to allow * dev-replace to start anyway. */ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REPLACE)) { down_write(&dev_replace->rwsem); dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; up_write(&dev_replace->rwsem); btrfs_info(fs_info, "cannot resume dev-replace, other exclusive operation running"); return 0; } task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl"); return PTR_ERR_OR_ZERO(task); } static int btrfs_dev_replace_kthread(void *data) { struct btrfs_fs_info *fs_info = data; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; u64 progress; int ret; progress = btrfs_dev_replace_progress(fs_info); progress = div_u64(progress, 10); btrfs_info(fs_info, "continuing dev_replace from %s (devid %llu) to target %s @%u%%", btrfs_dev_name(dev_replace->srcdev), dev_replace->srcdev->devid, btrfs_dev_name(dev_replace->tgtdev), (unsigned int)progress); ret = btrfs_scrub_dev(fs_info, dev_replace->srcdev->devid, dev_replace->committed_cursor_left, btrfs_device_get_total_bytes(dev_replace->srcdev), &dev_replace->scrub_progress, 0, 1); ret = btrfs_dev_replace_finishing(fs_info, ret); WARN_ON(ret && ret != -ECANCELED); btrfs_exclop_finish(fs_info); return 0; } bool __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace) { if (!dev_replace->is_valid) return false; switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: return false; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: /* * return true even if tgtdev is missing (this is * something that can happen if the dev_replace * procedure is suspended by an umount and then * the tgtdev is missing (or "btrfs dev scan") was * not called and the filesystem is remounted * in degraded state. This does not stop the * dev_replace procedure. It needs to be canceled * manually if the cancellation is wanted. */ break; } return true; } void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount) { percpu_counter_sub(&fs_info->dev_replace.bio_counter, amount); cond_wake_up_nomb(&fs_info->dev_replace.replace_wait); } void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info) { while (1) { percpu_counter_inc(&fs_info->dev_replace.bio_counter); if (likely(!test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state))) break; btrfs_bio_counter_dec(fs_info); wait_event(fs_info->dev_replace.replace_wait, !test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)); } }
63 3 27 244 245 245 244 347 357 347 355 356 1 347 359 338 38 344 336 169 234 353 352 8 9 427 429 741 760 450 745 302 299 7 293 737 442 742 163 710 113 550 548 541 69 539 99 527 15 15 15 264 524 240 239 524 5 222 34 521 528 521 740 218 600 743 59 11 60 56 58 3 53 1 60 36 121 4 121 639 637 627 629 157 155 157 156 20 20 156 47 47 46 47 3 110 1 109 108 110 110 110 59 58 57 59 357 233 240 76 76 76 76 3 3 3 3 27 18 81 81 81 81 29 331 332 194 78 78 76 76 56 77 77 76 78 1 76 54 43 54 53 54 53 52 54 54 54 7 7 7 6 1 7 7 6 7 7 7 7 461 459 456 66 65 4 65 65 66 29 32 5 5 5 5 2 30 30 2 2 2 2 1 2 2 1 6 3 6 6 6 6 8 6 8 8 8 8 8 8 8 8 8 5 8 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/seq_file.c * * helper functions for making synthetic files from sequences of records. * initial implementation -- AV, Oct 2001. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cache.h> #include <linux/fs.h> #include <linux/export.h> #include <linux/hex.h> #include <linux/seq_file.h> #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/cred.h> #include <linux/mm.h> #include <linux/printk.h> #include <linux/string_helpers.h> #include <linux/uio.h> #include <linux/uaccess.h> #include <asm/page.h> static struct kmem_cache *seq_file_cache __ro_after_init; static void seq_set_overflow(struct seq_file *m) { m->count = m->size; } static void *seq_buf_alloc(unsigned long size) { if (unlikely(size > MAX_RW_COUNT)) return NULL; return kvmalloc(size, GFP_KERNEL_ACCOUNT); } /** * seq_open - initialize sequential file * @file: file we initialize * @op: method table describing the sequence * * seq_open() sets @file, associating it with a sequence described * by @op. @op->start() sets the iterator up and returns the first * element of sequence. @op->stop() shuts it down. @op->next() * returns the next element of sequence. @op->show() prints element * into the buffer. In case of error ->start() and ->next() return * ERR_PTR(error). In the end of sequence they return %NULL. ->show() * returns 0 in case of success and negative number in case of error. * Returning SEQ_SKIP means "discard this element and move on". * Note: seq_open() will allocate a struct seq_file and store its * pointer in @file->private_data. This pointer should not be modified. */ int seq_open(struct file *file, const struct seq_operations *op) { struct seq_file *p; WARN_ON(file->private_data); p = kmem_cache_zalloc(seq_file_cache, GFP_KERNEL); if (!p) return -ENOMEM; file->private_data = p; mutex_init(&p->lock); p->op = op; // No refcounting: the lifetime of 'p' is constrained // to the lifetime of the file. p->file = file; /* * seq_files support lseek() and pread(). They do not implement * write() at all, but we clear FMODE_PWRITE here for historical * reasons. * * If a client of seq_files a) implements file.write() and b) wishes to * support pwrite() then that client will need to implement its own * file.open() which calls seq_open() and then sets FMODE_PWRITE. */ file->f_mode &= ~FMODE_PWRITE; return 0; } EXPORT_SYMBOL(seq_open); static int traverse(struct seq_file *m, loff_t offset) { loff_t pos = 0; int error = 0; void *p; m->index = 0; m->count = m->from = 0; if (!offset) return 0; if (!m->buf) { m->buf = seq_buf_alloc(m->size = PAGE_SIZE); if (!m->buf) return -ENOMEM; } p = m->op->start(m, &m->index); while (p) { error = PTR_ERR(p); if (IS_ERR(p)) break; error = m->op->show(m, p); if (error < 0) break; if (unlikely(error)) { error = 0; m->count = 0; } if (seq_has_overflowed(m)) goto Eoverflow; p = m->op->next(m, p, &m->index); if (pos + m->count > offset) { m->from = offset - pos; m->count -= m->from; break; } pos += m->count; m->count = 0; if (pos == offset) break; } m->op->stop(m, p); return error; Eoverflow: m->op->stop(m, p); kvfree(m->buf); m->count = 0; m->buf = seq_buf_alloc(m->size <<= 1); return !m->buf ? -ENOMEM : -EAGAIN; } /** * seq_read - ->read() method for sequential files. * @file: the file to read from * @buf: the buffer to read to * @size: the maximum number of bytes to read * @ppos: the current position in the file * * Ready-made ->f_op->read() */ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct iovec iov = { .iov_base = buf, .iov_len = size}; struct kiocb kiocb; struct iov_iter iter; ssize_t ret; init_sync_kiocb(&kiocb, file); iov_iter_init(&iter, ITER_DEST, &iov, 1, size); kiocb.ki_pos = *ppos; ret = seq_read_iter(&kiocb, &iter); *ppos = kiocb.ki_pos; return ret; } EXPORT_SYMBOL(seq_read); /* * Ready-made ->f_op->read_iter() */ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct seq_file *m = iocb->ki_filp->private_data; size_t copied = 0; size_t n; void *p; int err = 0; if (!iov_iter_count(iter)) return 0; mutex_lock(&m->lock); /* * if request is to read from zero offset, reset iterator to first * record as it might have been already advanced by previous requests */ if (iocb->ki_pos == 0) { m->index = 0; m->count = 0; } /* Don't assume ki_pos is where we left it */ if (unlikely(iocb->ki_pos != m->read_pos)) { while ((err = traverse(m, iocb->ki_pos)) == -EAGAIN) ; if (err) { /* With prejudice... */ m->read_pos = 0; m->index = 0; m->count = 0; goto Done; } else { m->read_pos = iocb->ki_pos; } } /* grab buffer if we didn't have one */ if (!m->buf) { m->buf = seq_buf_alloc(m->size = PAGE_SIZE); if (!m->buf) goto Enomem; } // something left in the buffer - copy it out first if (m->count) { n = copy_to_iter(m->buf + m->from, m->count, iter); m->count -= n; m->from += n; copied += n; if (m->count) // hadn't managed to copy everything goto Done; } // get a non-empty record in the buffer m->from = 0; p = m->op->start(m, &m->index); while (1) { err = PTR_ERR(p); if (!p || IS_ERR(p)) // EOF or an error break; err = m->op->show(m, p); if (err < 0) // hard error break; if (unlikely(err)) // ->show() says "skip it" m->count = 0; if (unlikely(!m->count)) { // empty record p = m->op->next(m, p, &m->index); continue; } if (!seq_has_overflowed(m)) // got it goto Fill; // need a bigger buffer m->op->stop(m, p); kvfree(m->buf); m->count = 0; m->buf = seq_buf_alloc(m->size <<= 1); if (!m->buf) goto Enomem; p = m->op->start(m, &m->index); } // EOF or an error m->op->stop(m, p); m->count = 0; goto Done; Fill: // one non-empty record is in the buffer; if they want more, // try to fit more in, but in any case we need to advance // the iterator once for every record shown. while (1) { size_t offs = m->count; loff_t pos = m->index; p = m->op->next(m, p, &m->index); if (pos == m->index) { pr_info_ratelimited("buggy .next function %ps did not update position index\n", m->op->next); m->index++; } if (!p || IS_ERR(p)) // no next record for us break; if (m->count >= iov_iter_count(iter)) break; err = m->op->show(m, p); if (err > 0) { // ->show() says "skip it" m->count = offs; } else if (err || seq_has_overflowed(m)) { m->count = offs; break; } } m->op->stop(m, p); n = copy_to_iter(m->buf, m->count, iter); copied += n; m->count -= n; m->from = n; Done: if (unlikely(!copied)) { copied = m->count ? -EFAULT : err; } else { iocb->ki_pos += copied; m->read_pos += copied; } mutex_unlock(&m->lock); return copied; Enomem: err = -ENOMEM; goto Done; } EXPORT_SYMBOL(seq_read_iter); /** * seq_lseek - ->llseek() method for sequential files. * @file: the file in question * @offset: new position * @whence: 0 for absolute, 1 for relative position * * Ready-made ->f_op->llseek() */ loff_t seq_lseek(struct file *file, loff_t offset, int whence) { struct seq_file *m = file->private_data; loff_t retval = -EINVAL; mutex_lock(&m->lock); switch (whence) { case SEEK_CUR: offset += file->f_pos; fallthrough; case SEEK_SET: if (offset < 0) break; retval = offset; if (offset != m->read_pos) { while ((retval = traverse(m, offset)) == -EAGAIN) ; if (retval) { /* with extreme prejudice... */ file->f_pos = 0; m->read_pos = 0; m->index = 0; m->count = 0; } else { m->read_pos = offset; retval = file->f_pos = offset; } } else { file->f_pos = offset; } } mutex_unlock(&m->lock); return retval; } EXPORT_SYMBOL(seq_lseek); /** * seq_release - free the structures associated with sequential file. * @inode: its inode * @file: file in question * * Frees the structures associated with sequential file; can be used * as ->f_op->release() if you don't have private data to destroy. */ int seq_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; kvfree(m->buf); kmem_cache_free(seq_file_cache, m); return 0; } EXPORT_SYMBOL(seq_release); /** * seq_escape_mem - print data into buffer, escaping some characters * @m: target buffer * @src: source buffer * @len: size of source buffer * @flags: flags to pass to string_escape_mem() * @esc: set of characters that need escaping * * Puts data into buffer, replacing each occurrence of character from * given class (defined by @flags and @esc) with printable escaped sequence. * * Use seq_has_overflowed() to check for errors. */ void seq_escape_mem(struct seq_file *m, const char *src, size_t len, unsigned int flags, const char *esc) { char *buf; size_t size = seq_get_buf(m, &buf); int ret; ret = string_escape_mem(src, len, buf, size, flags, esc); seq_commit(m, ret < size ? ret : -1); } EXPORT_SYMBOL(seq_escape_mem); void seq_vprintf(struct seq_file *m, const char *f, va_list args) { int len; if (m->count < m->size) { len = vsnprintf(m->buf + m->count, m->size - m->count, f, args); if (m->count + len < m->size) { m->count += len; return; } } seq_set_overflow(m); } EXPORT_SYMBOL(seq_vprintf); void seq_printf(struct seq_file *m, const char *f, ...) { va_list args; va_start(args, f); seq_vprintf(m, f, args); va_end(args); } EXPORT_SYMBOL(seq_printf); #ifdef CONFIG_BINARY_PRINTF void seq_bprintf(struct seq_file *m, const char *f, const u32 *binary) { int len; if (m->count < m->size) { len = bstr_printf(m->buf + m->count, m->size - m->count, f, binary); if (m->count + len < m->size) { m->count += len; return; } } seq_set_overflow(m); } EXPORT_SYMBOL(seq_bprintf); #endif /* CONFIG_BINARY_PRINTF */ /** * mangle_path - mangle and copy path to buffer beginning * @s: buffer start * @p: beginning of path in above buffer * @esc: set of characters that need escaping * * Copy the path from @p to @s, replacing each occurrence of character from * @esc with usual octal escape. * Returns pointer past last written character in @s, or NULL in case of * failure. */ char *mangle_path(char *s, const char *p, const char *esc) { while (s <= p) { char c = *p++; if (!c) { return s; } else if (!strchr(esc, c)) { *s++ = c; } else if (s + 4 > p) { break; } else { *s++ = '\\'; *s++ = '0' + ((c & 0300) >> 6); *s++ = '0' + ((c & 070) >> 3); *s++ = '0' + (c & 07); } } return NULL; } EXPORT_SYMBOL(mangle_path); /** * seq_path - seq_file interface to print a pathname * @m: the seq_file handle * @path: the struct path to print * @esc: set of characters to escape in the output * * return the absolute path of 'path', as represented by the * dentry / mnt pair in the path parameter. */ int seq_path(struct seq_file *m, const struct path *path, const char *esc) { char *buf; size_t size = seq_get_buf(m, &buf); int res = -1; if (size) { char *p = d_path(path, buf, size); if (!IS_ERR(p)) { char *end = mangle_path(buf, p, esc); if (end) res = end - buf; } } seq_commit(m, res); return res; } EXPORT_SYMBOL(seq_path); /** * seq_file_path - seq_file interface to print a pathname of a file * @m: the seq_file handle * @file: the struct file to print * @esc: set of characters to escape in the output * * return the absolute path to the file. */ int seq_file_path(struct seq_file *m, struct file *file, const char *esc) { return seq_path(m, &file->f_path, esc); } EXPORT_SYMBOL(seq_file_path); /* * Same as seq_path, but relative to supplied root. */ int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc) { char *buf; size_t size = seq_get_buf(m, &buf); int res = -ENAMETOOLONG; if (size) { char *p; p = __d_path(path, root, buf, size); if (!p) return SEQ_SKIP; res = PTR_ERR(p); if (!IS_ERR(p)) { char *end = mangle_path(buf, p, esc); if (end) res = end - buf; else res = -ENAMETOOLONG; } } seq_commit(m, res); return res < 0 && res != -ENAMETOOLONG ? res : 0; } /* * returns the path of the 'dentry' from the root of its filesystem. */ int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc) { char *buf; size_t size = seq_get_buf(m, &buf); int res = -1; if (size) { char *p = dentry_path(dentry, buf, size); if (!IS_ERR(p)) { char *end = mangle_path(buf, p, esc); if (end) res = end - buf; } } seq_commit(m, res); return res; } EXPORT_SYMBOL(seq_dentry); void *single_start(struct seq_file *p, loff_t *pos) { return *pos ? NULL : SEQ_START_TOKEN; } static void *single_next(struct seq_file *p, void *v, loff_t *pos) { ++*pos; return NULL; } static void single_stop(struct seq_file *p, void *v) { } int single_open(struct file *file, int (*show)(struct seq_file *, void *), void *data) { struct seq_operations *op = kmalloc_obj(*op, GFP_KERNEL_ACCOUNT); int res = -ENOMEM; if (op) { op->start = single_start; op->next = single_next; op->stop = single_stop; op->show = show; res = seq_open(file, op); if (!res) ((struct seq_file *)file->private_data)->private = data; else kfree(op); } return res; } EXPORT_SYMBOL(single_open); int single_open_size(struct file *file, int (*show)(struct seq_file *, void *), void *data, size_t size) { char *buf = seq_buf_alloc(size); int ret; if (!buf) return -ENOMEM; ret = single_open(file, show, data); if (ret) { kvfree(buf); return ret; } ((struct seq_file *)file->private_data)->buf = buf; ((struct seq_file *)file->private_data)->size = size; return 0; } EXPORT_SYMBOL(single_open_size); int single_release(struct inode *inode, struct file *file) { const struct seq_operations *op = ((struct seq_file *)file->private_data)->op; int res = seq_release(inode, file); kfree(op); return res; } EXPORT_SYMBOL(single_release); int seq_release_private(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; kfree(seq->private); seq->private = NULL; return seq_release(inode, file); } EXPORT_SYMBOL(seq_release_private); void *__seq_open_private(struct file *f, const struct seq_operations *ops, int psize) { int rc; void *private; struct seq_file *seq; private = kzalloc(psize, GFP_KERNEL_ACCOUNT); if (private == NULL) goto out; rc = seq_open(f, ops); if (rc < 0) goto out_free; seq = f->private_data; seq->private = private; return private; out_free: kfree(private); out: return NULL; } EXPORT_SYMBOL(__seq_open_private); int seq_open_private(struct file *filp, const struct seq_operations *ops, int psize) { return __seq_open_private(filp, ops, psize) ? 0 : -ENOMEM; } EXPORT_SYMBOL(seq_open_private); void seq_putc(struct seq_file *m, char c) { if (m->count >= m->size) return; m->buf[m->count++] = c; } EXPORT_SYMBOL(seq_putc); void __seq_puts(struct seq_file *m, const char *s) { seq_write(m, s, strlen(s)); } EXPORT_SYMBOL(__seq_puts); /** * seq_put_decimal_ull_width - A helper routine for putting decimal numbers * without rich format of printf(). * only 'unsigned long long' is supported. * @m: seq_file identifying the buffer to which data should be written * @delimiter: a string which is printed before the number * @num: the number * @width: a minimum field width * * This routine will put strlen(delimiter) + number into seq_filed. * This routine is very quick when you show lots of numbers. * In usual cases, it will be better to use seq_printf(). It's easier to read. */ void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, unsigned long long num, unsigned int width) { int len; if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */ goto overflow; if (delimiter && delimiter[0]) { if (delimiter[1] == 0) seq_putc(m, delimiter[0]); else seq_puts(m, delimiter); } if (!width) width = 1; if (m->count + width >= m->size) goto overflow; len = num_to_str(m->buf + m->count, m->size - m->count, num, width); if (!len) goto overflow; m->count += len; return; overflow: seq_set_overflow(m); } void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, unsigned long long num) { return seq_put_decimal_ull_width(m, delimiter, num, 0); } EXPORT_SYMBOL(seq_put_decimal_ull); /** * seq_put_hex_ll - put a number in hexadecimal notation * @m: seq_file identifying the buffer to which data should be written * @delimiter: a string which is printed before the number * @v: the number * @width: a minimum field width * * seq_put_hex_ll(m, "", v, 8) is equal to seq_printf(m, "%08llx", v) * * This routine is very quick when you show lots of numbers. * In usual cases, it will be better to use seq_printf(). It's easier to read. */ void seq_put_hex_ll(struct seq_file *m, const char *delimiter, unsigned long long v, unsigned int width) { unsigned int len; int i; if (delimiter && delimiter[0]) { if (delimiter[1] == 0) seq_putc(m, delimiter[0]); else seq_puts(m, delimiter); } /* If x is 0, the result of __builtin_clzll is undefined */ if (v == 0) len = 1; else len = (sizeof(v) * 8 - __builtin_clzll(v) + 3) / 4; if (len < width) len = width; if (m->count + len > m->size) { seq_set_overflow(m); return; } for (i = len - 1; i >= 0; i--) { m->buf[m->count + i] = hex_asc[0xf & v]; v = v >> 4; } m->count += len; } void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num) { int len; if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */ goto overflow; if (delimiter && delimiter[0]) { if (delimiter[1] == 0) seq_putc(m, delimiter[0]); else seq_puts(m, delimiter); } if (m->count + 2 >= m->size) goto overflow; if (num < 0) { m->buf[m->count++] = '-'; num = -num; } if (num < 10) { m->buf[m->count++] = num + '0'; return; } len = num_to_str(m->buf + m->count, m->size - m->count, num, 0); if (!len) goto overflow; m->count += len; return; overflow: seq_set_overflow(m); } EXPORT_SYMBOL(seq_put_decimal_ll); /** * seq_write - write arbitrary data to buffer * @seq: seq_file identifying the buffer to which data should be written * @data: data address * @len: number of bytes * * Return 0 on success, non-zero otherwise. */ int seq_write(struct seq_file *seq, const void *data, size_t len) { if (seq->count + len < seq->size) { memcpy(seq->buf + seq->count, data, len); seq->count += len; return 0; } seq_set_overflow(seq); return -1; } EXPORT_SYMBOL(seq_write); /** * seq_pad - write padding spaces to buffer * @m: seq_file identifying the buffer to which data should be written * @c: the byte to append after padding if non-zero */ void seq_pad(struct seq_file *m, char c) { int size = m->pad_until - m->count; if (size > 0) { if (size + m->count > m->size) { seq_set_overflow(m); return; } memset(m->buf + m->count, ' ', size); m->count += size; } if (c) seq_putc(m, c); } EXPORT_SYMBOL(seq_pad); /* A complete analogue of print_hex_dump() */ void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii) { const u8 *ptr = buf; int i, linelen, remaining = len; char *buffer; size_t size; int ret; if (rowsize != 16 && rowsize != 32) rowsize = 16; for (i = 0; i < len && !seq_has_overflowed(m); i += rowsize) { linelen = min(remaining, rowsize); remaining -= rowsize; switch (prefix_type) { case DUMP_PREFIX_ADDRESS: seq_printf(m, "%s%p: ", prefix_str, ptr + i); break; case DUMP_PREFIX_OFFSET: seq_printf(m, "%s%.8x: ", prefix_str, i); break; default: seq_printf(m, "%s", prefix_str); break; } size = seq_get_buf(m, &buffer); ret = hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, buffer, size, ascii); seq_commit(m, ret < size ? ret : -1); seq_putc(m, '\n'); } } EXPORT_SYMBOL(seq_hex_dump); struct list_head *seq_list_start(struct list_head *head, loff_t pos) { struct list_head *lh; list_for_each(lh, head) if (pos-- == 0) return lh; return NULL; } EXPORT_SYMBOL(seq_list_start); struct list_head *seq_list_start_head(struct list_head *head, loff_t pos) { if (!pos) return head; return seq_list_start(head, pos - 1); } EXPORT_SYMBOL(seq_list_start_head); struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos) { struct list_head *lh; lh = ((struct list_head *)v)->next; ++*ppos; return lh == head ? NULL : lh; } EXPORT_SYMBOL(seq_list_next); struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos) { struct list_head *lh; list_for_each_rcu(lh, head) if (pos-- == 0) return lh; return NULL; } EXPORT_SYMBOL(seq_list_start_rcu); struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos) { if (!pos) return head; return seq_list_start_rcu(head, pos - 1); } EXPORT_SYMBOL(seq_list_start_head_rcu); struct list_head *seq_list_next_rcu(void *v, struct list_head *head, loff_t *ppos) { struct list_head *lh; lh = list_next_rcu((struct list_head *)v); ++*ppos; return lh == head ? NULL : lh; } EXPORT_SYMBOL(seq_list_next_rcu); /** * seq_hlist_start - start an iteration of a hlist * @head: the head of the hlist * @pos: the start position of the sequence * * Called at seq_file->op->start(). */ struct hlist_node *seq_hlist_start(struct hlist_head *head, loff_t pos) { struct hlist_node *node; hlist_for_each(node, head) if (pos-- == 0) return node; return NULL; } EXPORT_SYMBOL(seq_hlist_start); /** * seq_hlist_start_head - start an iteration of a hlist * @head: the head of the hlist * @pos: the start position of the sequence * * Called at seq_file->op->start(). Call this function if you want to * print a header at the top of the output. */ struct hlist_node *seq_hlist_start_head(struct hlist_head *head, loff_t pos) { if (!pos) return SEQ_START_TOKEN; return seq_hlist_start(head, pos - 1); } EXPORT_SYMBOL(seq_hlist_start_head); /** * seq_hlist_next - move to the next position of the hlist * @v: the current iterator * @head: the head of the hlist * @ppos: the current position * * Called at seq_file->op->next(). */ struct hlist_node *seq_hlist_next(void *v, struct hlist_head *head, loff_t *ppos) { struct hlist_node *node = v; ++*ppos; if (v == SEQ_START_TOKEN) return head->first; else return node->next; } EXPORT_SYMBOL(seq_hlist_next); /** * seq_hlist_start_rcu - start an iteration of a hlist protected by RCU * @head: the head of the hlist * @pos: the start position of the sequence * * Called at seq_file->op->start(). * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ struct hlist_node *seq_hlist_start_rcu(struct hlist_head *head, loff_t pos) { struct hlist_node *node; __hlist_for_each_rcu(node, head) if (pos-- == 0) return node; return NULL; } EXPORT_SYMBOL(seq_hlist_start_rcu); /** * seq_hlist_start_head_rcu - start an iteration of a hlist protected by RCU * @head: the head of the hlist * @pos: the start position of the sequence * * Called at seq_file->op->start(). Call this function if you want to * print a header at the top of the output. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ struct hlist_node *seq_hlist_start_head_rcu(struct hlist_head *head, loff_t pos) { if (!pos) return SEQ_START_TOKEN; return seq_hlist_start_rcu(head, pos - 1); } EXPORT_SYMBOL(seq_hlist_start_head_rcu); /** * seq_hlist_next_rcu - move to the next position of the hlist protected by RCU * @v: the current iterator * @head: the head of the hlist * @ppos: the current position * * Called at seq_file->op->next(). * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ struct hlist_node *seq_hlist_next_rcu(void *v, struct hlist_head *head, loff_t *ppos) { struct hlist_node *node = v; ++*ppos; if (v == SEQ_START_TOKEN) return rcu_dereference(head->first); else return rcu_dereference(node->next); } EXPORT_SYMBOL(seq_hlist_next_rcu); /** * seq_hlist_start_percpu - start an iteration of a percpu hlist array * @head: pointer to percpu array of struct hlist_heads * @cpu: pointer to cpu "cursor" * @pos: start position of sequence * * Called at seq_file->op->start(). */ struct hlist_node * seq_hlist_start_percpu(struct hlist_head __percpu *head, int *cpu, loff_t pos) { struct hlist_node *node; for_each_possible_cpu(*cpu) { hlist_for_each(node, per_cpu_ptr(head, *cpu)) { if (pos-- == 0) return node; } } return NULL; } EXPORT_SYMBOL(seq_hlist_start_percpu); /** * seq_hlist_next_percpu - move to the next position of the percpu hlist array * @v: pointer to current hlist_node * @head: pointer to percpu array of struct hlist_heads * @cpu: pointer to cpu "cursor" * @pos: start position of sequence * * Called at seq_file->op->next(). */ struct hlist_node * seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head, int *cpu, loff_t *pos) { struct hlist_node *node = v; ++*pos; if (node->next) return node->next; for (*cpu = cpumask_next(*cpu, cpu_possible_mask); *cpu < nr_cpu_ids; *cpu = cpumask_next(*cpu, cpu_possible_mask)) { struct hlist_head *bucket = per_cpu_ptr(head, *cpu); if (!hlist_empty(bucket)) return bucket->first; } return NULL; } EXPORT_SYMBOL(seq_hlist_next_percpu); void __init seq_file_init(void) { seq_file_cache = KMEM_CACHE(seq_file, SLAB_ACCOUNT|SLAB_PANIC); }
8 2 2 2 2 1 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 8 631 630 633 632 621 629 620 633 633 633 85 160 196 160 85 1287 1290 537 195 195 81 79 196 3 633 489 341 343 344 344 342 344 339 491 151 425 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 /* * Copyright (c) 2015, Mellanox Technologies inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "core_priv.h" #include <linux/in.h> #include <linux/in6.h> /* For in6_dev_get/in6_dev_put */ #include <net/addrconf.h> #include <net/bonding.h> #include <rdma/ib_cache.h> #include <rdma/ib_addr.h> static struct workqueue_struct *gid_cache_wq; enum gid_op_type { GID_DEL = 0, GID_ADD }; struct update_gid_event_work { struct work_struct work; union ib_gid gid; struct ib_gid_attr gid_attr; enum gid_op_type gid_op; }; #define ROCE_NETDEV_CALLBACK_SZ 3 struct netdev_event_work_cmd { roce_netdev_callback cb; roce_netdev_filter filter; struct net_device *ndev; struct net_device *filter_ndev; }; struct netdev_event_work { struct work_struct work; struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ]; }; static const struct { bool (*is_supported)(const struct ib_device *device, u32 port_num); enum ib_gid_type gid_type; } PORT_CAP_TO_GID_TYPE[] = { {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE}, {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP}, }; #define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE) unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u32 port) { int i; unsigned int ret_flags = 0; if (!rdma_protocol_roce(ib_dev, port)) return 1UL << IB_GID_TYPE_IB; for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++) if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port)) ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type; return ret_flags; } EXPORT_SYMBOL(roce_gid_type_mask_support); static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev, u32 port, union ib_gid *gid, struct ib_gid_attr *gid_attr) { int i; unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port); for (i = 0; i < IB_GID_TYPE_SIZE; i++) { if ((1UL << i) & gid_type_mask) { gid_attr->gid_type = i; switch (gid_op) { case GID_ADD: ib_cache_gid_add(ib_dev, port, gid, gid_attr); break; case GID_DEL: ib_cache_gid_del(ib_dev, port, gid, gid_attr); break; } } } } enum bonding_slave_state { BONDING_SLAVE_STATE_ACTIVE = 1UL << 0, BONDING_SLAVE_STATE_INACTIVE = 1UL << 1, /* No primary slave or the device isn't a slave in bonding */ BONDING_SLAVE_STATE_NA = 1UL << 2, }; static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_device *dev, struct net_device *upper) { if (upper && netif_is_bond_master(upper)) { struct net_device *pdev = bond_option_active_slave_get_rcu(netdev_priv(upper)); if (pdev) return dev == pdev ? BONDING_SLAVE_STATE_ACTIVE : BONDING_SLAVE_STATE_INACTIVE; } return BONDING_SLAVE_STATE_NA; } #define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \ BONDING_SLAVE_STATE_NA) static bool is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *real_dev; bool res; if (!rdma_ndev) return false; rcu_read_lock(); real_dev = rdma_vlan_dev_real_dev(cookie); if (!real_dev) real_dev = cookie; res = ((rdma_is_upper_dev_rcu(rdma_ndev, cookie) && (is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) & REQUIRED_BOND_STATES)) || real_dev == rdma_ndev); rcu_read_unlock(); return res; } static bool is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *master_dev; bool res; if (!rdma_ndev) return false; rcu_read_lock(); master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev); res = is_eth_active_slave_of_bonding_rcu(rdma_ndev, master_dev) == BONDING_SLAVE_STATE_INACTIVE; rcu_read_unlock(); return res; } /** * is_ndev_for_default_gid_filter - Check if a given netdevice * can be considered for default GIDs or not. * @ib_dev: IB device to check * @port: Port to consider for adding default GID * @rdma_ndev: rdma netdevice pointer * @cookie: Netdevice to consider to form a default GID * * is_ndev_for_default_gid_filter() returns true if a given netdevice can be * considered for deriving default RoCE GID, returns false otherwise. */ static bool is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *cookie_ndev = cookie; bool res; if (!rdma_ndev) return false; rcu_read_lock(); /* * When rdma netdevice is used in bonding, bonding master netdevice * should be considered for default GIDs. Therefore, ignore slave rdma * netdevices when bonding is considered. * Additionally when event(cookie) netdevice is bond master device, * make sure that it the upper netdevice of rdma netdevice. */ res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) || (netif_is_bond_master(cookie_ndev) && rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))); rcu_read_unlock(); return res; } static bool pass_all_filter(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { return true; } static bool upper_device_filter(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { bool res; if (!rdma_ndev) return false; if (rdma_ndev == cookie) return true; rcu_read_lock(); res = rdma_is_upper_dev_rcu(rdma_ndev, cookie); rcu_read_unlock(); return res; } /** * is_upper_ndev_bond_master_filter - Check if a given netdevice * is bond master device of netdevice of the RDMA device of port. * @ib_dev: IB device to check * @port: Port to consider for adding default GID * @rdma_ndev: Pointer to rdma netdevice * @cookie: Netdevice to consider to form a default GID * * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev * is bond master device and rdma_ndev is its lower netdevice. It might * not have been established as slave device yet. */ static bool is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *cookie_ndev = cookie; bool match = false; if (!rdma_ndev) return false; rcu_read_lock(); if (netif_is_bond_master(cookie_ndev) && rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)) match = true; rcu_read_unlock(); return match; } static void update_gid_ip(enum gid_op_type gid_op, struct ib_device *ib_dev, u32 port, struct net_device *ndev, struct sockaddr *addr) { union ib_gid gid; struct ib_gid_attr gid_attr; rdma_ip2gid(addr, &gid); memset(&gid_attr, 0, sizeof(gid_attr)); gid_attr.ndev = ndev; update_gid(gid_op, ib_dev, port, &gid, &gid_attr); } static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, struct net_device *event_ndev) { struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev); unsigned long gid_type_mask; if (!rdma_ndev) return; if (!real_dev) real_dev = event_ndev; rcu_read_lock(); if (((rdma_ndev != event_ndev && !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) || is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) == BONDING_SLAVE_STATE_INACTIVE)) { rcu_read_unlock(); return; } rcu_read_unlock(); gid_type_mask = roce_gid_type_mask_support(ib_dev, port); ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask, IB_CACHE_GID_DEFAULT_MODE_DELETE); } static void enum_netdev_ipv4_ips(struct ib_device *ib_dev, u32 port, struct net_device *ndev) { const struct in_ifaddr *ifa; struct in_device *in_dev; struct sin_list { struct list_head list; struct sockaddr_in ip; }; struct sin_list *sin_iter; struct sin_list *sin_temp; LIST_HEAD(sin_list); if (ndev->reg_state >= NETREG_UNREGISTERING) return; rcu_read_lock(); in_dev = __in_dev_get_rcu(ndev); if (!in_dev) { rcu_read_unlock(); return; } in_dev_for_each_ifa_rcu(ifa, in_dev) { struct sin_list *entry = kzalloc_obj(*entry, GFP_ATOMIC); if (!entry) continue; entry->ip.sin_family = AF_INET; entry->ip.sin_addr.s_addr = ifa->ifa_address; list_add_tail(&entry->list, &sin_list); } rcu_read_unlock(); list_for_each_entry_safe(sin_iter, sin_temp, &sin_list, list) { update_gid_ip(GID_ADD, ib_dev, port, ndev, (struct sockaddr *)&sin_iter->ip); list_del(&sin_iter->list); kfree(sin_iter); } } static void enum_netdev_ipv6_ips(struct ib_device *ib_dev, u32 port, struct net_device *ndev) { struct inet6_ifaddr *ifp; struct inet6_dev *in6_dev; struct sin6_list { struct list_head list; struct sockaddr_in6 sin6; }; struct sin6_list *sin6_iter; struct sin6_list *sin6_temp; struct ib_gid_attr gid_attr = {.ndev = ndev}; LIST_HEAD(sin6_list); if (ndev->reg_state >= NETREG_UNREGISTERING) return; in6_dev = in6_dev_get(ndev); if (!in6_dev) return; read_lock_bh(&in6_dev->lock); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { struct sin6_list *entry = kzalloc_obj(*entry, GFP_ATOMIC); if (!entry) continue; entry->sin6.sin6_family = AF_INET6; entry->sin6.sin6_addr = ifp->addr; list_add_tail(&entry->list, &sin6_list); } read_unlock_bh(&in6_dev->lock); in6_dev_put(in6_dev); list_for_each_entry_safe(sin6_iter, sin6_temp, &sin6_list, list) { union ib_gid gid; rdma_ip2gid((struct sockaddr *)&sin6_iter->sin6, &gid); update_gid(GID_ADD, ib_dev, port, &gid, &gid_attr); list_del(&sin6_iter->list); kfree(sin6_iter); } } static void _add_netdev_ips(struct ib_device *ib_dev, u32 port, struct net_device *ndev) { enum_netdev_ipv4_ips(ib_dev, port, ndev); if (IS_ENABLED(CONFIG_IPV6)) enum_netdev_ipv6_ips(ib_dev, port, ndev); } static void add_netdev_ips(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { _add_netdev_ips(ib_dev, port, cookie); } static void del_netdev_ips(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie); } /** * del_default_gids - Delete default GIDs of the event/cookie netdevice * @ib_dev: RDMA device pointer * @port: Port of the RDMA device whose GID table to consider * @rdma_ndev: Unused rdma netdevice * @cookie: Pointer to event netdevice * * del_default_gids() deletes the default GIDs of the event/cookie netdevice. */ static void del_default_gids(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *cookie_ndev = cookie; unsigned long gid_type_mask; gid_type_mask = roce_gid_type_mask_support(ib_dev, port); ib_cache_gid_set_default_gid(ib_dev, port, cookie_ndev, gid_type_mask, IB_CACHE_GID_DEFAULT_MODE_DELETE); } static void add_default_gids(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *event_ndev = cookie; unsigned long gid_type_mask; gid_type_mask = roce_gid_type_mask_support(ib_dev, port); ib_cache_gid_set_default_gid(ib_dev, port, event_ndev, gid_type_mask, IB_CACHE_GID_DEFAULT_MODE_SET); } static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net *net; struct net_device *ndev; /* Lock the rtnl to make sure the netdevs does not move under * our feet */ rtnl_lock(); down_read(&net_rwsem); for_each_net(net) for_each_netdev(net, ndev) { /* * Filter and add default GIDs of the primary netdevice * when not in bonding mode, or add default GIDs * of bond master device, when in bonding mode. */ if (is_ndev_for_default_gid_filter(ib_dev, port, rdma_ndev, ndev)) add_default_gids(ib_dev, port, rdma_ndev, ndev); if (is_eth_port_of_netdev_filter(ib_dev, port, rdma_ndev, ndev)) _add_netdev_ips(ib_dev, port, ndev); } up_read(&net_rwsem); rtnl_unlock(); } /** * rdma_roce_rescan_device - Rescan all of the network devices in the system * and add their gids, as needed, to the relevant RoCE devices. * * @ib_dev: the rdma device */ void rdma_roce_rescan_device(struct ib_device *ib_dev) { ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL, enum_all_gids_of_dev_cb, NULL); } EXPORT_SYMBOL(rdma_roce_rescan_device); /** * rdma_roce_rescan_port - Rescan all of the network devices in the system * and add their gids if relevant to the port of the RoCE device. * * @ib_dev: IB device * @port: Port number */ void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port) { struct net_device *ndev = NULL; if (rdma_protocol_roce(ib_dev, port)) { ndev = ib_device_get_netdev(ib_dev, port); if (!ndev) return; enum_all_gids_of_dev_cb(ib_dev, port, ndev, ndev); dev_put(ndev); } } EXPORT_SYMBOL(rdma_roce_rescan_port); static void callback_for_addr_gid_device_scan(struct ib_device *device, u32 port, struct net_device *rdma_ndev, void *cookie) { struct update_gid_event_work *parsed = cookie; return update_gid(parsed->gid_op, device, port, &parsed->gid, &parsed->gid_attr); } struct upper_list { struct list_head list; struct net_device *upper; }; static int netdev_upper_walk(struct net_device *upper, struct netdev_nested_priv *priv) { struct upper_list *entry = kmalloc_obj(*entry, GFP_ATOMIC); struct list_head *upper_list = (struct list_head *)priv->data; if (!entry) return 0; list_add_tail(&entry->list, upper_list); dev_hold(upper); entry->upper = upper; return 0; } static void handle_netdev_upper(struct ib_device *ib_dev, u32 port, void *cookie, void (*handle_netdev)(struct ib_device *ib_dev, u32 port, struct net_device *ndev)) { struct net_device *ndev = cookie; struct netdev_nested_priv priv; struct upper_list *upper_iter; struct upper_list *upper_temp; LIST_HEAD(upper_list); priv.data = &upper_list; rcu_read_lock(); netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &priv); rcu_read_unlock(); handle_netdev(ib_dev, port, ndev); list_for_each_entry_safe(upper_iter, upper_temp, &upper_list, list) { handle_netdev(ib_dev, port, upper_iter->upper); dev_put(upper_iter->upper); list_del(&upper_iter->list); kfree(upper_iter); } } void roce_del_all_netdev_gids(struct ib_device *ib_dev, u32 port, struct net_device *ndev) { ib_cache_gid_del_all_netdev_gids(ib_dev, port, ndev); } EXPORT_SYMBOL(roce_del_all_netdev_gids); static void del_netdev_upper_ips(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { handle_netdev_upper(ib_dev, port, cookie, roce_del_all_netdev_gids); } static void add_netdev_upper_ips(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { handle_netdev_upper(ib_dev, port, cookie, _add_netdev_ips); } static void del_netdev_default_ips_join(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *master_ndev; rcu_read_lock(); master_ndev = netdev_master_upper_dev_get_rcu(rdma_ndev); dev_hold(master_ndev); rcu_read_unlock(); if (master_ndev) { bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev, master_ndev); dev_put(master_ndev); } } /* The following functions operate on all IB devices. netdevice_event and * addr_event execute ib_enum_all_roce_netdevs through a work. * ib_enum_all_roce_netdevs iterates through all IB devices. */ static void netdevice_event_work_handler(struct work_struct *_work) { struct netdev_event_work *work = container_of(_work, struct netdev_event_work, work); unsigned int i; for (i = 0; i < ARRAY_SIZE(work->cmds) && work->cmds[i].cb; i++) { ib_enum_all_roce_netdevs(work->cmds[i].filter, work->cmds[i].filter_ndev, work->cmds[i].cb, work->cmds[i].ndev); dev_put(work->cmds[i].ndev); dev_put(work->cmds[i].filter_ndev); } kfree(work); } static int netdevice_queue_work(struct netdev_event_work_cmd *cmds, struct net_device *ndev) { unsigned int i; struct netdev_event_work *ndev_work = kmalloc_obj(*ndev_work); if (!ndev_work) return NOTIFY_DONE; memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds)); for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) { if (!ndev_work->cmds[i].ndev) ndev_work->cmds[i].ndev = ndev; if (!ndev_work->cmds[i].filter_ndev) ndev_work->cmds[i].filter_ndev = ndev; dev_hold(ndev_work->cmds[i].ndev); dev_hold(ndev_work->cmds[i].filter_ndev); } INIT_WORK(&ndev_work->work, netdevice_event_work_handler); queue_work(gid_cache_wq, &ndev_work->work); return NOTIFY_DONE; } static const struct netdev_event_work_cmd add_cmd = { .cb = add_netdev_ips, .filter = is_eth_port_of_netdev_filter }; static const struct netdev_event_work_cmd add_cmd_upper_ips = { .cb = add_netdev_upper_ips, .filter = is_eth_port_of_netdev_filter }; static void ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info, struct netdev_event_work_cmd *cmds) { static const struct netdev_event_work_cmd upper_ips_del_cmd = { .cb = del_netdev_upper_ips, .filter = upper_device_filter }; cmds[0] = upper_ips_del_cmd; cmds[0].ndev = changeupper_info->upper_dev; cmds[1] = add_cmd; } static const struct netdev_event_work_cmd bonding_default_add_cmd = { .cb = add_default_gids, .filter = is_upper_ndev_bond_master_filter }; static void ndev_event_link(struct net_device *event_ndev, struct netdev_notifier_changeupper_info *changeupper_info, struct netdev_event_work_cmd *cmds) { static const struct netdev_event_work_cmd bonding_default_del_cmd = { .cb = del_default_gids, .filter = is_upper_ndev_bond_master_filter }; /* * When a lower netdev is linked to its upper bonding * netdev, delete lower slave netdev's default GIDs. */ cmds[0] = bonding_default_del_cmd; cmds[0].ndev = event_ndev; cmds[0].filter_ndev = changeupper_info->upper_dev; /* Now add bonding upper device default GIDs */ cmds[1] = bonding_default_add_cmd; cmds[1].ndev = changeupper_info->upper_dev; cmds[1].filter_ndev = changeupper_info->upper_dev; /* Now add bonding upper device IP based GIDs */ cmds[2] = add_cmd_upper_ips; cmds[2].ndev = changeupper_info->upper_dev; cmds[2].filter_ndev = changeupper_info->upper_dev; } static void netdevice_event_changeupper(struct net_device *event_ndev, struct netdev_notifier_changeupper_info *changeupper_info, struct netdev_event_work_cmd *cmds) { if (changeupper_info->linking) ndev_event_link(event_ndev, changeupper_info, cmds); else ndev_event_unlink(changeupper_info, cmds); } static const struct netdev_event_work_cmd add_default_gid_cmd = { .cb = add_default_gids, .filter = is_ndev_for_default_gid_filter, }; static int netdevice_event(struct notifier_block *this, unsigned long event, void *ptr) { static const struct netdev_event_work_cmd del_cmd = { .cb = del_netdev_ips, .filter = pass_all_filter}; static const struct netdev_event_work_cmd bonding_default_del_cmd_join = { .cb = del_netdev_default_ips_join, .filter = is_eth_port_inactive_slave_filter }; static const struct netdev_event_work_cmd netdev_del_cmd = { .cb = del_netdev_ips, .filter = is_eth_port_of_netdev_filter }; static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = { .cb = del_netdev_upper_ips, .filter = upper_device_filter}; struct net_device *ndev = netdev_notifier_info_to_dev(ptr); struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ] = { {NULL} }; if (ndev->type != ARPHRD_ETHER) return NOTIFY_DONE; switch (event) { case NETDEV_REGISTER: case NETDEV_UP: cmds[0] = bonding_default_del_cmd_join; cmds[1] = add_default_gid_cmd; cmds[2] = add_cmd; break; case NETDEV_UNREGISTER: if (ndev->reg_state < NETREG_UNREGISTERED) cmds[0] = del_cmd; else return NOTIFY_DONE; break; case NETDEV_CHANGEADDR: cmds[0] = netdev_del_cmd; if (ndev->reg_state == NETREG_REGISTERED) { cmds[1] = add_default_gid_cmd; cmds[2] = add_cmd; } break; case NETDEV_CHANGEUPPER: netdevice_event_changeupper(ndev, container_of(ptr, struct netdev_notifier_changeupper_info, info), cmds); break; case NETDEV_BONDING_FAILOVER: cmds[0] = bonding_event_ips_del_cmd; /* Add default GIDs of the bond device */ cmds[1] = bonding_default_add_cmd; /* Add IP based GIDs of the bond device */ cmds[2] = add_cmd_upper_ips; break; default: return NOTIFY_DONE; } return netdevice_queue_work(cmds, ndev); } static void update_gid_event_work_handler(struct work_struct *_work) { struct update_gid_event_work *work = container_of(_work, struct update_gid_event_work, work); ib_enum_all_roce_netdevs(is_eth_port_of_netdev_filter, work->gid_attr.ndev, callback_for_addr_gid_device_scan, work); dev_put(work->gid_attr.ndev); kfree(work); } static int addr_event(struct notifier_block *this, unsigned long event, struct sockaddr *sa, struct net_device *ndev) { struct update_gid_event_work *work; enum gid_op_type gid_op; if (ndev->type != ARPHRD_ETHER) return NOTIFY_DONE; switch (event) { case NETDEV_UP: gid_op = GID_ADD; break; case NETDEV_DOWN: gid_op = GID_DEL; break; default: return NOTIFY_DONE; } work = kmalloc_obj(*work, GFP_ATOMIC); if (!work) return NOTIFY_DONE; INIT_WORK(&work->work, update_gid_event_work_handler); rdma_ip2gid(sa, &work->gid); work->gid_op = gid_op; memset(&work->gid_attr, 0, sizeof(work->gid_attr)); dev_hold(ndev); work->gid_attr.ndev = ndev; queue_work(gid_cache_wq, &work->work); return NOTIFY_DONE; } static int inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct sockaddr_in in; struct net_device *ndev; struct in_ifaddr *ifa = ptr; in.sin_family = AF_INET; in.sin_addr.s_addr = ifa->ifa_address; ndev = ifa->ifa_dev->dev; return addr_event(this, event, (struct sockaddr *)&in, ndev); } static int inet6addr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct sockaddr_in6 in6; struct net_device *ndev; struct inet6_ifaddr *ifa6 = ptr; in6.sin6_family = AF_INET6; in6.sin6_addr = ifa6->addr; ndev = ifa6->idev->dev; return addr_event(this, event, (struct sockaddr *)&in6, ndev); } static struct notifier_block nb_netdevice = { .notifier_call = netdevice_event }; static struct notifier_block nb_inetaddr = { .notifier_call = inetaddr_event }; static struct notifier_block nb_inet6addr = { .notifier_call = inet6addr_event }; int __init roce_gid_mgmt_init(void) { gid_cache_wq = alloc_ordered_workqueue("gid-cache-wq", 0); if (!gid_cache_wq) return -ENOMEM; register_inetaddr_notifier(&nb_inetaddr); if (IS_ENABLED(CONFIG_IPV6)) register_inet6addr_notifier(&nb_inet6addr); /* We relay on the netdevice notifier to enumerate all * existing devices in the system. Register to this notifier * last to make sure we will not miss any IP add/del * callbacks. */ register_netdevice_notifier(&nb_netdevice); return 0; } void __exit roce_gid_mgmt_cleanup(void) { if (IS_ENABLED(CONFIG_IPV6)) unregister_inet6addr_notifier(&nb_inet6addr); unregister_inetaddr_notifier(&nb_inetaddr); unregister_netdevice_notifier(&nb_netdevice); /* Ensure all gid deletion tasks complete before we go down, * to avoid any reference to free'd memory. By the time * ib-core is removed, all physical devices have been removed, * so no issue with remaining hardware contexts. */ destroy_workqueue(gid_cache_wq); }
2 1 1 3 3 3 1 1 2 2 1 1 1 1 1 3 3 3 3 6 6 6 6 6 6 2 4 4 4 1 3 3 3 6 3 3 3 3 3 3 3 6 2 1 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 // SPDX-License-Identifier: GPL-2.0+ /****************************************************************************** * cxacru.c - driver for USB ADSL modems based on * Conexant AccessRunner chipset * * Copyright (C) 2004 David Woodhouse, Duncan Sands, Roman Kagan * Copyright (C) 2005 Duncan Sands, Roman Kagan (rkagan % mail ! ru) * Copyright (C) 2007 Simon Arlott * Copyright (C) 2009 Simon Arlott ******************************************************************************/ /* * Credit is due for Josep Comas, who created the original patch to speedtch.c * to support the different padding used by the AccessRunner (now generalized * into usbatm), and the userspace firmware loading utility. */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/device.h> #include <linux/firmware.h> #include <linux/mutex.h> #include <linux/unaligned.h> #include "usbatm.h" #define DRIVER_AUTHOR "Roman Kagan, David Woodhouse, Duncan Sands, Simon Arlott" #define DRIVER_DESC "Conexant AccessRunner ADSL USB modem driver" static const char cxacru_driver_name[] = "cxacru"; #define CXACRU_EP_CMD 0x01 /* Bulk/interrupt in/out */ #define CXACRU_EP_DATA 0x02 /* Bulk in/out */ #define CMD_PACKET_SIZE 64 /* Should be maxpacket(ep)? */ #define CMD_MAX_CONFIG ((CMD_PACKET_SIZE / 4 - 1) / 2) /* Addresses */ #define PLLFCLK_ADDR 0x00350068 #define PLLBCLK_ADDR 0x0035006c #define SDRAMEN_ADDR 0x00350010 #define FW_ADDR 0x00801000 #define BR_ADDR 0x00180600 #define SIG_ADDR 0x00180500 #define BR_STACK_ADDR 0x00187f10 /* Values */ #define SDRAM_ENA 0x1 #define CMD_TIMEOUT 2000 /* msecs */ #define POLL_INTERVAL 1 /* secs */ /* commands for interaction with the modem through the control channel before * firmware is loaded */ enum cxacru_fw_request { FW_CMD_ERR, FW_GET_VER, FW_READ_MEM, FW_WRITE_MEM, FW_RMW_MEM, FW_CHECKSUM_MEM, FW_GOTO_MEM, }; /* commands for interaction with the modem through the control channel once * firmware is loaded */ enum cxacru_cm_request { CM_REQUEST_UNDEFINED = 0x80, CM_REQUEST_TEST, CM_REQUEST_CHIP_GET_MAC_ADDRESS, CM_REQUEST_CHIP_GET_DP_VERSIONS, CM_REQUEST_CHIP_ADSL_LINE_START, CM_REQUEST_CHIP_ADSL_LINE_STOP, CM_REQUEST_CHIP_ADSL_LINE_GET_STATUS, CM_REQUEST_CHIP_ADSL_LINE_GET_SPEED, CM_REQUEST_CARD_INFO_GET, CM_REQUEST_CARD_DATA_GET, CM_REQUEST_CARD_DATA_SET, CM_REQUEST_COMMAND_HW_IO, CM_REQUEST_INTERFACE_HW_IO, CM_REQUEST_CARD_SERIAL_DATA_PATH_GET, CM_REQUEST_CARD_SERIAL_DATA_PATH_SET, CM_REQUEST_CARD_CONTROLLER_VERSION_GET, CM_REQUEST_CARD_GET_STATUS, CM_REQUEST_CARD_GET_MAC_ADDRESS, CM_REQUEST_CARD_GET_DATA_LINK_STATUS, CM_REQUEST_MAX, }; /* commands for interaction with the flash memory * * read: response is the contents of the first 60 bytes of flash memory * write: request contains the 60 bytes of data to write to flash memory * response is the contents of the first 60 bytes of flash memory * * layout: PP PP VV VV MM MM MM MM MM MM ?? ?? SS SS SS SS SS SS SS SS * SS SS SS SS SS SS SS SS 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * * P: le16 USB Product ID * V: le16 USB Vendor ID * M: be48 MAC Address * S: le16 ASCII Serial Number */ enum cxacru_cm_flash { CM_FLASH_READ = 0xa1, CM_FLASH_WRITE = 0xa2 }; /* reply codes to the commands above */ enum cxacru_cm_status { CM_STATUS_UNDEFINED, CM_STATUS_SUCCESS, CM_STATUS_ERROR, CM_STATUS_UNSUPPORTED, CM_STATUS_UNIMPLEMENTED, CM_STATUS_PARAMETER_ERROR, CM_STATUS_DBG_LOOPBACK, CM_STATUS_MAX, }; /* indices into CARD_INFO_GET return array */ enum cxacru_info_idx { CXINF_DOWNSTREAM_RATE, CXINF_UPSTREAM_RATE, CXINF_LINK_STATUS, CXINF_LINE_STATUS, CXINF_MAC_ADDRESS_HIGH, CXINF_MAC_ADDRESS_LOW, CXINF_UPSTREAM_SNR_MARGIN, CXINF_DOWNSTREAM_SNR_MARGIN, CXINF_UPSTREAM_ATTENUATION, CXINF_DOWNSTREAM_ATTENUATION, CXINF_TRANSMITTER_POWER, CXINF_UPSTREAM_BITS_PER_FRAME, CXINF_DOWNSTREAM_BITS_PER_FRAME, CXINF_STARTUP_ATTEMPTS, CXINF_UPSTREAM_CRC_ERRORS, CXINF_DOWNSTREAM_CRC_ERRORS, CXINF_UPSTREAM_FEC_ERRORS, CXINF_DOWNSTREAM_FEC_ERRORS, CXINF_UPSTREAM_HEC_ERRORS, CXINF_DOWNSTREAM_HEC_ERRORS, CXINF_LINE_STARTABLE, CXINF_MODULATION, CXINF_ADSL_HEADEND, CXINF_ADSL_HEADEND_ENVIRONMENT, CXINF_CONTROLLER_VERSION, /* dunno what the missing two mean */ CXINF_MAX = 0x1c, }; enum cxacru_poll_state { CXPOLL_STOPPING, CXPOLL_STOPPED, CXPOLL_POLLING, CXPOLL_SHUTDOWN }; struct cxacru_modem_type { u32 pll_f_clk; u32 pll_b_clk; int boot_rom_patch; }; struct cxacru_data { struct usbatm_data *usbatm; const struct cxacru_modem_type *modem_type; int line_status; struct mutex adsl_state_serialize; int adsl_status; struct delayed_work poll_work; u32 card_info[CXINF_MAX]; struct mutex poll_state_serialize; enum cxacru_poll_state poll_state; /* control handles */ struct mutex cm_serialize; u8 *rcv_buf; u8 *snd_buf; struct urb *rcv_urb; struct urb *snd_urb; struct completion rcv_done; struct completion snd_done; }; static int cxacru_cm(struct cxacru_data *instance, enum cxacru_cm_request cm, u8 *wdata, int wsize, u8 *rdata, int rsize); static void cxacru_poll_status(struct work_struct *work); /* Card info exported through sysfs */ #define CXACRU__ATTR_INIT(_name) \ static DEVICE_ATTR_RO(_name) #define CXACRU_CMD_INIT(_name) \ static DEVICE_ATTR_RW(_name) #define CXACRU_SET_INIT(_name) \ static DEVICE_ATTR_WO(_name) #define CXACRU_ATTR_INIT(_value, _type, _name) \ static ssize_t _name##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct cxacru_data *instance = to_usbatm_driver_data(\ to_usb_interface(dev)); \ \ if (instance == NULL) \ return -ENODEV; \ \ return cxacru_sysfs_showattr_##_type(instance->card_info[_value], buf); \ } \ CXACRU__ATTR_INIT(_name) #define CXACRU_ATTR_CREATE(_v, _t, _name) CXACRU_DEVICE_CREATE_FILE(_name) #define CXACRU_CMD_CREATE(_name) CXACRU_DEVICE_CREATE_FILE(_name) #define CXACRU_SET_CREATE(_name) CXACRU_DEVICE_CREATE_FILE(_name) #define CXACRU__ATTR_CREATE(_name) CXACRU_DEVICE_CREATE_FILE(_name) #define CXACRU_ATTR_REMOVE(_v, _t, _name) CXACRU_DEVICE_REMOVE_FILE(_name) #define CXACRU_CMD_REMOVE(_name) CXACRU_DEVICE_REMOVE_FILE(_name) #define CXACRU_SET_REMOVE(_name) CXACRU_DEVICE_REMOVE_FILE(_name) #define CXACRU__ATTR_REMOVE(_name) CXACRU_DEVICE_REMOVE_FILE(_name) static ssize_t cxacru_sysfs_showattr_u32(u32 value, char *buf) { return sprintf(buf, "%u\n", value); } static ssize_t cxacru_sysfs_showattr_s8(s8 value, char *buf) { return sprintf(buf, "%d\n", value); } static ssize_t cxacru_sysfs_showattr_dB(s16 value, char *buf) { if (likely(value >= 0)) { return snprintf(buf, PAGE_SIZE, "%u.%02u\n", value / 100, value % 100); } else { value = -value; return snprintf(buf, PAGE_SIZE, "-%u.%02u\n", value / 100, value % 100); } } static ssize_t cxacru_sysfs_showattr_bool(u32 value, char *buf) { static char *str[] = { "no", "yes" }; if (unlikely(value >= ARRAY_SIZE(str))) return sprintf(buf, "%u\n", value); return sprintf(buf, "%s\n", str[value]); } static ssize_t cxacru_sysfs_showattr_LINK(u32 value, char *buf) { static char *str[] = { NULL, "not connected", "connected", "lost" }; if (unlikely(value >= ARRAY_SIZE(str) || str[value] == NULL)) return sprintf(buf, "%u\n", value); return sprintf(buf, "%s\n", str[value]); } static ssize_t cxacru_sysfs_showattr_LINE(u32 value, char *buf) { static char *str[] = { "down", "attempting to activate", "training", "channel analysis", "exchange", "up", "waiting", "initialising" }; if (unlikely(value >= ARRAY_SIZE(str))) return sprintf(buf, "%u\n", value); return sprintf(buf, "%s\n", str[value]); } static ssize_t cxacru_sysfs_showattr_MODU(u32 value, char *buf) { static char *str[] = { "", "ANSI T1.413", "ITU-T G.992.1 (G.DMT)", "ITU-T G.992.2 (G.LITE)" }; if (unlikely(value >= ARRAY_SIZE(str))) return sprintf(buf, "%u\n", value); return sprintf(buf, "%s\n", str[value]); } /* * This could use MAC_ADDRESS_HIGH and MAC_ADDRESS_LOW, but since * this data is already in atm_dev there's no point. * * MAC_ADDRESS_HIGH = 0x????5544 * MAC_ADDRESS_LOW = 0x33221100 * Where 00-55 are bytes 0-5 of the MAC. */ static ssize_t mac_address_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxacru_data *instance = to_usbatm_driver_data( to_usb_interface(dev)); if (instance == NULL || instance->usbatm->atm_dev == NULL) return -ENODEV; return sprintf(buf, "%pM\n", instance->usbatm->atm_dev->esi); } static ssize_t adsl_state_show(struct device *dev, struct device_attribute *attr, char *buf) { static char *str[] = { "running", "stopped" }; struct cxacru_data *instance = to_usbatm_driver_data( to_usb_interface(dev)); u32 value; if (instance == NULL) return -ENODEV; value = instance->card_info[CXINF_LINE_STARTABLE]; if (unlikely(value >= ARRAY_SIZE(str))) return sprintf(buf, "%u\n", value); return sprintf(buf, "%s\n", str[value]); } static ssize_t adsl_state_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct cxacru_data *instance = to_usbatm_driver_data( to_usb_interface(dev)); int ret; int poll = -1; char str_cmd[8]; int len = strlen(buf); if (!capable(CAP_NET_ADMIN)) return -EACCES; ret = sscanf(buf, "%7s", str_cmd); if (ret != 1) return -EINVAL; ret = 0; if (instance == NULL) return -ENODEV; if (mutex_lock_interruptible(&instance->adsl_state_serialize)) return -ERESTARTSYS; if (!strcmp(str_cmd, "stop") || !strcmp(str_cmd, "restart")) { ret = cxacru_cm(instance, CM_REQUEST_CHIP_ADSL_LINE_STOP, NULL, 0, NULL, 0); if (ret < 0) { atm_err(instance->usbatm, "change adsl state:" " CHIP_ADSL_LINE_STOP returned %d\n", ret); ret = -EIO; } else { ret = len; poll = CXPOLL_STOPPED; } } /* Line status is only updated every second * and the device appears to only react to * START/STOP every second too. Wait 1.5s to * be sure that restart will have an effect. */ if (!strcmp(str_cmd, "restart")) msleep(1500); if (!strcmp(str_cmd, "start") || !strcmp(str_cmd, "restart")) { ret = cxacru_cm(instance, CM_REQUEST_CHIP_ADSL_LINE_START, NULL, 0, NULL, 0); if (ret < 0) { atm_err(instance->usbatm, "change adsl state:" " CHIP_ADSL_LINE_START returned %d\n", ret); ret = -EIO; } else { ret = len; poll = CXPOLL_POLLING; } } if (!strcmp(str_cmd, "poll")) { ret = len; poll = CXPOLL_POLLING; } if (ret == 0) { ret = -EINVAL; poll = -1; } if (poll == CXPOLL_POLLING) { mutex_lock(&instance->poll_state_serialize); switch (instance->poll_state) { case CXPOLL_STOPPED: /* start polling */ instance->poll_state = CXPOLL_POLLING; break; case CXPOLL_STOPPING: /* abort stop request */ instance->poll_state = CXPOLL_POLLING; fallthrough; case CXPOLL_POLLING: case CXPOLL_SHUTDOWN: /* don't start polling */ poll = -1; } mutex_unlock(&instance->poll_state_serialize); } else if (poll == CXPOLL_STOPPED) { mutex_lock(&instance->poll_state_serialize); /* request stop */ if (instance->poll_state == CXPOLL_POLLING) instance->poll_state = CXPOLL_STOPPING; mutex_unlock(&instance->poll_state_serialize); } mutex_unlock(&instance->adsl_state_serialize); if (poll == CXPOLL_POLLING) cxacru_poll_status(&instance->poll_work.work); return ret; } /* CM_REQUEST_CARD_DATA_GET times out, so no show attribute */ static ssize_t adsl_config_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct cxacru_data *instance = to_usbatm_driver_data( to_usb_interface(dev)); int len = strlen(buf); int ret, pos, num; __le32 data[CMD_PACKET_SIZE / 4]; if (!capable(CAP_NET_ADMIN)) return -EACCES; if (instance == NULL) return -ENODEV; pos = 0; num = 0; while (pos < len) { int tmp; u32 index; u32 value; ret = sscanf(buf + pos, "%x=%x%n", &index, &value, &tmp); if (ret < 2) return -EINVAL; if (index > 0x7f) return -EINVAL; if (tmp < 0 || tmp > len - pos) return -EINVAL; pos += tmp; /* skip trailing newline */ if (buf[pos] == '\n' && pos == len-1) pos++; data[num * 2 + 1] = cpu_to_le32(index); data[num * 2 + 2] = cpu_to_le32(value); num++; /* send config values when data buffer is full * or no more data */ if (pos >= len || num >= CMD_MAX_CONFIG) { char log[CMD_MAX_CONFIG * 12 + 1]; /* %02x=%08x */ data[0] = cpu_to_le32(num); ret = cxacru_cm(instance, CM_REQUEST_CARD_DATA_SET, (u8 *) data, 4 + num * 8, NULL, 0); if (ret < 0) { atm_err(instance->usbatm, "set card data returned %d\n", ret); return -EIO; } for (tmp = 0; tmp < num; tmp++) snprintf(log + tmp*12, 13, " %02x=%08x", le32_to_cpu(data[tmp * 2 + 1]), le32_to_cpu(data[tmp * 2 + 2])); atm_info(instance->usbatm, "config%s\n", log); num = 0; } } return len; } /* * All device attributes are included in CXACRU_ALL_FILES * so that the same list can be used multiple times: * INIT (define the device attributes) * CREATE (create all the device files) * REMOVE (remove all the device files) * * With the last two being defined as needed in the functions * they are used in before calling CXACRU_ALL_FILES() */ #define CXACRU_ALL_FILES(_action) \ CXACRU_ATTR_##_action(CXINF_DOWNSTREAM_RATE, u32, downstream_rate); \ CXACRU_ATTR_##_action(CXINF_UPSTREAM_RATE, u32, upstream_rate); \ CXACRU_ATTR_##_action(CXINF_LINK_STATUS, LINK, link_status); \ CXACRU_ATTR_##_action(CXINF_LINE_STATUS, LINE, line_status); \ CXACRU__ATTR_##_action( mac_address); \ CXACRU_ATTR_##_action(CXINF_UPSTREAM_SNR_MARGIN, dB, upstream_snr_margin); \ CXACRU_ATTR_##_action(CXINF_DOWNSTREAM_SNR_MARGIN, dB, downstream_snr_margin); \ CXACRU_ATTR_##_action(CXINF_UPSTREAM_ATTENUATION, dB, upstream_attenuation); \ CXACRU_ATTR_##_action(CXINF_DOWNSTREAM_ATTENUATION, dB, downstream_attenuation); \ CXACRU_ATTR_##_action(CXINF_TRANSMITTER_POWER, s8, transmitter_power); \ CXACRU_ATTR_##_action(CXINF_UPSTREAM_BITS_PER_FRAME, u32, upstream_bits_per_frame); \ CXACRU_ATTR_##_action(CXINF_DOWNSTREAM_BITS_PER_FRAME, u32, downstream_bits_per_frame); \ CXACRU_ATTR_##_action(CXINF_STARTUP_ATTEMPTS, u32, startup_attempts); \ CXACRU_ATTR_##_action(CXINF_UPSTREAM_CRC_ERRORS, u32, upstream_crc_errors); \ CXACRU_ATTR_##_action(CXINF_DOWNSTREAM_CRC_ERRORS, u32, downstream_crc_errors); \ CXACRU_ATTR_##_action(CXINF_UPSTREAM_FEC_ERRORS, u32, upstream_fec_errors); \ CXACRU_ATTR_##_action(CXINF_DOWNSTREAM_FEC_ERRORS, u32, downstream_fec_errors); \ CXACRU_ATTR_##_action(CXINF_UPSTREAM_HEC_ERRORS, u32, upstream_hec_errors); \ CXACRU_ATTR_##_action(CXINF_DOWNSTREAM_HEC_ERRORS, u32, downstream_hec_errors); \ CXACRU_ATTR_##_action(CXINF_LINE_STARTABLE, bool, line_startable); \ CXACRU_ATTR_##_action(CXINF_MODULATION, MODU, modulation); \ CXACRU_ATTR_##_action(CXINF_ADSL_HEADEND, u32, adsl_headend); \ CXACRU_ATTR_##_action(CXINF_ADSL_HEADEND_ENVIRONMENT, u32, adsl_headend_environment); \ CXACRU_ATTR_##_action(CXINF_CONTROLLER_VERSION, u32, adsl_controller_version); \ CXACRU_CMD_##_action( adsl_state); \ CXACRU_SET_##_action( adsl_config); CXACRU_ALL_FILES(INIT); static struct attribute *cxacru_attrs[] = { &dev_attr_adsl_config.attr, &dev_attr_adsl_state.attr, &dev_attr_adsl_controller_version.attr, &dev_attr_adsl_headend_environment.attr, &dev_attr_adsl_headend.attr, &dev_attr_modulation.attr, &dev_attr_line_startable.attr, &dev_attr_downstream_hec_errors.attr, &dev_attr_upstream_hec_errors.attr, &dev_attr_downstream_fec_errors.attr, &dev_attr_upstream_fec_errors.attr, &dev_attr_downstream_crc_errors.attr, &dev_attr_upstream_crc_errors.attr, &dev_attr_startup_attempts.attr, &dev_attr_downstream_bits_per_frame.attr, &dev_attr_upstream_bits_per_frame.attr, &dev_attr_transmitter_power.attr, &dev_attr_downstream_attenuation.attr, &dev_attr_upstream_attenuation.attr, &dev_attr_downstream_snr_margin.attr, &dev_attr_upstream_snr_margin.attr, &dev_attr_mac_address.attr, &dev_attr_line_status.attr, &dev_attr_link_status.attr, &dev_attr_upstream_rate.attr, &dev_attr_downstream_rate.attr, NULL, }; ATTRIBUTE_GROUPS(cxacru); /* the following three functions are stolen from drivers/usb/core/message.c */ static void cxacru_blocking_completion(struct urb *urb) { complete(urb->context); } struct cxacru_timer { struct timer_list timer; struct urb *urb; }; static void cxacru_timeout_kill(struct timer_list *t) { struct cxacru_timer *timer = timer_container_of(timer, t, timer); usb_unlink_urb(timer->urb); } static int cxacru_start_wait_urb(struct urb *urb, struct completion *done, int *actual_length) { struct cxacru_timer timer = { .urb = urb, }; timer_setup_on_stack(&timer.timer, cxacru_timeout_kill, 0); mod_timer(&timer.timer, jiffies + msecs_to_jiffies(CMD_TIMEOUT)); wait_for_completion(done); timer_delete_sync(&timer.timer); timer_destroy_on_stack(&timer.timer); if (actual_length) *actual_length = urb->actual_length; return urb->status; /* must read status after completion */ } static int cxacru_cm(struct cxacru_data *instance, enum cxacru_cm_request cm, u8 *wdata, int wsize, u8 *rdata, int rsize) { int ret, actlen; int offb, offd; const int stride = CMD_PACKET_SIZE - 4; u8 *wbuf = instance->snd_buf; u8 *rbuf = instance->rcv_buf; int wbuflen = ((wsize - 1) / stride + 1) * CMD_PACKET_SIZE; int rbuflen = ((rsize - 1) / stride + 1) * CMD_PACKET_SIZE; if (wbuflen > PAGE_SIZE || rbuflen > PAGE_SIZE) { if (printk_ratelimit()) usb_err(instance->usbatm, "requested transfer size too large (%d, %d)\n", wbuflen, rbuflen); ret = -ENOMEM; goto err; } mutex_lock(&instance->cm_serialize); /* submit reading urb before the writing one */ init_completion(&instance->rcv_done); ret = usb_submit_urb(instance->rcv_urb, GFP_KERNEL); if (ret < 0) { if (printk_ratelimit()) usb_err(instance->usbatm, "submit of read urb for cm %#x failed (%d)\n", cm, ret); goto fail; } memset(wbuf, 0, wbuflen); /* handle wsize == 0 */ wbuf[0] = cm; for (offb = offd = 0; offd < wsize; offd += stride, offb += CMD_PACKET_SIZE) { wbuf[offb] = cm; memcpy(wbuf + offb + 4, wdata + offd, min_t(int, stride, wsize - offd)); } instance->snd_urb->transfer_buffer_length = wbuflen; init_completion(&instance->snd_done); ret = usb_submit_urb(instance->snd_urb, GFP_KERNEL); if (ret < 0) { if (printk_ratelimit()) usb_err(instance->usbatm, "submit of write urb for cm %#x failed (%d)\n", cm, ret); goto fail; } ret = cxacru_start_wait_urb(instance->snd_urb, &instance->snd_done, NULL); if (ret < 0) { if (printk_ratelimit()) usb_err(instance->usbatm, "send of cm %#x failed (%d)\n", cm, ret); goto fail; } ret = cxacru_start_wait_urb(instance->rcv_urb, &instance->rcv_done, &actlen); if (ret < 0) { if (printk_ratelimit()) usb_err(instance->usbatm, "receive of cm %#x failed (%d)\n", cm, ret); goto fail; } if (actlen % CMD_PACKET_SIZE || !actlen) { if (printk_ratelimit()) usb_err(instance->usbatm, "invalid response length to cm %#x: %d\n", cm, actlen); ret = -EIO; goto fail; } /* check the return status and copy the data to the output buffer, if needed */ for (offb = offd = 0; offd < rsize && offb < actlen; offb += CMD_PACKET_SIZE) { if (rbuf[offb] != cm) { if (printk_ratelimit()) usb_err(instance->usbatm, "wrong cm %#x in response to cm %#x\n", rbuf[offb], cm); ret = -EIO; goto fail; } if (rbuf[offb + 1] != CM_STATUS_SUCCESS) { if (printk_ratelimit()) usb_err(instance->usbatm, "response to cm %#x failed: %#x\n", cm, rbuf[offb + 1]); ret = -EIO; goto fail; } if (offd >= rsize) break; memcpy(rdata + offd, rbuf + offb + 4, min_t(int, stride, rsize - offd)); offd += stride; } ret = offd; usb_dbg(instance->usbatm, "cm %#x\n", cm); fail: mutex_unlock(&instance->cm_serialize); err: return ret; } static int cxacru_cm_get_array(struct cxacru_data *instance, enum cxacru_cm_request cm, u32 *data, int size) { int ret, len; __le32 *buf; int offb; unsigned int offd; const int stride = CMD_PACKET_SIZE / (4 * 2) - 1; int buflen = ((size - 1) / stride + 1 + size * 2) * 4; buf = kmalloc(buflen, GFP_KERNEL); if (!buf) return -ENOMEM; ret = cxacru_cm(instance, cm, NULL, 0, (u8 *) buf, buflen); if (ret < 0) goto cleanup; /* len > 0 && len % 4 == 0 guaranteed by cxacru_cm() */ len = ret / 4; for (offb = 0; offb < len; ) { int l = le32_to_cpu(buf[offb++]); if (l < 0 || l > stride || l > (len - offb) / 2) { if (printk_ratelimit()) usb_err(instance->usbatm, "invalid data length from cm %#x: %d\n", cm, l); ret = -EIO; goto cleanup; } while (l--) { offd = le32_to_cpu(buf[offb++]); if (offd >= size) { if (printk_ratelimit()) usb_err(instance->usbatm, "wrong index %#x in response to cm %#x\n", offd, cm); ret = -EIO; goto cleanup; } data[offd] = le32_to_cpu(buf[offb++]); } } ret = 0; cleanup: kfree(buf); return ret; } static int cxacru_card_status(struct cxacru_data *instance) { int ret = cxacru_cm(instance, CM_REQUEST_CARD_GET_STATUS, NULL, 0, NULL, 0); if (ret < 0) { /* firmware not loaded */ usb_dbg(instance->usbatm, "cxacru_adsl_start: CARD_GET_STATUS returned %d\n", ret); return ret; } return 0; } static int cxacru_atm_start(struct usbatm_data *usbatm_instance, struct atm_dev *atm_dev) { struct cxacru_data *instance = usbatm_instance->driver_data; struct usb_interface *intf = usbatm_instance->usb_intf; int ret; int start_polling = 1; dev_dbg(&intf->dev, "%s\n", __func__); /* Read MAC address */ ret = cxacru_cm(instance, CM_REQUEST_CARD_GET_MAC_ADDRESS, NULL, 0, atm_dev->esi, sizeof(atm_dev->esi)); if (ret < 0) { atm_err(usbatm_instance, "cxacru_atm_start: CARD_GET_MAC_ADDRESS returned %d\n", ret); return ret; } /* start ADSL */ mutex_lock(&instance->adsl_state_serialize); ret = cxacru_cm(instance, CM_REQUEST_CHIP_ADSL_LINE_START, NULL, 0, NULL, 0); if (ret < 0) atm_err(usbatm_instance, "cxacru_atm_start: CHIP_ADSL_LINE_START returned %d\n", ret); /* Start status polling */ mutex_lock(&instance->poll_state_serialize); switch (instance->poll_state) { case CXPOLL_STOPPED: /* start polling */ instance->poll_state = CXPOLL_POLLING; break; case CXPOLL_STOPPING: /* abort stop request */ instance->poll_state = CXPOLL_POLLING; fallthrough; case CXPOLL_POLLING: case CXPOLL_SHUTDOWN: /* don't start polling */ start_polling = 0; } mutex_unlock(&instance->poll_state_serialize); mutex_unlock(&instance->adsl_state_serialize); if (start_polling) cxacru_poll_status(&instance->poll_work.work); return 0; } static void cxacru_poll_status(struct work_struct *work) { struct cxacru_data *instance = container_of(work, struct cxacru_data, poll_work.work); u32 buf[CXINF_MAX] = {}; struct usbatm_data *usbatm = instance->usbatm; struct atm_dev *atm_dev = usbatm->atm_dev; int keep_polling = 1; int ret; ret = cxacru_cm_get_array(instance, CM_REQUEST_CARD_INFO_GET, buf, CXINF_MAX); if (ret < 0) { if (ret != -ESHUTDOWN) atm_warn(usbatm, "poll status: error %d\n", ret); mutex_lock(&instance->poll_state_serialize); if (instance->poll_state != CXPOLL_SHUTDOWN) { instance->poll_state = CXPOLL_STOPPED; if (ret != -ESHUTDOWN) atm_warn(usbatm, "polling disabled, set adsl_state" " to 'start' or 'poll' to resume\n"); } mutex_unlock(&instance->poll_state_serialize); goto reschedule; } memcpy(instance->card_info, buf, sizeof(instance->card_info)); if (instance->adsl_status != buf[CXINF_LINE_STARTABLE]) { instance->adsl_status = buf[CXINF_LINE_STARTABLE]; switch (instance->adsl_status) { case 0: atm_info(usbatm, "ADSL state: running\n"); break; case 1: atm_info(usbatm, "ADSL state: stopped\n"); break; default: atm_info(usbatm, "Unknown adsl status %02x\n", instance->adsl_status); break; } } if (instance->line_status == buf[CXINF_LINE_STATUS]) goto reschedule; instance->line_status = buf[CXINF_LINE_STATUS]; switch (instance->line_status) { case 0: atm_dev_signal_change(atm_dev, ATM_PHY_SIG_LOST); atm_info(usbatm, "ADSL line: down\n"); break; case 1: atm_dev_signal_change(atm_dev, ATM_PHY_SIG_LOST); atm_info(usbatm, "ADSL line: attempting to activate\n"); break; case 2: atm_dev_signal_change(atm_dev, ATM_PHY_SIG_LOST); atm_info(usbatm, "ADSL line: training\n"); break; case 3: atm_dev_signal_change(atm_dev, ATM_PHY_SIG_LOST); atm_info(usbatm, "ADSL line: channel analysis\n"); break; case 4: atm_dev_signal_change(atm_dev, ATM_PHY_SIG_LOST); atm_info(usbatm, "ADSL line: exchange\n"); break; case 5: atm_dev->link_rate = buf[CXINF_DOWNSTREAM_RATE] * 1000 / 424; atm_dev_signal_change(atm_dev, ATM_PHY_SIG_FOUND); atm_info(usbatm, "ADSL line: up (%d kb/s down | %d kb/s up)\n", buf[CXINF_DOWNSTREAM_RATE], buf[CXINF_UPSTREAM_RATE]); break; case 6: atm_dev_signal_change(atm_dev, ATM_PHY_SIG_LOST); atm_info(usbatm, "ADSL line: waiting\n"); break; case 7: atm_dev_signal_change(atm_dev, ATM_PHY_SIG_LOST); atm_info(usbatm, "ADSL line: initializing\n"); break; default: atm_dev_signal_change(atm_dev, ATM_PHY_SIG_UNKNOWN); atm_info(usbatm, "Unknown line state %02x\n", instance->line_status); break; } reschedule: mutex_lock(&instance->poll_state_serialize); if (instance->poll_state == CXPOLL_STOPPING && instance->adsl_status == 1 && /* stopped */ instance->line_status == 0) /* down */ instance->poll_state = CXPOLL_STOPPED; if (instance->poll_state == CXPOLL_STOPPED) keep_polling = 0; mutex_unlock(&instance->poll_state_serialize); if (keep_polling) schedule_delayed_work(&instance->poll_work, round_jiffies_relative(POLL_INTERVAL*HZ)); } static int cxacru_fw(struct usb_device *usb_dev, enum cxacru_fw_request fw, u8 code1, u8 code2, u32 addr, const u8 *data, int size) { int ret; u8 *buf; int offd, offb; const int stride = CMD_PACKET_SIZE - 8; buf = (u8 *) __get_free_page(GFP_KERNEL); if (!buf) return -ENOMEM; offb = offd = 0; do { int l = min_t(int, stride, size - offd); buf[offb++] = fw; buf[offb++] = l; buf[offb++] = code1; buf[offb++] = code2; put_unaligned(cpu_to_le32(addr), (__le32 *)(buf + offb)); offb += 4; addr += l; if (l) memcpy(buf + offb, data + offd, l); if (l < stride) memset(buf + offb + l, 0, stride - l); offb += stride; offd += stride; if ((offb >= PAGE_SIZE) || (offd >= size)) { ret = usb_bulk_msg(usb_dev, usb_sndbulkpipe(usb_dev, CXACRU_EP_CMD), buf, offb, NULL, CMD_TIMEOUT); if (ret < 0) { dev_dbg(&usb_dev->dev, "sending fw %#x failed\n", fw); goto cleanup; } offb = 0; } } while (offd < size); dev_dbg(&usb_dev->dev, "sent fw %#x\n", fw); ret = 0; cleanup: free_page((unsigned long) buf); return ret; } static int cxacru_find_firmware(struct cxacru_data *instance, char *phase, const struct firmware **fw_p) { struct usbatm_data *usbatm = instance->usbatm; struct device *dev = &usbatm->usb_intf->dev; char buf[16]; sprintf(buf, "cxacru-%s.bin", phase); usb_dbg(usbatm, "cxacru_find_firmware: looking for %s\n", buf); if (request_firmware(fw_p, buf, dev)) { usb_dbg(usbatm, "no stage %s firmware found\n", phase); return -ENOENT; } usb_info(usbatm, "found firmware %s\n", buf); return 0; } static int cxacru_heavy_init(struct usbatm_data *usbatm_instance, struct usb_interface *usb_intf) { const struct firmware *fw, *bp; struct cxacru_data *instance = usbatm_instance->driver_data; struct usbatm_data *usbatm = instance->usbatm; struct usb_device *usb_dev = usbatm->usb_dev; __le16 signature[] = { usb_dev->descriptor.idVendor, usb_dev->descriptor.idProduct }; __le32 val; int ret; ret = cxacru_find_firmware(instance, "fw", &fw); if (ret) { usb_warn(usbatm_instance, "firmware (cxacru-fw.bin) unavailable (system misconfigured?)\n"); return ret; } if (instance->modem_type->boot_rom_patch) { ret = cxacru_find_firmware(instance, "bp", &bp); if (ret) { usb_warn(usbatm_instance, "boot ROM patch (cxacru-bp.bin) unavailable (system misconfigured?)\n"); release_firmware(fw); return ret; } } /* FirmwarePllFClkValue */ val = cpu_to_le32(instance->modem_type->pll_f_clk); ret = cxacru_fw(usb_dev, FW_WRITE_MEM, 0x2, 0x0, PLLFCLK_ADDR, (u8 *) &val, 4); if (ret) { usb_err(usbatm, "FirmwarePllFClkValue failed: %d\n", ret); goto done; } /* FirmwarePllBClkValue */ val = cpu_to_le32(instance->modem_type->pll_b_clk); ret = cxacru_fw(usb_dev, FW_WRITE_MEM, 0x2, 0x0, PLLBCLK_ADDR, (u8 *) &val, 4); if (ret) { usb_err(usbatm, "FirmwarePllBClkValue failed: %d\n", ret); goto done; } /* Enable SDRAM */ val = cpu_to_le32(SDRAM_ENA); ret = cxacru_fw(usb_dev, FW_WRITE_MEM, 0x2, 0x0, SDRAMEN_ADDR, (u8 *) &val, 4); if (ret) { usb_err(usbatm, "Enable SDRAM failed: %d\n", ret); goto done; } /* Firmware */ usb_info(usbatm, "loading firmware\n"); ret = cxacru_fw(usb_dev, FW_WRITE_MEM, 0x2, 0x0, FW_ADDR, fw->data, fw->size); if (ret) { usb_err(usbatm, "Firmware upload failed: %d\n", ret); goto done; } /* Boot ROM patch */ if (instance->modem_type->boot_rom_patch) { usb_info(usbatm, "loading boot ROM patch\n"); ret = cxacru_fw(usb_dev, FW_WRITE_MEM, 0x2, 0x0, BR_ADDR, bp->data, bp->size); if (ret) { usb_err(usbatm, "Boot ROM patching failed: %d\n", ret); goto done; } } /* Signature */ ret = cxacru_fw(usb_dev, FW_WRITE_MEM, 0x2, 0x0, SIG_ADDR, (u8 *) signature, 4); if (ret) { usb_err(usbatm, "Signature storing failed: %d\n", ret); goto done; } usb_info(usbatm, "starting device\n"); if (instance->modem_type->boot_rom_patch) { val = cpu_to_le32(BR_ADDR); ret = cxacru_fw(usb_dev, FW_WRITE_MEM, 0x2, 0x0, BR_STACK_ADDR, (u8 *) &val, 4); } else { ret = cxacru_fw(usb_dev, FW_GOTO_MEM, 0x0, 0x0, FW_ADDR, NULL, 0); } if (ret) { usb_err(usbatm, "Passing control to firmware failed: %d\n", ret); goto done; } /* Delay to allow firmware to start up. */ msleep_interruptible(1000); usb_clear_halt(usb_dev, usb_sndbulkpipe(usb_dev, CXACRU_EP_CMD)); usb_clear_halt(usb_dev, usb_rcvbulkpipe(usb_dev, CXACRU_EP_CMD)); usb_clear_halt(usb_dev, usb_sndbulkpipe(usb_dev, CXACRU_EP_DATA)); usb_clear_halt(usb_dev, usb_rcvbulkpipe(usb_dev, CXACRU_EP_DATA)); ret = cxacru_cm(instance, CM_REQUEST_CARD_GET_STATUS, NULL, 0, NULL, 0); if (ret < 0) { usb_err(usbatm, "modem failed to initialize: %d\n", ret); goto done; } done: if (instance->modem_type->boot_rom_patch) release_firmware(bp); release_firmware(fw); ret = cxacru_card_status(instance); if (ret) usb_dbg(usbatm_instance, "modem initialisation failed\n"); else usb_dbg(usbatm_instance, "done setting up the modem\n"); return ret; } static int cxacru_bind(struct usbatm_data *usbatm_instance, struct usb_interface *intf, const struct usb_device_id *id) { struct cxacru_data *instance; struct usb_device *usb_dev = interface_to_usbdev(intf); struct usb_host_endpoint *cmd_ep = usb_dev->ep_in[CXACRU_EP_CMD]; static const u8 ep_addrs[] = { CXACRU_EP_CMD + USB_DIR_IN, CXACRU_EP_CMD + USB_DIR_OUT, 0}; int ret; /* instance init */ instance = kzalloc_obj(*instance); if (!instance) return -ENOMEM; instance->usbatm = usbatm_instance; instance->modem_type = (struct cxacru_modem_type *) id->driver_info; mutex_init(&instance->poll_state_serialize); instance->poll_state = CXPOLL_STOPPED; instance->line_status = -1; instance->adsl_status = -1; mutex_init(&instance->adsl_state_serialize); instance->rcv_buf = (u8 *) __get_free_page(GFP_KERNEL); if (!instance->rcv_buf) { usb_dbg(usbatm_instance, "cxacru_bind: no memory for rcv_buf\n"); ret = -ENOMEM; goto fail; } instance->snd_buf = (u8 *) __get_free_page(GFP_KERNEL); if (!instance->snd_buf) { usb_dbg(usbatm_instance, "cxacru_bind: no memory for snd_buf\n"); ret = -ENOMEM; goto fail; } instance->rcv_urb = usb_alloc_urb(0, GFP_KERNEL); if (!instance->rcv_urb) { ret = -ENOMEM; goto fail; } instance->snd_urb = usb_alloc_urb(0, GFP_KERNEL); if (!instance->snd_urb) { ret = -ENOMEM; goto fail; } if (!cmd_ep) { usb_dbg(usbatm_instance, "cxacru_bind: no command endpoint\n"); ret = -ENODEV; goto fail; } if (usb_endpoint_xfer_int(&cmd_ep->desc)) ret = usb_check_int_endpoints(intf, ep_addrs); else ret = usb_check_bulk_endpoints(intf, ep_addrs); if (!ret) { usb_err(usbatm_instance, "cxacru_bind: interface has incorrect endpoints\n"); ret = -ENODEV; goto fail; } if ((cmd_ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_INT) { usb_fill_int_urb(instance->rcv_urb, usb_dev, usb_rcvintpipe(usb_dev, CXACRU_EP_CMD), instance->rcv_buf, PAGE_SIZE, cxacru_blocking_completion, &instance->rcv_done, 1); usb_fill_int_urb(instance->snd_urb, usb_dev, usb_sndintpipe(usb_dev, CXACRU_EP_CMD), instance->snd_buf, PAGE_SIZE, cxacru_blocking_completion, &instance->snd_done, 4); } else { usb_fill_bulk_urb(instance->rcv_urb, usb_dev, usb_rcvbulkpipe(usb_dev, CXACRU_EP_CMD), instance->rcv_buf, PAGE_SIZE, cxacru_blocking_completion, &instance->rcv_done); usb_fill_bulk_urb(instance->snd_urb, usb_dev, usb_sndbulkpipe(usb_dev, CXACRU_EP_CMD), instance->snd_buf, PAGE_SIZE, cxacru_blocking_completion, &instance->snd_done); } mutex_init(&instance->cm_serialize); INIT_DELAYED_WORK(&instance->poll_work, cxacru_poll_status); usbatm_instance->driver_data = instance; usbatm_instance->flags = (cxacru_card_status(instance) ? 0 : UDSL_SKIP_HEAVY_INIT); return 0; fail: free_page((unsigned long) instance->snd_buf); free_page((unsigned long) instance->rcv_buf); usb_free_urb(instance->snd_urb); usb_free_urb(instance->rcv_urb); kfree(instance); return ret; } static void cxacru_unbind(struct usbatm_data *usbatm_instance, struct usb_interface *intf) { struct cxacru_data *instance = usbatm_instance->driver_data; int is_polling = 1; usb_dbg(usbatm_instance, "cxacru_unbind entered\n"); if (!instance) { usb_dbg(usbatm_instance, "cxacru_unbind: NULL instance!\n"); return; } mutex_lock(&instance->poll_state_serialize); BUG_ON(instance->poll_state == CXPOLL_SHUTDOWN); /* ensure that status polling continues unless * it has already stopped */ if (instance->poll_state == CXPOLL_STOPPED) is_polling = 0; /* stop polling from being stopped or started */ instance->poll_state = CXPOLL_SHUTDOWN; mutex_unlock(&instance->poll_state_serialize); if (is_polling) cancel_delayed_work_sync(&instance->poll_work); usb_kill_urb(instance->snd_urb); usb_kill_urb(instance->rcv_urb); usb_free_urb(instance->snd_urb); usb_free_urb(instance->rcv_urb); free_page((unsigned long) instance->snd_buf); free_page((unsigned long) instance->rcv_buf); kfree(instance); usbatm_instance->driver_data = NULL; } static const struct cxacru_modem_type cxacru_cafe = { .pll_f_clk = 0x02d874df, .pll_b_clk = 0x0196a51a, .boot_rom_patch = 1, }; static const struct cxacru_modem_type cxacru_cb00 = { .pll_f_clk = 0x5, .pll_b_clk = 0x3, .boot_rom_patch = 0, }; static const struct usb_device_id cxacru_usb_ids[] = { { /* V = Conexant P = ADSL modem (Euphrates project) */ USB_DEVICE(0x0572, 0xcafe), .driver_info = (unsigned long) &cxacru_cafe }, { /* V = Conexant P = ADSL modem (Hasbani project) */ USB_DEVICE(0x0572, 0xcb00), .driver_info = (unsigned long) &cxacru_cb00 }, { /* V = Conexant P = ADSL modem */ USB_DEVICE(0x0572, 0xcb01), .driver_info = (unsigned long) &cxacru_cb00 }, { /* V = Conexant P = ADSL modem (Well PTI-800) */ USB_DEVICE(0x0572, 0xcb02), .driver_info = (unsigned long) &cxacru_cb00 }, { /* V = Conexant P = ADSL modem */ USB_DEVICE(0x0572, 0xcb06), .driver_info = (unsigned long) &cxacru_cb00 }, { /* V = Conexant P = ADSL modem (ZTE ZXDSL 852) */ USB_DEVICE(0x0572, 0xcb07), .driver_info = (unsigned long) &cxacru_cb00 }, { /* V = Olitec P = ADSL modem version 2 */ USB_DEVICE(0x08e3, 0x0100), .driver_info = (unsigned long) &cxacru_cafe }, { /* V = Olitec P = ADSL modem version 3 */ USB_DEVICE(0x08e3, 0x0102), .driver_info = (unsigned long) &cxacru_cb00 }, { /* V = Trust/Amigo Technology Co. P = AMX-CA86U */ USB_DEVICE(0x0eb0, 0x3457), .driver_info = (unsigned long) &cxacru_cafe }, { /* V = Zoom P = 5510 */ USB_DEVICE(0x1803, 0x5510), .driver_info = (unsigned long) &cxacru_cb00 }, { /* V = Draytek P = Vigor 318 */ USB_DEVICE(0x0675, 0x0200), .driver_info = (unsigned long) &cxacru_cb00 }, { /* V = Zyxel P = 630-C1 aka OMNI ADSL USB (Annex A) */ USB_DEVICE(0x0586, 0x330a), .driver_info = (unsigned long) &cxacru_cb00 }, { /* V = Zyxel P = 630-C3 aka OMNI ADSL USB (Annex B) */ USB_DEVICE(0x0586, 0x330b), .driver_info = (unsigned long) &cxacru_cb00 }, { /* V = Aethra P = Starmodem UM1020 */ USB_DEVICE(0x0659, 0x0020), .driver_info = (unsigned long) &cxacru_cb00 }, { /* V = Aztech Systems P = ? AKA Pirelli AUA-010 */ USB_DEVICE(0x0509, 0x0812), .driver_info = (unsigned long) &cxacru_cb00 }, { /* V = Netopia P = Cayman 3341(Annex A)/3351(Annex B) */ USB_DEVICE(0x100d, 0xcb01), .driver_info = (unsigned long) &cxacru_cb00 }, { /* V = Netopia P = Cayman 3342(Annex A)/3352(Annex B) */ USB_DEVICE(0x100d, 0x3342), .driver_info = (unsigned long) &cxacru_cb00 }, {} }; MODULE_DEVICE_TABLE(usb, cxacru_usb_ids); static struct usbatm_driver cxacru_driver = { .driver_name = cxacru_driver_name, .bind = cxacru_bind, .heavy_init = cxacru_heavy_init, .unbind = cxacru_unbind, .atm_start = cxacru_atm_start, .bulk_in = CXACRU_EP_DATA, .bulk_out = CXACRU_EP_DATA, .rx_padding = 3, .tx_padding = 11, }; static int cxacru_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *usb_dev = interface_to_usbdev(intf); char buf[15]; /* Avoid ADSL routers (cx82310_eth). * Abort if bDeviceClass is 0xff and iProduct is "USB NET CARD". */ if (usb_dev->descriptor.bDeviceClass == USB_CLASS_VENDOR_SPEC && usb_string(usb_dev, usb_dev->descriptor.iProduct, buf, sizeof(buf)) > 0) { if (!strcmp(buf, "USB NET CARD")) { dev_info(&intf->dev, "ignoring cx82310_eth device\n"); return -ENODEV; } } return usbatm_usb_probe(intf, id, &cxacru_driver); } static struct usb_driver cxacru_usb_driver = { .name = cxacru_driver_name, .probe = cxacru_usb_probe, .disconnect = usbatm_usb_disconnect, .id_table = cxacru_usb_ids, .dev_groups = cxacru_groups, }; module_usb_driver(cxacru_usb_driver); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL");
11 28 19 19 20 20 20 20 20 12 8 9 9 20 20 20 20 20 20 20 20 27 27 19 5 3 1 5 5 5 2 2 2 2 2 2 12 11 2 2 1 2 12 19 19 10 10 10 10 10 19 19 19 19 19 12 1 19 19 19 19 19 19 28 27 28 27 26 17 12 5 7 5 26 7 19 13 6 19 19 19 19 14 19 4 19 11 11 10 10 10 2 10 10 10 11 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2010-2011 EIA Electronics, // Kurt Van Dijck <kurt.van.dijck@eia.be> // Copyright (c) 2018 Protonic, // Robin van der Gracht <robin@protonic.nl> // Copyright (c) 2017-2019 Pengutronix, // Marc Kleine-Budde <kernel@pengutronix.de> // Copyright (c) 2017-2019 Pengutronix, // Oleksij Rempel <kernel@pengutronix.de> #include <linux/can/skb.h> #include <net/can.h> #include "j1939-priv.h" #define J1939_XTP_TX_RETRY_LIMIT 100 #define J1939_ETP_PGN_CTL 0xc800 #define J1939_ETP_PGN_DAT 0xc700 #define J1939_TP_PGN_CTL 0xec00 #define J1939_TP_PGN_DAT 0xeb00 #define J1939_TP_CMD_RTS 0x10 #define J1939_TP_CMD_CTS 0x11 #define J1939_TP_CMD_EOMA 0x13 #define J1939_TP_CMD_BAM 0x20 #define J1939_TP_CMD_ABORT 0xff #define J1939_ETP_CMD_RTS 0x14 #define J1939_ETP_CMD_CTS 0x15 #define J1939_ETP_CMD_DPO 0x16 #define J1939_ETP_CMD_EOMA 0x17 #define J1939_ETP_CMD_ABORT 0xff enum j1939_xtp_abort { J1939_XTP_NO_ABORT = 0, J1939_XTP_ABORT_BUSY = 1, /* Already in one or more connection managed sessions and * cannot support another. * * EALREADY: * Operation already in progress */ J1939_XTP_ABORT_RESOURCE = 2, /* System resources were needed for another task so this * connection managed session was terminated. * * EMSGSIZE: * The socket type requires that message be sent atomically, * and the size of the message to be sent made this * impossible. */ J1939_XTP_ABORT_TIMEOUT = 3, /* A timeout occurred and this is the connection abort to * close the session. * * EHOSTUNREACH: * The destination host cannot be reached (probably because * the host is down or a remote router cannot reach it). */ J1939_XTP_ABORT_GENERIC = 4, /* CTS messages received when data transfer is in progress * * EBADMSG: * Not a data message */ J1939_XTP_ABORT_FAULT = 5, /* Maximal retransmit request limit reached * * ENOTRECOVERABLE: * State not recoverable */ J1939_XTP_ABORT_UNEXPECTED_DATA = 6, /* Unexpected data transfer packet * * ENOTCONN: * Transport endpoint is not connected */ J1939_XTP_ABORT_BAD_SEQ = 7, /* Bad sequence number (and software is not able to recover) * * EILSEQ: * Illegal byte sequence */ J1939_XTP_ABORT_DUP_SEQ = 8, /* Duplicate sequence number (and software is not able to * recover) */ J1939_XTP_ABORT_EDPO_UNEXPECTED = 9, /* Unexpected EDPO packet (ETP) or Message size > 1785 bytes * (TP) */ J1939_XTP_ABORT_BAD_EDPO_PGN = 10, /* Unexpected EDPO PGN (PGN in EDPO is bad) */ J1939_XTP_ABORT_EDPO_OUTOF_CTS = 11, /* EDPO number of packets is greater than CTS */ J1939_XTP_ABORT_BAD_EDPO_OFFSET = 12, /* Bad EDPO offset */ J1939_XTP_ABORT_OTHER_DEPRECATED = 13, /* Deprecated. Use 250 instead (Any other reason) */ J1939_XTP_ABORT_ECTS_UNXPECTED_PGN = 14, /* Unexpected ECTS PGN (PGN in ECTS is bad) */ J1939_XTP_ABORT_ECTS_TOO_BIG = 15, /* ECTS requested packets exceeds message size */ J1939_XTP_ABORT_OTHER = 250, /* Any other reason (if a Connection Abort reason is * identified that is not listed in the table use code 250) */ }; static unsigned int j1939_tp_block = 255; static unsigned int j1939_tp_packet_delay; static unsigned int j1939_tp_padding = 1; /* helpers */ static const char *j1939_xtp_abort_to_str(enum j1939_xtp_abort abort) { switch (abort) { case J1939_XTP_ABORT_BUSY: return "Already in one or more connection managed sessions and cannot support another."; case J1939_XTP_ABORT_RESOURCE: return "System resources were needed for another task so this connection managed session was terminated."; case J1939_XTP_ABORT_TIMEOUT: return "A timeout occurred and this is the connection abort to close the session."; case J1939_XTP_ABORT_GENERIC: return "CTS messages received when data transfer is in progress"; case J1939_XTP_ABORT_FAULT: return "Maximal retransmit request limit reached"; case J1939_XTP_ABORT_UNEXPECTED_DATA: return "Unexpected data transfer packet"; case J1939_XTP_ABORT_BAD_SEQ: return "Bad sequence number (and software is not able to recover)"; case J1939_XTP_ABORT_DUP_SEQ: return "Duplicate sequence number (and software is not able to recover)"; case J1939_XTP_ABORT_EDPO_UNEXPECTED: return "Unexpected EDPO packet (ETP) or Message size > 1785 bytes (TP)"; case J1939_XTP_ABORT_BAD_EDPO_PGN: return "Unexpected EDPO PGN (PGN in EDPO is bad)"; case J1939_XTP_ABORT_EDPO_OUTOF_CTS: return "EDPO number of packets is greater than CTS"; case J1939_XTP_ABORT_BAD_EDPO_OFFSET: return "Bad EDPO offset"; case J1939_XTP_ABORT_OTHER_DEPRECATED: return "Deprecated. Use 250 instead (Any other reason)"; case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN: return "Unexpected ECTS PGN (PGN in ECTS is bad)"; case J1939_XTP_ABORT_ECTS_TOO_BIG: return "ECTS requested packets exceeds message size"; case J1939_XTP_ABORT_OTHER: return "Any other reason (if a Connection Abort reason is identified that is not listed in the table use code 250)"; default: return "<unknown>"; } } static int j1939_xtp_abort_to_errno(struct j1939_priv *priv, enum j1939_xtp_abort abort) { int err; switch (abort) { case J1939_XTP_NO_ABORT: WARN_ON_ONCE(abort == J1939_XTP_NO_ABORT); err = 0; break; case J1939_XTP_ABORT_BUSY: err = EALREADY; break; case J1939_XTP_ABORT_RESOURCE: err = EMSGSIZE; break; case J1939_XTP_ABORT_TIMEOUT: err = EHOSTUNREACH; break; case J1939_XTP_ABORT_GENERIC: err = EBADMSG; break; case J1939_XTP_ABORT_FAULT: err = ENOTRECOVERABLE; break; case J1939_XTP_ABORT_UNEXPECTED_DATA: err = ENOTCONN; break; case J1939_XTP_ABORT_BAD_SEQ: err = EILSEQ; break; case J1939_XTP_ABORT_DUP_SEQ: err = EPROTO; break; case J1939_XTP_ABORT_EDPO_UNEXPECTED: err = EPROTO; break; case J1939_XTP_ABORT_BAD_EDPO_PGN: err = EPROTO; break; case J1939_XTP_ABORT_EDPO_OUTOF_CTS: err = EPROTO; break; case J1939_XTP_ABORT_BAD_EDPO_OFFSET: err = EPROTO; break; case J1939_XTP_ABORT_OTHER_DEPRECATED: err = EPROTO; break; case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN: err = EPROTO; break; case J1939_XTP_ABORT_ECTS_TOO_BIG: err = EPROTO; break; case J1939_XTP_ABORT_OTHER: err = EPROTO; break; default: netdev_warn(priv->ndev, "Unknown abort code %i", abort); err = EPROTO; } return err; } static inline void j1939_session_list_lock(struct j1939_priv *priv) { spin_lock_bh(&priv->active_session_list_lock); } static inline void j1939_session_list_unlock(struct j1939_priv *priv) { spin_unlock_bh(&priv->active_session_list_lock); } void j1939_session_get(struct j1939_session *session) { kref_get(&session->kref); } /* session completion functions */ static void __j1939_session_drop(struct j1939_session *session) { if (!session->transmission) return; j1939_sock_pending_del(session->sk); sock_put(session->sk); } static void j1939_session_destroy(struct j1939_session *session) { struct sk_buff *skb; if (session->transmission) { if (session->err) j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ABORT); else j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ACK); } else if (session->err) { j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); WARN_ON_ONCE(!list_empty(&session->sk_session_queue_entry)); WARN_ON_ONCE(!list_empty(&session->active_session_list_entry)); while ((skb = skb_dequeue(&session->skb_queue)) != NULL) { /* drop ref taken in j1939_session_skb_queue() */ skb_unref(skb); kfree_skb(skb); } __j1939_session_drop(session); j1939_priv_put(session->priv); kfree(session); } static void __j1939_session_release(struct kref *kref) { struct j1939_session *session = container_of(kref, struct j1939_session, kref); j1939_session_destroy(session); } void j1939_session_put(struct j1939_session *session) { kref_put(&session->kref, __j1939_session_release); } static void j1939_session_txtimer_cancel(struct j1939_session *session) { if (hrtimer_cancel(&session->txtimer)) j1939_session_put(session); } static void j1939_session_rxtimer_cancel(struct j1939_session *session) { if (hrtimer_cancel(&session->rxtimer)) j1939_session_put(session); } void j1939_session_timers_cancel(struct j1939_session *session) { j1939_session_txtimer_cancel(session); j1939_session_rxtimer_cancel(session); } static inline bool j1939_cb_is_broadcast(const struct j1939_sk_buff_cb *skcb) { return (!skcb->addr.dst_name && (skcb->addr.da == 0xff)); } static void j1939_session_skb_drop_old(struct j1939_session *session) { struct sk_buff *do_skb; struct j1939_sk_buff_cb *do_skcb; unsigned int offset_start; unsigned long flags; if (skb_queue_len(&session->skb_queue) < 2) return; offset_start = session->pkt.tx_acked * 7; spin_lock_irqsave(&session->skb_queue.lock, flags); do_skb = skb_peek(&session->skb_queue); do_skcb = j1939_skb_to_cb(do_skb); if ((do_skcb->offset + do_skb->len) < offset_start) { __skb_unlink(do_skb, &session->skb_queue); /* drop ref taken in j1939_session_skb_queue() */ skb_unref(do_skb); spin_unlock_irqrestore(&session->skb_queue.lock, flags); kfree_skb(do_skb); } else { spin_unlock_irqrestore(&session->skb_queue.lock, flags); } } void j1939_session_skb_queue(struct j1939_session *session, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_priv *priv = session->priv; j1939_ac_fixup(priv, skb); if (j1939_address_is_unicast(skcb->addr.da) && priv->ents[skcb->addr.da].nusers) skcb->flags |= J1939_ECU_LOCAL_DST; skcb->flags |= J1939_ECU_LOCAL_SRC; skb_get(skb); skb_queue_tail(&session->skb_queue, skb); } static struct sk_buff *j1939_session_skb_get_by_offset(struct j1939_session *session, unsigned int offset_start) { struct j1939_priv *priv = session->priv; struct j1939_sk_buff_cb *do_skcb; struct sk_buff *skb = NULL; struct sk_buff *do_skb; unsigned long flags; spin_lock_irqsave(&session->skb_queue.lock, flags); skb_queue_walk(&session->skb_queue, do_skb) { do_skcb = j1939_skb_to_cb(do_skb); if ((offset_start >= do_skcb->offset && offset_start < (do_skcb->offset + do_skb->len)) || (offset_start == 0 && do_skcb->offset == 0 && do_skb->len == 0)) { skb = do_skb; } } if (skb) skb_get(skb); spin_unlock_irqrestore(&session->skb_queue.lock, flags); if (!skb) netdev_dbg(priv->ndev, "%s: 0x%p: no skb found for start: %i, queue size: %i\n", __func__, session, offset_start, skb_queue_len(&session->skb_queue)); return skb; } static struct sk_buff *j1939_session_skb_get(struct j1939_session *session) { unsigned int offset_start; offset_start = session->pkt.dpo * 7; return j1939_session_skb_get_by_offset(session, offset_start); } /* see if we are receiver * returns 0 for broadcasts, although we will receive them */ static inline int j1939_tp_im_receiver(const struct j1939_sk_buff_cb *skcb) { return skcb->flags & J1939_ECU_LOCAL_DST; } /* see if we are sender */ static inline int j1939_tp_im_transmitter(const struct j1939_sk_buff_cb *skcb) { return skcb->flags & J1939_ECU_LOCAL_SRC; } /* see if we are involved as either receiver or transmitter */ static int j1939_tp_im_involved(const struct j1939_sk_buff_cb *skcb, bool swap) { if (swap) return j1939_tp_im_receiver(skcb); else return j1939_tp_im_transmitter(skcb); } static int j1939_tp_im_involved_anydir(struct j1939_sk_buff_cb *skcb) { return skcb->flags & (J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST); } /* extract pgn from flow-ctl message */ static inline pgn_t j1939_xtp_ctl_to_pgn(const u8 *dat) { pgn_t pgn; pgn = (dat[7] << 16) | (dat[6] << 8) | (dat[5] << 0); if (j1939_pgn_is_pdu1(pgn)) pgn &= 0xffff00; return pgn; } static inline unsigned int j1939_tp_ctl_to_size(const u8 *dat) { return (dat[2] << 8) + (dat[1] << 0); } static inline unsigned int j1939_etp_ctl_to_packet(const u8 *dat) { return (dat[4] << 16) | (dat[3] << 8) | (dat[2] << 0); } static inline unsigned int j1939_etp_ctl_to_size(const u8 *dat) { return (dat[4] << 24) | (dat[3] << 16) | (dat[2] << 8) | (dat[1] << 0); } /* find existing session: * reverse: swap cb's src & dst * there is no problem with matching broadcasts, since * broadcasts (no dst, no da) would never call this * with reverse == true */ static bool j1939_session_match(struct j1939_addr *se_addr, struct j1939_addr *sk_addr, bool reverse) { if (se_addr->type != sk_addr->type) return false; if (reverse) { if (se_addr->src_name) { if (se_addr->src_name != sk_addr->dst_name) return false; } else if (se_addr->sa != sk_addr->da) { return false; } if (se_addr->dst_name) { if (se_addr->dst_name != sk_addr->src_name) return false; } else if (se_addr->da != sk_addr->sa) { return false; } } else { if (se_addr->src_name) { if (se_addr->src_name != sk_addr->src_name) return false; } else if (se_addr->sa != sk_addr->sa) { return false; } if (se_addr->dst_name) { if (se_addr->dst_name != sk_addr->dst_name) return false; } else if (se_addr->da != sk_addr->da) { return false; } } return true; } static struct j1939_session *j1939_session_get_by_addr_locked(struct j1939_priv *priv, struct list_head *root, struct j1939_addr *addr, bool reverse, bool transmitter) { struct j1939_session *session; lockdep_assert_held(&priv->active_session_list_lock); list_for_each_entry(session, root, active_session_list_entry) { j1939_session_get(session); if (j1939_session_match(&session->skcb.addr, addr, reverse) && session->transmission == transmitter) return session; j1939_session_put(session); } return NULL; } static struct j1939_session *j1939_session_get_simple(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; lockdep_assert_held(&priv->active_session_list_lock); list_for_each_entry(session, &priv->active_session_list, active_session_list_entry) { j1939_session_get(session); if (session->skcb.addr.type == J1939_SIMPLE && session->tskey == skcb->tskey && session->sk == skb->sk) return session; j1939_session_put(session); } return NULL; } static struct j1939_session *j1939_session_get_by_addr(struct j1939_priv *priv, struct j1939_addr *addr, bool reverse, bool transmitter) { struct j1939_session *session; j1939_session_list_lock(priv); session = j1939_session_get_by_addr_locked(priv, &priv->active_session_list, addr, reverse, transmitter); j1939_session_list_unlock(priv); return session; } static void j1939_skbcb_swap(struct j1939_sk_buff_cb *skcb) { u8 tmp = 0; swap(skcb->addr.dst_name, skcb->addr.src_name); swap(skcb->addr.da, skcb->addr.sa); /* swap SRC and DST flags, leave other untouched */ if (skcb->flags & J1939_ECU_LOCAL_SRC) tmp |= J1939_ECU_LOCAL_DST; if (skcb->flags & J1939_ECU_LOCAL_DST) tmp |= J1939_ECU_LOCAL_SRC; skcb->flags &= ~(J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST); skcb->flags |= tmp; } static struct sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv, const struct j1939_sk_buff_cb *re_skcb, bool ctl, bool swap_src_dst) { struct sk_buff *skb; struct can_skb_ext *csx; struct j1939_sk_buff_cb *skcb; skb = alloc_skb(sizeof(struct can_frame), GFP_ATOMIC); if (unlikely(!skb)) return ERR_PTR(-ENOMEM); csx = can_skb_ext_add(skb); if (!csx) { kfree_skb(skb); return ERR_PTR(-ENOMEM); } skb->dev = priv->ndev; csx->can_iif = priv->ndev->ifindex; /* reserve CAN header */ skb_reserve(skb, offsetof(struct can_frame, data)); /* skb->cb must be large enough to hold a j1939_sk_buff_cb structure */ BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*re_skcb)); memcpy(skb->cb, re_skcb, sizeof(*re_skcb)); skcb = j1939_skb_to_cb(skb); if (swap_src_dst) j1939_skbcb_swap(skcb); if (ctl) { if (skcb->addr.type == J1939_ETP) skcb->addr.pgn = J1939_ETP_PGN_CTL; else skcb->addr.pgn = J1939_TP_PGN_CTL; } else { if (skcb->addr.type == J1939_ETP) skcb->addr.pgn = J1939_ETP_PGN_DAT; else skcb->addr.pgn = J1939_TP_PGN_DAT; } return skb; } /* TP transmit packet functions */ static int j1939_tp_tx_dat(struct j1939_session *session, const u8 *dat, int len) { struct j1939_priv *priv = session->priv; struct sk_buff *skb; skb = j1939_tp_tx_dat_new(priv, &session->skcb, false, false); if (IS_ERR(skb)) return PTR_ERR(skb); skb_put_data(skb, dat, len); if (j1939_tp_padding && len < 8) memset(skb_put(skb, 8 - len), 0xff, 8 - len); return j1939_send_one(priv, skb); } static int j1939_xtp_do_tx_ctl(struct j1939_priv *priv, const struct j1939_sk_buff_cb *re_skcb, bool swap_src_dst, pgn_t pgn, const u8 *dat) { struct sk_buff *skb; u8 *skdat; if (!j1939_tp_im_involved(re_skcb, swap_src_dst)) return 0; skb = j1939_tp_tx_dat_new(priv, re_skcb, true, swap_src_dst); if (IS_ERR(skb)) return PTR_ERR(skb); skdat = skb_put(skb, 8); memcpy(skdat, dat, 5); skdat[5] = (pgn >> 0); skdat[6] = (pgn >> 8); skdat[7] = (pgn >> 16); return j1939_send_one(priv, skb); } static inline int j1939_tp_tx_ctl(struct j1939_session *session, bool swap_src_dst, const u8 *dat) { struct j1939_priv *priv = session->priv; return j1939_xtp_do_tx_ctl(priv, &session->skcb, swap_src_dst, session->skcb.addr.pgn, dat); } static int j1939_xtp_tx_abort(struct j1939_priv *priv, const struct j1939_sk_buff_cb *re_skcb, bool swap_src_dst, enum j1939_xtp_abort err, pgn_t pgn) { u8 dat[5]; if (!j1939_tp_im_involved(re_skcb, swap_src_dst)) return 0; memset(dat, 0xff, sizeof(dat)); dat[0] = J1939_TP_CMD_ABORT; dat[1] = err; return j1939_xtp_do_tx_ctl(priv, re_skcb, swap_src_dst, pgn, dat); } void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec) { j1939_session_get(session); hrtimer_start(&session->txtimer, ms_to_ktime(msec), HRTIMER_MODE_REL_SOFT); } static inline void j1939_tp_set_rxtimeout(struct j1939_session *session, int msec) { j1939_session_rxtimer_cancel(session); j1939_session_get(session); hrtimer_start(&session->rxtimer, ms_to_ktime(msec), HRTIMER_MODE_REL_SOFT); } static int j1939_session_tx_rts(struct j1939_session *session) { u8 dat[8]; int ret; memset(dat, 0xff, sizeof(dat)); dat[1] = (session->total_message_size >> 0); dat[2] = (session->total_message_size >> 8); dat[3] = session->pkt.total; if (session->skcb.addr.type == J1939_ETP) { dat[0] = J1939_ETP_CMD_RTS; dat[1] = (session->total_message_size >> 0); dat[2] = (session->total_message_size >> 8); dat[3] = (session->total_message_size >> 16); dat[4] = (session->total_message_size >> 24); } else if (j1939_cb_is_broadcast(&session->skcb)) { dat[0] = J1939_TP_CMD_BAM; /* fake cts for broadcast */ session->pkt.tx = 0; } else { dat[0] = J1939_TP_CMD_RTS; dat[4] = dat[3]; } if (dat[0] == session->last_txcmd) /* done already */ return 0; ret = j1939_tp_tx_ctl(session, false, dat); if (ret < 0) return ret; session->last_txcmd = dat[0]; if (dat[0] == J1939_TP_CMD_BAM) { j1939_tp_schedule_txtimer(session, 50); j1939_tp_set_rxtimeout(session, 250); } else { j1939_tp_set_rxtimeout(session, 1250); } netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); return 0; } static int j1939_session_tx_dpo(struct j1939_session *session) { unsigned int pkt; u8 dat[8]; int ret; memset(dat, 0xff, sizeof(dat)); dat[0] = J1939_ETP_CMD_DPO; session->pkt.dpo = session->pkt.tx_acked; pkt = session->pkt.dpo; dat[1] = session->pkt.last - session->pkt.tx_acked; dat[2] = (pkt >> 0); dat[3] = (pkt >> 8); dat[4] = (pkt >> 16); ret = j1939_tp_tx_ctl(session, false, dat); if (ret < 0) return ret; session->last_txcmd = dat[0]; j1939_tp_set_rxtimeout(session, 1250); session->pkt.tx = session->pkt.tx_acked; netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); return 0; } static int j1939_session_tx_dat(struct j1939_session *session) { struct j1939_priv *priv = session->priv; struct j1939_sk_buff_cb *se_skcb; int offset, pkt_done, pkt_end; unsigned int len, pdelay; struct sk_buff *se_skb; const u8 *tpdat; int ret = 0; u8 dat[8]; se_skb = j1939_session_skb_get_by_offset(session, session->pkt.tx * 7); if (!se_skb) return -ENOBUFS; se_skcb = j1939_skb_to_cb(se_skb); tpdat = se_skb->data; ret = 0; pkt_done = 0; if (session->skcb.addr.type != J1939_ETP && j1939_cb_is_broadcast(&session->skcb)) pkt_end = session->pkt.total; else pkt_end = session->pkt.last; while (session->pkt.tx < pkt_end) { dat[0] = session->pkt.tx - session->pkt.dpo + 1; offset = (session->pkt.tx * 7) - se_skcb->offset; len = se_skb->len - offset; if (len > 7) len = 7; if (offset + len > se_skb->len) { netdev_err_once(priv->ndev, "%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n", __func__, session, se_skcb->offset, se_skb->len , session->pkt.tx); ret = -EOVERFLOW; goto out_free; } if (!len) { ret = -ENOBUFS; break; } memcpy(&dat[1], &tpdat[offset], len); ret = j1939_tp_tx_dat(session, dat, len + 1); if (ret < 0) { /* ENOBUFS == CAN interface TX queue is full */ if (ret != -ENOBUFS) netdev_alert(priv->ndev, "%s: 0x%p: queue data error: %i\n", __func__, session, ret); break; } session->last_txcmd = 0xff; pkt_done++; session->pkt.tx++; pdelay = j1939_cb_is_broadcast(&session->skcb) ? 50 : j1939_tp_packet_delay; if (session->pkt.tx < session->pkt.total && pdelay) { j1939_tp_schedule_txtimer(session, pdelay); break; } } if (pkt_done) j1939_tp_set_rxtimeout(session, 250); out_free: if (ret) kfree_skb(se_skb); else consume_skb(se_skb); return ret; } static int j1939_xtp_txnext_transmiter(struct j1939_session *session) { struct j1939_priv *priv = session->priv; int ret = 0; if (!j1939_tp_im_transmitter(&session->skcb)) { netdev_alert(priv->ndev, "%s: 0x%p: called by not transmitter!\n", __func__, session); return -EINVAL; } switch (session->last_cmd) { case 0: ret = j1939_session_tx_rts(session); break; case J1939_ETP_CMD_CTS: if (session->last_txcmd != J1939_ETP_CMD_DPO) { ret = j1939_session_tx_dpo(session); if (ret) return ret; } fallthrough; case J1939_TP_CMD_CTS: case 0xff: /* did some data */ case J1939_ETP_CMD_DPO: case J1939_TP_CMD_BAM: ret = j1939_session_tx_dat(session); break; default: netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n", __func__, session, session->last_cmd); } return ret; } static int j1939_session_tx_cts(struct j1939_session *session) { struct j1939_priv *priv = session->priv; unsigned int pkt, len; int ret; u8 dat[8]; if (!j1939_sk_recv_match(priv, &session->skcb)) return -ENOENT; len = session->pkt.total - session->pkt.rx; len = min3(len, session->pkt.block, j1939_tp_block ?: 255); memset(dat, 0xff, sizeof(dat)); if (session->skcb.addr.type == J1939_ETP) { pkt = session->pkt.rx + 1; dat[0] = J1939_ETP_CMD_CTS; dat[1] = len; dat[2] = (pkt >> 0); dat[3] = (pkt >> 8); dat[4] = (pkt >> 16); } else { dat[0] = J1939_TP_CMD_CTS; dat[1] = len; dat[2] = session->pkt.rx + 1; } if (dat[0] == session->last_txcmd) /* done already */ return 0; ret = j1939_tp_tx_ctl(session, true, dat); if (ret < 0) return ret; if (len) /* only mark cts done when len is set */ session->last_txcmd = dat[0]; j1939_tp_set_rxtimeout(session, 1250); netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); return 0; } static int j1939_session_tx_eoma(struct j1939_session *session) { struct j1939_priv *priv = session->priv; u8 dat[8]; int ret; if (!j1939_sk_recv_match(priv, &session->skcb)) return -ENOENT; memset(dat, 0xff, sizeof(dat)); if (session->skcb.addr.type == J1939_ETP) { dat[0] = J1939_ETP_CMD_EOMA; dat[1] = session->total_message_size >> 0; dat[2] = session->total_message_size >> 8; dat[3] = session->total_message_size >> 16; dat[4] = session->total_message_size >> 24; } else { dat[0] = J1939_TP_CMD_EOMA; dat[1] = session->total_message_size; dat[2] = session->total_message_size >> 8; dat[3] = session->pkt.total; } if (dat[0] == session->last_txcmd) /* done already */ return 0; ret = j1939_tp_tx_ctl(session, true, dat); if (ret < 0) return ret; session->last_txcmd = dat[0]; /* wait for the EOMA packet to come in */ j1939_tp_set_rxtimeout(session, 1250); netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); return 0; } static int j1939_xtp_txnext_receiver(struct j1939_session *session) { struct j1939_priv *priv = session->priv; int ret = 0; if (!j1939_tp_im_receiver(&session->skcb)) { netdev_alert(priv->ndev, "%s: 0x%p: called by not receiver!\n", __func__, session); return -EINVAL; } switch (session->last_cmd) { case J1939_TP_CMD_RTS: case J1939_ETP_CMD_RTS: ret = j1939_session_tx_cts(session); break; case J1939_ETP_CMD_CTS: case J1939_TP_CMD_CTS: case 0xff: /* did some data */ case J1939_ETP_CMD_DPO: if ((session->skcb.addr.type == J1939_TP && j1939_cb_is_broadcast(&session->skcb))) break; if (session->pkt.rx >= session->pkt.total) { ret = j1939_session_tx_eoma(session); } else if (session->pkt.rx >= session->pkt.last) { session->last_txcmd = 0; ret = j1939_session_tx_cts(session); } break; default: netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n", __func__, session, session->last_cmd); } return ret; } static int j1939_simple_txnext(struct j1939_session *session) { struct j1939_priv *priv = session->priv; struct sk_buff *se_skb = j1939_session_skb_get(session); struct sk_buff *skb; int ret; if (!se_skb) return 0; skb = skb_clone(se_skb, GFP_ATOMIC); if (!skb) { ret = -ENOMEM; goto out_free; } /* the cloned skb points to the skb extension of the original se_skb * with an increased refcount. skb_ext_add() creates a copy to * separate the skb extension data which is needed to modify the * can_framelen in can_put_echo_skb(). */ if (!skb_ext_add(skb, SKB_EXT_CAN)) { kfree_skb(skb); ret = -ENOMEM; goto out_free; } can_skb_set_owner(skb, se_skb->sk); j1939_tp_set_rxtimeout(session, J1939_SIMPLE_ECHO_TIMEOUT_MS); ret = j1939_send_one(priv, skb); if (ret) goto out_free; j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_SCHED); j1939_sk_queue_activate_next(session); out_free: if (ret) kfree_skb(se_skb); else consume_skb(se_skb); return ret; } static bool j1939_session_deactivate_locked(struct j1939_session *session) { bool active = false; lockdep_assert_held(&session->priv->active_session_list_lock); if (session->state >= J1939_SESSION_ACTIVE && session->state < J1939_SESSION_ACTIVE_MAX) { active = true; list_del_init(&session->active_session_list_entry); session->state = J1939_SESSION_DONE; j1939_session_put(session); } return active; } static bool j1939_session_deactivate(struct j1939_session *session) { struct j1939_priv *priv = session->priv; bool active; j1939_session_list_lock(priv); active = j1939_session_deactivate_locked(session); j1939_session_list_unlock(priv); return active; } static void j1939_session_deactivate_activate_next(struct j1939_session *session) { if (j1939_session_deactivate(session)) j1939_sk_queue_activate_next(session); } static void __j1939_session_cancel(struct j1939_session *session, enum j1939_xtp_abort err) { struct j1939_priv *priv = session->priv; WARN_ON_ONCE(!err); lockdep_assert_held(&session->priv->active_session_list_lock); session->err = j1939_xtp_abort_to_errno(priv, err); session->state = J1939_SESSION_WAITING_ABORT; /* do not send aborts on incoming broadcasts */ if (!j1939_cb_is_broadcast(&session->skcb)) { j1939_xtp_tx_abort(priv, &session->skcb, !session->transmission, err, session->skcb.addr.pgn); } if (session->sk) j1939_sk_send_loop_abort(session->sk, session->err); } static void j1939_session_cancel(struct j1939_session *session, enum j1939_xtp_abort err) { j1939_session_list_lock(session->priv); if (session->state >= J1939_SESSION_ACTIVE && session->state < J1939_SESSION_WAITING_ABORT) { j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); __j1939_session_cancel(session, err); } j1939_session_list_unlock(session->priv); if (!session->sk) j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) { struct j1939_session *session = container_of(hrtimer, struct j1939_session, txtimer); struct j1939_priv *priv = session->priv; int ret = 0; if (session->skcb.addr.type == J1939_SIMPLE) { ret = j1939_simple_txnext(session); } else { if (session->transmission) ret = j1939_xtp_txnext_transmiter(session); else ret = j1939_xtp_txnext_receiver(session); } switch (ret) { case -ENOBUFS: /* Retry limit is currently arbitrary chosen */ if (session->tx_retry < J1939_XTP_TX_RETRY_LIMIT) { session->tx_retry++; j1939_tp_schedule_txtimer(session, 10 + get_random_u32_below(16)); } else { netdev_alert(priv->ndev, "%s: 0x%p: tx retry count reached\n", __func__, session); session->err = -ENETUNREACH; j1939_session_rxtimer_cancel(session); j1939_session_deactivate_activate_next(session); } break; case -ENETDOWN: /* In this case we should get a netdev_event(), all active * sessions will be cleared by j1939_cancel_active_session(). * So handle this as an error, but let * j1939_cancel_active_session() do the cleanup including * propagation of the error to user space. */ break; case -EOVERFLOW: j1939_session_cancel(session, J1939_XTP_ABORT_ECTS_TOO_BIG); break; case 0: session->tx_retry = 0; break; default: netdev_alert(priv->ndev, "%s: 0x%p: tx aborted with unknown reason: %i\n", __func__, session, ret); if (session->skcb.addr.type != J1939_SIMPLE) { j1939_session_cancel(session, J1939_XTP_ABORT_OTHER); } else { session->err = ret; j1939_session_rxtimer_cancel(session); j1939_session_deactivate_activate_next(session); } } j1939_session_put(session); return HRTIMER_NORESTART; } static void j1939_session_completed(struct j1939_session *session) { struct sk_buff *se_skb; if (!session->transmission) { se_skb = j1939_session_skb_get(session); /* distribute among j1939 receivers */ j1939_sk_recv(session->priv, se_skb); consume_skb(se_skb); } j1939_session_deactivate_activate_next(session); } static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer) { struct j1939_session *session = container_of(hrtimer, struct j1939_session, rxtimer); struct j1939_priv *priv = session->priv; if (session->state == J1939_SESSION_WAITING_ABORT) { netdev_alert(priv->ndev, "%s: 0x%p: abort rx timeout. Force session deactivation\n", __func__, session); j1939_session_deactivate_activate_next(session); } else if (session->skcb.addr.type == J1939_SIMPLE) { netdev_alert(priv->ndev, "%s: 0x%p: Timeout. Failed to send simple message.\n", __func__, session); /* The message is probably stuck in the CAN controller and can * be send as soon as CAN bus is in working state again. */ session->err = -ETIME; j1939_session_deactivate(session); } else { j1939_session_list_lock(session->priv); if (session->state >= J1939_SESSION_ACTIVE && session->state < J1939_SESSION_ACTIVE_MAX) { netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n", __func__, session); j1939_session_get(session); hrtimer_start(&session->rxtimer, ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS), HRTIMER_MODE_REL_SOFT); __j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT); } j1939_session_list_unlock(session->priv); if (!session->sk) j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } j1939_session_put(session); return HRTIMER_NORESTART; } static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session, const struct sk_buff *skb) { const struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); pgn_t pgn = j1939_xtp_ctl_to_pgn(skb->data); struct j1939_priv *priv = session->priv; enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT; u8 cmd = skb->data[0]; if (session->skcb.addr.pgn == pgn) return false; switch (cmd) { case J1939_TP_CMD_BAM: abort = J1939_XTP_NO_ABORT; break; case J1939_ETP_CMD_RTS: fallthrough; case J1939_TP_CMD_RTS: abort = J1939_XTP_ABORT_BUSY; break; case J1939_ETP_CMD_CTS: fallthrough; case J1939_TP_CMD_CTS: abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN; break; case J1939_ETP_CMD_DPO: abort = J1939_XTP_ABORT_BAD_EDPO_PGN; break; case J1939_ETP_CMD_EOMA: fallthrough; case J1939_TP_CMD_EOMA: abort = J1939_XTP_ABORT_OTHER; break; case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ abort = J1939_XTP_NO_ABORT; break; default: WARN_ON_ONCE(1); break; } netdev_warn(priv->ndev, "%s: 0x%p: CMD 0x%02x with PGN 0x%05x for running session with different PGN 0x%05x.\n", __func__, session, cmd, pgn, session->skcb.addr.pgn); if (abort != J1939_XTP_NO_ABORT) j1939_xtp_tx_abort(priv, skcb, true, abort, pgn); return true; } static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb, bool reverse, bool transmitter) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; u8 abort = skb->data[1]; session = j1939_session_get_by_addr(priv, &skcb->addr, reverse, transmitter); if (!session) return; if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) goto abort_put; netdev_info(priv->ndev, "%s: 0x%p: 0x%05x: (%u) %s\n", __func__, session, j1939_xtp_ctl_to_pgn(skb->data), abort, j1939_xtp_abort_to_str(abort)); j1939_session_timers_cancel(session); session->err = j1939_xtp_abort_to_errno(priv, abort); if (session->sk) j1939_sk_send_loop_abort(session->sk, session->err); else j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); j1939_session_deactivate_activate_next(session); abort_put: j1939_session_put(session); } /* abort packets may come in 2 directions */ static void j1939_xtp_rx_abort(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter) { j1939_xtp_rx_abort_one(priv, skb, false, transmitter); j1939_xtp_rx_abort_one(priv, skb, true, transmitter); } static void j1939_xtp_rx_eoma_one(struct j1939_session *session, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); const u8 *dat; int len; if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) return; dat = skb->data; if (skcb->addr.type == J1939_ETP) len = j1939_etp_ctl_to_size(dat); else len = j1939_tp_ctl_to_size(dat); if (session->total_message_size != len) { netdev_warn_once(session->priv->ndev, "%s: 0x%p: Incorrect size. Expected: %i; got: %i.\n", __func__, session, session->total_message_size, len); } netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); session->pkt.tx_acked = session->pkt.total; j1939_session_timers_cancel(session); /* transmitted without problems */ j1939_session_completed(session); } static void j1939_xtp_rx_eoma(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; session = j1939_session_get_by_addr(priv, &skcb->addr, true, transmitter); if (!session) return; j1939_xtp_rx_eoma_one(session, skb); j1939_session_put(session); } static void j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb) { enum j1939_xtp_abort err = J1939_XTP_ABORT_FAULT; unsigned int pkt; const u8 *dat; dat = skb->data; if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) return; netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); if (session->last_cmd == dat[0]) { err = J1939_XTP_ABORT_DUP_SEQ; goto out_session_cancel; } if (session->skcb.addr.type == J1939_ETP) pkt = j1939_etp_ctl_to_packet(dat); else pkt = dat[2]; if (!pkt) goto out_session_cancel; else if (dat[1] > session->pkt.block /* 0xff for etp */) goto out_session_cancel; /* set packet counters only when not CTS(0) */ session->pkt.tx_acked = pkt - 1; j1939_session_skb_drop_old(session); session->pkt.last = session->pkt.tx_acked + dat[1]; if (session->pkt.last > session->pkt.total) /* safety measure */ session->pkt.last = session->pkt.total; /* TODO: do not set tx here, do it in txtimer */ session->pkt.tx = session->pkt.tx_acked; session->last_cmd = dat[0]; if (dat[1]) { j1939_tp_set_rxtimeout(session, 1250); if (session->transmission) { if (session->pkt.tx_acked) j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_SCHED); j1939_session_txtimer_cancel(session); j1939_tp_schedule_txtimer(session, 0); } } else { /* CTS(0) */ j1939_tp_set_rxtimeout(session, 550); } return; out_session_cancel: j1939_session_timers_cancel(session); j1939_session_cancel(session, err); } static void j1939_xtp_rx_cts(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; session = j1939_session_get_by_addr(priv, &skcb->addr, true, transmitter); if (!session) return; j1939_xtp_rx_cts_one(session, skb); j1939_session_put(session); } static struct j1939_session *j1939_session_new(struct j1939_priv *priv, struct sk_buff *skb, size_t size) { struct j1939_session *session; struct j1939_sk_buff_cb *skcb; session = kzalloc_obj(*session, gfp_any()); if (!session) return NULL; INIT_LIST_HEAD(&session->active_session_list_entry); INIT_LIST_HEAD(&session->sk_session_queue_entry); kref_init(&session->kref); j1939_priv_get(priv); session->priv = priv; session->total_message_size = size; session->state = J1939_SESSION_NEW; skb_queue_head_init(&session->skb_queue); skb_queue_tail(&session->skb_queue, skb_get(skb)); skcb = j1939_skb_to_cb(skb); memcpy(&session->skcb, skcb, sizeof(session->skcb)); hrtimer_setup(&session->txtimer, j1939_tp_txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); hrtimer_setup(&session->rxtimer, j1939_tp_rxtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); netdev_dbg(priv->ndev, "%s: 0x%p: sa: %02x, da: %02x\n", __func__, session, skcb->addr.sa, skcb->addr.da); return session; } static struct j1939_session *j1939_session_fresh_new(struct j1939_priv *priv, int size, const struct j1939_sk_buff_cb *rel_skcb) { struct sk_buff *skb; struct can_skb_ext *csx; struct j1939_sk_buff_cb *skcb; struct j1939_session *session; skb = alloc_skb(size, GFP_ATOMIC); if (unlikely(!skb)) return NULL; csx = can_skb_ext_add(skb); if (!csx) { kfree_skb(skb); return NULL; } skb->dev = priv->ndev; csx->can_iif = priv->ndev->ifindex; skcb = j1939_skb_to_cb(skb); memcpy(skcb, rel_skcb, sizeof(*skcb)); session = j1939_session_new(priv, skb, size); if (!session) { kfree_skb(skb); return NULL; } /* alloc data area */ skb_put(skb, size); /* skb is recounted in j1939_session_new() */ return session; } int j1939_session_activate(struct j1939_session *session) { struct j1939_priv *priv = session->priv; struct j1939_session *active = NULL; int ret = 0; j1939_session_list_lock(priv); if (session->skcb.addr.type != J1939_SIMPLE) active = j1939_session_get_by_addr_locked(priv, &priv->active_session_list, &session->skcb.addr, false, session->transmission); if (active) { j1939_session_put(active); ret = -EAGAIN; } else if (priv->ndev->reg_state != NETREG_REGISTERED) { ret = -ENODEV; } else { WARN_ON_ONCE(session->state != J1939_SESSION_NEW); list_add_tail(&session->active_session_list_entry, &priv->active_session_list); j1939_session_get(session); session->state = J1939_SESSION_ACTIVE; netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); } j1939_session_list_unlock(priv); return ret; } static struct j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, struct sk_buff *skb) { enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT; struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb); struct j1939_session *session; const u8 *dat; int len, ret; pgn_t pgn; netdev_dbg(priv->ndev, "%s\n", __func__); dat = skb->data; pgn = j1939_xtp_ctl_to_pgn(dat); skcb.addr.pgn = pgn; if (!j1939_sk_recv_match(priv, &skcb)) return NULL; if (skcb.addr.type == J1939_ETP) { len = j1939_etp_ctl_to_size(dat); if (len > J1939_MAX_ETP_PACKET_SIZE) abort = J1939_XTP_ABORT_FAULT; else if (len > priv->tp_max_packet_size) abort = J1939_XTP_ABORT_RESOURCE; else if (len <= J1939_MAX_TP_PACKET_SIZE) abort = J1939_XTP_ABORT_FAULT; } else { len = j1939_tp_ctl_to_size(dat); if (len > J1939_MAX_TP_PACKET_SIZE) abort = J1939_XTP_ABORT_FAULT; else if (len > priv->tp_max_packet_size) abort = J1939_XTP_ABORT_RESOURCE; else if (len < J1939_MIN_TP_PACKET_SIZE) abort = J1939_XTP_ABORT_FAULT; } if (abort != J1939_XTP_NO_ABORT) { j1939_xtp_tx_abort(priv, &skcb, true, abort, pgn); return NULL; } session = j1939_session_fresh_new(priv, len, &skcb); if (!session) { j1939_xtp_tx_abort(priv, &skcb, true, J1939_XTP_ABORT_RESOURCE, pgn); return NULL; } /* initialize the control buffer: plain copy */ session->pkt.total = (len + 6) / 7; session->pkt.block = 0xff; if (skcb.addr.type != J1939_ETP) { if (dat[3] != session->pkt.total) netdev_alert(priv->ndev, "%s: 0x%p: strange total, %u != %u\n", __func__, session, session->pkt.total, dat[3]); session->pkt.total = dat[3]; session->pkt.block = min(dat[3], dat[4]); } session->pkt.rx = 0; session->pkt.tx = 0; session->tskey = priv->rx_tskey++; j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS); ret = j1939_session_activate(session); if (ret) { /* Entering this scope indicates an issue with the J1939 bus. * Possible scenarios include: * - A time lapse occurred, and a new session was initiated * due to another packet being sent correctly. This could * have been caused by too long interrupt, debugger, or being * out-scheduled by another task. * - The bus is receiving numerous erroneous packets, either * from a malfunctioning device or during a test scenario. */ netdev_alert(priv->ndev, "%s: 0x%p: concurrent session with same addr (%02x %02x) is already active.\n", __func__, session, skcb.addr.sa, skcb.addr.da); j1939_session_put(session); return NULL; } return session; } static int j1939_xtp_rx_rts_session_active(struct j1939_session *session, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_priv *priv = session->priv; if (!session->transmission) { if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) return -EBUSY; /* RTS on active session */ j1939_session_timers_cancel(session); j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); } if (session->last_cmd != 0) { /* we received a second rts on the same connection */ netdev_alert(priv->ndev, "%s: 0x%p: connection exists (%02x %02x). last cmd: %x\n", __func__, session, skcb->addr.sa, skcb->addr.da, session->last_cmd); j1939_session_timers_cancel(session); j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); if (session->transmission) { j1939_session_deactivate_activate_next(session); } else if (session->state == J1939_SESSION_WAITING_ABORT) { /* Force deactivation for the receiver. * If we rely on the timer starting in j1939_session_cancel, * a second RTS call here will cancel that timer and fail * to restart it because the state is already WAITING_ABORT. */ j1939_session_deactivate_activate_next(session); } return -EBUSY; } if (session->skcb.addr.sa != skcb->addr.sa || session->skcb.addr.da != skcb->addr.da) netdev_warn(priv->ndev, "%s: 0x%p: session->skcb.addr.sa=0x%02x skcb->addr.sa=0x%02x session->skcb.addr.da=0x%02x skcb->addr.da=0x%02x\n", __func__, session, session->skcb.addr.sa, skcb->addr.sa, session->skcb.addr.da, skcb->addr.da); /* make sure 'sa' & 'da' are correct ! * They may be 'not filled in yet' for sending * skb's, since they did not pass the Address Claim ever. */ session->skcb.addr.sa = skcb->addr.sa; session->skcb.addr.da = skcb->addr.da; netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); return 0; } static void j1939_xtp_rx_rts(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; u8 cmd = skb->data[0]; session = j1939_session_get_by_addr(priv, &skcb->addr, false, transmitter); if (!session) { if (transmitter) { /* If we're the transmitter and this function is called, * we received our own RTS. A session has already been * created. * * For some reasons however it might have been destroyed * already. So don't create a new one here (using * "j1939_xtp_rx_rts_session_new()") as this will be a * receiver session. * * The reasons the session is already destroyed might * be: * - user space closed socket was and the session was * aborted * - session was aborted due to external abort message */ return; } session = j1939_xtp_rx_rts_session_new(priv, skb); if (!session) { if (cmd == J1939_TP_CMD_BAM && j1939_sk_recv_match(priv, skcb)) netdev_info(priv->ndev, "%s: failed to create TP BAM session\n", __func__); return; } } else { if (j1939_xtp_rx_rts_session_active(session, skb)) { j1939_session_put(session); return; } } session->last_cmd = cmd; if (cmd == J1939_TP_CMD_BAM) { if (!session->transmission) j1939_tp_set_rxtimeout(session, 750); } else { if (!session->transmission) { j1939_session_txtimer_cancel(session); j1939_tp_schedule_txtimer(session, 0); } j1939_tp_set_rxtimeout(session, 1250); } j1939_session_put(session); } static void j1939_xtp_rx_dpo_one(struct j1939_session *session, struct sk_buff *skb) { const u8 *dat = skb->data; if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) return; netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); /* transmitted without problems */ session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data); session->last_cmd = dat[0]; j1939_tp_set_rxtimeout(session, 750); if (!session->transmission) j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_DPO); } static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; session = j1939_session_get_by_addr(priv, &skcb->addr, false, transmitter); if (!session) { netdev_info(priv->ndev, "%s: no connection found\n", __func__); return; } j1939_xtp_rx_dpo_one(session, skb); j1939_session_put(session); } static void j1939_xtp_rx_dat_one(struct j1939_session *session, struct sk_buff *skb) { enum j1939_xtp_abort abort = J1939_XTP_ABORT_FAULT; struct j1939_priv *priv = session->priv; struct j1939_sk_buff_cb *skcb, *se_skcb; struct sk_buff *se_skb = NULL; const u8 *dat; u8 *tpdat; int offset; int nbytes; bool final = false; bool remain = false; bool do_cts_eoma = false; int packet; skcb = j1939_skb_to_cb(skb); dat = skb->data; if (skb->len != 8) { /* makes no sense */ abort = J1939_XTP_ABORT_UNEXPECTED_DATA; goto out_session_cancel; } switch (session->last_cmd) { case 0xff: break; case J1939_ETP_CMD_DPO: if (skcb->addr.type == J1939_ETP) break; fallthrough; case J1939_TP_CMD_BAM: fallthrough; case J1939_TP_CMD_CTS: if (skcb->addr.type != J1939_ETP) break; fallthrough; default: netdev_info(priv->ndev, "%s: 0x%p: last %02x\n", __func__, session, session->last_cmd); goto out_session_cancel; } packet = (dat[0] - 1 + session->pkt.dpo); if (packet > session->pkt.total || (session->pkt.rx + 1) > session->pkt.total) { netdev_info(priv->ndev, "%s: 0x%p: should have been completed\n", __func__, session); goto out_session_cancel; } se_skb = j1939_session_skb_get_by_offset(session, packet * 7); if (!se_skb) { netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__, session); goto out_session_cancel; } se_skcb = j1939_skb_to_cb(se_skb); offset = packet * 7 - se_skcb->offset; nbytes = se_skb->len - offset; if (nbytes > 7) nbytes = 7; if (nbytes <= 0 || (nbytes + 1) > skb->len) { netdev_info(priv->ndev, "%s: 0x%p: nbytes %i, len %i\n", __func__, session, nbytes, skb->len); goto out_session_cancel; } tpdat = se_skb->data; if (!session->transmission) { memcpy(&tpdat[offset], &dat[1], nbytes); } else { int err; err = memcmp(&tpdat[offset], &dat[1], nbytes); if (err) netdev_err_once(priv->ndev, "%s: 0x%p: Data of RX-looped back packet (%*ph) doesn't match TX data (%*ph)!\n", __func__, session, nbytes, &dat[1], nbytes, &tpdat[offset]); } if (packet == session->pkt.rx) session->pkt.rx++; if (se_skcb->addr.type != J1939_ETP && j1939_cb_is_broadcast(&session->skcb)) { if (session->pkt.rx >= session->pkt.total) final = true; else remain = true; } else { /* never final, an EOMA must follow */ if (session->pkt.rx >= session->pkt.last) do_cts_eoma = true; } if (final) { j1939_session_timers_cancel(session); j1939_session_completed(session); } else if (remain) { if (!session->transmission) j1939_tp_set_rxtimeout(session, 750); } else if (do_cts_eoma) { j1939_tp_set_rxtimeout(session, 1250); if (!session->transmission) j1939_tp_schedule_txtimer(session, 0); } else { j1939_tp_set_rxtimeout(session, 750); } session->last_cmd = 0xff; consume_skb(se_skb); j1939_session_put(session); return; out_session_cancel: kfree_skb(se_skb); j1939_session_timers_cancel(session); j1939_session_cancel(session, abort); j1939_session_put(session); } static void j1939_xtp_rx_dat(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb; struct j1939_session *session; skcb = j1939_skb_to_cb(skb); if (j1939_tp_im_transmitter(skcb)) { session = j1939_session_get_by_addr(priv, &skcb->addr, false, true); if (!session) netdev_info(priv->ndev, "%s: no tx connection found\n", __func__); else j1939_xtp_rx_dat_one(session, skb); } if (j1939_tp_im_receiver(skcb)) { session = j1939_session_get_by_addr(priv, &skcb->addr, false, false); if (!session) netdev_info(priv->ndev, "%s: no rx connection found\n", __func__); else j1939_xtp_rx_dat_one(session, skb); } if (j1939_cb_is_broadcast(skcb)) { session = j1939_session_get_by_addr(priv, &skcb->addr, false, false); if (session) j1939_xtp_rx_dat_one(session, skb); } } /* j1939 main intf */ struct j1939_session *j1939_tp_send(struct j1939_priv *priv, struct sk_buff *skb, size_t size) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); struct j1939_session *session; int ret; if (skcb->addr.pgn == J1939_TP_PGN_DAT || skcb->addr.pgn == J1939_TP_PGN_CTL || skcb->addr.pgn == J1939_ETP_PGN_DAT || skcb->addr.pgn == J1939_ETP_PGN_CTL) /* avoid conflict */ return ERR_PTR(-EDOM); if (size > priv->tp_max_packet_size) return ERR_PTR(-EMSGSIZE); if (size <= 8) skcb->addr.type = J1939_SIMPLE; else if (size > J1939_MAX_TP_PACKET_SIZE) skcb->addr.type = J1939_ETP; else skcb->addr.type = J1939_TP; if (skcb->addr.type == J1939_ETP && j1939_cb_is_broadcast(skcb)) return ERR_PTR(-EDESTADDRREQ); /* fill in addresses from names */ ret = j1939_ac_fixup(priv, skb); if (unlikely(ret)) return ERR_PTR(ret); /* fix DST flags, it may be used there soon */ if (j1939_address_is_unicast(skcb->addr.da) && priv->ents[skcb->addr.da].nusers) skcb->flags |= J1939_ECU_LOCAL_DST; /* src is always local, I'm sending ... */ skcb->flags |= J1939_ECU_LOCAL_SRC; /* prepare new session */ session = j1939_session_new(priv, skb, size); if (!session) return ERR_PTR(-ENOMEM); /* skb is recounted in j1939_session_new() */ sock_hold(skb->sk); session->sk = skb->sk; session->transmission = true; session->pkt.total = (size + 6) / 7; session->pkt.block = skcb->addr.type == J1939_ETP ? 255 : min(j1939_tp_block ?: 255, session->pkt.total); if (j1939_cb_is_broadcast(&session->skcb)) /* set the end-packet for broadcast */ session->pkt.last = session->pkt.total; skcb->tskey = atomic_inc_return(&session->sk->sk_tskey) - 1; session->tskey = skcb->tskey; return session; } static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); int extd = J1939_TP; u8 cmd = skb->data[0]; switch (cmd) { case J1939_ETP_CMD_RTS: extd = J1939_ETP; fallthrough; case J1939_TP_CMD_BAM: if (cmd == J1939_TP_CMD_BAM && !j1939_cb_is_broadcast(skcb)) { netdev_err_once(priv->ndev, "%s: BAM to unicast (%02x), ignoring!\n", __func__, skcb->addr.sa); return; } fallthrough; case J1939_TP_CMD_RTS: if (skcb->addr.type != extd) return; if (cmd == J1939_TP_CMD_RTS && j1939_cb_is_broadcast(skcb)) { netdev_alert(priv->ndev, "%s: rts without destination (%02x)\n", __func__, skcb->addr.sa); return; } if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_rts(priv, skb, true); if (j1939_tp_im_receiver(skcb) || j1939_cb_is_broadcast(skcb)) j1939_xtp_rx_rts(priv, skb, false); break; case J1939_ETP_CMD_CTS: extd = J1939_ETP; fallthrough; case J1939_TP_CMD_CTS: if (skcb->addr.type != extd) return; if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_cts(priv, skb, false); if (j1939_tp_im_receiver(skcb)) j1939_xtp_rx_cts(priv, skb, true); break; case J1939_ETP_CMD_DPO: if (skcb->addr.type != J1939_ETP) return; if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_dpo(priv, skb, true); if (j1939_tp_im_receiver(skcb)) j1939_xtp_rx_dpo(priv, skb, false); break; case J1939_ETP_CMD_EOMA: extd = J1939_ETP; fallthrough; case J1939_TP_CMD_EOMA: if (skcb->addr.type != extd) return; if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_eoma(priv, skb, false); if (j1939_tp_im_receiver(skcb)) j1939_xtp_rx_eoma(priv, skb, true); break; case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ if (j1939_cb_is_broadcast(skcb)) { netdev_err_once(priv->ndev, "%s: abort to broadcast (%02x), ignoring!\n", __func__, skcb->addr.sa); return; } if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_abort(priv, skb, true); if (j1939_tp_im_receiver(skcb)) j1939_xtp_rx_abort(priv, skb, false); break; default: return; } } int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); if (!j1939_tp_im_involved_anydir(skcb) && !j1939_cb_is_broadcast(skcb)) return 0; switch (skcb->addr.pgn) { case J1939_ETP_PGN_DAT: skcb->addr.type = J1939_ETP; fallthrough; case J1939_TP_PGN_DAT: j1939_xtp_rx_dat(priv, skb); break; case J1939_ETP_PGN_CTL: skcb->addr.type = J1939_ETP; fallthrough; case J1939_TP_PGN_CTL: if (skb->len < 8) return 0; /* Don't care. Nothing to extract here */ j1939_tp_cmd_recv(priv, skb); break; default: return 0; /* no problem */ } return 1; /* "I processed the message" */ } void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_session *session; if (!skb->sk) return; if (skb->sk->sk_family != AF_CAN || skb->sk->sk_protocol != CAN_J1939) return; j1939_session_list_lock(priv); session = j1939_session_get_simple(priv, skb); j1939_session_list_unlock(priv); if (!session) { netdev_warn(priv->ndev, "%s: Received already invalidated message\n", __func__); return; } j1939_session_timers_cancel(session); j1939_session_deactivate(session); j1939_session_put(session); } int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk) { struct j1939_session *session, *saved; netdev_dbg(priv->ndev, "%s, sk: %p\n", __func__, sk); j1939_session_list_lock(priv); list_for_each_entry_safe(session, saved, &priv->active_session_list, active_session_list_entry) { if (!sk || sk == session->sk) { if (hrtimer_try_to_cancel(&session->txtimer) == 1) j1939_session_put(session); if (hrtimer_try_to_cancel(&session->rxtimer) == 1) j1939_session_put(session); session->err = ESHUTDOWN; j1939_session_deactivate_locked(session); } } j1939_session_list_unlock(priv); return NOTIFY_DONE; } void j1939_tp_init(struct j1939_priv *priv) { spin_lock_init(&priv->active_session_list_lock); INIT_LIST_HEAD(&priv->active_session_list); priv->tp_max_packet_size = J1939_MAX_ETP_PACKET_SIZE; }
1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 22 2 2 21 21 7 22 22 22 22 22 14 13 22 9 22 2 22 8 22 22 22 14 22 22 22 22 22 2 2 2 2 2 2 2 2 2 2 22 22 22 22 22 22 22 22 22 22 22 22 22 9 8 22 22 22 2 2 22 22 22 22 22 22 22 21 22 22 22 22 22 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2005 Marc Kleine-Budde, Pengutronix * Copyright (C) 2006 Andrey Volkov, Varma Electronics * Copyright (C) 2008-2009 Wolfgang Grandegger <wg@grandegger.com> * Copyright (C) 2021-2025 Vincent Mailhol <mailhol@kernel.org> */ #include <linux/can/dev.h> #include <net/rtnetlink.h> static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { [IFLA_CAN_STATE] = { .type = NLA_U32 }, [IFLA_CAN_CTRLMODE] = { .len = sizeof(struct can_ctrlmode) }, [IFLA_CAN_RESTART_MS] = { .type = NLA_U32 }, [IFLA_CAN_RESTART] = { .type = NLA_U32 }, [IFLA_CAN_BITTIMING] = { .len = sizeof(struct can_bittiming) }, [IFLA_CAN_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, [IFLA_CAN_CLOCK] = { .len = sizeof(struct can_clock) }, [IFLA_CAN_BERR_COUNTER] = { .len = sizeof(struct can_berr_counter) }, [IFLA_CAN_DATA_BITTIMING] = { .len = sizeof(struct can_bittiming) }, [IFLA_CAN_DATA_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, [IFLA_CAN_TERMINATION] = { .type = NLA_U16 }, [IFLA_CAN_TDC] = { .type = NLA_NESTED }, [IFLA_CAN_CTRLMODE_EXT] = { .type = NLA_NESTED }, [IFLA_CAN_XL_DATA_BITTIMING] = { .len = sizeof(struct can_bittiming) }, [IFLA_CAN_XL_DATA_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, [IFLA_CAN_XL_TDC] = { .type = NLA_NESTED }, [IFLA_CAN_XL_PWM] = { .type = NLA_NESTED }, }; static const struct nla_policy can_tdc_policy[IFLA_CAN_TDC_MAX + 1] = { [IFLA_CAN_TDC_TDCV_MIN] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCV_MAX] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCO_MIN] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCO_MAX] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCF_MIN] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCF_MAX] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCV] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCO] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCF] = { .type = NLA_U32 }, }; static const struct nla_policy can_pwm_policy[IFLA_CAN_PWM_MAX + 1] = { [IFLA_CAN_PWM_PWMS_MIN] = { .type = NLA_U32 }, [IFLA_CAN_PWM_PWMS_MAX] = { .type = NLA_U32 }, [IFLA_CAN_PWM_PWML_MIN] = { .type = NLA_U32 }, [IFLA_CAN_PWM_PWML_MAX] = { .type = NLA_U32 }, [IFLA_CAN_PWM_PWMO_MIN] = { .type = NLA_U32 }, [IFLA_CAN_PWM_PWMO_MAX] = { .type = NLA_U32 }, [IFLA_CAN_PWM_PWMS] = { .type = NLA_U32 }, [IFLA_CAN_PWM_PWML] = { .type = NLA_U32 }, [IFLA_CAN_PWM_PWMO] = { .type = NLA_U32 }, }; static int can_validate_bittiming(struct nlattr *data[], struct netlink_ext_ack *extack, int ifla_can_bittiming) { struct can_bittiming *bt; if (!data[ifla_can_bittiming]) return 0; static_assert(__alignof__(*bt) <= NLA_ALIGNTO); bt = nla_data(data[ifla_can_bittiming]); /* sample point is in one-tenth of a percent */ if (bt->sample_point >= 1000) { NL_SET_ERR_MSG(extack, "sample point must be between 0 and 100%"); return -EINVAL; } return 0; } static int can_validate_tdc(struct nlattr *data_tdc, struct netlink_ext_ack *extack, u32 tdc_flags) { bool tdc_manual = tdc_flags & CAN_CTRLMODE_TDC_MANUAL_MASK; bool tdc_auto = tdc_flags & CAN_CTRLMODE_TDC_AUTO_MASK; int err; if (tdc_auto && tdc_manual) { NL_SET_ERR_MSG(extack, "TDC manual and auto modes are mutually exclusive"); return -EOPNOTSUPP; } /* If one of the CAN_CTRLMODE_{,XL}_TDC_* flags is set then TDC * must be set and vice-versa */ if ((tdc_auto || tdc_manual) && !data_tdc) { NL_SET_ERR_MSG(extack, "TDC parameters are missing"); return -EOPNOTSUPP; } if (!(tdc_auto || tdc_manual) && data_tdc) { NL_SET_ERR_MSG(extack, "TDC mode (auto or manual) is missing"); return -EOPNOTSUPP; } /* If providing TDC parameters, at least TDCO is needed. TDCV is * needed if and only if CAN_CTRLMODE_{,XL}_TDC_MANUAL is set */ if (data_tdc) { struct nlattr *tb_tdc[IFLA_CAN_TDC_MAX + 1]; err = nla_parse_nested(tb_tdc, IFLA_CAN_TDC_MAX, data_tdc, can_tdc_policy, extack); if (err) return err; if (tb_tdc[IFLA_CAN_TDC_TDCV]) { if (tdc_auto) { NL_SET_ERR_MSG(extack, "TDCV is incompatible with TDC auto mode"); return -EOPNOTSUPP; } } else { if (tdc_manual) { NL_SET_ERR_MSG(extack, "TDC manual mode requires TDCV"); return -EOPNOTSUPP; } } if (!tb_tdc[IFLA_CAN_TDC_TDCO]) { NL_SET_ERR_MSG(extack, "TDCO is missing"); return -EOPNOTSUPP; } } return 0; } static int can_validate_pwm(struct nlattr *data[], struct netlink_ext_ack *extack, u32 flags) { struct nlattr *tb_pwm[IFLA_CAN_PWM_MAX + 1]; int err; if (!data[IFLA_CAN_XL_PWM]) return 0; if (!(flags & CAN_CTRLMODE_XL_TMS)) { NL_SET_ERR_MSG(extack, "PWM requires TMS"); return -EOPNOTSUPP; } err = nla_parse_nested(tb_pwm, IFLA_CAN_PWM_MAX, data[IFLA_CAN_XL_PWM], can_pwm_policy, extack); if (err) return err; if (!tb_pwm[IFLA_CAN_PWM_PWMS] != !tb_pwm[IFLA_CAN_PWM_PWML]) { NL_SET_ERR_MSG(extack, "Provide either both PWMS and PWML, or none for automatic calculation"); return -EOPNOTSUPP; } if (tb_pwm[IFLA_CAN_PWM_PWMO] && (!tb_pwm[IFLA_CAN_PWM_PWMS] || !tb_pwm[IFLA_CAN_PWM_PWML])) { NL_SET_ERR_MSG(extack, "PWMO requires both PWMS and PWML"); return -EOPNOTSUPP; } return 0; } static int can_validate_databittiming(struct nlattr *data[], struct netlink_ext_ack *extack, int ifla_can_data_bittiming, u32 flags) { struct nlattr *data_tdc; const char *type; u32 tdc_flags; bool is_on; int err; /* Make sure that valid CAN FD/XL configurations always consist of * - nominal/arbitration bittiming * - data bittiming * - control mode with CAN_CTRLMODE_{FD,XL} set * - TDC parameters are coherent (details in can_validate_tdc()) */ if (ifla_can_data_bittiming == IFLA_CAN_DATA_BITTIMING) { data_tdc = data[IFLA_CAN_TDC]; tdc_flags = flags & CAN_CTRLMODE_FD_TDC_MASK; is_on = flags & CAN_CTRLMODE_FD; type = "FD"; } else { data_tdc = data[IFLA_CAN_XL_TDC]; tdc_flags = flags & CAN_CTRLMODE_XL_TDC_MASK; is_on = flags & CAN_CTRLMODE_XL; type = "XL"; } if (is_on) { if (!data[IFLA_CAN_BITTIMING] || !data[ifla_can_data_bittiming]) { NL_SET_ERR_MSG_FMT(extack, "Provide both nominal and %s data bittiming", type); return -EOPNOTSUPP; } } else { if (data[ifla_can_data_bittiming]) { NL_SET_ERR_MSG_FMT(extack, "%s data bittiming requires CAN %s", type, type); return -EOPNOTSUPP; } if (data_tdc) { NL_SET_ERR_MSG_FMT(extack, "%s TDC requires CAN %s", type, type); return -EOPNOTSUPP; } } err = can_validate_bittiming(data, extack, ifla_can_data_bittiming); if (err) return err; err = can_validate_tdc(data_tdc, extack, tdc_flags); if (err) return err; return 0; } static int can_validate_xl_flags(struct netlink_ext_ack *extack, u32 masked_flags, u32 mask) { if (masked_flags & CAN_CTRLMODE_XL) { if (masked_flags & CAN_CTRLMODE_XL_TMS) { const u32 tms_conflicts_mask = CAN_CTRLMODE_FD | CAN_CTRLMODE_XL_TDC_MASK; u32 tms_conflicts = masked_flags & tms_conflicts_mask; if (tms_conflicts) { NL_SET_ERR_MSG_FMT(extack, "TMS and %s are mutually exclusive", can_get_ctrlmode_str(tms_conflicts)); return -EOPNOTSUPP; } } } else { if (mask & CAN_CTRLMODE_XL_TMS) { NL_SET_ERR_MSG(extack, "TMS requires CAN XL"); return -EOPNOTSUPP; } } return 0; } static int can_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { u32 flags = 0; int err; if (!data) return 0; if (data[IFLA_CAN_CTRLMODE]) { struct can_ctrlmode *cm = nla_data(data[IFLA_CAN_CTRLMODE]); flags = cm->flags & cm->mask; if ((flags & CAN_CTRLMODE_LISTENONLY) && (flags & CAN_CTRLMODE_RESTRICTED)) { NL_SET_ERR_MSG(extack, "LISTEN-ONLY and RESTRICTED modes are mutually exclusive"); return -EOPNOTSUPP; } err = can_validate_xl_flags(extack, flags, cm->mask); if (err) return err; } err = can_validate_bittiming(data, extack, IFLA_CAN_BITTIMING); if (err) return err; err = can_validate_databittiming(data, extack, IFLA_CAN_DATA_BITTIMING, flags); if (err) return err; err = can_validate_databittiming(data, extack, IFLA_CAN_XL_DATA_BITTIMING, flags); if (err) return err; err = can_validate_pwm(data, extack, flags); if (err) return err; return 0; } static int can_ctrlmode_changelink(struct net_device *dev, struct nlattr *data[], struct netlink_ext_ack *extack) { struct can_priv *priv = netdev_priv(dev); struct can_ctrlmode *cm; u32 ctrlstatic, maskedflags, deactivated, notsupp, ctrlstatic_missing; if (!data[IFLA_CAN_CTRLMODE]) return 0; /* Do not allow changing controller mode while running */ if (dev->flags & IFF_UP) return -EBUSY; cm = nla_data(data[IFLA_CAN_CTRLMODE]); ctrlstatic = can_get_static_ctrlmode(priv); maskedflags = cm->flags & cm->mask; deactivated = ~cm->flags & cm->mask; notsupp = maskedflags & ~(priv->ctrlmode_supported | ctrlstatic); ctrlstatic_missing = (maskedflags & ctrlstatic) ^ ctrlstatic; if (notsupp) { NL_SET_ERR_MSG_FMT(extack, "requested control mode %s not supported", can_get_ctrlmode_str(notsupp)); return -EOPNOTSUPP; } /* do not check for static fd-non-iso if 'fd' is disabled */ if (!(maskedflags & CAN_CTRLMODE_FD)) ctrlstatic &= ~CAN_CTRLMODE_FD_NON_ISO; if (ctrlstatic_missing) { NL_SET_ERR_MSG_FMT(extack, "missing required %s static control mode", can_get_ctrlmode_str(ctrlstatic_missing)); return -EOPNOTSUPP; } /* If FD was active and is not turned off, check for XL conflicts */ if (priv->ctrlmode & CAN_CTRLMODE_FD & ~deactivated) { if (maskedflags & CAN_CTRLMODE_XL_TMS) { NL_SET_ERR_MSG(extack, "TMS can not be activated while CAN FD is on"); return -EOPNOTSUPP; } } /* If a top dependency flag is provided, reset all its dependencies */ if (cm->mask & CAN_CTRLMODE_FD) priv->ctrlmode &= ~CAN_CTRLMODE_FD_TDC_MASK; if (cm->mask & CAN_CTRLMODE_XL) priv->ctrlmode &= ~(CAN_CTRLMODE_XL_TDC_MASK | CAN_CTRLMODE_XL_TMS); /* clear bits to be modified and copy the flag values */ priv->ctrlmode &= ~cm->mask; priv->ctrlmode |= maskedflags; /* Wipe potential leftovers from previous CAN FD/XL config */ if (!(priv->ctrlmode & CAN_CTRLMODE_FD)) { memset(&priv->fd.data_bittiming, 0, sizeof(priv->fd.data_bittiming)); priv->ctrlmode &= ~CAN_CTRLMODE_FD_TDC_MASK; memset(&priv->fd.tdc, 0, sizeof(priv->fd.tdc)); } if (!(priv->ctrlmode & CAN_CTRLMODE_XL)) { memset(&priv->xl.data_bittiming, 0, sizeof(priv->fd.data_bittiming)); priv->ctrlmode &= ~CAN_CTRLMODE_XL_TDC_MASK; memset(&priv->xl.tdc, 0, sizeof(priv->xl.tdc)); memset(&priv->xl.pwm, 0, sizeof(priv->xl.pwm)); } can_set_default_mtu(dev); can_set_cap_info(dev); return 0; } static int can_tdc_changelink(struct data_bittiming_params *dbt_params, const struct nlattr *nla, struct netlink_ext_ack *extack) { struct nlattr *tb_tdc[IFLA_CAN_TDC_MAX + 1]; struct can_tdc tdc = { 0 }; const struct can_tdc_const *tdc_const = dbt_params->tdc_const; int err; if (!tdc_const) { NL_SET_ERR_MSG(extack, "The device does not support TDC"); return -EOPNOTSUPP; } err = nla_parse_nested(tb_tdc, IFLA_CAN_TDC_MAX, nla, can_tdc_policy, extack); if (err) return err; if (tb_tdc[IFLA_CAN_TDC_TDCV]) { u32 tdcv = nla_get_u32(tb_tdc[IFLA_CAN_TDC_TDCV]); if (tdcv < tdc_const->tdcv_min || tdcv > tdc_const->tdcv_max) return -EINVAL; tdc.tdcv = tdcv; } if (tb_tdc[IFLA_CAN_TDC_TDCO]) { u32 tdco = nla_get_u32(tb_tdc[IFLA_CAN_TDC_TDCO]); if (tdco < tdc_const->tdco_min || tdco > tdc_const->tdco_max) return -EINVAL; tdc.tdco = tdco; } if (tb_tdc[IFLA_CAN_TDC_TDCF]) { u32 tdcf = nla_get_u32(tb_tdc[IFLA_CAN_TDC_TDCF]); if (tdcf < tdc_const->tdcf_min || tdcf > tdc_const->tdcf_max) return -EINVAL; tdc.tdcf = tdcf; } dbt_params->tdc = tdc; return 0; } static int can_dbt_changelink(struct net_device *dev, struct nlattr *data[], bool fd, struct netlink_ext_ack *extack) { struct nlattr *data_bittiming, *data_tdc; struct can_priv *priv = netdev_priv(dev); struct data_bittiming_params *dbt_params; struct can_bittiming dbt; bool need_tdc_calc = false; u32 tdc_mask; int err; if (fd) { data_bittiming = data[IFLA_CAN_DATA_BITTIMING]; data_tdc = data[IFLA_CAN_TDC]; dbt_params = &priv->fd; tdc_mask = CAN_CTRLMODE_FD_TDC_MASK; } else { data_bittiming = data[IFLA_CAN_XL_DATA_BITTIMING]; data_tdc = data[IFLA_CAN_XL_TDC]; dbt_params = &priv->xl; tdc_mask = CAN_CTRLMODE_XL_TDC_MASK; } if (!data_bittiming) return 0; /* Do not allow changing bittiming while running */ if (dev->flags & IFF_UP) return -EBUSY; /* Calculate bittiming parameters based on data_bittiming_const * if set, otherwise pass bitrate directly via do_set_bitrate(). * Bail out if neither is given. */ if (!dbt_params->data_bittiming_const && !dbt_params->do_set_data_bittiming && !dbt_params->data_bitrate_const) return -EOPNOTSUPP; memcpy(&dbt, nla_data(data_bittiming), sizeof(dbt)); err = can_get_bittiming(dev, &dbt, dbt_params->data_bittiming_const, dbt_params->data_bitrate_const, dbt_params->data_bitrate_const_cnt, extack); if (err) return err; if (priv->bitrate_max && dbt.bitrate > priv->bitrate_max) { NL_SET_ERR_MSG_FMT(extack, "CAN data bitrate %u bps surpasses transceiver capabilities of %u bps", dbt.bitrate, priv->bitrate_max); return -EINVAL; } memset(&dbt_params->tdc, 0, sizeof(dbt_params->tdc)); if (data[IFLA_CAN_CTRLMODE]) { struct can_ctrlmode *cm = nla_data(data[IFLA_CAN_CTRLMODE]); if (fd || !(priv->ctrlmode & CAN_CTRLMODE_XL_TMS)) need_tdc_calc = !(cm->mask & tdc_mask); } if (data_tdc) { /* TDC parameters are provided: use them */ err = can_tdc_changelink(dbt_params, data_tdc, extack); if (err) { priv->ctrlmode &= ~tdc_mask; return err; } } else if (need_tdc_calc) { /* Neither of TDC parameters nor TDC flags are provided: * do calculation */ can_calc_tdco(&dbt_params->tdc, dbt_params->tdc_const, &dbt, tdc_mask, &priv->ctrlmode, priv->ctrlmode_supported); } /* else: both CAN_CTRLMODE_{,XL}_TDC_{AUTO,MANUAL} are explicitly * turned off. TDC is disabled: do nothing */ memcpy(&dbt_params->data_bittiming, &dbt, sizeof(dbt)); if (dbt_params->do_set_data_bittiming) { /* Finally, set the bit-timing registers */ err = dbt_params->do_set_data_bittiming(dev); if (err) return err; } return 0; } static int can_pwm_changelink(struct net_device *dev, const struct nlattr *pwm_nla, struct netlink_ext_ack *extack) { struct can_priv *priv = netdev_priv(dev); const struct can_pwm_const *pwm_const = priv->xl.pwm_const; struct nlattr *tb_pwm[IFLA_CAN_PWM_MAX + 1]; struct can_pwm pwm = { 0 }; int err; if (!(priv->ctrlmode & CAN_CTRLMODE_XL_TMS)) return 0; if (!pwm_const) { NL_SET_ERR_MSG(extack, "The device does not support PWM"); return -EOPNOTSUPP; } if (!pwm_nla) return can_calc_pwm(dev, extack); err = nla_parse_nested(tb_pwm, IFLA_CAN_PWM_MAX, pwm_nla, can_pwm_policy, extack); if (err) return err; if (tb_pwm[IFLA_CAN_PWM_PWMS]) { pwm.pwms = nla_get_u32(tb_pwm[IFLA_CAN_PWM_PWMS]); if (pwm.pwms < pwm_const->pwms_min || pwm.pwms > pwm_const->pwms_max) { NL_SET_ERR_MSG_FMT(extack, "PWMS: %u tqmin is out of range: %u...%u", pwm.pwms, pwm_const->pwms_min, pwm_const->pwms_max); return -EINVAL; } } if (tb_pwm[IFLA_CAN_PWM_PWML]) { pwm.pwml = nla_get_u32(tb_pwm[IFLA_CAN_PWM_PWML]); if (pwm.pwml < pwm_const->pwml_min || pwm.pwml > pwm_const->pwml_max) { NL_SET_ERR_MSG_FMT(extack, "PWML: %u tqmin is out of range: %u...%u", pwm.pwml, pwm_const->pwml_min, pwm_const->pwml_max); return -EINVAL; } } if (tb_pwm[IFLA_CAN_PWM_PWMO]) { pwm.pwmo = nla_get_u32(tb_pwm[IFLA_CAN_PWM_PWMO]); if (pwm.pwmo < pwm_const->pwmo_min || pwm.pwmo > pwm_const->pwmo_max) { NL_SET_ERR_MSG_FMT(extack, "PWMO: %u tqmin is out of range: %u...%u", pwm.pwmo, pwm_const->pwmo_min, pwm_const->pwmo_max); return -EINVAL; } } err = can_validate_pwm_bittiming(dev, &pwm, extack); if (err) return err; priv->xl.pwm = pwm; return 0; } static int can_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct can_priv *priv = netdev_priv(dev); int err; /* We need synchronization with dev->stop() */ ASSERT_RTNL(); err = can_ctrlmode_changelink(dev, data, extack); if (err) return err; if (data[IFLA_CAN_BITTIMING]) { struct can_bittiming bt; /* Do not allow changing bittiming while running */ if (dev->flags & IFF_UP) return -EBUSY; /* Calculate bittiming parameters based on * bittiming_const if set, otherwise pass bitrate * directly via do_set_bitrate(). Bail out if neither * is given. */ if (!priv->bittiming_const && !priv->do_set_bittiming && !priv->bitrate_const) return -EOPNOTSUPP; memcpy(&bt, nla_data(data[IFLA_CAN_BITTIMING]), sizeof(bt)); err = can_get_bittiming(dev, &bt, priv->bittiming_const, priv->bitrate_const, priv->bitrate_const_cnt, extack); if (err) return err; if (priv->bitrate_max && bt.bitrate > priv->bitrate_max) { NL_SET_ERR_MSG_FMT(extack, "arbitration bitrate %u bps surpasses transceiver capabilities of %u bps", bt.bitrate, priv->bitrate_max); return -EINVAL; } memcpy(&priv->bittiming, &bt, sizeof(bt)); if (priv->do_set_bittiming) { /* Finally, set the bit-timing registers */ err = priv->do_set_bittiming(dev); if (err) return err; } } if (data[IFLA_CAN_RESTART_MS]) { unsigned int restart_ms = nla_get_u32(data[IFLA_CAN_RESTART_MS]); if (restart_ms != 0 && !priv->do_set_mode) { NL_SET_ERR_MSG(extack, "Device doesn't support restart from Bus Off"); return -EOPNOTSUPP; } /* Do not allow changing restart delay while running */ if (dev->flags & IFF_UP) return -EBUSY; priv->restart_ms = restart_ms; } if (data[IFLA_CAN_RESTART]) { if (!priv->do_set_mode) { NL_SET_ERR_MSG(extack, "Device doesn't support restart from Bus Off"); return -EOPNOTSUPP; } /* Do not allow a restart while not running */ if (!(dev->flags & IFF_UP)) return -EINVAL; err = can_restart_now(dev); if (err) return err; } /* CAN FD */ err = can_dbt_changelink(dev, data, true, extack); if (err) return err; /* CAN XL */ err = can_dbt_changelink(dev, data, false, extack); if (err) return err; err = can_pwm_changelink(dev, data[IFLA_CAN_XL_PWM], extack); if (err) return err; if (data[IFLA_CAN_TERMINATION]) { const u16 termval = nla_get_u16(data[IFLA_CAN_TERMINATION]); const unsigned int num_term = priv->termination_const_cnt; unsigned int i; if (!priv->do_set_termination) { NL_SET_ERR_MSG(extack, "Termination is not configurable on this device"); return -EOPNOTSUPP; } /* check whether given value is supported by the interface */ for (i = 0; i < num_term; i++) { if (termval == priv->termination_const[i]) break; } if (i >= num_term) return -EINVAL; /* Finally, set the termination value */ err = priv->do_set_termination(dev, termval); if (err) return err; priv->termination = termval; } return 0; } static size_t can_tdc_get_size(struct data_bittiming_params *dbt_params, u32 tdc_flags) { bool tdc_manual = tdc_flags & CAN_CTRLMODE_TDC_MANUAL_MASK; size_t size; if (!dbt_params->tdc_const) return 0; size = nla_total_size(0); /* nest IFLA_CAN_TDC */ if (tdc_manual) { size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCV_MIN */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCV_MAX */ } size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCO_MIN */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCO_MAX */ if (dbt_params->tdc_const->tdcf_max) { size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCF_MIN */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCF_MAX */ } if (tdc_flags) { if (tdc_manual || dbt_params->do_get_auto_tdcv) size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCV */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCO */ if (dbt_params->tdc_const->tdcf_max) size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCF */ } return size; } static size_t can_data_bittiming_get_size(struct data_bittiming_params *dbt_params, u32 tdc_flags) { size_t size = 0; if (dbt_params->data_bittiming.bitrate) /* IFLA_CAN_{,XL}_DATA_BITTIMING */ size += nla_total_size(sizeof(dbt_params->data_bittiming)); if (dbt_params->data_bittiming_const) /* IFLA_CAN_{,XL}_DATA_BITTIMING_CONST */ size += nla_total_size(sizeof(*dbt_params->data_bittiming_const)); if (dbt_params->data_bitrate_const) /* IFLA_CAN_{,XL}_DATA_BITRATE_CONST */ size += nla_total_size(sizeof(*dbt_params->data_bitrate_const) * dbt_params->data_bitrate_const_cnt); size += can_tdc_get_size(dbt_params, tdc_flags);/* IFLA_CAN_{,XL}_TDC */ return size; } static size_t can_ctrlmode_ext_get_size(void) { return nla_total_size(0) + /* nest IFLA_CAN_CTRLMODE_EXT */ nla_total_size(sizeof(u32)); /* IFLA_CAN_CTRLMODE_SUPPORTED */ } static size_t can_pwm_get_size(const struct can_pwm_const *pwm_const, bool pwm_on) { size_t size; if (!pwm_const || !pwm_on) return 0; size = nla_total_size(0); /* nest IFLA_CAN_PWM */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_PWM_PWMS_MIN */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_PWM_PWMS_MAX */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_PWM_PWML_MIN */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_PWM_PWML_MAX */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_PWM_PWMO_MIN */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_PWM_PWMO_MAX */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_PWM_PWMS */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_PWM_PWML */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_PWM_PWMO */ return size; } static size_t can_get_size(const struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); size_t size = 0; if (priv->bittiming.bitrate) /* IFLA_CAN_BITTIMING */ size += nla_total_size(sizeof(struct can_bittiming)); if (priv->bittiming_const) /* IFLA_CAN_BITTIMING_CONST */ size += nla_total_size(sizeof(struct can_bittiming_const)); size += nla_total_size(sizeof(struct can_clock)); /* IFLA_CAN_CLOCK */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_STATE */ size += nla_total_size(sizeof(struct can_ctrlmode)); /* IFLA_CAN_CTRLMODE */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_RESTART_MS */ if (priv->do_get_berr_counter) /* IFLA_CAN_BERR_COUNTER */ size += nla_total_size(sizeof(struct can_berr_counter)); if (priv->termination_const) { size += nla_total_size(sizeof(priv->termination)); /* IFLA_CAN_TERMINATION */ size += nla_total_size(sizeof(*priv->termination_const) * /* IFLA_CAN_TERMINATION_CONST */ priv->termination_const_cnt); } if (priv->bitrate_const) /* IFLA_CAN_BITRATE_CONST */ size += nla_total_size(sizeof(*priv->bitrate_const) * priv->bitrate_const_cnt); size += sizeof(priv->bitrate_max); /* IFLA_CAN_BITRATE_MAX */ size += can_ctrlmode_ext_get_size(); /* IFLA_CAN_CTRLMODE_EXT */ size += can_data_bittiming_get_size(&priv->fd, priv->ctrlmode & CAN_CTRLMODE_FD_TDC_MASK); size += can_data_bittiming_get_size(&priv->xl, priv->ctrlmode & CAN_CTRLMODE_XL_TDC_MASK); size += can_pwm_get_size(priv->xl.pwm_const, /* IFLA_CAN_XL_PWM */ priv->ctrlmode & CAN_CTRLMODE_XL_TMS); return size; } static int can_bittiming_fill_info(struct sk_buff *skb, int ifla_can_bittiming, struct can_bittiming *bittiming) { return bittiming->bitrate != CAN_BITRATE_UNSET && bittiming->bitrate != CAN_BITRATE_UNKNOWN && nla_put(skb, ifla_can_bittiming, sizeof(*bittiming), bittiming); } static int can_bittiming_const_fill_info(struct sk_buff *skb, int ifla_can_bittiming_const, const struct can_bittiming_const *bittiming_const) { return bittiming_const && nla_put(skb, ifla_can_bittiming_const, sizeof(*bittiming_const), bittiming_const); } static int can_bitrate_const_fill_info(struct sk_buff *skb, int ifla_can_bitrate_const, const u32 *bitrate_const, unsigned int cnt) { return bitrate_const && nla_put(skb, ifla_can_bitrate_const, sizeof(*bitrate_const) * cnt, bitrate_const); } static int can_tdc_fill_info(struct sk_buff *skb, const struct net_device *dev, int ifla_can_tdc) { struct can_priv *priv = netdev_priv(dev); struct data_bittiming_params *dbt_params; const struct can_tdc_const *tdc_const; struct can_tdc *tdc; struct nlattr *nest; bool tdc_is_enabled, tdc_manual; if (ifla_can_tdc == IFLA_CAN_TDC) { dbt_params = &priv->fd; tdc_is_enabled = can_fd_tdc_is_enabled(priv); tdc_manual = priv->ctrlmode & CAN_CTRLMODE_TDC_MANUAL; } else { dbt_params = &priv->xl; tdc_is_enabled = can_xl_tdc_is_enabled(priv); tdc_manual = priv->ctrlmode & CAN_CTRLMODE_XL_TDC_MANUAL; } tdc_const = dbt_params->tdc_const; tdc = &dbt_params->tdc; if (!tdc_const) return 0; nest = nla_nest_start(skb, ifla_can_tdc); if (!nest) return -EMSGSIZE; if (tdc_manual && (nla_put_u32(skb, IFLA_CAN_TDC_TDCV_MIN, tdc_const->tdcv_min) || nla_put_u32(skb, IFLA_CAN_TDC_TDCV_MAX, tdc_const->tdcv_max))) goto err_cancel; if (nla_put_u32(skb, IFLA_CAN_TDC_TDCO_MIN, tdc_const->tdco_min) || nla_put_u32(skb, IFLA_CAN_TDC_TDCO_MAX, tdc_const->tdco_max)) goto err_cancel; if (tdc_const->tdcf_max && (nla_put_u32(skb, IFLA_CAN_TDC_TDCF_MIN, tdc_const->tdcf_min) || nla_put_u32(skb, IFLA_CAN_TDC_TDCF_MAX, tdc_const->tdcf_max))) goto err_cancel; if (tdc_is_enabled) { u32 tdcv; int err = -EINVAL; if (tdc_manual) { tdcv = tdc->tdcv; err = 0; } else if (dbt_params->do_get_auto_tdcv) { err = dbt_params->do_get_auto_tdcv(dev, &tdcv); } if (!err && nla_put_u32(skb, IFLA_CAN_TDC_TDCV, tdcv)) goto err_cancel; if (nla_put_u32(skb, IFLA_CAN_TDC_TDCO, tdc->tdco)) goto err_cancel; if (tdc_const->tdcf_max && nla_put_u32(skb, IFLA_CAN_TDC_TDCF, tdc->tdcf)) goto err_cancel; } nla_nest_end(skb, nest); return 0; err_cancel: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int can_pwm_fill_info(struct sk_buff *skb, const struct can_priv *priv) { const struct can_pwm_const *pwm_const = priv->xl.pwm_const; const struct can_pwm *pwm = &priv->xl.pwm; struct nlattr *nest; if (!pwm_const) return 0; nest = nla_nest_start(skb, IFLA_CAN_XL_PWM); if (!nest) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_CAN_PWM_PWMS_MIN, pwm_const->pwms_min) || nla_put_u32(skb, IFLA_CAN_PWM_PWMS_MAX, pwm_const->pwms_max) || nla_put_u32(skb, IFLA_CAN_PWM_PWML_MIN, pwm_const->pwml_min) || nla_put_u32(skb, IFLA_CAN_PWM_PWML_MAX, pwm_const->pwml_max) || nla_put_u32(skb, IFLA_CAN_PWM_PWMO_MIN, pwm_const->pwmo_min) || nla_put_u32(skb, IFLA_CAN_PWM_PWMO_MAX, pwm_const->pwmo_max)) goto err_cancel; if (priv->ctrlmode & CAN_CTRLMODE_XL_TMS) { if (nla_put_u32(skb, IFLA_CAN_PWM_PWMS, pwm->pwms) || nla_put_u32(skb, IFLA_CAN_PWM_PWML, pwm->pwml) || nla_put_u32(skb, IFLA_CAN_PWM_PWMO, pwm->pwmo)) goto err_cancel; } nla_nest_end(skb, nest); return 0; err_cancel: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int can_ctrlmode_ext_fill_info(struct sk_buff *skb, const struct can_priv *priv) { struct nlattr *nest; nest = nla_nest_start(skb, IFLA_CAN_CTRLMODE_EXT); if (!nest) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_CAN_CTRLMODE_SUPPORTED, priv->ctrlmode_supported)) { nla_nest_cancel(skb, nest); return -EMSGSIZE; } nla_nest_end(skb, nest); return 0; } static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); struct can_ctrlmode cm = {.flags = priv->ctrlmode}; struct can_berr_counter bec = { }; enum can_state state = priv->state; if (priv->do_get_state) priv->do_get_state(dev, &state); if (can_bittiming_fill_info(skb, IFLA_CAN_BITTIMING, &priv->bittiming) || can_bittiming_const_fill_info(skb, IFLA_CAN_BITTIMING_CONST, priv->bittiming_const) || nla_put(skb, IFLA_CAN_CLOCK, sizeof(priv->clock), &priv->clock) || nla_put_u32(skb, IFLA_CAN_STATE, state) || nla_put(skb, IFLA_CAN_CTRLMODE, sizeof(cm), &cm) || nla_put_u32(skb, IFLA_CAN_RESTART_MS, priv->restart_ms) || (priv->do_get_berr_counter && !priv->do_get_berr_counter(dev, &bec) && nla_put(skb, IFLA_CAN_BERR_COUNTER, sizeof(bec), &bec)) || can_bittiming_fill_info(skb, IFLA_CAN_DATA_BITTIMING, &priv->fd.data_bittiming) || can_bittiming_const_fill_info(skb, IFLA_CAN_DATA_BITTIMING_CONST, priv->fd.data_bittiming_const) || (priv->termination_const && (nla_put_u16(skb, IFLA_CAN_TERMINATION, priv->termination) || nla_put(skb, IFLA_CAN_TERMINATION_CONST, sizeof(*priv->termination_const) * priv->termination_const_cnt, priv->termination_const))) || can_bitrate_const_fill_info(skb, IFLA_CAN_BITRATE_CONST, priv->bitrate_const, priv->bitrate_const_cnt) || can_bitrate_const_fill_info(skb, IFLA_CAN_DATA_BITRATE_CONST, priv->fd.data_bitrate_const, priv->fd.data_bitrate_const_cnt) || (nla_put(skb, IFLA_CAN_BITRATE_MAX, sizeof(priv->bitrate_max), &priv->bitrate_max)) || can_tdc_fill_info(skb, dev, IFLA_CAN_TDC) || can_ctrlmode_ext_fill_info(skb, priv) || can_bittiming_fill_info(skb, IFLA_CAN_XL_DATA_BITTIMING, &priv->xl.data_bittiming) || can_bittiming_const_fill_info(skb, IFLA_CAN_XL_DATA_BITTIMING_CONST, priv->xl.data_bittiming_const) || can_bitrate_const_fill_info(skb, IFLA_CAN_XL_DATA_BITRATE_CONST, priv->xl.data_bitrate_const, priv->xl.data_bitrate_const_cnt) || can_tdc_fill_info(skb, dev, IFLA_CAN_XL_TDC) || can_pwm_fill_info(skb, priv) ) return -EMSGSIZE; return 0; } static size_t can_get_xstats_size(const struct net_device *dev) { return sizeof(struct can_device_stats); } static int can_fill_xstats(struct sk_buff *skb, const struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); if (nla_put(skb, IFLA_INFO_XSTATS, sizeof(priv->can_stats), &priv->can_stats)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static int can_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static void can_dellink(struct net_device *dev, struct list_head *head) { } struct rtnl_link_ops can_link_ops __read_mostly = { .kind = "can", .netns_refund = true, .maxtype = IFLA_CAN_MAX, .policy = can_policy, .setup = can_setup, .validate = can_validate, .newlink = can_newlink, .changelink = can_changelink, .dellink = can_dellink, .get_size = can_get_size, .fill_info = can_fill_info, .get_xstats_size = can_get_xstats_size, .fill_xstats = can_fill_xstats, }; int can_netlink_register(void) { return rtnl_link_register(&can_link_ops); } void can_netlink_unregister(void) { rtnl_link_unregister(&can_link_ops); }
6 6 6 6 6 6 6 6 3 3 3 3 3 3 3 3 3 3 3 1 2 2 2 1 1 3 3 3 5 5 2 3 3 1 2 1 1 1 1 1 5 5 5 4 4 4 1 3 1 2 2 4 4 4 2 1 1 2 1 4 3 3 3 1 2 1 1 3 4 1 1 1 1 14 14 13 13 2 2 2 2 2 2 2 2 1 2 6 1 5 2 6 1 5 1 1 4 5 4 3 4 2 5 5 3 3 1 3 4 1 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 4 4 4 6 6 1 1 1 1 1 1 1 1 1 7 7 2 5 7 5 1 4 5 2 5 5 5 1 4 4 4 1 3 3 3 4 3 3 3 7 7 5 4 4 4 5 4 5 4 4 1 4 5 5 10 10 7 7 7 7 7 1 6 7 1 6 6 6 4 2 2 2 1 10 5 5 5 1 4 4 1 3 3 1 3 3 3 1 2 1 1 1 1 1 1 5 5 1 1 1 1 1 1 20 20 10 10 10 7 3 3 3 3 3 17 2 2 1 1 1 1 2 2 3 1 3 2 2 1 1 1 2 3 4 1 1 3 1 1 3 53 52 1 1 2 5 3 3 4 1 1 3 6 7 1 1 1 2 3 1 1 1 1 1 2 1 1 1 52 1 1 4 4 4 4 4 4 3 1 4 4 22 22 22 1 21 20 20 20 20 17 20 20 20 2 20 4 20 20 20 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 // SPDX-License-Identifier: GPL-2.0+ /* * drivers/usb/class/usbtmc.c - USB Test & Measurement class driver * * Copyright (C) 2007 Stefan Kopp, Gechingen, Germany * Copyright (C) 2008 Novell, Inc. * Copyright (C) 2008 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (C) 2018 IVI Foundation, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/fs.h> #include <linux/uaccess.h> #include <linux/kref.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/mutex.h> #include <linux/usb.h> #include <linux/compat.h> #include <linux/usb/tmc.h> /* Increment API VERSION when changing tmc.h with new flags or ioctls * or when changing a significant behavior of the driver. */ #define USBTMC_API_VERSION (3) #define USBTMC_HEADER_SIZE 12 #define USBTMC_MINOR_BASE 176 /* Minimum USB timeout (in milliseconds) */ #define USBTMC_MIN_TIMEOUT 100 /* Default USB timeout (in milliseconds) */ #define USBTMC_TIMEOUT 5000 /* Max number of urbs used in write transfers */ #define MAX_URBS_IN_FLIGHT 16 /* I/O buffer size used in generic read/write functions */ #define USBTMC_BUFSIZE (4096) /* * Maximum number of read cycles to empty bulk in endpoint during CLEAR and * ABORT_BULK_IN requests. Ends the loop if (for whatever reason) a short * packet is never read. */ #define USBTMC_MAX_READS_TO_CLEAR_BULK_IN 100 static const struct usb_device_id usbtmc_devices[] = { { USB_INTERFACE_INFO(USB_CLASS_APP_SPEC, 3, 0), }, { USB_INTERFACE_INFO(USB_CLASS_APP_SPEC, 3, 1), }, { 0, } /* terminating entry */ }; MODULE_DEVICE_TABLE(usb, usbtmc_devices); /* * This structure is the capabilities for the device * See section 4.2.1.8 of the USBTMC specification, * and section 4.2.2 of the USBTMC usb488 subclass * specification for details. */ struct usbtmc_dev_capabilities { __u8 interface_capabilities; __u8 device_capabilities; __u8 usb488_interface_capabilities; __u8 usb488_device_capabilities; }; /* This structure holds private data for each USBTMC device. One copy is * allocated for each USBTMC device in the driver's probe function. */ struct usbtmc_device_data { const struct usb_device_id *id; struct usb_device *usb_dev; struct usb_interface *intf; struct list_head file_list; unsigned int bulk_in; unsigned int bulk_out; u8 bTag; u8 bTag_last_write; /* needed for abort */ u8 bTag_last_read; /* needed for abort */ /* packet size of IN bulk */ u16 wMaxPacketSize; /* data for interrupt in endpoint handling */ u8 bNotify1; u8 bNotify2; u16 ifnum; u8 iin_bTag; u8 *iin_buffer; atomic_t iin_data_valid; unsigned int iin_ep; int iin_ep_present; int iin_interval; struct urb *iin_urb; u16 iin_wMaxPacketSize; /* coalesced usb488_caps from usbtmc_dev_capabilities */ __u8 usb488_caps; bool zombie; /* fd of disconnected device */ struct usbtmc_dev_capabilities capabilities; struct kref kref; struct mutex io_mutex; /* only one i/o function running at a time */ wait_queue_head_t waitq; struct fasync_struct *fasync; spinlock_t dev_lock; /* lock for file_list */ }; #define to_usbtmc_data(d) container_of(d, struct usbtmc_device_data, kref) /* * This structure holds private data for each USBTMC file handle. */ struct usbtmc_file_data { struct usbtmc_device_data *data; struct list_head file_elem; u32 timeout; u8 srq_byte; atomic_t srq_asserted; atomic_t closing; u8 bmTransferAttributes; /* member of DEV_DEP_MSG_IN */ u8 eom_val; u8 term_char; bool term_char_enabled; bool auto_abort; spinlock_t err_lock; /* lock for errors */ struct usb_anchor submitted; /* data for generic_write */ struct semaphore limit_write_sem; u32 out_transfer_size; int out_status; /* data for generic_read */ u32 in_transfer_size; int in_status; int in_urbs_used; struct usb_anchor in_anchor; wait_queue_head_t wait_bulk_in; }; /* Forward declarations */ static struct usb_driver usbtmc_driver; static void usbtmc_draw_down(struct usbtmc_file_data *file_data); static void usbtmc_delete(struct kref *kref) { struct usbtmc_device_data *data = to_usbtmc_data(kref); usb_put_dev(data->usb_dev); kfree(data); } static int usbtmc_open(struct inode *inode, struct file *filp) { struct usb_interface *intf; struct usbtmc_device_data *data; struct usbtmc_file_data *file_data; intf = usb_find_interface(&usbtmc_driver, iminor(inode)); if (!intf) { pr_err("can not find device for minor %d", iminor(inode)); return -ENODEV; } file_data = kzalloc_obj(*file_data); if (!file_data) return -ENOMEM; spin_lock_init(&file_data->err_lock); sema_init(&file_data->limit_write_sem, MAX_URBS_IN_FLIGHT); init_usb_anchor(&file_data->submitted); init_usb_anchor(&file_data->in_anchor); init_waitqueue_head(&file_data->wait_bulk_in); data = usb_get_intfdata(intf); /* Protect reference to data from file structure until release */ kref_get(&data->kref); mutex_lock(&data->io_mutex); file_data->data = data; atomic_set(&file_data->closing, 0); file_data->timeout = USBTMC_TIMEOUT; file_data->term_char = '\n'; file_data->term_char_enabled = 0; file_data->auto_abort = 0; file_data->eom_val = 1; INIT_LIST_HEAD(&file_data->file_elem); spin_lock_irq(&data->dev_lock); list_add_tail(&file_data->file_elem, &data->file_list); spin_unlock_irq(&data->dev_lock); mutex_unlock(&data->io_mutex); /* Store pointer in file structure's private data field */ filp->private_data = file_data; return 0; } /* * usbtmc_flush - called before file handle is closed */ static int usbtmc_flush(struct file *file, fl_owner_t id) { struct usbtmc_file_data *file_data; struct usbtmc_device_data *data; file_data = file->private_data; if (file_data == NULL) return -ENODEV; atomic_set(&file_data->closing, 1); data = file_data->data; /* wait for io to stop */ mutex_lock(&data->io_mutex); usbtmc_draw_down(file_data); spin_lock_irq(&file_data->err_lock); file_data->in_status = 0; file_data->in_transfer_size = 0; file_data->in_urbs_used = 0; file_data->out_status = 0; file_data->out_transfer_size = 0; spin_unlock_irq(&file_data->err_lock); wake_up_interruptible_all(&data->waitq); mutex_unlock(&data->io_mutex); return 0; } static int usbtmc_release(struct inode *inode, struct file *file) { struct usbtmc_file_data *file_data = file->private_data; /* prevent IO _AND_ usbtmc_interrupt */ mutex_lock(&file_data->data->io_mutex); spin_lock_irq(&file_data->data->dev_lock); list_del(&file_data->file_elem); spin_unlock_irq(&file_data->data->dev_lock); /* flush anchored URBs */ usbtmc_draw_down(file_data); mutex_unlock(&file_data->data->io_mutex); kref_put(&file_data->data->kref, usbtmc_delete); file_data->data = NULL; kfree(file_data); return 0; } static int usbtmc_ioctl_abort_bulk_in_tag(struct usbtmc_device_data *data, u8 tag) { u8 *buffer; struct device *dev; int rv; int n; int actual; dev = &data->intf->dev; buffer = kmalloc(USBTMC_BUFSIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_INITIATE_ABORT_BULK_IN, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT, tag, data->bulk_in, buffer, 2, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "INITIATE_ABORT_BULK_IN returned %x with tag %02x\n", buffer[0], buffer[1]); if (buffer[0] == USBTMC_STATUS_FAILED) { /* No transfer in progress and the Bulk-OUT FIFO is empty. */ rv = 0; goto exit; } if (buffer[0] == USBTMC_STATUS_TRANSFER_NOT_IN_PROGRESS) { /* The device returns this status if either: * - There is a transfer in progress, but the specified bTag * does not match. * - There is no transfer in progress, but the Bulk-OUT FIFO * is not empty. */ rv = -ENOMSG; goto exit; } if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "INITIATE_ABORT_BULK_IN returned %x\n", buffer[0]); rv = -EPERM; goto exit; } n = 0; usbtmc_abort_bulk_in_status: dev_dbg(dev, "Reading from bulk in EP\n"); /* Data must be present. So use low timeout 300 ms */ actual = 0; rv = usb_bulk_msg(data->usb_dev, usb_rcvbulkpipe(data->usb_dev, data->bulk_in), buffer, USBTMC_BUFSIZE, &actual, 300); print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1, buffer, actual, true); n++; if (rv < 0) { dev_err(dev, "usb_bulk_msg returned %d\n", rv); if (rv != -ETIMEDOUT) goto exit; } if (actual == USBTMC_BUFSIZE) goto usbtmc_abort_bulk_in_status; if (n >= USBTMC_MAX_READS_TO_CLEAR_BULK_IN) { dev_err(dev, "Couldn't clear device buffer within %d cycles\n", USBTMC_MAX_READS_TO_CLEAR_BULK_IN); rv = -EPERM; goto exit; } rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_CHECK_ABORT_BULK_IN_STATUS, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT, 0, data->bulk_in, buffer, 0x08, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "CHECK_ABORT_BULK_IN returned %x\n", buffer[0]); if (buffer[0] == USBTMC_STATUS_SUCCESS) { rv = 0; goto exit; } if (buffer[0] != USBTMC_STATUS_PENDING) { dev_err(dev, "CHECK_ABORT_BULK_IN returned %x\n", buffer[0]); rv = -EPERM; goto exit; } if ((buffer[1] & 1) > 0) { /* The device has 1 or more queued packets the Host can read */ goto usbtmc_abort_bulk_in_status; } /* The Host must send CHECK_ABORT_BULK_IN_STATUS at a later time. */ rv = -EAGAIN; exit: kfree(buffer); return rv; } static int usbtmc_ioctl_abort_bulk_in(struct usbtmc_device_data *data) { return usbtmc_ioctl_abort_bulk_in_tag(data, data->bTag_last_read); } static int usbtmc_ioctl_abort_bulk_out_tag(struct usbtmc_device_data *data, u8 tag) { struct device *dev; u8 *buffer; int rv; int n; dev = &data->intf->dev; buffer = kmalloc(8, GFP_KERNEL); if (!buffer) return -ENOMEM; rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_INITIATE_ABORT_BULK_OUT, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT, tag, data->bulk_out, buffer, 2, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "INITIATE_ABORT_BULK_OUT returned %x\n", buffer[0]); if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "INITIATE_ABORT_BULK_OUT returned %x\n", buffer[0]); rv = -EPERM; goto exit; } n = 0; usbtmc_abort_bulk_out_check_status: /* do not stress device with subsequent requests */ msleep(50); rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_CHECK_ABORT_BULK_OUT_STATUS, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT, 0, data->bulk_out, buffer, 0x08, USB_CTRL_GET_TIMEOUT); n++; if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "CHECK_ABORT_BULK_OUT returned %x\n", buffer[0]); if (buffer[0] == USBTMC_STATUS_SUCCESS) goto usbtmc_abort_bulk_out_clear_halt; if ((buffer[0] == USBTMC_STATUS_PENDING) && (n < USBTMC_MAX_READS_TO_CLEAR_BULK_IN)) goto usbtmc_abort_bulk_out_check_status; rv = -EPERM; goto exit; usbtmc_abort_bulk_out_clear_halt: rv = usb_clear_halt(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out)); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } rv = 0; exit: kfree(buffer); return rv; } static int usbtmc_ioctl_abort_bulk_out(struct usbtmc_device_data *data) { return usbtmc_ioctl_abort_bulk_out_tag(data, data->bTag_last_write); } static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) { struct usbtmc_device_data *data = file_data->data; struct device *dev = &data->intf->dev; u8 *buffer; u8 tag; int rv; long wait_rv; unsigned long expire; dev_dbg(dev, "Enter ioctl_read_stb iin_ep_present: %d\n", data->iin_ep_present); buffer = kmalloc(8, GFP_KERNEL); if (!buffer) return -ENOMEM; atomic_set(&data->iin_data_valid, 0); rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC488_REQUEST_READ_STATUS_BYTE, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, data->iin_bTag, data->ifnum, buffer, 0x03, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "stb usb_control_msg returned %d\n", rv); goto exit; } if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "control status returned %x\n", buffer[0]); rv = -EIO; goto exit; } if (data->iin_ep_present) { expire = msecs_to_jiffies(file_data->timeout); wait_rv = wait_event_interruptible_timeout( data->waitq, atomic_read(&data->iin_data_valid) != 0, expire); if (wait_rv < 0) { dev_dbg(dev, "wait interrupted %ld\n", wait_rv); rv = wait_rv; goto exit; } if (wait_rv == 0) { dev_dbg(dev, "wait timed out\n"); rv = -ETIMEDOUT; goto exit; } tag = data->bNotify1 & 0x7f; if (tag != data->iin_bTag) { dev_err(dev, "expected bTag %x got %x\n", data->iin_bTag, tag); } *stb = data->bNotify2; } else { *stb = buffer[2]; } dev_dbg(dev, "stb:0x%02x received %d\n", (unsigned int)*stb, rv); rv = 0; exit: /* bump interrupt bTag */ data->iin_bTag += 1; if (data->iin_bTag > 127) /* 1 is for SRQ see USBTMC-USB488 subclass spec section 4.3.1 */ data->iin_bTag = 2; kfree(buffer); return rv; } static int usbtmc488_ioctl_read_stb(struct usbtmc_file_data *file_data, void __user *arg) { int srq_asserted = 0; __u8 stb; int rv; rv = usbtmc_get_stb(file_data, &stb); if (rv < 0) return rv; srq_asserted = atomic_xchg(&file_data->srq_asserted, srq_asserted); if (srq_asserted) stb |= 0x40; /* Set RQS bit */ rv = put_user(stb, (__u8 __user *)arg); return rv; } static int usbtmc_ioctl_get_srq_stb(struct usbtmc_file_data *file_data, void __user *arg) { struct usbtmc_device_data *data = file_data->data; struct device *dev = &data->intf->dev; int srq_asserted = 0; __u8 stb = 0; int rv; spin_lock_irq(&data->dev_lock); srq_asserted = atomic_xchg(&file_data->srq_asserted, srq_asserted); if (srq_asserted) { stb = file_data->srq_byte; spin_unlock_irq(&data->dev_lock); rv = put_user(stb, (__u8 __user *)arg); } else { spin_unlock_irq(&data->dev_lock); rv = -ENOMSG; } dev_dbg(dev, "stb:0x%02x with srq received %d\n", (unsigned int)stb, rv); return rv; } static int usbtmc488_ioctl_wait_srq(struct usbtmc_file_data *file_data, __u32 __user *arg) { struct usbtmc_device_data *data = file_data->data; struct device *dev = &data->intf->dev; u32 timeout; unsigned long expire; long wait_rv; if (!data->iin_ep_present) { dev_dbg(dev, "no interrupt endpoint present\n"); return -EFAULT; } if (get_user(timeout, arg)) return -EFAULT; expire = msecs_to_jiffies(timeout); mutex_unlock(&data->io_mutex); wait_rv = wait_event_interruptible_timeout( data->waitq, atomic_read(&file_data->srq_asserted) != 0 || atomic_read(&file_data->closing), expire); mutex_lock(&data->io_mutex); /* Note! disconnect or close could be called in the meantime */ if (atomic_read(&file_data->closing) || data->zombie) return -ENODEV; if (wait_rv < 0) { dev_dbg(dev, "%s - wait interrupted %ld\n", __func__, wait_rv); return wait_rv; } if (wait_rv == 0) { dev_dbg(dev, "%s - wait timed out\n", __func__); return -ETIMEDOUT; } dev_dbg(dev, "%s - srq asserted\n", __func__); return 0; } static int usbtmc488_ioctl_simple(struct usbtmc_device_data *data, void __user *arg, unsigned int cmd) { struct device *dev = &data->intf->dev; __u8 val; u8 *buffer; u16 wValue; int rv; if (!(data->usb488_caps & USBTMC488_CAPABILITY_SIMPLE)) return -EINVAL; buffer = kmalloc(8, GFP_KERNEL); if (!buffer) return -ENOMEM; if (cmd == USBTMC488_REQUEST_REN_CONTROL) { rv = copy_from_user(&val, arg, sizeof(val)); if (rv) { rv = -EFAULT; goto exit; } wValue = val ? 1 : 0; } else { wValue = 0; } rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), cmd, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, wValue, data->ifnum, buffer, 0x01, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "simple usb_control_msg failed %d\n", rv); goto exit; } else if (rv != 1) { dev_warn(dev, "simple usb_control_msg returned %d\n", rv); rv = -EIO; goto exit; } if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "simple control status returned %x\n", buffer[0]); rv = -EIO; goto exit; } rv = 0; exit: kfree(buffer); return rv; } /* * Sends a TRIGGER Bulk-OUT command message * See the USBTMC-USB488 specification, Table 2. * * Also updates bTag_last_write. */ static int usbtmc488_ioctl_trigger(struct usbtmc_file_data *file_data) { struct usbtmc_device_data *data = file_data->data; int retval; u8 *buffer; int actual; buffer = kzalloc(USBTMC_HEADER_SIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; buffer[0] = 128; buffer[1] = data->bTag; buffer[2] = ~data->bTag; retval = usb_bulk_msg_killable(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out), buffer, USBTMC_HEADER_SIZE, &actual, file_data->timeout); /* Store bTag (in case we need to abort) */ data->bTag_last_write = data->bTag; /* Increment bTag -- and increment again if zero */ data->bTag++; if (!data->bTag) data->bTag++; kfree(buffer); if (retval < 0) { dev_err(&data->intf->dev, "%s returned %d\n", __func__, retval); return retval; } return 0; } static struct urb *usbtmc_create_urb(void) { const size_t bufsize = USBTMC_BUFSIZE; u8 *dmabuf = NULL; struct urb *urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) return NULL; dmabuf = kzalloc(bufsize, GFP_KERNEL); if (!dmabuf) { usb_free_urb(urb); return NULL; } urb->transfer_buffer = dmabuf; urb->transfer_buffer_length = bufsize; urb->transfer_flags |= URB_FREE_BUFFER; return urb; } static void usbtmc_read_bulk_cb(struct urb *urb) { struct usbtmc_file_data *file_data = urb->context; int status = urb->status; unsigned long flags; /* sync/async unlink faults aren't errors */ if (status) { if (!(/* status == -ENOENT || */ status == -ECONNRESET || status == -EREMOTEIO || /* Short packet */ status == -ESHUTDOWN)) dev_err(&file_data->data->intf->dev, "%s - nonzero read bulk status received: %d\n", __func__, status); spin_lock_irqsave(&file_data->err_lock, flags); if (!file_data->in_status) file_data->in_status = status; spin_unlock_irqrestore(&file_data->err_lock, flags); } spin_lock_irqsave(&file_data->err_lock, flags); file_data->in_transfer_size += urb->actual_length; dev_dbg(&file_data->data->intf->dev, "%s - total size: %u current: %d status: %d\n", __func__, file_data->in_transfer_size, urb->actual_length, status); spin_unlock_irqrestore(&file_data->err_lock, flags); usb_anchor_urb(urb, &file_data->in_anchor); wake_up_interruptible(&file_data->wait_bulk_in); wake_up_interruptible(&file_data->data->waitq); } static inline bool usbtmc_do_transfer(struct usbtmc_file_data *file_data) { bool data_or_error; spin_lock_irq(&file_data->err_lock); data_or_error = !usb_anchor_empty(&file_data->in_anchor) || file_data->in_status; spin_unlock_irq(&file_data->err_lock); dev_dbg(&file_data->data->intf->dev, "%s: returns %d\n", __func__, data_or_error); return data_or_error; } static ssize_t usbtmc_generic_read(struct usbtmc_file_data *file_data, void __user *user_buffer, u32 transfer_size, u32 *transferred, u32 flags) { struct usbtmc_device_data *data = file_data->data; struct device *dev = &data->intf->dev; u32 done = 0; u32 remaining; const u32 bufsize = USBTMC_BUFSIZE; int retval = 0; u32 max_transfer_size; unsigned long expire; int bufcount = 1; int again = 0; long wait_rv; /* mutex already locked */ *transferred = done; max_transfer_size = transfer_size; if (flags & USBTMC_FLAG_IGNORE_TRAILER) { /* The device may send extra alignment bytes (up to * wMaxPacketSize – 1) to avoid sending a zero-length * packet */ remaining = transfer_size; if ((max_transfer_size % data->wMaxPacketSize) == 0) max_transfer_size += (data->wMaxPacketSize - 1); } else { /* round down to bufsize to avoid truncated data left */ if (max_transfer_size > bufsize) { max_transfer_size = roundup(max_transfer_size + 1 - bufsize, bufsize); } remaining = max_transfer_size; } spin_lock_irq(&file_data->err_lock); if (file_data->in_status) { /* return the very first error */ retval = file_data->in_status; spin_unlock_irq(&file_data->err_lock); goto error; } if (flags & USBTMC_FLAG_ASYNC) { if (usb_anchor_empty(&file_data->in_anchor)) again = 1; if (file_data->in_urbs_used == 0) { file_data->in_transfer_size = 0; file_data->in_status = 0; } } else { file_data->in_transfer_size = 0; file_data->in_status = 0; } if (max_transfer_size == 0) { bufcount = 0; } else { bufcount = roundup(max_transfer_size, bufsize) / bufsize; if (bufcount > file_data->in_urbs_used) bufcount -= file_data->in_urbs_used; else bufcount = 0; if (bufcount + file_data->in_urbs_used > MAX_URBS_IN_FLIGHT) { bufcount = MAX_URBS_IN_FLIGHT - file_data->in_urbs_used; } } spin_unlock_irq(&file_data->err_lock); dev_dbg(dev, "%s: requested=%u flags=0x%X size=%u bufs=%d used=%d\n", __func__, transfer_size, flags, max_transfer_size, bufcount, file_data->in_urbs_used); while (bufcount > 0) { u8 *dmabuf = NULL; struct urb *urb = usbtmc_create_urb(); if (!urb) { retval = -ENOMEM; goto error; } dmabuf = urb->transfer_buffer; usb_fill_bulk_urb(urb, data->usb_dev, usb_rcvbulkpipe(data->usb_dev, data->bulk_in), dmabuf, bufsize, usbtmc_read_bulk_cb, file_data); usb_anchor_urb(urb, &file_data->submitted); retval = usb_submit_urb(urb, GFP_KERNEL); /* urb is anchored. We can release our reference. */ usb_free_urb(urb); if (unlikely(retval)) { usb_unanchor_urb(urb); goto error; } file_data->in_urbs_used++; bufcount--; } if (again) { dev_dbg(dev, "%s: ret=again\n", __func__); return -EAGAIN; } if (user_buffer == NULL) return -EINVAL; expire = msecs_to_jiffies(file_data->timeout); while (max_transfer_size > 0) { u32 this_part; struct urb *urb = NULL; if (!(flags & USBTMC_FLAG_ASYNC)) { dev_dbg(dev, "%s: before wait time %lu\n", __func__, expire); wait_rv = wait_event_interruptible_timeout( file_data->wait_bulk_in, usbtmc_do_transfer(file_data), expire); dev_dbg(dev, "%s: wait returned %ld\n", __func__, wait_rv); if (wait_rv < 0) { retval = wait_rv; goto error; } if (wait_rv == 0) { retval = -ETIMEDOUT; goto error; } } urb = usb_get_from_anchor(&file_data->in_anchor); if (!urb) { if (!(flags & USBTMC_FLAG_ASYNC)) { /* synchronous case: must not happen */ retval = -EFAULT; goto error; } /* asynchronous case: ready, do not block or wait */ *transferred = done; dev_dbg(dev, "%s: (async) done=%u ret=0\n", __func__, done); return 0; } file_data->in_urbs_used--; if (max_transfer_size > urb->actual_length) max_transfer_size -= urb->actual_length; else max_transfer_size = 0; if (remaining > urb->actual_length) this_part = urb->actual_length; else this_part = remaining; print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1, urb->transfer_buffer, urb->actual_length, true); if (copy_to_user(user_buffer + done, urb->transfer_buffer, this_part)) { usb_free_urb(urb); retval = -EFAULT; goto error; } remaining -= this_part; done += this_part; spin_lock_irq(&file_data->err_lock); if (urb->status) { /* return the very first error */ retval = file_data->in_status; spin_unlock_irq(&file_data->err_lock); usb_free_urb(urb); goto error; } spin_unlock_irq(&file_data->err_lock); if (urb->actual_length < bufsize) { /* short packet or ZLP received => ready */ usb_free_urb(urb); retval = 1; break; } if (!(flags & USBTMC_FLAG_ASYNC) && max_transfer_size > (bufsize * file_data->in_urbs_used)) { /* resubmit, since other buffers still not enough */ usb_anchor_urb(urb, &file_data->submitted); retval = usb_submit_urb(urb, GFP_KERNEL); if (unlikely(retval)) { usb_unanchor_urb(urb); usb_free_urb(urb); goto error; } file_data->in_urbs_used++; } usb_free_urb(urb); retval = 0; } error: *transferred = done; dev_dbg(dev, "%s: before kill\n", __func__); /* Attention: killing urbs can take long time (2 ms) */ usb_kill_anchored_urbs(&file_data->submitted); dev_dbg(dev, "%s: after kill\n", __func__); usb_scuttle_anchored_urbs(&file_data->in_anchor); file_data->in_urbs_used = 0; file_data->in_status = 0; /* no spinlock needed here */ dev_dbg(dev, "%s: done=%u ret=%d\n", __func__, done, retval); return retval; } static ssize_t usbtmc_ioctl_generic_read(struct usbtmc_file_data *file_data, void __user *arg) { struct usbtmc_message msg; ssize_t retval = 0; /* mutex already locked */ if (copy_from_user(&msg, arg, sizeof(struct usbtmc_message))) return -EFAULT; retval = usbtmc_generic_read(file_data, msg.message, msg.transfer_size, &msg.transferred, msg.flags); if (put_user(msg.transferred, &((struct usbtmc_message __user *)arg)->transferred)) return -EFAULT; return retval; } static void usbtmc_write_bulk_cb(struct urb *urb) { struct usbtmc_file_data *file_data = urb->context; int wakeup = 0; unsigned long flags; spin_lock_irqsave(&file_data->err_lock, flags); file_data->out_transfer_size += urb->actual_length; /* sync/async unlink faults aren't errors */ if (urb->status) { if (!(urb->status == -ENOENT || urb->status == -ECONNRESET || urb->status == -ESHUTDOWN)) dev_err(&file_data->data->intf->dev, "%s - nonzero write bulk status received: %d\n", __func__, urb->status); if (!file_data->out_status) { file_data->out_status = urb->status; wakeup = 1; } } spin_unlock_irqrestore(&file_data->err_lock, flags); dev_dbg(&file_data->data->intf->dev, "%s - write bulk total size: %u\n", __func__, file_data->out_transfer_size); up(&file_data->limit_write_sem); if (usb_anchor_empty(&file_data->submitted) || wakeup) wake_up_interruptible(&file_data->data->waitq); } static ssize_t usbtmc_generic_write(struct usbtmc_file_data *file_data, const void __user *user_buffer, u32 transfer_size, u32 *transferred, u32 flags) { struct usbtmc_device_data *data = file_data->data; struct device *dev; u32 done = 0; u32 remaining; unsigned long expire; const u32 bufsize = USBTMC_BUFSIZE; struct urb *urb = NULL; int retval = 0; u32 timeout; *transferred = 0; /* Get pointer to private data structure */ dev = &data->intf->dev; dev_dbg(dev, "%s: size=%u flags=0x%X sema=%u\n", __func__, transfer_size, flags, file_data->limit_write_sem.count); if (flags & USBTMC_FLAG_APPEND) { spin_lock_irq(&file_data->err_lock); retval = file_data->out_status; spin_unlock_irq(&file_data->err_lock); if (retval < 0) return retval; } else { spin_lock_irq(&file_data->err_lock); file_data->out_transfer_size = 0; file_data->out_status = 0; spin_unlock_irq(&file_data->err_lock); } remaining = transfer_size; if (remaining > INT_MAX) remaining = INT_MAX; timeout = file_data->timeout; expire = msecs_to_jiffies(timeout); while (remaining > 0) { u32 this_part, aligned; u8 *buffer = NULL; if (flags & USBTMC_FLAG_ASYNC) { if (down_trylock(&file_data->limit_write_sem)) { retval = (done)?(0):(-EAGAIN); goto exit; } } else { retval = down_timeout(&file_data->limit_write_sem, expire); if (retval < 0) { retval = -ETIMEDOUT; goto error; } } spin_lock_irq(&file_data->err_lock); retval = file_data->out_status; spin_unlock_irq(&file_data->err_lock); if (retval < 0) { up(&file_data->limit_write_sem); goto error; } /* prepare next urb to send */ urb = usbtmc_create_urb(); if (!urb) { retval = -ENOMEM; up(&file_data->limit_write_sem); goto error; } buffer = urb->transfer_buffer; if (remaining > bufsize) this_part = bufsize; else this_part = remaining; if (copy_from_user(buffer, user_buffer + done, this_part)) { retval = -EFAULT; up(&file_data->limit_write_sem); goto error; } print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1, buffer, this_part, true); /* fill bulk with 32 bit alignment to meet USBTMC specification * (size + 3 & ~3) rounds up and simplifies user code */ aligned = (this_part + 3) & ~3; dev_dbg(dev, "write(size:%u align:%u done:%u)\n", (unsigned int)this_part, (unsigned int)aligned, (unsigned int)done); usb_fill_bulk_urb(urb, data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out), urb->transfer_buffer, aligned, usbtmc_write_bulk_cb, file_data); usb_anchor_urb(urb, &file_data->submitted); retval = usb_submit_urb(urb, GFP_KERNEL); if (unlikely(retval)) { usb_unanchor_urb(urb); up(&file_data->limit_write_sem); goto error; } usb_free_urb(urb); urb = NULL; /* urb will be finally released by usb driver */ remaining -= this_part; done += this_part; } /* All urbs are on the fly */ if (!(flags & USBTMC_FLAG_ASYNC)) { if (!usb_wait_anchor_empty_timeout(&file_data->submitted, timeout)) { retval = -ETIMEDOUT; goto error; } } retval = 0; goto exit; error: usb_kill_anchored_urbs(&file_data->submitted); exit: usb_free_urb(urb); spin_lock_irq(&file_data->err_lock); if (!(flags & USBTMC_FLAG_ASYNC)) done = file_data->out_transfer_size; if (!retval && file_data->out_status) retval = file_data->out_status; spin_unlock_irq(&file_data->err_lock); *transferred = done; dev_dbg(dev, "%s: done=%u, retval=%d, urbstat=%d\n", __func__, done, retval, file_data->out_status); return retval; } static ssize_t usbtmc_ioctl_generic_write(struct usbtmc_file_data *file_data, void __user *arg) { struct usbtmc_message msg; ssize_t retval = 0; /* mutex already locked */ if (copy_from_user(&msg, arg, sizeof(struct usbtmc_message))) return -EFAULT; retval = usbtmc_generic_write(file_data, msg.message, msg.transfer_size, &msg.transferred, msg.flags); if (put_user(msg.transferred, &((struct usbtmc_message __user *)arg)->transferred)) return -EFAULT; return retval; } /* * Get the generic write result */ static ssize_t usbtmc_ioctl_write_result(struct usbtmc_file_data *file_data, void __user *arg) { u32 transferred; int retval; spin_lock_irq(&file_data->err_lock); transferred = file_data->out_transfer_size; retval = file_data->out_status; spin_unlock_irq(&file_data->err_lock); if (put_user(transferred, (__u32 __user *)arg)) return -EFAULT; return retval; } /* * Sends a REQUEST_DEV_DEP_MSG_IN message on the Bulk-OUT endpoint. * @transfer_size: number of bytes to request from the device. * * See the USBTMC specification, Table 4. * * Also updates bTag_last_write. */ static int send_request_dev_dep_msg_in(struct usbtmc_file_data *file_data, u32 transfer_size) { struct usbtmc_device_data *data = file_data->data; int retval; u8 *buffer; int actual; buffer = kmalloc(USBTMC_HEADER_SIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; /* Setup IO buffer for REQUEST_DEV_DEP_MSG_IN message * Refer to class specs for details */ buffer[0] = 2; buffer[1] = data->bTag; buffer[2] = ~data->bTag; buffer[3] = 0; /* Reserved */ buffer[4] = transfer_size >> 0; buffer[5] = transfer_size >> 8; buffer[6] = transfer_size >> 16; buffer[7] = transfer_size >> 24; buffer[8] = file_data->term_char_enabled * 2; /* Use term character? */ buffer[9] = file_data->term_char; buffer[10] = 0; /* Reserved */ buffer[11] = 0; /* Reserved */ /* Send bulk URB */ retval = usb_bulk_msg_killable(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out), buffer, USBTMC_HEADER_SIZE, &actual, file_data->timeout); /* Store bTag (in case we need to abort) */ data->bTag_last_write = data->bTag; /* Increment bTag -- and increment again if zero */ data->bTag++; if (!data->bTag) data->bTag++; kfree(buffer); if (retval < 0) dev_err(&data->intf->dev, "%s returned %d\n", __func__, retval); return retval; } static ssize_t usbtmc_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct usbtmc_file_data *file_data; struct usbtmc_device_data *data; struct device *dev; const u32 bufsize = USBTMC_BUFSIZE; u32 n_characters; u8 *buffer; int actual; u32 done = 0; u32 remaining; int retval; /* Get pointer to private data structure */ file_data = filp->private_data; data = file_data->data; dev = &data->intf->dev; buffer = kmalloc(bufsize, GFP_KERNEL); if (!buffer) return -ENOMEM; retval = mutex_lock_interruptible(&data->io_mutex); if (retval < 0) goto exit_nolock; if (data->zombie) { retval = -ENODEV; goto exit; } if (count > INT_MAX) count = INT_MAX; dev_dbg(dev, "%s(count:%zu)\n", __func__, count); retval = send_request_dev_dep_msg_in(file_data, count); if (retval < 0) { if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_out(data); goto exit; } /* Loop until we have fetched everything we requested */ remaining = count; actual = 0; /* Send bulk URB */ retval = usb_bulk_msg_killable(data->usb_dev, usb_rcvbulkpipe(data->usb_dev, data->bulk_in), buffer, bufsize, &actual, file_data->timeout); dev_dbg(dev, "%s: bulk_msg retval(%u), actual(%d)\n", __func__, retval, actual); /* Store bTag (in case we need to abort) */ data->bTag_last_read = data->bTag; if (retval < 0) { if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_in(data); goto exit; } /* Sanity checks for the header */ if (actual < USBTMC_HEADER_SIZE) { dev_err(dev, "Device sent too small first packet: %u < %u\n", actual, USBTMC_HEADER_SIZE); if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_in(data); goto exit; } if (buffer[0] != 2) { dev_err(dev, "Device sent reply with wrong MsgID: %u != 2\n", buffer[0]); if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_in(data); goto exit; } if (buffer[1] != data->bTag_last_write) { dev_err(dev, "Device sent reply with wrong bTag: %u != %u\n", buffer[1], data->bTag_last_write); if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_in(data); goto exit; } /* How many characters did the instrument send? */ n_characters = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24); file_data->bmTransferAttributes = buffer[8]; dev_dbg(dev, "Bulk-IN header: N_characters(%u), bTransAttr(%u)\n", n_characters, buffer[8]); if (n_characters > remaining) { dev_err(dev, "Device wants to return more data than requested: %u > %zu\n", n_characters, count); if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_in(data); goto exit; } print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1, buffer, actual, true); remaining = n_characters; /* Remove the USBTMC header */ actual -= USBTMC_HEADER_SIZE; /* Remove padding if it exists */ if (actual > remaining) actual = remaining; remaining -= actual; /* Copy buffer to user space */ if (copy_to_user(buf, &buffer[USBTMC_HEADER_SIZE], actual)) { /* There must have been an addressing problem */ retval = -EFAULT; goto exit; } if ((actual + USBTMC_HEADER_SIZE) == bufsize) { retval = usbtmc_generic_read(file_data, buf + actual, remaining, &done, USBTMC_FLAG_IGNORE_TRAILER); if (retval < 0) goto exit; } done += actual; /* Update file position value */ *f_pos = *f_pos + done; retval = done; exit: mutex_unlock(&data->io_mutex); exit_nolock: kfree(buffer); return retval; } static ssize_t usbtmc_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) { struct usbtmc_file_data *file_data; struct usbtmc_device_data *data; struct urb *urb = NULL; ssize_t retval = 0; u8 *buffer; u32 remaining, done; u32 transfersize, aligned, buflen; file_data = filp->private_data; data = file_data->data; mutex_lock(&data->io_mutex); if (data->zombie) { retval = -ENODEV; goto exit; } done = 0; spin_lock_irq(&file_data->err_lock); file_data->out_transfer_size = 0; file_data->out_status = 0; spin_unlock_irq(&file_data->err_lock); if (!count) goto exit; if (down_trylock(&file_data->limit_write_sem)) { /* previous calls were async */ retval = -EBUSY; goto exit; } urb = usbtmc_create_urb(); if (!urb) { retval = -ENOMEM; up(&file_data->limit_write_sem); goto exit; } buffer = urb->transfer_buffer; buflen = urb->transfer_buffer_length; if (count > INT_MAX) { transfersize = INT_MAX; buffer[8] = 0; } else { transfersize = count; buffer[8] = file_data->eom_val; } /* Setup IO buffer for DEV_DEP_MSG_OUT message */ buffer[0] = 1; buffer[1] = data->bTag; buffer[2] = ~data->bTag; buffer[3] = 0; /* Reserved */ buffer[4] = transfersize >> 0; buffer[5] = transfersize >> 8; buffer[6] = transfersize >> 16; buffer[7] = transfersize >> 24; /* buffer[8] is set above... */ buffer[9] = 0; /* Reserved */ buffer[10] = 0; /* Reserved */ buffer[11] = 0; /* Reserved */ remaining = transfersize; if (transfersize + USBTMC_HEADER_SIZE > buflen) { transfersize = buflen - USBTMC_HEADER_SIZE; aligned = buflen; } else { aligned = (transfersize + (USBTMC_HEADER_SIZE + 3)) & ~3; } if (copy_from_user(&buffer[USBTMC_HEADER_SIZE], buf, transfersize)) { retval = -EFAULT; up(&file_data->limit_write_sem); goto exit; } dev_dbg(&data->intf->dev, "%s(size:%u align:%u)\n", __func__, (unsigned int)transfersize, (unsigned int)aligned); print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1, buffer, aligned, true); usb_fill_bulk_urb(urb, data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out), urb->transfer_buffer, aligned, usbtmc_write_bulk_cb, file_data); usb_anchor_urb(urb, &file_data->submitted); retval = usb_submit_urb(urb, GFP_KERNEL); if (unlikely(retval)) { usb_unanchor_urb(urb); up(&file_data->limit_write_sem); goto exit; } remaining -= transfersize; data->bTag_last_write = data->bTag; data->bTag++; if (!data->bTag) data->bTag++; /* call generic_write even when remaining = 0 */ retval = usbtmc_generic_write(file_data, buf + transfersize, remaining, &done, USBTMC_FLAG_APPEND); /* truncate alignment bytes */ if (done > remaining) done = remaining; /*add size of first urb*/ done += transfersize; if (retval < 0) { usb_kill_anchored_urbs(&file_data->submitted); dev_err(&data->intf->dev, "Unable to send data, error %d\n", (int)retval); if (file_data->auto_abort) usbtmc_ioctl_abort_bulk_out(data); goto exit; } retval = done; exit: usb_free_urb(urb); mutex_unlock(&data->io_mutex); return retval; } static int usbtmc_ioctl_clear(struct usbtmc_device_data *data) { struct device *dev; u8 *buffer; int rv; int n; int actual = 0; dev = &data->intf->dev; dev_dbg(dev, "Sending INITIATE_CLEAR request\n"); buffer = kmalloc(USBTMC_BUFSIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_INITIATE_CLEAR, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, 0, buffer, 1, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "INITIATE_CLEAR returned %x\n", buffer[0]); if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "INITIATE_CLEAR returned %x\n", buffer[0]); rv = -EPERM; goto exit; } n = 0; usbtmc_clear_check_status: dev_dbg(dev, "Sending CHECK_CLEAR_STATUS request\n"); rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_CHECK_CLEAR_STATUS, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, 0, buffer, 2, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "CHECK_CLEAR_STATUS returned %x\n", buffer[0]); if (buffer[0] == USBTMC_STATUS_SUCCESS) goto usbtmc_clear_bulk_out_halt; if (buffer[0] != USBTMC_STATUS_PENDING) { dev_err(dev, "CHECK_CLEAR_STATUS returned %x\n", buffer[0]); rv = -EPERM; goto exit; } if ((buffer[1] & 1) != 0) { do { dev_dbg(dev, "Reading from bulk in EP\n"); actual = 0; rv = usb_bulk_msg(data->usb_dev, usb_rcvbulkpipe(data->usb_dev, data->bulk_in), buffer, USBTMC_BUFSIZE, &actual, USB_CTRL_GET_TIMEOUT); print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1, buffer, actual, true); n++; if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } } while ((actual == USBTMC_BUFSIZE) && (n < USBTMC_MAX_READS_TO_CLEAR_BULK_IN)); } else { /* do not stress device with subsequent requests */ msleep(50); n++; } if (n >= USBTMC_MAX_READS_TO_CLEAR_BULK_IN) { dev_err(dev, "Couldn't clear device buffer within %d cycles\n", USBTMC_MAX_READS_TO_CLEAR_BULK_IN); rv = -EPERM; goto exit; } goto usbtmc_clear_check_status; usbtmc_clear_bulk_out_halt: rv = usb_clear_halt(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out)); if (rv < 0) { dev_err(dev, "usb_clear_halt returned %d\n", rv); goto exit; } rv = 0; exit: kfree(buffer); return rv; } static int usbtmc_ioctl_clear_out_halt(struct usbtmc_device_data *data) { int rv; rv = usb_clear_halt(data->usb_dev, usb_sndbulkpipe(data->usb_dev, data->bulk_out)); if (rv < 0) dev_err(&data->usb_dev->dev, "%s returned %d\n", __func__, rv); return rv; } static int usbtmc_ioctl_clear_in_halt(struct usbtmc_device_data *data) { int rv; rv = usb_clear_halt(data->usb_dev, usb_rcvbulkpipe(data->usb_dev, data->bulk_in)); if (rv < 0) dev_err(&data->usb_dev->dev, "%s returned %d\n", __func__, rv); return rv; } static int usbtmc_ioctl_cancel_io(struct usbtmc_file_data *file_data) { spin_lock_irq(&file_data->err_lock); file_data->in_status = -ECANCELED; file_data->out_status = -ECANCELED; spin_unlock_irq(&file_data->err_lock); usb_kill_anchored_urbs(&file_data->submitted); return 0; } static int usbtmc_ioctl_cleanup_io(struct usbtmc_file_data *file_data) { usb_kill_anchored_urbs(&file_data->submitted); usb_scuttle_anchored_urbs(&file_data->in_anchor); spin_lock_irq(&file_data->err_lock); file_data->in_status = 0; file_data->in_transfer_size = 0; file_data->out_status = 0; file_data->out_transfer_size = 0; spin_unlock_irq(&file_data->err_lock); file_data->in_urbs_used = 0; return 0; } static int get_capabilities(struct usbtmc_device_data *data) { struct device *dev = &data->usb_dev->dev; char *buffer; int rv = 0; buffer = kmalloc(0x18, GFP_KERNEL); if (!buffer) return -ENOMEM; rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_GET_CAPABILITIES, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, 0, buffer, 0x18, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto err_out; } dev_dbg(dev, "GET_CAPABILITIES returned %x\n", buffer[0]); if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "GET_CAPABILITIES returned %x\n", buffer[0]); rv = -EPERM; goto err_out; } dev_dbg(dev, "Interface capabilities are %x\n", buffer[4]); dev_dbg(dev, "Device capabilities are %x\n", buffer[5]); dev_dbg(dev, "USB488 interface capabilities are %x\n", buffer[14]); dev_dbg(dev, "USB488 device capabilities are %x\n", buffer[15]); data->capabilities.interface_capabilities = buffer[4]; data->capabilities.device_capabilities = buffer[5]; data->capabilities.usb488_interface_capabilities = buffer[14]; data->capabilities.usb488_device_capabilities = buffer[15]; data->usb488_caps = (buffer[14] & 0x07) | ((buffer[15] & 0x0f) << 4); rv = 0; err_out: kfree(buffer); return rv; } #define capability_attribute(name) \ static ssize_t name##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct usb_interface *intf = to_usb_interface(dev); \ struct usbtmc_device_data *data = usb_get_intfdata(intf); \ \ return sprintf(buf, "%d\n", data->capabilities.name); \ } \ static DEVICE_ATTR_RO(name) capability_attribute(interface_capabilities); capability_attribute(device_capabilities); capability_attribute(usb488_interface_capabilities); capability_attribute(usb488_device_capabilities); static struct attribute *usbtmc_attrs[] = { &dev_attr_interface_capabilities.attr, &dev_attr_device_capabilities.attr, &dev_attr_usb488_interface_capabilities.attr, &dev_attr_usb488_device_capabilities.attr, NULL, }; ATTRIBUTE_GROUPS(usbtmc); static int usbtmc_ioctl_indicator_pulse(struct usbtmc_device_data *data) { struct device *dev; u8 *buffer; int rv; dev = &data->intf->dev; buffer = kmalloc(2, GFP_KERNEL); if (!buffer) return -ENOMEM; rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0), USBTMC_REQUEST_INDICATOR_PULSE, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, 0, buffer, 0x01, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "usb_control_msg returned %d\n", rv); goto exit; } dev_dbg(dev, "INDICATOR_PULSE returned %x\n", buffer[0]); if (buffer[0] != USBTMC_STATUS_SUCCESS) { dev_err(dev, "INDICATOR_PULSE returned %x\n", buffer[0]); rv = -EPERM; goto exit; } rv = 0; exit: kfree(buffer); return rv; } static int usbtmc_ioctl_request(struct usbtmc_device_data *data, void __user *arg) { struct device *dev = &data->intf->dev; struct usbtmc_ctrlrequest request; u8 *buffer = NULL; int rv; unsigned int is_in, pipe; if (copy_from_user(&request, arg, sizeof(struct usbtmc_ctrlrequest))) return -EFAULT; if (request.req.wLength > USBTMC_BUFSIZE) return -EMSGSIZE; if (request.req.wLength == 0) /* Length-0 requests are never IN */ request.req.bRequestType &= ~USB_DIR_IN; is_in = request.req.bRequestType & USB_DIR_IN; if (request.req.wLength) { buffer = kmalloc(request.req.wLength, GFP_KERNEL); if (!buffer) return -ENOMEM; if (!is_in) { /* Send control data to device */ if (copy_from_user(buffer, request.data, request.req.wLength)) { rv = -EFAULT; goto exit; } } } if (is_in) pipe = usb_rcvctrlpipe(data->usb_dev, 0); else pipe = usb_sndctrlpipe(data->usb_dev, 0); rv = usb_control_msg(data->usb_dev, pipe, request.req.bRequest, request.req.bRequestType, request.req.wValue, request.req.wIndex, buffer, request.req.wLength, USB_CTRL_GET_TIMEOUT); if (rv < 0) { dev_err(dev, "%s failed %d\n", __func__, rv); goto exit; } if (rv && is_in) { /* Read control data from device */ if (copy_to_user(request.data, buffer, rv)) rv = -EFAULT; } exit: kfree(buffer); return rv; } /* * Get the usb timeout value */ static int usbtmc_ioctl_get_timeout(struct usbtmc_file_data *file_data, void __user *arg) { u32 timeout; timeout = file_data->timeout; return put_user(timeout, (__u32 __user *)arg); } /* * Set the usb timeout value */ static int usbtmc_ioctl_set_timeout(struct usbtmc_file_data *file_data, void __user *arg) { u32 timeout; if (get_user(timeout, (__u32 __user *)arg)) return -EFAULT; /* Note that timeout = 0 means * MAX_SCHEDULE_TIMEOUT in usb_control_msg */ if (timeout < USBTMC_MIN_TIMEOUT) return -EINVAL; file_data->timeout = timeout; return 0; } /* * enables/disables sending EOM on write */ static int usbtmc_ioctl_eom_enable(struct usbtmc_file_data *file_data, void __user *arg) { u8 eom_enable; if (copy_from_user(&eom_enable, arg, sizeof(eom_enable))) return -EFAULT; if (eom_enable > 1) return -EINVAL; file_data->eom_val = eom_enable; return 0; } /* * Configure termination character for read() */ static int usbtmc_ioctl_config_termc(struct usbtmc_file_data *file_data, void __user *arg) { struct usbtmc_termchar termc; if (copy_from_user(&termc, arg, sizeof(termc))) return -EFAULT; if ((termc.term_char_enabled > 1) || (termc.term_char_enabled && !(file_data->data->capabilities.device_capabilities & 1))) return -EINVAL; file_data->term_char = termc.term_char; file_data->term_char_enabled = termc.term_char_enabled; return 0; } static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct usbtmc_file_data *file_data; struct usbtmc_device_data *data; int retval = -EBADRQC; __u8 tmp_byte; file_data = file->private_data; data = file_data->data; mutex_lock(&data->io_mutex); if (data->zombie) { retval = -ENODEV; goto skip_io_on_zombie; } switch (cmd) { case USBTMC_IOCTL_CLEAR_OUT_HALT: retval = usbtmc_ioctl_clear_out_halt(data); break; case USBTMC_IOCTL_CLEAR_IN_HALT: retval = usbtmc_ioctl_clear_in_halt(data); break; case USBTMC_IOCTL_INDICATOR_PULSE: retval = usbtmc_ioctl_indicator_pulse(data); break; case USBTMC_IOCTL_CLEAR: retval = usbtmc_ioctl_clear(data); break; case USBTMC_IOCTL_ABORT_BULK_OUT: retval = usbtmc_ioctl_abort_bulk_out(data); break; case USBTMC_IOCTL_ABORT_BULK_IN: retval = usbtmc_ioctl_abort_bulk_in(data); break; case USBTMC_IOCTL_CTRL_REQUEST: retval = usbtmc_ioctl_request(data, (void __user *)arg); break; case USBTMC_IOCTL_GET_TIMEOUT: retval = usbtmc_ioctl_get_timeout(file_data, (void __user *)arg); break; case USBTMC_IOCTL_SET_TIMEOUT: retval = usbtmc_ioctl_set_timeout(file_data, (void __user *)arg); break; case USBTMC_IOCTL_EOM_ENABLE: retval = usbtmc_ioctl_eom_enable(file_data, (void __user *)arg); break; case USBTMC_IOCTL_CONFIG_TERMCHAR: retval = usbtmc_ioctl_config_termc(file_data, (void __user *)arg); break; case USBTMC_IOCTL_WRITE: retval = usbtmc_ioctl_generic_write(file_data, (void __user *)arg); break; case USBTMC_IOCTL_READ: retval = usbtmc_ioctl_generic_read(file_data, (void __user *)arg); break; case USBTMC_IOCTL_WRITE_RESULT: retval = usbtmc_ioctl_write_result(file_data, (void __user *)arg); break; case USBTMC_IOCTL_API_VERSION: retval = put_user(USBTMC_API_VERSION, (__u32 __user *)arg); break; case USBTMC488_IOCTL_GET_CAPS: retval = put_user(data->usb488_caps, (unsigned char __user *)arg); break; case USBTMC488_IOCTL_READ_STB: retval = usbtmc488_ioctl_read_stb(file_data, (void __user *)arg); break; case USBTMC488_IOCTL_REN_CONTROL: retval = usbtmc488_ioctl_simple(data, (void __user *)arg, USBTMC488_REQUEST_REN_CONTROL); break; case USBTMC488_IOCTL_GOTO_LOCAL: retval = usbtmc488_ioctl_simple(data, (void __user *)arg, USBTMC488_REQUEST_GOTO_LOCAL); break; case USBTMC488_IOCTL_LOCAL_LOCKOUT: retval = usbtmc488_ioctl_simple(data, (void __user *)arg, USBTMC488_REQUEST_LOCAL_LOCKOUT); break; case USBTMC488_IOCTL_TRIGGER: retval = usbtmc488_ioctl_trigger(file_data); break; case USBTMC488_IOCTL_WAIT_SRQ: retval = usbtmc488_ioctl_wait_srq(file_data, (__u32 __user *)arg); break; case USBTMC_IOCTL_MSG_IN_ATTR: retval = put_user(file_data->bmTransferAttributes, (__u8 __user *)arg); break; case USBTMC_IOCTL_AUTO_ABORT: retval = get_user(tmp_byte, (unsigned char __user *)arg); if (retval == 0) file_data->auto_abort = !!tmp_byte; break; case USBTMC_IOCTL_GET_STB: retval = usbtmc_get_stb(file_data, &tmp_byte); if (!retval) retval = put_user(tmp_byte, (__u8 __user *)arg); break; case USBTMC_IOCTL_GET_SRQ_STB: retval = usbtmc_ioctl_get_srq_stb(file_data, (void __user *)arg); break; case USBTMC_IOCTL_CANCEL_IO: retval = usbtmc_ioctl_cancel_io(file_data); break; case USBTMC_IOCTL_CLEANUP_IO: retval = usbtmc_ioctl_cleanup_io(file_data); break; } skip_io_on_zombie: mutex_unlock(&data->io_mutex); return retval; } static int usbtmc_fasync(int fd, struct file *file, int on) { struct usbtmc_file_data *file_data = file->private_data; return fasync_helper(fd, file, on, &file_data->data->fasync); } static __poll_t usbtmc_poll(struct file *file, poll_table *wait) { struct usbtmc_file_data *file_data = file->private_data; struct usbtmc_device_data *data = file_data->data; __poll_t mask; mutex_lock(&data->io_mutex); if (data->zombie) { mask = EPOLLHUP | EPOLLERR; goto no_poll; } poll_wait(file, &data->waitq, wait); /* Note that EPOLLPRI is now assigned to SRQ, and * EPOLLIN|EPOLLRDNORM to normal read data. */ mask = 0; if (atomic_read(&file_data->srq_asserted)) mask |= EPOLLPRI; /* Note that the anchor submitted includes all urbs for BULK IN * and OUT. So EPOLLOUT is signaled when BULK OUT is empty and * all BULK IN urbs are completed and moved to in_anchor. */ if (usb_anchor_empty(&file_data->submitted)) mask |= (EPOLLOUT | EPOLLWRNORM); if (!usb_anchor_empty(&file_data->in_anchor)) mask |= (EPOLLIN | EPOLLRDNORM); spin_lock_irq(&file_data->err_lock); if (file_data->in_status || file_data->out_status) mask |= EPOLLERR; spin_unlock_irq(&file_data->err_lock); dev_dbg(&data->intf->dev, "poll mask = %x\n", mask); no_poll: mutex_unlock(&data->io_mutex); return mask; } static const struct file_operations fops = { .owner = THIS_MODULE, .read = usbtmc_read, .write = usbtmc_write, .open = usbtmc_open, .release = usbtmc_release, .flush = usbtmc_flush, .unlocked_ioctl = usbtmc_ioctl, .compat_ioctl = compat_ptr_ioctl, .fasync = usbtmc_fasync, .poll = usbtmc_poll, .llseek = default_llseek, }; static struct usb_class_driver usbtmc_class = { .name = "usbtmc%d", .fops = &fops, .minor_base = USBTMC_MINOR_BASE, }; static void usbtmc_interrupt(struct urb *urb) { struct usbtmc_device_data *data = urb->context; struct device *dev = &data->intf->dev; int status = urb->status; int rv; dev_dbg(&data->intf->dev, "int status: %d len %d\n", status, urb->actual_length); switch (status) { case 0: /* SUCCESS */ /* check for valid STB notification */ if (data->iin_buffer[0] > 0x81) { data->bNotify1 = data->iin_buffer[0]; data->bNotify2 = data->iin_buffer[1]; atomic_set(&data->iin_data_valid, 1); wake_up_interruptible(&data->waitq); goto exit; } /* check for SRQ notification */ if (data->iin_buffer[0] == 0x81) { unsigned long flags; struct list_head *elem; if (data->fasync) kill_fasync(&data->fasync, SIGIO, POLL_PRI); spin_lock_irqsave(&data->dev_lock, flags); list_for_each(elem, &data->file_list) { struct usbtmc_file_data *file_data; file_data = list_entry(elem, struct usbtmc_file_data, file_elem); file_data->srq_byte = data->iin_buffer[1]; atomic_set(&file_data->srq_asserted, 1); } spin_unlock_irqrestore(&data->dev_lock, flags); dev_dbg(dev, "srq received bTag %x stb %x\n", (unsigned int)data->iin_buffer[0], (unsigned int)data->iin_buffer[1]); wake_up_interruptible_all(&data->waitq); goto exit; } dev_warn(dev, "invalid notification: %x\n", data->iin_buffer[0]); break; case -EOVERFLOW: dev_err(dev, "overflow with length %d, actual length is %d\n", data->iin_wMaxPacketSize, urb->actual_length); fallthrough; default: /* urb terminated, clean up */ dev_dbg(dev, "urb terminated, status: %d\n", status); return; } exit: rv = usb_submit_urb(urb, GFP_ATOMIC); if (rv) dev_err(dev, "usb_submit_urb failed: %d\n", rv); } static void usbtmc_free_int(struct usbtmc_device_data *data) { if (!data->iin_ep_present || !data->iin_urb) return; usb_kill_urb(data->iin_urb); kfree(data->iin_buffer); data->iin_buffer = NULL; usb_free_urb(data->iin_urb); data->iin_urb = NULL; kref_put(&data->kref, usbtmc_delete); } static int usbtmc_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usbtmc_device_data *data; struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *bulk_in, *bulk_out, *int_in; int retcode; dev_dbg(&intf->dev, "%s called\n", __func__); data = kzalloc_obj(*data); if (!data) return -ENOMEM; data->intf = intf; data->id = id; data->usb_dev = usb_get_dev(interface_to_usbdev(intf)); usb_set_intfdata(intf, data); kref_init(&data->kref); mutex_init(&data->io_mutex); init_waitqueue_head(&data->waitq); atomic_set(&data->iin_data_valid, 0); INIT_LIST_HEAD(&data->file_list); spin_lock_init(&data->dev_lock); data->zombie = 0; /* Initialize USBTMC bTag and other fields */ data->bTag = 1; /* 2 <= bTag <= 127 USBTMC-USB488 subclass specification 4.3.1 */ data->iin_bTag = 2; /* USBTMC devices have only one setting, so use that */ iface_desc = data->intf->cur_altsetting; data->ifnum = iface_desc->desc.bInterfaceNumber; /* Find bulk endpoints */ retcode = usb_find_common_endpoints(iface_desc, &bulk_in, &bulk_out, NULL, NULL); if (retcode) { dev_err(&intf->dev, "bulk endpoints not found\n"); goto err_put; } retcode = -EINVAL; data->bulk_in = bulk_in->bEndpointAddress; data->wMaxPacketSize = usb_endpoint_maxp(bulk_in); if (!data->wMaxPacketSize) goto err_put; dev_dbg(&intf->dev, "Found bulk in endpoint at %u\n", data->bulk_in); data->bulk_out = bulk_out->bEndpointAddress; dev_dbg(&intf->dev, "Found Bulk out endpoint at %u\n", data->bulk_out); /* Find int endpoint */ retcode = usb_find_int_in_endpoint(iface_desc, &int_in); if (!retcode) { data->iin_ep_present = 1; data->iin_ep = int_in->bEndpointAddress; data->iin_wMaxPacketSize = usb_endpoint_maxp(int_in); data->iin_interval = int_in->bInterval; dev_dbg(&intf->dev, "Found Int in endpoint at %u\n", data->iin_ep); } retcode = get_capabilities(data); if (retcode) dev_err(&intf->dev, "can't read capabilities\n"); if (data->iin_ep_present) { /* allocate int urb */ data->iin_urb = usb_alloc_urb(0, GFP_KERNEL); if (!data->iin_urb) { retcode = -ENOMEM; goto error_register; } /* Protect interrupt in endpoint data until iin_urb is freed */ kref_get(&data->kref); /* allocate buffer for interrupt in */ data->iin_buffer = kmalloc(data->iin_wMaxPacketSize, GFP_KERNEL); if (!data->iin_buffer) { retcode = -ENOMEM; goto error_register; } /* fill interrupt urb */ usb_fill_int_urb(data->iin_urb, data->usb_dev, usb_rcvintpipe(data->usb_dev, data->iin_ep), data->iin_buffer, data->iin_wMaxPacketSize, usbtmc_interrupt, data, data->iin_interval); retcode = usb_submit_urb(data->iin_urb, GFP_KERNEL); if (retcode) { dev_err(&intf->dev, "Failed to submit iin_urb\n"); goto error_register; } } retcode = usb_register_dev(intf, &usbtmc_class); if (retcode) { dev_err(&intf->dev, "Not able to get a minor (base %u, slice default): %d\n", USBTMC_MINOR_BASE, retcode); goto error_register; } dev_dbg(&intf->dev, "Using minor number %d\n", intf->minor); return 0; error_register: usbtmc_free_int(data); err_put: kref_put(&data->kref, usbtmc_delete); return retcode; } static void usbtmc_disconnect(struct usb_interface *intf) { struct usbtmc_device_data *data = usb_get_intfdata(intf); struct list_head *elem; usb_deregister_dev(intf, &usbtmc_class); mutex_lock(&data->io_mutex); data->zombie = 1; wake_up_interruptible_all(&data->waitq); list_for_each(elem, &data->file_list) { struct usbtmc_file_data *file_data; file_data = list_entry(elem, struct usbtmc_file_data, file_elem); usb_kill_anchored_urbs(&file_data->submitted); usb_scuttle_anchored_urbs(&file_data->in_anchor); } mutex_unlock(&data->io_mutex); usbtmc_free_int(data); kref_put(&data->kref, usbtmc_delete); } static void usbtmc_draw_down(struct usbtmc_file_data *file_data) { int time; time = usb_wait_anchor_empty_timeout(&file_data->submitted, 1000); if (!time) usb_kill_anchored_urbs(&file_data->submitted); usb_scuttle_anchored_urbs(&file_data->in_anchor); } static int usbtmc_suspend(struct usb_interface *intf, pm_message_t message) { struct usbtmc_device_data *data = usb_get_intfdata(intf); struct list_head *elem; if (!data) return 0; mutex_lock(&data->io_mutex); list_for_each(elem, &data->file_list) { struct usbtmc_file_data *file_data; file_data = list_entry(elem, struct usbtmc_file_data, file_elem); usbtmc_draw_down(file_data); } if (data->iin_ep_present && data->iin_urb) usb_kill_urb(data->iin_urb); mutex_unlock(&data->io_mutex); return 0; } static int usbtmc_resume(struct usb_interface *intf) { struct usbtmc_device_data *data = usb_get_intfdata(intf); int retcode = 0; if (data->iin_ep_present && data->iin_urb) retcode = usb_submit_urb(data->iin_urb, GFP_KERNEL); if (retcode) dev_err(&intf->dev, "Failed to submit iin_urb\n"); return retcode; } static int usbtmc_pre_reset(struct usb_interface *intf) { struct usbtmc_device_data *data = usb_get_intfdata(intf); struct list_head *elem; if (!data) return 0; mutex_lock(&data->io_mutex); list_for_each(elem, &data->file_list) { struct usbtmc_file_data *file_data; file_data = list_entry(elem, struct usbtmc_file_data, file_elem); usbtmc_ioctl_cancel_io(file_data); } return 0; } static int usbtmc_post_reset(struct usb_interface *intf) { struct usbtmc_device_data *data = usb_get_intfdata(intf); mutex_unlock(&data->io_mutex); return 0; } static struct usb_driver usbtmc_driver = { .name = "usbtmc", .id_table = usbtmc_devices, .probe = usbtmc_probe, .disconnect = usbtmc_disconnect, .suspend = usbtmc_suspend, .resume = usbtmc_resume, .pre_reset = usbtmc_pre_reset, .post_reset = usbtmc_post_reset, .dev_groups = usbtmc_groups, }; module_usb_driver(usbtmc_driver); MODULE_DESCRIPTION("USB Test & Measurement class driver"); MODULE_LICENSE("GPL");
115 304 7783 4897 4861 5388 3904 3901 4981 476 43 43 444 342 7055 4599 4602 6285 408 1 1 406 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NS_COMMON_H #define _LINUX_NS_COMMON_H #include <linux/ns/ns_common_types.h> #include <linux/refcount.h> #include <linux/vfsdebug.h> #include <uapi/linux/sched.h> #include <uapi/linux/nsfs.h> bool is_current_namespace(struct ns_common *ns); int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum); void __ns_common_free(struct ns_common *ns); struct ns_common *__must_check ns_owner(struct ns_common *ns); static __always_inline bool is_ns_init_inum(const struct ns_common *ns) { VFS_WARN_ON_ONCE(ns->inum == 0); return unlikely(in_range(ns->inum, MNT_NS_INIT_INO, IPC_NS_INIT_INO - MNT_NS_INIT_INO + 1)); } static __always_inline bool is_ns_init_id(const struct ns_common *ns) { VFS_WARN_ON_ONCE(ns->ns_id == 0); return ns->ns_id <= NS_LAST_INIT_ID; } #define NS_COMMON_INIT(nsname) \ { \ .ns_type = ns_common_type(&nsname), \ .ns_id = ns_init_id(&nsname), \ .inum = ns_init_inum(&nsname), \ .ops = to_ns_operations(&nsname), \ .stashed = NULL, \ .__ns_ref = REFCOUNT_INIT(1), \ .__ns_ref_active = ATOMIC_INIT(1), \ .ns_unified_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_unified_node.ns_list_entry), \ .ns_tree_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_tree_node.ns_list_entry), \ .ns_owner_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_owner_node.ns_list_entry), \ .ns_owner_root.ns_list_head = LIST_HEAD_INIT(nsname.ns.ns_owner_root.ns_list_head), \ } #define ns_common_init(__ns) \ __ns_common_init(to_ns_common(__ns), \ ns_common_type(__ns), \ to_ns_operations(__ns), \ (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0)) #define ns_common_init_inum(__ns, __inum) \ __ns_common_init(to_ns_common(__ns), \ ns_common_type(__ns), \ to_ns_operations(__ns), \ __inum) #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns))) bool may_see_all_namespaces(void); static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns) { return atomic_read(&ns->__ns_ref_active); } static __always_inline __must_check int __ns_ref_read(const struct ns_common *ns) { return refcount_read(&ns->__ns_ref); } static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns) { if (is_ns_init_id(ns)) { VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1); VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1); return false; } if (refcount_dec_and_test(&ns->__ns_ref)) { VFS_WARN_ON_ONCE(__ns_ref_active_read(ns)); return true; } return false; } static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns) { if (is_ns_init_id(ns)) { VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1); VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1); return true; } if (refcount_inc_not_zero(&ns->__ns_ref)) return true; VFS_WARN_ON_ONCE(__ns_ref_active_read(ns)); return false; } static __always_inline void __ns_ref_inc(struct ns_common *ns) { if (is_ns_init_id(ns)) { VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1); VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1); return; } refcount_inc(&ns->__ns_ref); } static __always_inline __must_check bool __ns_ref_dec_and_lock(struct ns_common *ns, spinlock_t *ns_lock) { if (is_ns_init_id(ns)) { VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1); VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1); return false; } return refcount_dec_and_lock(&ns->__ns_ref, ns_lock); } #define ns_ref_read(__ns) __ns_ref_read(to_ns_common((__ns))) #define ns_ref_inc(__ns) \ do { if (__ns) __ns_ref_inc(to_ns_common((__ns))); } while (0) #define ns_ref_get(__ns) \ ((__ns) ? __ns_ref_get(to_ns_common((__ns))) : false) #define ns_ref_put(__ns) \ ((__ns) ? __ns_ref_put(to_ns_common((__ns))) : false) #define ns_ref_put_and_lock(__ns, __ns_lock) \ ((__ns) ? __ns_ref_dec_and_lock(to_ns_common((__ns)), __ns_lock) : false) #define ns_ref_active_read(__ns) \ ((__ns) ? __ns_ref_active_read(to_ns_common(__ns)) : 0) void __ns_ref_active_put(struct ns_common *ns); #define ns_ref_active_put(__ns) \ do { if (__ns) __ns_ref_active_put(to_ns_common(__ns)); } while (0) static __always_inline struct ns_common *__must_check ns_get_unless_inactive(struct ns_common *ns) { if (!__ns_ref_active_read(ns)) { VFS_WARN_ON_ONCE(is_ns_init_id(ns)); return NULL; } if (!__ns_ref_get(ns)) return NULL; return ns; } void __ns_ref_active_get(struct ns_common *ns); #define ns_ref_active_get(__ns) \ do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0) #endif
1 1 1 1 1 1 1 1 1 23 23 23 23 23 23 23 23 2 1 3 3 3 3 4 4 4 4 4 4 4 4 4 1 1 1 1 1 1 2 1 2 1 2 2 2 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 1 1 1 1 1 2 2 2 2 2 2 2 2 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. */ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/compat.h> #include <linux/completion.h> #include <linux/buffer_head.h> #include <linux/pagemap.h> #include <linux/uio.h> #include <linux/blkdev.h> #include <linux/mm.h> #include <linux/mount.h> #include <linux/fs.h> #include <linux/filelock.h> #include <linux/gfs2_ondisk.h> #include <linux/falloc.h> #include <linux/swap.h> #include <linux/crc32.h> #include <linux/writeback.h> #include <linux/uaccess.h> #include <linux/dlm.h> #include <linux/dlm_plock.h> #include <linux/delay.h> #include <linux/backing-dev.h> #include <linux/fileattr.h> #include "gfs2.h" #include "incore.h" #include "bmap.h" #include "aops.h" #include "dir.h" #include "glock.h" #include "glops.h" #include "inode.h" #include "log.h" #include "meta_io.h" #include "quota.h" #include "rgrp.h" #include "trans.h" #include "util.h" /** * gfs2_llseek - seek to a location in a file * @file: the file * @offset: the offset * @whence: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END) * * SEEK_END requires the glock for the file because it references the * file's size. * * Returns: The new offset, or errno */ static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_holder i_gh; loff_t error; switch (whence) { case SEEK_END: error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (!error) { error = generic_file_llseek(file, offset, whence); gfs2_glock_dq_uninit(&i_gh); } break; case SEEK_DATA: error = gfs2_seek_data(file, offset); break; case SEEK_HOLE: error = gfs2_seek_hole(file, offset); break; case SEEK_CUR: case SEEK_SET: /* * These don't reference inode->i_size and don't depend on the * block mapping, so we don't need the glock. */ error = generic_file_llseek(file, offset, whence); break; default: error = -EINVAL; } return error; } /** * gfs2_readdir - Iterator for a directory * @file: The directory to read from * @ctx: What to feed directory entries to * * Returns: errno */ static int gfs2_readdir(struct file *file, struct dir_context *ctx) { struct inode *dir = file->f_mapping->host; struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_holder d_gh; int error; error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); if (error) return error; error = gfs2_dir_read(dir, ctx, &file->f_ra); gfs2_glock_dq_uninit(&d_gh); return error; } /* * struct fsflag_gfs2flag * * The FS_JOURNAL_DATA_FL flag maps to GFS2_DIF_INHERIT_JDATA for directories, * and to GFS2_DIF_JDATA for non-directories. */ static struct { u32 fsflag; u32 gfsflag; } fsflag_gfs2flag[] = { {FS_SYNC_FL, GFS2_DIF_SYNC}, {FS_IMMUTABLE_FL, GFS2_DIF_IMMUTABLE}, {FS_APPEND_FL, GFS2_DIF_APPENDONLY}, {FS_NOATIME_FL, GFS2_DIF_NOATIME}, {FS_INDEX_FL, GFS2_DIF_EXHASH}, {FS_TOPDIR_FL, GFS2_DIF_TOPDIR}, {FS_JOURNAL_DATA_FL, GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA}, }; static inline u32 gfs2_gfsflags_to_fsflags(struct inode *inode, u32 gfsflags) { int i; u32 fsflags = 0; if (S_ISDIR(inode->i_mode)) gfsflags &= ~GFS2_DIF_JDATA; else gfsflags &= ~GFS2_DIF_INHERIT_JDATA; for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++) if (gfsflags & fsflag_gfs2flag[i].gfsflag) fsflags |= fsflag_gfs2flag[i].fsflag; return fsflags; } int gfs2_fileattr_get(struct dentry *dentry, struct file_kattr *fa) { struct inode *inode = d_inode(dentry); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int error; u32 fsflags; if (d_is_special(dentry)) return -ENOTTY; gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); error = gfs2_glock_nq(&gh); if (error) goto out_uninit; fsflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags); fileattr_fill_flags(fa, fsflags); gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); return error; } void gfs2_set_inode_flags(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); unsigned int flags = inode->i_flags; flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC); if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode)) flags |= S_NOSEC; if (ip->i_diskflags & GFS2_DIF_IMMUTABLE) flags |= S_IMMUTABLE; if (ip->i_diskflags & GFS2_DIF_APPENDONLY) flags |= S_APPEND; if (ip->i_diskflags & GFS2_DIF_NOATIME) flags |= S_NOATIME; if (ip->i_diskflags & GFS2_DIF_SYNC) flags |= S_SYNC; inode->i_flags = flags; } /* Flags that can be set by user space */ #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ GFS2_DIF_IMMUTABLE| \ GFS2_DIF_APPENDONLY| \ GFS2_DIF_NOATIME| \ GFS2_DIF_SYNC| \ GFS2_DIF_TOPDIR| \ GFS2_DIF_INHERIT_JDATA) /** * do_gfs2_set_flags - set flags on an inode * @inode: The inode * @reqflags: The flags to set * @mask: Indicates which flags are valid * */ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *bh; struct gfs2_holder gh; int error; u32 new_flags, flags; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (error) return error; error = 0; flags = ip->i_diskflags; new_flags = (flags & ~mask) | (reqflags & mask); if ((new_flags ^ flags) == 0) goto out; if (!IS_IMMUTABLE(inode)) { error = gfs2_permission(&nop_mnt_idmap, inode, MAY_WRITE); if (error) goto out; } if ((flags ^ new_flags) & GFS2_DIF_JDATA) { if (new_flags & GFS2_DIF_JDATA) gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_SET_FLAGS); error = filemap_fdatawrite(inode->i_mapping); if (error) goto out; error = filemap_fdatawait(inode->i_mapping); if (error) goto out; truncate_inode_pages(inode->i_mapping, 0); if (new_flags & GFS2_DIF_JDATA) gfs2_ordered_del_inode(ip); } error = gfs2_trans_begin(sdp, RES_DINODE, 0); if (error) goto out; error = gfs2_meta_inode_buffer(ip, &bh); if (error) goto out_trans_end; inode_set_ctime_current(inode); gfs2_trans_add_meta(ip->i_gl, bh); ip->i_diskflags = new_flags; gfs2_dinode_out(ip, bh->b_data); brelse(bh); gfs2_set_inode_flags(inode); gfs2_set_aops(inode); out_trans_end: gfs2_trans_end(sdp); out: gfs2_glock_dq_uninit(&gh); return error; } int gfs2_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct file_kattr *fa) { struct inode *inode = d_inode(dentry); u32 fsflags = fa->flags, gfsflags = 0; u32 mask; int i; if (d_is_special(dentry)) return -ENOTTY; if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++) { if (fsflags & fsflag_gfs2flag[i].fsflag) { fsflags &= ~fsflag_gfs2flag[i].fsflag; gfsflags |= fsflag_gfs2flag[i].gfsflag; } } if (fsflags || gfsflags & ~GFS2_FLAGS_USER_SET) return -EINVAL; mask = GFS2_FLAGS_USER_SET; if (S_ISDIR(inode->i_mode)) { mask &= ~GFS2_DIF_JDATA; } else { /* The GFS2_DIF_TOPDIR flag is only valid for directories. */ if (gfsflags & GFS2_DIF_TOPDIR) return -EINVAL; mask &= ~(GFS2_DIF_TOPDIR | GFS2_DIF_INHERIT_JDATA); } return do_gfs2_set_flags(inode, gfsflags, mask); } static int gfs2_getlabel(struct file *filp, char __user *label) { struct inode *inode = file_inode(filp); struct gfs2_sbd *sdp = GFS2_SB(inode); if (copy_to_user(label, sdp->sd_sb.sb_locktable, GFS2_LOCKNAME_LEN)) return -EFAULT; return 0; } static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch(cmd) { case FITRIM: return gfs2_fitrim(filp, (void __user *)arg); case FS_IOC_GETFSLABEL: return gfs2_getlabel(filp, (char __user *)arg); } return -ENOTTY; } #ifdef CONFIG_COMPAT static long gfs2_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch(cmd) { /* Keep this list in sync with gfs2_ioctl */ case FITRIM: case FS_IOC_GETFSLABEL: break; default: return -ENOIOCTLCMD; } return gfs2_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); } #else #define gfs2_compat_ioctl NULL #endif /** * gfs2_size_hint - Give a hint to the size of a write request * @filep: The struct file * @offset: The file offset of the write * @size: The length of the write * * When we are about to do a write, this function records the total * write size in order to provide a suitable hint to the lower layers * about how many blocks will be required. * */ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) { struct inode *inode = file_inode(filep); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift; int hint = min_t(size_t, INT_MAX, blks); if (hint > atomic_read(&ip->i_sizehint)) atomic_set(&ip->i_sizehint, hint); } /** * gfs2_allocate_folio_backing - Allocate blocks for a write fault * @folio: The (locked) folio to allocate backing for * @length: Size of the allocation * * We try to allocate all the blocks required for the folio in one go. This * might fail for various reasons, so we keep trying until all the blocks to * back this folio are allocated. If some of the blocks are already allocated, * that is ok too. */ static int gfs2_allocate_folio_backing(struct folio *folio, size_t length) { u64 pos = folio_pos(folio); do { struct iomap iomap = { }; if (gfs2_iomap_alloc(folio->mapping->host, pos, length, &iomap)) return -EIO; if (length < iomap.length) iomap.length = length; length -= iomap.length; pos += iomap.length; } while (length > 0); return 0; } /** * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable * @vmf: The virtual memory fault containing the page to become writable * * When the page becomes writable, we need to ensure that we have * blocks allocated on disk to back that page. */ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) { struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = {}; u64 pos = folio_pos(folio); unsigned int data_blocks, ind_blocks, rblocks; vm_fault_t ret = VM_FAULT_LOCKED; struct gfs2_holder gh; size_t length; loff_t size; int err; sb_start_pagefault(inode->i_sb); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); err = gfs2_glock_nq(&gh); if (err) { ret = vmf_fs_error(err); goto out_uninit; } /* Check folio index against inode size */ size = i_size_read(inode); if (pos >= size) { ret = VM_FAULT_SIGBUS; goto out_unlock; } /* Update file times before taking folio lock */ file_update_time(vmf->vma->vm_file); /* folio is wholly or partially inside EOF */ if (size - pos < folio_size(folio)) length = size - pos; else length = folio_size(folio); gfs2_size_hint(vmf->vma->vm_file, pos, length); set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); /* * iomap_writepage / iomap_writepages currently don't support inline * files, so always unstuff here. */ if (!gfs2_is_stuffed(ip) && !gfs2_write_alloc_required(ip, pos, length)) { folio_lock(folio); if (!folio_test_uptodate(folio) || folio->mapping != inode->i_mapping) { ret = VM_FAULT_NOPAGE; folio_unlock(folio); } goto out_unlock; } err = gfs2_rindex_update(sdp); if (err) { ret = vmf_fs_error(err); goto out_unlock; } gfs2_write_calc_reserv(ip, length, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; err = gfs2_quota_lock_check(ip, &ap); if (err) { ret = vmf_fs_error(err); goto out_unlock; } err = gfs2_inplace_reserve(ip, &ap); if (err) { ret = vmf_fs_error(err); goto out_quota_unlock; } rblocks = RES_DINODE + ind_blocks; if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); } err = gfs2_trans_begin(sdp, rblocks, 0); if (err) { ret = vmf_fs_error(err); goto out_trans_fail; } /* Unstuff, if required, and allocate backing blocks for folio */ if (gfs2_is_stuffed(ip)) { err = gfs2_unstuff_dinode(ip); if (err) { ret = vmf_fs_error(err); goto out_trans_end; } } folio_lock(folio); /* If truncated, we must retry the operation, we may have raced * with the glock demotion code. */ if (!folio_test_uptodate(folio) || folio->mapping != inode->i_mapping) { ret = VM_FAULT_NOPAGE; goto out_page_locked; } err = gfs2_allocate_folio_backing(folio, length); if (err) ret = vmf_fs_error(err); out_page_locked: if (ret != VM_FAULT_LOCKED) folio_unlock(folio); out_trans_end: gfs2_trans_end(sdp); out_trans_fail: gfs2_inplace_release(ip); out_quota_unlock: gfs2_quota_unlock(ip); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); if (ret == VM_FAULT_LOCKED) { folio_mark_dirty(folio); folio_wait_stable(folio); } sb_end_pagefault(inode->i_sb); return ret; } static vm_fault_t gfs2_fault(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; vm_fault_t ret; int err; gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); err = gfs2_glock_nq(&gh); if (err) { ret = vmf_fs_error(err); goto out_uninit; } ret = filemap_fault(vmf); gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); return ret; } static const struct vm_operations_struct gfs2_vm_ops = { .fault = gfs2_fault, .map_pages = filemap_map_pages, .page_mkwrite = gfs2_page_mkwrite, }; /** * gfs2_mmap * @file: The file to map * @vma: The VMA which described the mapping * * There is no need to get a lock here unless we should be updating * atime. We ignore any locking errors since the only consequence is * a missed atime update (which will just be deferred until later). * * Returns: 0 */ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); if (!(file->f_flags & O_NOATIME) && !IS_NOATIME(&ip->i_inode)) { struct gfs2_holder i_gh; int error; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; /* grab lock to update inode */ gfs2_glock_dq_uninit(&i_gh); file_accessed(file); } vma->vm_ops = &gfs2_vm_ops; return 0; } /** * gfs2_open_common - This is common to open and atomic_open * @inode: The inode being opened * @file: The file being opened * * This maybe called under a glock or not depending upon how it has * been called. We must always be called under a glock for regular * files, however. For other file types, it does not matter whether * we hold the glock or not. * * Returns: Error code or 0 for success */ int gfs2_open_common(struct inode *inode, struct file *file) { struct gfs2_file *fp; int ret; if (S_ISREG(inode->i_mode)) { ret = generic_file_open(inode, file); if (ret) return ret; if (!gfs2_is_jdata(GFS2_I(inode))) file->f_mode |= FMODE_CAN_ODIRECT; } fp = kzalloc_obj(struct gfs2_file, GFP_NOFS); if (!fp) return -ENOMEM; mutex_init(&fp->f_fl_mutex); gfs2_assert_warn(GFS2_SB(inode), !file->private_data); file->private_data = fp; if (file->f_mode & FMODE_WRITE) { ret = gfs2_qa_get(GFS2_I(inode)); if (ret) goto fail; } return 0; fail: kfree(file->private_data); file->private_data = NULL; return ret; } /** * gfs2_open - open a file * @inode: the inode to open * @file: the struct file for this opening * * After atomic_open, this function is only used for opening files * which are already cached. We must still get the glock for regular * files to ensure that we have the file size uptodate for the large * file check which is in the common code. That is only an issue for * regular files though. * * Returns: errno */ static int gfs2_open(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder i_gh; int error; bool need_unlock = false; if (S_ISREG(ip->i_inode.i_mode)) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; need_unlock = true; } error = gfs2_open_common(inode, file); if (need_unlock) gfs2_glock_dq_uninit(&i_gh); return error; } /** * gfs2_release - called to close a struct file * @inode: the inode the struct file belongs to * @file: the struct file being closed * * Returns: errno */ static int gfs2_release(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); kfree(file->private_data); file->private_data = NULL; if (file->f_mode & FMODE_WRITE) { if (gfs2_rs_active(&ip->i_res)) gfs2_rs_delete(ip); gfs2_qa_put(ip); } return 0; } /** * gfs2_fsync - sync the dirty data for a file (across the cluster) * @file: the file that points to the dentry * @start: the start position in the file to sync * @end: the end position in the file to sync * @datasync: set if we can ignore timestamp changes * * We split the data flushing here so that we don't wait for the data * until after we've also sent the metadata to disk. Note that for * data=ordered, we will write & wait for the data at the log flush * stage anyway, so this is unlikely to make much of a difference * except in the data=writeback case. * * If the fdatawrite fails due to any reason except -EIO, we will * continue the remainder of the fsync, although we'll still report * the error at the end. This is to match filemap_write_and_wait_range() * behaviour. * * Returns: errno */ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; int sync_state = inode_state_read_once(inode) & I_DIRTY; struct gfs2_inode *ip = GFS2_I(inode); int ret = 0, ret1 = 0; if (mapping->nrpages) { ret1 = filemap_fdatawrite_range(mapping, start, end); if (ret1 == -EIO) return ret1; } if (!gfs2_is_jdata(ip)) sync_state &= ~I_DIRTY_PAGES; if (datasync) sync_state &= ~I_DIRTY_SYNC; if (sync_state) { ret = sync_inode_metadata(inode, 1); if (ret) return ret; if (gfs2_is_jdata(ip)) ret = file_write_and_wait(file); if (ret) return ret; gfs2_ail_flush(ip->i_gl, 1); } if (mapping->nrpages) ret = file_fdatawait_range(file, start, end); return ret ? ret : ret1; } static inline bool should_fault_in_pages(struct iov_iter *i, struct kiocb *iocb, size_t *prev_count, size_t *window_size) { size_t count = iov_iter_count(i); size_t size, offs; if (!count) return false; if (!user_backed_iter(i)) return false; /* * Try to fault in multiple pages initially. When that doesn't result * in any progress, fall back to a single page. */ size = PAGE_SIZE; offs = offset_in_page(iocb->ki_pos); if (*prev_count != count) { size_t nr_dirtied; nr_dirtied = max(current->nr_dirtied_pause - current->nr_dirtied, 8); size = min_t(size_t, SZ_1M, nr_dirtied << PAGE_SHIFT); } *prev_count = count; *window_size = size - offs; return true; } static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, struct gfs2_holder *gh) { struct file *file = iocb->ki_filp; struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); size_t prev_count = 0, window_size = 0; size_t read = 0; ssize_t ret; /* * In this function, we disable page faults when we're holding the * inode glock while doing I/O. If a page fault occurs, we indicate * that the inode glock should be dropped, fault in the pages manually, * and retry. * * Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger * physical as well as manual page faults, and we need to disable both * kinds. * * For direct I/O, gfs2 takes the inode glock in deferred mode. This * locking mode is compatible with other deferred holders, so multiple * processes and nodes can do direct I/O to a file at the same time. * There's no guarantee that reads or writes will be atomic. Any * coordination among readers and writers needs to happen externally. */ if (!iov_iter_count(to)) return 0; /* skip atime */ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh); retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; pagefault_disable(); to->nofault = true; ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, IOMAP_DIO_PARTIAL, NULL, read); to->nofault = false; pagefault_enable(); if (ret <= 0 && ret != -EFAULT) goto out_unlock; /* No increment (+=) because iomap_dio_rw returns a cumulative value. */ if (ret > 0) read = ret; if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) { gfs2_glock_dq(gh); window_size -= fault_in_iov_iter_writeable(to, window_size); if (window_size) goto retry; } out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); out_uninit: gfs2_holder_uninit(gh); /* User space doesn't expect partial success. */ if (ret < 0) return ret; return read; } static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, struct gfs2_holder *gh) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct gfs2_inode *ip = GFS2_I(inode); size_t prev_count = 0, window_size = 0; size_t written = 0; bool enough_retries; ssize_t ret; /* * In this function, we disable page faults when we're holding the * inode glock while doing I/O. If a page fault occurs, we indicate * that the inode glock should be dropped, fault in the pages manually, * and retry. * * For writes, iomap_dio_rw only triggers manual page faults, so we * don't need to disable physical ones. */ /* * Deferred lock, even if its a write, since we do no allocation on * this path. All we need to change is the atime, and this lock mode * ensures that other nodes have flushed their buffered read caches * (i.e. their page cache entries for this inode). We do not, * unfortunately, have the option of only flushing a range like the * VFS does. */ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh); retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; /* Silently fall back to buffered I/O when writing beyond EOF */ if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode)) goto out_unlock; from->nofault = true; ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, IOMAP_DIO_PARTIAL, NULL, written); from->nofault = false; if (ret <= 0) { if (ret == -ENOTBLK) ret = 0; if (ret != -EFAULT) goto out_unlock; } /* No increment (+=) because iomap_dio_rw returns a cumulative value. */ if (ret > 0) written = ret; enough_retries = prev_count == iov_iter_count(from) && window_size <= PAGE_SIZE; if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { gfs2_glock_dq(gh); window_size -= fault_in_iov_iter_readable(from, window_size); if (window_size) { if (!enough_retries) goto retry; /* fall back to buffered I/O */ ret = 0; } } out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); out_uninit: gfs2_holder_uninit(gh); /* User space doesn't expect partial success. */ if (ret < 0) return ret; return written; } static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct gfs2_inode *ip; struct gfs2_holder gh; size_t prev_count = 0, window_size = 0; size_t read = 0; ssize_t ret; /* * In this function, we disable page faults when we're holding the * inode glock while doing I/O. If a page fault occurs, we indicate * that the inode glock should be dropped, fault in the pages manually, * and retry. */ if (iocb->ki_flags & IOCB_DIRECT) return gfs2_file_direct_read(iocb, to, &gh); pagefault_disable(); iocb->ki_flags |= IOCB_NOIO; ret = generic_file_read_iter(iocb, to); iocb->ki_flags &= ~IOCB_NOIO; pagefault_enable(); if (ret >= 0) { if (!iov_iter_count(to)) return ret; read = ret; } else if (ret != -EFAULT) { if (ret != -EAGAIN) return ret; if (iocb->ki_flags & IOCB_NOWAIT) return ret; } ip = GFS2_I(iocb->ki_filp->f_mapping->host); gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); retry: ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; pagefault_disable(); ret = generic_file_read_iter(iocb, to); pagefault_enable(); if (ret <= 0 && ret != -EFAULT) goto out_unlock; if (ret > 0) read += ret; if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) { gfs2_glock_dq(&gh); window_size -= fault_in_iov_iter_writeable(to, window_size); if (window_size) goto retry; } out_unlock: if (gfs2_holder_queued(&gh)) gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); return read ? read : ret; } static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, struct gfs2_holder *gh) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_holder *statfs_gh = NULL; size_t prev_count = 0, window_size = 0; size_t orig_count = iov_iter_count(from); size_t written = 0; ssize_t ret; /* * In this function, we disable page faults when we're holding the * inode glock while doing I/O. If a page fault occurs, we indicate * that the inode glock should be dropped, fault in the pages manually, * and retry. */ if (inode == sdp->sd_rindex) { statfs_gh = kmalloc_obj(*statfs_gh, GFP_NOFS); if (!statfs_gh) return -ENOMEM; } gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh); if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { retry: window_size -= fault_in_iov_iter_readable(from, window_size); if (!window_size) { ret = -EFAULT; goto out_uninit; } from->count = min(from->count, window_size); } ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; if (inode == sdp->sd_rindex) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); ret = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, statfs_gh); if (ret) goto out_unlock; } pagefault_disable(); ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops, &gfs2_iomap_write_ops, NULL); pagefault_enable(); if (ret > 0) written += ret; if (inode == sdp->sd_rindex) gfs2_glock_dq_uninit(statfs_gh); if (ret <= 0 && ret != -EFAULT) goto out_unlock; from->count = orig_count - written; if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { gfs2_glock_dq(gh); goto retry; } out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); out_uninit: gfs2_holder_uninit(gh); kfree(statfs_gh); from->count = orig_count - written; return written ? written : ret; } /** * gfs2_file_write_iter - Perform a write to a file * @iocb: The io context * @from: The data to write * * We have to do a lock/unlock here to refresh the inode size for * O_APPEND writes, otherwise we can land up writing at the wrong * offset. There is still a race, but provided the app is using its * own file locking, this will make O_APPEND work as expected. * */ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; ssize_t ret; gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); if (iocb->ki_flags & IOCB_APPEND) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) return ret; gfs2_glock_dq_uninit(&gh); } inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret <= 0) goto out_unlock; ret = file_remove_privs(file); if (ret) goto out_unlock; if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; ssize_t buffered, ret2; /* * Note that under direct I/O, we don't allow and inode * timestamp updates, so we're not calling file_update_time() * here. */ ret = gfs2_file_direct_write(iocb, from, &gh); if (ret < 0 || !iov_iter_count(from)) goto out_unlock; iocb->ki_flags |= IOCB_DSYNC; buffered = gfs2_file_buffered_write(iocb, from, &gh); if (unlikely(buffered <= 0)) { if (!ret) ret = buffered; goto out_unlock; } /* * We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT * semantics. If the writeback or invalidate fails, only report * the direct I/O range as we don't know if the buffered pages * made it to disk. */ ret2 = generic_write_sync(iocb, buffered); invalidate_mapping_pages(mapping, (iocb->ki_pos - buffered) >> PAGE_SHIFT, (iocb->ki_pos - 1) >> PAGE_SHIFT); if (!ret || ret2 > 0) ret += ret2; } else { ret = file_update_time(file); if (ret) goto out_unlock; ret = gfs2_file_buffered_write(iocb, from, &gh); if (likely(ret > 0)) ret = generic_write_sync(iocb, ret); } out_unlock: inode_unlock(inode); return ret; } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, int mode) { struct super_block *sb = inode->i_sb; struct gfs2_inode *ip = GFS2_I(inode); loff_t end = offset + len; struct buffer_head *dibh; int error; error = gfs2_meta_inode_buffer(ip, &dibh); if (unlikely(error)) return error; gfs2_trans_add_meta(ip->i_gl, dibh); if (gfs2_is_stuffed(ip)) { error = gfs2_unstuff_dinode(ip); if (unlikely(error)) goto out; } while (offset < end) { struct iomap iomap = { }; error = gfs2_iomap_alloc(inode, offset, end - offset, &iomap); if (error) goto out; offset = iomap.offset + iomap.length; if (!(iomap.flags & IOMAP_F_NEW)) continue; error = sb_issue_zeroout(sb, iomap.addr >> inode->i_blkbits, iomap.length >> inode->i_blkbits, GFP_NOFS); if (error) { fs_err(GFS2_SB(inode), "Failed to zero data buffers\n"); goto out; } } out: brelse(dibh); return error; } /** * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of * blocks, determine how many bytes can be written. * @ip: The inode in question. * @len: Max cap of bytes. What we return in *len must be <= this. * @data_blocks: Compute and return the number of data blocks needed * @ind_blocks: Compute and return the number of indirect blocks needed * @max_blocks: The total blocks available to work with. * * Returns: void, but @len, @data_blocks and @ind_blocks are filled in. */ static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len, unsigned int *data_blocks, unsigned int *ind_blocks, unsigned int max_blocks) { loff_t max = *len; const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); for (tmp = max_data; tmp > sdp->sd_diptrs;) { tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); max_data -= tmp; } *data_blocks = max_data; *ind_blocks = max_blocks - max_data; *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; if (*len > max) { *len = max; gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); } } static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_alloc_parms ap = {}; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes, max_blks; int error; const loff_t pos = offset; const loff_t count = len; loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; loff_t max_chunk_size = UINT_MAX & bsize_mask; next = (next + 1) << sdp->sd_sb.sb_bsize_shift; offset &= bsize_mask; len = next - offset; bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; if (!bytes) bytes = UINT_MAX; bytes &= bsize_mask; if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; gfs2_size_hint(file, offset, len); gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks); ap.min_target = data_blocks + ind_blocks; while (len > 0) { if (len < bytes) bytes = len; if (!gfs2_write_alloc_required(ip, offset, bytes)) { len -= bytes; offset += bytes; continue; } /* We need to determine how many bytes we can actually * fallocate without exceeding quota or going over the * end of the fs. We start off optimistically by assuming * we can write max_bytes */ max_bytes = (len > max_chunk_size) ? max_chunk_size : len; /* Since max_bytes is most likely a theoretical max, we * calculate a more realistic 'bytes' to serve as a good * starting point for the number of bytes we may be able * to write */ gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; error = gfs2_quota_lock_check(ip, &ap); if (error) return error; /* ap.allowed tells us how many blocks quota will allow * us to write. Check if this reduces max_blks */ max_blks = UINT_MAX; if (ap.allowed) max_blks = ap.allowed; error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_qunlock; /* check if the selected rgrp limits our max_blks further */ if (ip->i_res.rs_reserved < max_blks) max_blks = ip->i_res.rs_reserved; /* Almost done. Calculate bytes that can be written using * max_blks. We also recompute max_bytes, data_blocks and * ind_blocks */ calc_max_reserv(ip, &max_bytes, &data_blocks, &ind_blocks, max_blks); rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; error = gfs2_trans_begin(sdp, rblocks, PAGE_SIZE >> inode->i_blkbits); if (error) goto out_trans_fail; error = fallocate_chunk(inode, offset, max_bytes, mode); gfs2_trans_end(sdp); if (error) goto out_trans_fail; len -= max_bytes; offset += max_bytes; gfs2_inplace_release(ip); gfs2_quota_unlock(ip); } if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) i_size_write(inode, pos + count); file_update_time(file); mark_inode_dirty(inode); if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host)) return vfs_fsync_range(file, pos, pos + count - 1, (file->f_flags & __O_SYNC) ? 0 : 1); return 0; out_trans_fail: gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); return error; } static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int ret; if (mode & ~(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)) return -EOPNOTSUPP; /* fallocate is needed by gfs2_grow to reserve space in the rindex */ if (gfs2_is_jdata(ip) && inode != sdp->sd_rindex) return -EOPNOTSUPP; inode_lock(inode); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; if (!(mode & FALLOC_FL_KEEP_SIZE) && (offset + len) > inode->i_size) { ret = inode_newsize_ok(inode, offset + len); if (ret) goto out_unlock; } ret = get_write_access(inode); if (ret) goto out_unlock; if (mode & FALLOC_FL_PUNCH_HOLE) { ret = __gfs2_punch_hole(file, offset, len); } else { ret = __gfs2_fallocate(file, mode, offset, len); if (ret) gfs2_rs_deltree(&ip->i_res); } put_write_access(inode); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); inode_unlock(inode); return ret; } static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { ssize_t ret; gfs2_size_hint(out, *ppos, len); ret = iter_file_splice_write(pipe, out, ppos, len, flags); return ret; } #ifdef CONFIG_GFS2_FS_LOCKING_DLM /** * gfs2_lock - acquire/release a posix lock on a file * @file: the file pointer * @cmd: either modify or retrieve lock state, possibly wait * @fl: type and range of lock * * Returns: errno */ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); struct lm_lockstruct *ls = &sdp->sd_lockstruct; int ret; if (!(fl->c.flc_flags & FL_POSIX)) return -ENOLCK; if (gfs2_withdrawn(sdp)) { if (lock_is_unlock(fl)) locks_lock_file_wait(file, fl); return -EIO; } down_read(&ls->ls_sem); ret = -ENODEV; if (likely(ls->ls_dlm != NULL)) { if (cmd == F_CANCELLK) ret = dlm_posix_cancel(ls->ls_dlm, ip->i_no_addr, file, fl); else if (IS_GETLK(cmd)) ret = dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); else if (lock_is_unlock(fl)) ret = dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); else ret = dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); } up_read(&ls->ls_sem); return ret; } static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh) { struct gfs2_glock *gl = gfs2_glock_hold(fl_gh->gh_gl); /* * Make sure gfs2_glock_put() won't sleep under the file->f_lock * spinlock. */ spin_lock(&file->f_lock); gfs2_holder_uninit(fl_gh); spin_unlock(&file->f_lock); gfs2_glock_put(gl); } static int do_flock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_file *fp = file->private_data; struct gfs2_holder *fl_gh = &fp->f_fl_gh; struct gfs2_inode *ip = GFS2_I(file_inode(file)); struct gfs2_glock *gl; unsigned int state; u16 flags; int error = 0; int sleeptime; state = lock_is_write(fl) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; flags = GL_EXACT | GL_NOPID; if (!IS_SETLKW(cmd)) flags |= LM_FLAG_TRY_1CB; mutex_lock(&fp->f_fl_mutex); if (gfs2_holder_initialized(fl_gh)) { struct file_lock request; if (fl_gh->gh_state == state) goto out; locks_init_lock(&request); request.c.flc_type = F_UNLCK; request.c.flc_flags = FL_FLOCK; locks_lock_file_wait(file, &request); gfs2_glock_dq(fl_gh); gfs2_holder_reinit(state, flags, fl_gh); } else { error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, &gfs2_flock_glops, CREATE, &gl); if (error) goto out; spin_lock(&file->f_lock); gfs2_holder_init(gl, state, flags, fl_gh); spin_unlock(&file->f_lock); gfs2_glock_put(gl); } for (sleeptime = 1; sleeptime <= 4; sleeptime <<= 1) { error = gfs2_glock_nq(fl_gh); if (error != GLR_TRYFAILED) break; fl_gh->gh_flags &= ~LM_FLAG_TRY_1CB; fl_gh->gh_flags |= LM_FLAG_TRY; msleep(sleeptime); } if (error) { __flock_holder_uninit(file, fl_gh); if (error == GLR_TRYFAILED) error = -EAGAIN; } else { error = locks_lock_file_wait(file, fl); gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); } out: mutex_unlock(&fp->f_fl_mutex); return error; } static void do_unflock(struct file *file, struct file_lock *fl) { struct gfs2_file *fp = file->private_data; struct gfs2_holder *fl_gh = &fp->f_fl_gh; mutex_lock(&fp->f_fl_mutex); locks_lock_file_wait(file, fl); if (gfs2_holder_initialized(fl_gh)) { gfs2_glock_dq(fl_gh); __flock_holder_uninit(file, fl_gh); } mutex_unlock(&fp->f_fl_mutex); } /** * gfs2_flock - acquire/release a flock lock on a file * @file: the file pointer * @cmd: either modify or retrieve lock state, possibly wait * @fl: type and range of lock * * Returns: errno */ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) { if (!(fl->c.flc_flags & FL_FLOCK)) return -ENOLCK; if (lock_is_unlock(fl)) { do_unflock(file, fl); return 0; } else { return do_flock(file, cmd, fl); } } const struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, .iopoll = iocb_bio_iopoll, .unlocked_ioctl = gfs2_ioctl, .compat_ioctl = gfs2_compat_ioctl, .mmap = gfs2_mmap, .open = gfs2_open, .release = gfs2_release, .fsync = gfs2_fsync, .lock = gfs2_lock, .flock = gfs2_flock, .splice_read = copy_splice_read, .splice_write = gfs2_file_splice_write, .fallocate = gfs2_fallocate, .fop_flags = FOP_ASYNC_LOCK, }; const struct file_operations gfs2_dir_fops = { .iterate_shared = gfs2_readdir, .unlocked_ioctl = gfs2_ioctl, .compat_ioctl = gfs2_compat_ioctl, .open = gfs2_open, .release = gfs2_release, .fsync = gfs2_fsync, .lock = gfs2_lock, .flock = gfs2_flock, .llseek = default_llseek, .fop_flags = FOP_ASYNC_LOCK, }; #endif /* CONFIG_GFS2_FS_LOCKING_DLM */ const struct file_operations gfs2_file_fops_nolock = { .llseek = gfs2_llseek, .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, .iopoll = iocb_bio_iopoll, .unlocked_ioctl = gfs2_ioctl, .compat_ioctl = gfs2_compat_ioctl, .mmap = gfs2_mmap, .open = gfs2_open, .release = gfs2_release, .fsync = gfs2_fsync, .splice_read = copy_splice_read, .splice_write = gfs2_file_splice_write, .setlease = generic_setlease, .fallocate = gfs2_fallocate, }; const struct file_operations gfs2_dir_fops_nolock = { .iterate_shared = gfs2_readdir, .unlocked_ioctl = gfs2_ioctl, .compat_ioctl = gfs2_compat_ioctl, .open = gfs2_open, .release = gfs2_release, .fsync = gfs2_fsync, .llseek = default_llseek, .setlease = generic_setlease, };
24 24 24 24 24 14 8 5 24 24 14 24 14 24 24 24 14 24 9 24 24 5 5 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 // SPDX-License-Identifier: GPL-2.0 /* * Functions related to sysfs handling */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/blktrace_api.h> #include <linux/debugfs.h> #include "blk.h" #include "blk-mq.h" #include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" #include "blk-wbt.h" #include "blk-cgroup.h" #include "blk-throttle.h" struct queue_sysfs_entry { struct attribute attr; ssize_t (*show)(struct gendisk *disk, char *page); ssize_t (*show_limit)(struct gendisk *disk, char *page); ssize_t (*store)(struct gendisk *disk, const char *page, size_t count); int (*store_limit)(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim); }; static ssize_t queue_var_show(unsigned long var, char *page) { return sysfs_emit(page, "%lu\n", var); } static ssize_t queue_var_store(unsigned long *var, const char *page, size_t count) { int err; unsigned long v; err = kstrtoul(page, 10, &v); if (err || v > UINT_MAX) return -EINVAL; *var = v; return count; } static ssize_t queue_requests_show(struct gendisk *disk, char *page) { ssize_t ret; mutex_lock(&disk->queue->elevator_lock); ret = queue_var_show(disk->queue->nr_requests, page); mutex_unlock(&disk->queue->elevator_lock); return ret; } static ssize_t queue_requests_store(struct gendisk *disk, const char *page, size_t count) { struct request_queue *q = disk->queue; struct blk_mq_tag_set *set = q->tag_set; struct elevator_tags *et = NULL; unsigned int memflags; unsigned long nr; int ret; ret = queue_var_store(&nr, page, count); if (ret < 0) return ret; /* * Serialize updating nr_requests with concurrent queue_requests_store() * and switching elevator. * * Use trylock to avoid circular lock dependency with kernfs active * reference during concurrent disk deletion: * update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del) * kn->active -> update_nr_hwq_lock (via this sysfs write path) */ if (!down_write_trylock(&set->update_nr_hwq_lock)) return -EBUSY; if (nr == q->nr_requests) goto unlock; if (nr < BLKDEV_MIN_RQ) nr = BLKDEV_MIN_RQ; /* * Switching elevator is protected by update_nr_hwq_lock: * - read lock is held from elevator sysfs attribute; * - write lock is held from updating nr_hw_queues; * Hence it's safe to access q->elevator here with write lock held. */ if (nr <= set->reserved_tags || (q->elevator && nr > MAX_SCHED_RQ) || (!q->elevator && nr > set->queue_depth)) { ret = -EINVAL; goto unlock; } if (!blk_mq_is_shared_tags(set->flags) && q->elevator && nr > q->elevator->et->nr_requests) { /* * Tags will grow, allocate memory before freezing queue to * prevent deadlock. */ et = blk_mq_alloc_sched_tags(set, q->nr_hw_queues, nr); if (!et) { ret = -ENOMEM; goto unlock; } } memflags = blk_mq_freeze_queue(q); mutex_lock(&q->elevator_lock); et = blk_mq_update_nr_requests(q, et, nr); mutex_unlock(&q->elevator_lock); blk_mq_unfreeze_queue(q, memflags); if (et) blk_mq_free_sched_tags(et, set); unlock: up_write(&set->update_nr_hwq_lock); return ret; } static ssize_t queue_async_depth_show(struct gendisk *disk, char *page) { guard(mutex)(&disk->queue->elevator_lock); return queue_var_show(disk->queue->async_depth, page); } static ssize_t queue_async_depth_store(struct gendisk *disk, const char *page, size_t count) { struct request_queue *q = disk->queue; unsigned int memflags; unsigned long nr; int ret; if (!queue_is_mq(q)) return -EINVAL; ret = queue_var_store(&nr, page, count); if (ret < 0) return ret; if (nr == 0) return -EINVAL; memflags = blk_mq_freeze_queue(q); scoped_guard(mutex, &q->elevator_lock) { if (q->elevator) { q->async_depth = min(q->nr_requests, nr); if (q->elevator->type->ops.depth_updated) q->elevator->type->ops.depth_updated(q); } else { ret = -EINVAL; } } blk_mq_unfreeze_queue(q, memflags); return ret; } static ssize_t queue_ra_show(struct gendisk *disk, char *page) { ssize_t ret; mutex_lock(&disk->queue->limits_lock); ret = queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page); mutex_unlock(&disk->queue->limits_lock); return ret; } static ssize_t queue_ra_store(struct gendisk *disk, const char *page, size_t count) { unsigned long ra_kb; ssize_t ret; struct request_queue *q = disk->queue; ret = queue_var_store(&ra_kb, page, count); if (ret < 0) return ret; /* * The ->ra_pages change below is protected by ->limits_lock because it * is usually calculated from the queue limits by * queue_limits_commit_update(). * * bdi->ra_pages reads are not serialized against bdi->ra_pages writes. * Use WRITE_ONCE() to write bdi->ra_pages once. */ mutex_lock(&q->limits_lock); WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10)); mutex_unlock(&q->limits_lock); return ret; } #define QUEUE_SYSFS_LIMIT_SHOW(_field) \ static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ { \ return queue_var_show(disk->queue->limits._field, page); \ } QUEUE_SYSFS_LIMIT_SHOW(max_segments) QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments) QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments) QUEUE_SYSFS_LIMIT_SHOW(max_segment_size) QUEUE_SYSFS_LIMIT_SHOW(max_write_streams) QUEUE_SYSFS_LIMIT_SHOW(write_stream_granularity) QUEUE_SYSFS_LIMIT_SHOW(logical_block_size) QUEUE_SYSFS_LIMIT_SHOW(physical_block_size) QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors) QUEUE_SYSFS_LIMIT_SHOW(io_min) QUEUE_SYSFS_LIMIT_SHOW(io_opt) QUEUE_SYSFS_LIMIT_SHOW(discard_granularity) QUEUE_SYSFS_LIMIT_SHOW(zone_write_granularity) QUEUE_SYSFS_LIMIT_SHOW(virt_boundary_mask) QUEUE_SYSFS_LIMIT_SHOW(dma_alignment) QUEUE_SYSFS_LIMIT_SHOW(max_open_zones) QUEUE_SYSFS_LIMIT_SHOW(max_active_zones) QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_min) QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_max) #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(_field) \ static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ { \ return sysfs_emit(page, "%llu\n", \ (unsigned long long)disk->queue->limits._field << \ SECTOR_SHIFT); \ } QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_discard_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_wzeroes_unmap_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_wzeroes_unmap_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_zone_append_sectors) #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \ static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ { \ return queue_var_show(disk->queue->limits._field >> 1, page); \ } QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors) #define QUEUE_SYSFS_SHOW_CONST(_name, _val) \ static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ { \ return sysfs_emit(page, "%d\n", _val); \ } /* deprecated fields */ QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0) QUEUE_SYSFS_SHOW_CONST(write_same_max, 0) QUEUE_SYSFS_SHOW_CONST(poll_delay, -1) static int queue_max_discard_sectors_store(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim) { unsigned long max_discard_bytes; ssize_t ret; ret = queue_var_store(&max_discard_bytes, page, count); if (ret < 0) return ret; if (max_discard_bytes & (disk->queue->limits.discard_granularity - 1)) return -EINVAL; if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX) return -EINVAL; lim->max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT; return 0; } static int queue_max_wzeroes_unmap_sectors_store(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim) { unsigned long max_zeroes_bytes, max_hw_zeroes_bytes; ssize_t ret; ret = queue_var_store(&max_zeroes_bytes, page, count); if (ret < 0) return ret; max_hw_zeroes_bytes = lim->max_hw_wzeroes_unmap_sectors << SECTOR_SHIFT; if (max_zeroes_bytes != 0 && max_zeroes_bytes != max_hw_zeroes_bytes) return -EINVAL; lim->max_user_wzeroes_unmap_sectors = max_zeroes_bytes >> SECTOR_SHIFT; return 0; } static int queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim) { unsigned long max_sectors_kb; ssize_t ret; ret = queue_var_store(&max_sectors_kb, page, count); if (ret < 0) return ret; lim->max_user_sectors = max_sectors_kb << 1; return 0; } static ssize_t queue_feature_store(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim, blk_features_t feature) { unsigned long val; ssize_t ret; ret = queue_var_store(&val, page, count); if (ret < 0) return ret; if (val) lim->features |= feature; else lim->features &= ~feature; return 0; } #define QUEUE_SYSFS_FEATURE(_name, _feature) \ static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ { \ return sysfs_emit(page, "%u\n", \ !!(disk->queue->limits.features & _feature)); \ } \ static int queue_##_name##_store(struct gendisk *disk, \ const char *page, size_t count, struct queue_limits *lim) \ { \ return queue_feature_store(disk, page, count, lim, _feature); \ } QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM) QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT) QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES); #define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature) \ static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ { \ return sysfs_emit(page, "%u\n", \ !!(disk->queue->limits.features & _feature)); \ } QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA); QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX); static ssize_t queue_poll_show(struct gendisk *disk, char *page) { if (queue_is_mq(disk->queue)) return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue)); return sysfs_emit(page, "%u\n", !!(disk->queue->limits.features & BLK_FEAT_POLL)); } static ssize_t queue_zoned_show(struct gendisk *disk, char *page) { if (blk_queue_is_zoned(disk->queue)) return sysfs_emit(page, "host-managed\n"); return sysfs_emit(page, "none\n"); } static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page) { return queue_var_show(disk_nr_zones(disk), page); } static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page) { return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page); } static int queue_iostats_passthrough_store(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim) { unsigned long ios; ssize_t ret; ret = queue_var_store(&ios, page, count); if (ret < 0) return ret; if (ios) lim->flags |= BLK_FLAG_IOSTATS_PASSTHROUGH; else lim->flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH; return 0; } static ssize_t queue_nomerges_show(struct gendisk *disk, char *page) { return queue_var_show((blk_queue_nomerges(disk->queue) << 1) | blk_queue_noxmerges(disk->queue), page); } static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page, size_t count) { unsigned long nm; struct request_queue *q = disk->queue; ssize_t ret = queue_var_store(&nm, page, count); if (ret < 0) return ret; blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q); blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); if (nm == 2) blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); else if (nm) blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); return ret; } static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page) { bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags); bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags); return queue_var_show(set << force, page); } static ssize_t queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count) { ssize_t ret = -EINVAL; #ifdef CONFIG_SMP struct request_queue *q = disk->queue; unsigned long val; ret = queue_var_store(&val, page, count); if (ret < 0) return ret; /* * Here we update two queue flags each using atomic bitops, although * updating two flags isn't atomic it should be harmless as those flags * are accessed individually using atomic test_bit operation. So we * don't grab any lock while updating these flags. */ if (val == 2) { blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); } else if (val == 1) { blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); } else if (val == 0) { blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); } #endif return ret; } static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page, size_t count) { return count; } static ssize_t queue_poll_store(struct gendisk *disk, const char *page, size_t count) { ssize_t ret = count; struct request_queue *q = disk->queue; if (!(q->limits.features & BLK_FEAT_POLL)) { ret = -EINVAL; goto out; } pr_info_ratelimited("writes to the poll attribute are ignored.\n"); pr_info_ratelimited("please use driver specific parameters instead.\n"); out: return ret; } static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page) { return sysfs_emit(page, "%u\n", jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout))); } static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page, size_t count) { unsigned int val; int err; struct request_queue *q = disk->queue; err = kstrtou32(page, 10, &val); if (err || val == 0) return -EINVAL; blk_queue_rq_timeout(q, msecs_to_jiffies(val)); return count; } static ssize_t queue_wc_show(struct gendisk *disk, char *page) { if (blk_queue_write_cache(disk->queue)) return sysfs_emit(page, "write back\n"); return sysfs_emit(page, "write through\n"); } static int queue_wc_store(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim) { bool disable; if (!strncmp(page, "write back", 10)) { disable = false; } else if (!strncmp(page, "write through", 13) || !strncmp(page, "none", 4)) { disable = true; } else { return -EINVAL; } if (disable) lim->flags |= BLK_FLAG_WRITE_CACHE_DISABLED; else lim->flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED; return 0; } #define QUEUE_RO_ENTRY(_prefix, _name) \ static struct queue_sysfs_entry _prefix##_entry = { \ .attr = { .name = _name, .mode = 0444 }, \ .show = _prefix##_show, \ }; #define QUEUE_RW_ENTRY(_prefix, _name) \ static struct queue_sysfs_entry _prefix##_entry = { \ .attr = { .name = _name, .mode = 0644 }, \ .show = _prefix##_show, \ .store = _prefix##_store, \ }; #define QUEUE_LIM_RO_ENTRY(_prefix, _name) \ static struct queue_sysfs_entry _prefix##_entry = { \ .attr = { .name = _name, .mode = 0444 }, \ .show_limit = _prefix##_show, \ } #define QUEUE_LIM_RW_ENTRY(_prefix, _name) \ static struct queue_sysfs_entry _prefix##_entry = { \ .attr = { .name = _name, .mode = 0644 }, \ .show_limit = _prefix##_show, \ .store_limit = _prefix##_store, \ } QUEUE_RW_ENTRY(queue_requests, "nr_requests"); QUEUE_RW_ENTRY(queue_async_depth, "async_depth"); QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb"); QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb"); QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb"); QUEUE_LIM_RO_ENTRY(queue_max_segments, "max_segments"); QUEUE_LIM_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments"); QUEUE_LIM_RO_ENTRY(queue_max_segment_size, "max_segment_size"); QUEUE_LIM_RO_ENTRY(queue_max_write_streams, "max_write_streams"); QUEUE_LIM_RO_ENTRY(queue_write_stream_granularity, "write_stream_granularity"); QUEUE_RW_ENTRY(elv_iosched, "scheduler"); QUEUE_LIM_RO_ENTRY(queue_logical_block_size, "logical_block_size"); QUEUE_LIM_RO_ENTRY(queue_physical_block_size, "physical_block_size"); QUEUE_LIM_RO_ENTRY(queue_chunk_sectors, "chunk_sectors"); QUEUE_LIM_RO_ENTRY(queue_io_min, "minimum_io_size"); QUEUE_LIM_RO_ENTRY(queue_io_opt, "optimal_io_size"); QUEUE_LIM_RO_ENTRY(queue_max_discard_segments, "max_discard_segments"); QUEUE_LIM_RO_ENTRY(queue_discard_granularity, "discard_granularity"); QUEUE_LIM_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes"); QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes"); QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data"); QUEUE_LIM_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes"); QUEUE_LIM_RO_ENTRY(queue_atomic_write_boundary_sectors, "atomic_write_boundary_bytes"); QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes"); QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes"); QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes"); QUEUE_LIM_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes"); QUEUE_LIM_RO_ENTRY(queue_max_hw_wzeroes_unmap_sectors, "write_zeroes_unmap_max_hw_bytes"); QUEUE_LIM_RW_ENTRY(queue_max_wzeroes_unmap_sectors, "write_zeroes_unmap_max_bytes"); QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes"); QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity"); QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned"); QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones"); QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones"); QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones"); QUEUE_RW_ENTRY(queue_nomerges, "nomerges"); QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough"); QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity"); QUEUE_RW_ENTRY(queue_poll, "io_poll"); QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay"); QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache"); QUEUE_LIM_RO_ENTRY(queue_fua, "fua"); QUEUE_LIM_RO_ENTRY(queue_dax, "dax"); QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout"); QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask"); QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment"); /* legacy alias for logical_block_size: */ static struct queue_sysfs_entry queue_hw_sector_size_entry = { .attr = {.name = "hw_sector_size", .mode = 0444 }, .show_limit = queue_logical_block_size_show, }; QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational"); QUEUE_LIM_RW_ENTRY(queue_iostats, "iostats"); QUEUE_LIM_RW_ENTRY(queue_add_random, "add_random"); QUEUE_LIM_RW_ENTRY(queue_stable_writes, "stable_writes"); #ifdef CONFIG_BLK_WBT static ssize_t queue_var_store64(s64 *var, const char *page) { int err; s64 v; err = kstrtos64(page, 10, &v); if (err < 0) return err; *var = v; return 0; } static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page) { ssize_t ret; struct request_queue *q = disk->queue; mutex_lock(&disk->rqos_state_mutex); if (!wbt_rq_qos(q)) { ret = -EINVAL; goto out; } if (wbt_disabled(q)) { ret = sysfs_emit(page, "0\n"); goto out; } ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000)); out: mutex_unlock(&disk->rqos_state_mutex); return ret; } static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page, size_t count) { ssize_t ret; s64 val; ret = queue_var_store64(&val, page); if (ret < 0) return ret; if (val < -1) return -EINVAL; ret = wbt_set_lat(disk, val); return ret ? ret : count; } QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); #endif /* Common attributes for bio-based and request-based queues. */ static struct attribute *queue_attrs[] = { /* * Attributes which are protected with q->limits_lock. */ &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, &queue_max_segments_entry.attr, &queue_max_discard_segments_entry.attr, &queue_max_integrity_segments_entry.attr, &queue_max_segment_size_entry.attr, &queue_max_write_streams_entry.attr, &queue_write_stream_granularity_entry.attr, &queue_hw_sector_size_entry.attr, &queue_logical_block_size_entry.attr, &queue_physical_block_size_entry.attr, &queue_chunk_sectors_entry.attr, &queue_io_min_entry.attr, &queue_io_opt_entry.attr, &queue_discard_granularity_entry.attr, &queue_max_discard_sectors_entry.attr, &queue_max_hw_discard_sectors_entry.attr, &queue_atomic_write_max_sectors_entry.attr, &queue_atomic_write_boundary_sectors_entry.attr, &queue_atomic_write_unit_min_entry.attr, &queue_atomic_write_unit_max_entry.attr, &queue_max_write_zeroes_sectors_entry.attr, &queue_max_hw_wzeroes_unmap_sectors_entry.attr, &queue_max_wzeroes_unmap_sectors_entry.attr, &queue_max_zone_append_sectors_entry.attr, &queue_zone_write_granularity_entry.attr, &queue_rotational_entry.attr, &queue_zoned_entry.attr, &queue_max_open_zones_entry.attr, &queue_max_active_zones_entry.attr, &queue_iostats_passthrough_entry.attr, &queue_iostats_entry.attr, &queue_stable_writes_entry.attr, &queue_add_random_entry.attr, &queue_wc_entry.attr, &queue_fua_entry.attr, &queue_dax_entry.attr, &queue_virt_boundary_mask_entry.attr, &queue_dma_alignment_entry.attr, &queue_ra_entry.attr, /* * Attributes which don't require locking. */ &queue_discard_zeroes_data_entry.attr, &queue_write_same_max_entry.attr, &queue_nr_zones_entry.attr, &queue_nomerges_entry.attr, &queue_poll_entry.attr, &queue_poll_delay_entry.attr, NULL, }; /* Request-based queue attributes that are not relevant for bio-based queues. */ static struct attribute *blk_mq_queue_attrs[] = { /* * Attributes which require some form of locking other than * q->sysfs_lock. */ &elv_iosched_entry.attr, &queue_requests_entry.attr, &queue_async_depth_entry.attr, #ifdef CONFIG_BLK_WBT &queue_wb_lat_entry.attr, #endif /* * Attributes which don't require locking. */ &queue_rq_affinity_entry.attr, &queue_io_timeout_entry.attr, NULL, }; static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); struct request_queue *q = disk->queue; if ((attr == &queue_max_open_zones_entry.attr || attr == &queue_max_active_zones_entry.attr) && !blk_queue_is_zoned(q)) return 0; return attr->mode; } static umode_t blk_mq_queue_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); struct request_queue *q = disk->queue; if (!queue_is_mq(q)) return 0; if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout) return 0; return attr->mode; } static struct attribute_group queue_attr_group = { .attrs = queue_attrs, .is_visible = queue_attr_visible, }; static struct attribute_group blk_mq_queue_attr_group = { .attrs = blk_mq_queue_attrs, .is_visible = blk_mq_queue_attr_visible, }; #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) static ssize_t queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { struct queue_sysfs_entry *entry = to_queue(attr); struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); if (!entry->show && !entry->show_limit) return -EIO; if (entry->show_limit) { ssize_t res; mutex_lock(&disk->queue->limits_lock); res = entry->show_limit(disk, page); mutex_unlock(&disk->queue->limits_lock); return res; } return entry->show(disk, page); } static ssize_t queue_attr_store(struct kobject *kobj, struct attribute *attr, const char *page, size_t length) { struct queue_sysfs_entry *entry = to_queue(attr); struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); struct request_queue *q = disk->queue; if (!entry->store_limit && !entry->store) return -EIO; if (entry->store_limit) { ssize_t res; struct queue_limits lim = queue_limits_start_update(q); res = entry->store_limit(disk, page, length, &lim); if (res < 0) { queue_limits_cancel_update(q); return res; } res = queue_limits_commit_update_frozen(q, &lim); if (res) return res; return length; } return entry->store(disk, page, length); } static const struct sysfs_ops queue_sysfs_ops = { .show = queue_attr_show, .store = queue_attr_store, }; static const struct attribute_group *blk_queue_attr_groups[] = { &queue_attr_group, &blk_mq_queue_attr_group, NULL }; static void blk_queue_release(struct kobject *kobj) { /* nothing to do here, all data is associated with the parent gendisk */ } const struct kobj_type blk_queue_ktype = { .default_groups = blk_queue_attr_groups, .sysfs_ops = &queue_sysfs_ops, .release = blk_queue_release, }; static void blk_debugfs_remove(struct gendisk *disk) { struct request_queue *q = disk->queue; blk_debugfs_lock_nomemsave(q); blk_trace_shutdown(q); debugfs_remove_recursive(q->debugfs_dir); q->debugfs_dir = NULL; q->sched_debugfs_dir = NULL; q->rqos_debugfs_dir = NULL; blk_debugfs_unlock_nomemrestore(q); } /** * blk_register_queue - register a block layer queue with sysfs * @disk: Disk of which the request queue should be registered with sysfs. */ int blk_register_queue(struct gendisk *disk) { struct request_queue *q = disk->queue; unsigned int memflags; int ret; ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); if (ret < 0) return ret; if (queue_is_mq(q)) { ret = blk_mq_sysfs_register(disk); if (ret) goto out_del_queue_kobj; } mutex_lock(&q->sysfs_lock); memflags = blk_debugfs_lock(q); q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root); if (queue_is_mq(q)) blk_mq_debugfs_register(q); blk_debugfs_unlock(q, memflags); ret = disk_register_independent_access_ranges(disk); if (ret) goto out_debugfs_remove; ret = blk_crypto_sysfs_register(disk); if (ret) goto out_unregister_ia_ranges; if (queue_is_mq(q)) elevator_set_default(q); blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); wbt_init_enable_default(disk); /* Now everything is ready and send out KOBJ_ADD uevent */ kobject_uevent(&disk->queue_kobj, KOBJ_ADD); if (q->elevator) kobject_uevent(&q->elevator->kobj, KOBJ_ADD); mutex_unlock(&q->sysfs_lock); /* * SCSI probing may synchronously create and destroy a lot of * request_queues for non-existent devices. Shutting down a fully * functional queue takes measureable wallclock time as RCU grace * periods are involved. To avoid excessive latency in these * cases, a request_queue starts out in a degraded mode which is * faster to shut down and is made fully functional here as * request_queues for non-existent devices never get registered. */ blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q); percpu_ref_switch_to_percpu(&q->q_usage_counter); return ret; out_unregister_ia_ranges: disk_unregister_independent_access_ranges(disk); out_debugfs_remove: blk_debugfs_remove(disk); mutex_unlock(&q->sysfs_lock); if (queue_is_mq(q)) blk_mq_sysfs_unregister(disk); out_del_queue_kobj: kobject_del(&disk->queue_kobj); return ret; } /** * blk_unregister_queue - counterpart of blk_register_queue() * @disk: Disk of which the request queue should be unregistered from sysfs. * * Note: the caller is responsible for guaranteeing that this function is called * after blk_register_queue() has finished. */ void blk_unregister_queue(struct gendisk *disk) { struct request_queue *q = disk->queue; if (WARN_ON(!q)) return; /* Return early if disk->queue was never registered. */ if (!blk_queue_registered(q)) return; /* * Since sysfs_remove_dir() prevents adding new directory entries * before removal of existing entries starts, protect against * concurrent elv_iosched_store() calls. */ mutex_lock(&q->sysfs_lock); blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q); mutex_unlock(&q->sysfs_lock); /* * Remove the sysfs attributes before unregistering the queue data * structures that can be modified through sysfs. */ if (queue_is_mq(q)) blk_mq_sysfs_unregister(disk); blk_crypto_sysfs_unregister(disk); mutex_lock(&q->sysfs_lock); disk_unregister_independent_access_ranges(disk); mutex_unlock(&q->sysfs_lock); /* Now that we've deleted all child objects, we can delete the queue. */ kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE); kobject_del(&disk->queue_kobj); if (queue_is_mq(q)) elevator_set_none(q); blk_debugfs_remove(disk); }
366 366 365 366 348 367 238 177 165 235 235 215 349 345 133 1 348 238 238 209 208 237 237 367 330 330 215 216 217 145 146 41 30 30 145 366 367 349 348 4 4 4 366 64 17 17 17 1 16 16 367 367 29 365 94 89 78 78 3 366 366 367 367 366 96 367 366 365 366 367 21 21 21 21 21 21 237 22 235 22 21 22 22 22 21 21 21 74 238 94 93 87 238 61 238 4 237 237 233 233 238 235 4 4 238 21 236 21 236 236 193 94 190 189 366 366 366 90 23 23 1 366 366 366 189 239 239 1 237 238 62 238 234 200 236 69 238 69 367 191 191 366 365 95 92 23 95 95 92 62 46 44 15 94 90 94 4 365 342 365 349 20 117 53 77 52 1 52 52 29 20 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * * This file is part of the SCTP kernel implementation * * These functions handle output processing. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Jon Grimm <jgrimm@austin.ibm.com> * Sridhar Samudrala <sri@us.ibm.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/kernel.h> #include <linux/wait.h> #include <linux/time.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/init.h> #include <linux/slab.h> #include <net/inet_ecn.h> #include <net/ip.h> #include <net/icmp.h> #include <net/net_namespace.h> #include <linux/socket.h> /* for sa_family_t */ #include <net/sock.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/checksum.h> /* Forward declarations for private helpers. */ static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet, struct sctp_chunk *chunk); static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet, struct sctp_chunk *chunk); static void sctp_packet_append_data(struct sctp_packet *packet, struct sctp_chunk *chunk); static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet, struct sctp_chunk *chunk, u16 chunk_len); static void sctp_packet_reset(struct sctp_packet *packet) { /* sctp_packet_transmit() relies on this to reset size to the * current overhead after sending packets. */ packet->size = packet->overhead; packet->has_cookie_echo = 0; packet->has_sack = 0; packet->has_data = 0; packet->has_auth = 0; packet->ipfragok = 0; packet->auth = NULL; } /* Config a packet. * This appears to be a followup set of initializations. */ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, int ecn_capable) { struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; struct sctp_sock *sp = NULL; struct sock *sk; pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); packet->vtag = vtag; /* do the following jobs only once for a flush schedule */ if (!sctp_packet_empty(packet)) return; /* set packet max_size with pathmtu, then calculate overhead */ packet->max_size = tp->pathmtu; if (asoc) { sk = asoc->base.sk; sp = sctp_sk(sk); } packet->overhead = sctp_mtu_payload(sp, 0, 0); packet->size = packet->overhead; if (!asoc) return; /* update dst or transport pathmtu if in need */ if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sp); if (asoc->param_flags & SPP_PMTUD_ENABLE) sctp_assoc_sync_pmtu(asoc); } else if (!sctp_transport_pl_enabled(tp) && asoc->param_flags & SPP_PMTUD_ENABLE) { if (!sctp_transport_pmtu_check(tp)) sctp_assoc_sync_pmtu(asoc); } if (asoc->pmtu_pending) { if (asoc->param_flags & SPP_PMTUD_ENABLE) sctp_assoc_sync_pmtu(asoc); asoc->pmtu_pending = 0; } /* If there a is a prepend chunk stick it on the list before * any other chunks get appended. */ if (ecn_capable) { struct sctp_chunk *chunk = sctp_get_ecne_prepend(asoc); if (chunk) sctp_packet_append_chunk(packet, chunk); } if (!tp->dst) return; /* set packet max_size with gso_max_size if gso is enabled*/ rcu_read_lock(); if (__sk_dst_get(sk) != tp->dst) { dst_hold(tp->dst); sk_setup_caps(sk, tp->dst); } packet->max_size = sk_can_gso(sk) ? min(READ_ONCE(tp->dst->dev->gso_max_size), GSO_LEGACY_MAX_SIZE) : asoc->pathmtu; rcu_read_unlock(); } /* Initialize the packet structure. */ void sctp_packet_init(struct sctp_packet *packet, struct sctp_transport *transport, __u16 sport, __u16 dport) { pr_debug("%s: packet:%p transport:%p\n", __func__, packet, transport); packet->transport = transport; packet->source_port = sport; packet->destination_port = dport; INIT_LIST_HEAD(&packet->chunk_list); /* The overhead will be calculated by sctp_packet_config() */ packet->overhead = 0; sctp_packet_reset(packet); packet->vtag = 0; } /* Free a packet. */ void sctp_packet_free(struct sctp_packet *packet) { struct sctp_chunk *chunk, *tmp; pr_debug("%s: packet:%p\n", __func__, packet); list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); sctp_chunk_free(chunk); } } /* This routine tries to append the chunk to the offered packet. If adding * the chunk causes the packet to exceed the path MTU and COOKIE_ECHO chunk * is not present in the packet, it transmits the input packet. * Data can be bundled with a packet containing a COOKIE_ECHO chunk as long * as it can fit in the packet, but any more data that does not fit in this * packet can be sent only after receiving the COOKIE_ACK. */ enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet, struct sctp_chunk *chunk, int one_packet, gfp_t gfp) { enum sctp_xmit retval; pr_debug("%s: packet:%p size:%zu chunk:%p size:%d\n", __func__, packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1); switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { case SCTP_XMIT_PMTU_FULL: if (!packet->has_cookie_echo) { int error = 0; error = sctp_packet_transmit(packet, gfp); if (error < 0) chunk->skb->sk->sk_err = -error; /* If we have an empty packet, then we can NOT ever * return PMTU_FULL. */ if (!one_packet) retval = sctp_packet_append_chunk(packet, chunk); } break; case SCTP_XMIT_RWND_FULL: case SCTP_XMIT_OK: case SCTP_XMIT_DELAY: break; } return retval; } /* Try to bundle a pad chunk into a packet with a heartbeat chunk for PLPMTUTD probe */ static enum sctp_xmit sctp_packet_bundle_pad(struct sctp_packet *pkt, struct sctp_chunk *chunk) { struct sctp_transport *t = pkt->transport; struct sctp_chunk *pad; int overhead = 0; if (!chunk->pmtu_probe) return SCTP_XMIT_OK; /* calculate the Padding Data size for the pad chunk */ overhead += sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr); overhead += sizeof(struct sctp_sender_hb_info) + sizeof(struct sctp_pad_chunk); pad = sctp_make_pad(t->asoc, t->pl.probe_size - overhead); if (!pad) return SCTP_XMIT_DELAY; list_add_tail(&pad->list, &pkt->chunk_list); pkt->size += SCTP_PAD4(ntohs(pad->chunk_hdr->length)); chunk->transport = t; return SCTP_XMIT_OK; } /* Try to bundle an auth chunk into the packet. */ static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt, struct sctp_chunk *chunk) { struct sctp_association *asoc = pkt->transport->asoc; enum sctp_xmit retval = SCTP_XMIT_OK; struct sctp_chunk *auth; /* if we don't have an association, we can't do authentication */ if (!asoc) return retval; /* See if this is an auth chunk we are bundling or if * auth is already bundled. */ if (chunk->chunk_hdr->type == SCTP_CID_AUTH || pkt->has_auth) return retval; /* if the peer did not request this chunk to be authenticated, * don't do it */ if (!chunk->auth) return retval; auth = sctp_make_auth(asoc, chunk->shkey->key_id); if (!auth) return retval; auth->shkey = chunk->shkey; sctp_auth_shkey_hold(auth->shkey); retval = __sctp_packet_append_chunk(pkt, auth); if (retval != SCTP_XMIT_OK) sctp_chunk_free(auth); return retval; } /* Try to bundle a SACK with the packet. */ static enum sctp_xmit sctp_packet_bundle_sack(struct sctp_packet *pkt, struct sctp_chunk *chunk) { enum sctp_xmit retval = SCTP_XMIT_OK; /* If sending DATA and haven't aleady bundled a SACK, try to * bundle one in to the packet. */ if (sctp_chunk_is_data(chunk) && !pkt->has_sack && !pkt->has_cookie_echo) { struct sctp_association *asoc; struct timer_list *timer; asoc = pkt->transport->asoc; timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; /* If the SACK timer is running, we have a pending SACK */ if (timer_pending(timer)) { struct sctp_chunk *sack; if (pkt->transport->sack_generation != pkt->transport->asoc->peer.sack_generation) return retval; asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (sack) { retval = __sctp_packet_append_chunk(pkt, sack); if (retval != SCTP_XMIT_OK) { sctp_chunk_free(sack); goto out; } SCTP_INC_STATS(asoc->base.net, SCTP_MIB_OUTCTRLCHUNKS); asoc->stats.octrlchunks++; asoc->peer.sack_needed = 0; if (timer_delete(timer)) sctp_association_put(asoc); } } } out: return retval; } /* Append a chunk to the offered packet reporting back any inability to do * so. */ static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet, struct sctp_chunk *chunk) { __u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length)); enum sctp_xmit retval = SCTP_XMIT_OK; /* Check to see if this chunk will fit into the packet */ retval = sctp_packet_will_fit(packet, chunk, chunk_len); if (retval != SCTP_XMIT_OK) goto finish; /* We believe that this chunk is OK to add to the packet */ switch (chunk->chunk_hdr->type) { case SCTP_CID_DATA: case SCTP_CID_I_DATA: /* Account for the data being in the packet */ sctp_packet_append_data(packet, chunk); /* Disallow SACK bundling after DATA. */ packet->has_sack = 1; /* Disallow AUTH bundling after DATA */ packet->has_auth = 1; /* Let it be knows that packet has DATA in it */ packet->has_data = 1; /* timestamp the chunk for rtx purposes */ chunk->sent_at = jiffies; /* Mainly used for prsctp RTX policy */ chunk->sent_count++; break; case SCTP_CID_COOKIE_ECHO: packet->has_cookie_echo = 1; break; case SCTP_CID_SACK: packet->has_sack = 1; if (chunk->asoc) chunk->asoc->stats.osacks++; break; case SCTP_CID_AUTH: packet->has_auth = 1; packet->auth = chunk; break; } /* It is OK to send this chunk. */ list_add_tail(&chunk->list, &packet->chunk_list); packet->size += chunk_len; chunk->transport = packet->transport; finish: return retval; } /* Append a chunk to the offered packet reporting back any inability to do * so. */ enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet, struct sctp_chunk *chunk) { enum sctp_xmit retval = SCTP_XMIT_OK; pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); /* Data chunks are special. Before seeing what else we can * bundle into this packet, check to see if we are allowed to * send this DATA. */ if (sctp_chunk_is_data(chunk)) { retval = sctp_packet_can_append_data(packet, chunk); if (retval != SCTP_XMIT_OK) goto finish; } /* Try to bundle AUTH chunk */ retval = sctp_packet_bundle_auth(packet, chunk); if (retval != SCTP_XMIT_OK) goto finish; /* Try to bundle SACK chunk */ retval = sctp_packet_bundle_sack(packet, chunk); if (retval != SCTP_XMIT_OK) goto finish; retval = __sctp_packet_append_chunk(packet, chunk); if (retval != SCTP_XMIT_OK) goto finish; retval = sctp_packet_bundle_pad(packet, chunk); finish: return retval; } static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb) { if (SCTP_OUTPUT_CB(head)->last == head) skb_shinfo(head)->frag_list = skb; else SCTP_OUTPUT_CB(head)->last->next = skb; SCTP_OUTPUT_CB(head)->last = skb; head->truesize += skb->truesize; head->data_len += skb->len; head->len += skb->len; refcount_add(skb->truesize, &head->sk->sk_wmem_alloc); __skb_header_release(skb); } static int sctp_packet_pack(struct sctp_packet *packet, struct sk_buff *head, int gso, gfp_t gfp) { struct sctp_transport *tp = packet->transport; struct sctp_auth_chunk *auth = NULL; struct sctp_chunk *chunk, *tmp; int pkt_count = 0, pkt_size; struct sock *sk = head->sk; struct sk_buff *nskb; int auth_len = 0; if (gso) { skb_shinfo(head)->gso_type = sk->sk_gso_type; SCTP_OUTPUT_CB(head)->last = head; } else { nskb = head; pkt_size = packet->size; goto merge; } do { /* calculate the pkt_size and alloc nskb */ pkt_size = packet->overhead; list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { int padded = SCTP_PAD4(chunk->skb->len); if (chunk == packet->auth) auth_len = padded; else if (auth_len + padded + packet->overhead > tp->pathmtu) return 0; else if (pkt_size + padded > tp->pathmtu) break; pkt_size += padded; } nskb = alloc_skb(pkt_size + MAX_HEADER, gfp); if (!nskb) return 0; skb_reserve(nskb, packet->overhead + MAX_HEADER); merge: /* merge chunks into nskb and append nskb into head list */ pkt_size -= packet->overhead; list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { int padding; list_del_init(&chunk->list); if (sctp_chunk_is_data(chunk)) { if (!sctp_chunk_retransmitted(chunk) && !tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } } padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len; if (padding) skb_put_zero(chunk->skb, padding); if (chunk == packet->auth) auth = (struct sctp_auth_chunk *) skb_tail_pointer(nskb); skb_put_data(nskb, chunk->skb->data, chunk->skb->len); pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n", chunk, sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), chunk->has_tsn ? "TSN" : "No TSN", chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0, ntohs(chunk->chunk_hdr->length), chunk->skb->len, chunk->rtt_in_progress); pkt_size -= SCTP_PAD4(chunk->skb->len); if (!sctp_chunk_is_data(chunk) && chunk != packet->auth) sctp_chunk_free(chunk); if (!pkt_size) break; } if (auth) { sctp_auth_calculate_hmac(tp->asoc, nskb, auth, packet->auth->shkey, gfp); /* free auth if no more chunks, or add it back */ if (list_empty(&packet->chunk_list)) sctp_chunk_free(packet->auth); else list_add(&packet->auth->list, &packet->chunk_list); } if (gso) sctp_packet_gso_append(head, nskb); pkt_count++; } while (!list_empty(&packet->chunk_list)); if (gso) { memset(head->cb, 0, max(sizeof(struct inet_skb_parm), sizeof(struct inet6_skb_parm))); skb_shinfo(head)->gso_segs = pkt_count; skb_shinfo(head)->gso_size = GSO_BY_FRAGS; goto chksum; } if (sctp_checksum_disable) return 1; if (!(tp->dst->dev->features & NETIF_F_SCTP_CRC) || dst_xfrm(tp->dst) || packet->ipfragok || tp->encap_port) { struct sctphdr *sh = (struct sctphdr *)skb_transport_header(head); sh->checksum = sctp_compute_cksum(head, 0); } else { chksum: head->ip_summed = CHECKSUM_PARTIAL; head->csum_not_inet = 1; head->csum_start = skb_transport_header(head) - head->head; head->csum_offset = offsetof(struct sctphdr, checksum); } return pkt_count; } /* All packets are sent to the network through this function from * sctp_outq_tail(). * * The return value is always 0 for now. */ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) { struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; struct sctp_chunk *chunk, *tmp; int pkt_count, gso = 0; struct sk_buff *head; struct sctphdr *sh; struct sock *sk; pr_debug("%s: packet:%p\n", __func__, packet); if (list_empty(&packet->chunk_list)) return 0; chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); sk = chunk->skb->sk; if (packet->size > tp->pathmtu && !packet->ipfragok && !chunk->pmtu_probe) { if (tp->pl.state == SCTP_PL_ERROR) { /* do IP fragmentation if in Error state */ packet->ipfragok = 1; } else { if (!sk_can_gso(sk)) { /* check gso */ pr_err_once("Trying to GSO but underlying device doesn't support it."); goto out; } gso = 1; } } /* alloc head skb */ head = alloc_skb((gso ? packet->overhead : packet->size) + MAX_HEADER, gfp); if (!head) goto out; skb_reserve(head, packet->overhead + MAX_HEADER); skb_set_owner_w(head, sk); /* set sctp header */ sh = skb_push(head, sizeof(struct sctphdr)); skb_reset_transport_header(head); sh->source = htons(packet->source_port); sh->dest = htons(packet->destination_port); sh->vtag = htonl(packet->vtag); sh->checksum = 0; /* drop packet if no dst */ if (!tp->dst) { IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); kfree_skb(head); goto out; } /* pack up chunks */ pkt_count = sctp_packet_pack(packet, head, gso, gfp); if (!pkt_count) { kfree_skb(head); goto out; } pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len); /* start autoclose timer */ if (packet->has_data && sctp_state(asoc, ESTABLISHED) && asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { struct timer_list *timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; unsigned long timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; if (!mod_timer(timer, jiffies + timeout)) sctp_association_hold(asoc); } /* sctp xmit */ tp->af_specific->ecn_capable(sk); if (asoc) { asoc->stats.opackets += pkt_count; if (asoc->peer.last_sent_to != tp) asoc->peer.last_sent_to = tp; } head->ignore_df = packet->ipfragok; if (tp->dst_pending_confirm) skb_set_dst_pending_confirm(head, 1); /* neighbour should be confirmed on successful transmission or * positive error */ if (tp->af_specific->sctp_xmit(head, tp) >= 0 && tp->dst_pending_confirm) tp->dst_pending_confirm = 0; out: list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); if (!sctp_chunk_is_data(chunk)) sctp_chunk_free(chunk); } sctp_packet_reset(packet); return 0; } /******************************************************************** * 2nd Level Abstractions ********************************************************************/ /* This private function check to see if a chunk can be added */ static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet, struct sctp_chunk *chunk) { size_t datasize, rwnd, inflight, flight_size; struct sctp_transport *transport = packet->transport; struct sctp_association *asoc = transport->asoc; struct sctp_outq *q = &asoc->outqueue; /* RFC 2960 6.1 Transmission of DATA Chunks * * A) At any given time, the data sender MUST NOT transmit new data to * any destination transport address if its peer's rwnd indicates * that the peer has no buffer space (i.e. rwnd is 0, see Section * 6.2.1). However, regardless of the value of rwnd (including if it * is 0), the data sender can always have one DATA chunk in flight to * the receiver if allowed by cwnd (see rule B below). This rule * allows the sender to probe for a change in rwnd that the sender * missed due to the SACK having been lost in transit from the data * receiver to the data sender. */ rwnd = asoc->peer.rwnd; inflight = q->outstanding_bytes; flight_size = transport->flight_size; datasize = sctp_data_size(chunk); if (datasize > rwnd && inflight > 0) /* We have (at least) one data chunk in flight, * so we can't fall back to rule 6.1 B). */ return SCTP_XMIT_RWND_FULL; /* RFC 2960 6.1 Transmission of DATA Chunks * * B) At any given time, the sender MUST NOT transmit new data * to a given transport address if it has cwnd or more bytes * of data outstanding to that transport address. */ /* RFC 7.2.4 & the Implementers Guide 2.8. * * 3) ... * When a Fast Retransmit is being performed the sender SHOULD * ignore the value of cwnd and SHOULD NOT delay retransmission. */ if (chunk->fast_retransmit != SCTP_NEED_FRTX && flight_size >= transport->cwnd) return SCTP_XMIT_RWND_FULL; /* Nagle's algorithm to solve small-packet problem: * Inhibit the sending of new chunks when new outgoing data arrives * if any previously transmitted data on the connection remains * unacknowledged. */ if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) && !asoc->force_delay) /* Nothing unacked */ return SCTP_XMIT_OK; if (!sctp_packet_empty(packet)) /* Append to packet */ return SCTP_XMIT_OK; if (!sctp_state(asoc, ESTABLISHED)) return SCTP_XMIT_OK; /* Check whether this chunk and all the rest of pending data will fit * or delay in hopes of bundling a full sized packet. */ if (chunk->skb->len + q->out_qlen > transport->pathmtu - packet->overhead - sctp_datachk_len(&chunk->asoc->stream) - 4) /* Enough data queued to fill a packet */ return SCTP_XMIT_OK; /* Don't delay large message writes that may have been fragmented */ if (!chunk->msg->can_delay) return SCTP_XMIT_OK; /* Defer until all data acked or packet full */ return SCTP_XMIT_DELAY; } /* This private function does management things when adding DATA chunk */ static void sctp_packet_append_data(struct sctp_packet *packet, struct sctp_chunk *chunk) { struct sctp_transport *transport = packet->transport; size_t datasize = sctp_data_size(chunk); struct sctp_association *asoc = transport->asoc; u32 rwnd = asoc->peer.rwnd; /* Keep track of how many bytes are in flight over this transport. */ transport->flight_size += datasize; /* Keep track of how many bytes are in flight to the receiver. */ asoc->outqueue.outstanding_bytes += datasize; /* Update our view of the receiver's rwnd. */ if (datasize < rwnd) rwnd -= datasize; else rwnd = 0; asoc->peer.rwnd = rwnd; sctp_chunk_assign_tsn(chunk); asoc->stream.si->assign_number(chunk); } static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet, struct sctp_chunk *chunk, u16 chunk_len) { enum sctp_xmit retval = SCTP_XMIT_OK; size_t psize, pmtu, maxsize; /* Don't bundle in this packet if this chunk's auth key doesn't * match other chunks already enqueued on this packet. Also, * don't bundle the chunk with auth key if other chunks in this * packet don't have auth key. */ if ((packet->auth && chunk->shkey != packet->auth->shkey) || (!packet->auth && chunk->shkey && chunk->chunk_hdr->type != SCTP_CID_AUTH)) return SCTP_XMIT_PMTU_FULL; psize = packet->size; if (packet->transport->asoc) pmtu = packet->transport->asoc->pathmtu; else pmtu = packet->transport->pathmtu; /* Decide if we need to fragment or resubmit later. */ if (psize + chunk_len > pmtu) { /* It's OK to fragment at IP level if any one of the following * is true: * 1. The packet is empty (meaning this chunk is greater * the MTU) * 2. The packet doesn't have any data in it yet and data * requires authentication. */ if (sctp_packet_empty(packet) || (!packet->has_data && chunk->auth)) { /* We no longer do re-fragmentation. * Just fragment at the IP layer, if we * actually hit this condition */ packet->ipfragok = 1; goto out; } /* Similarly, if this chunk was built before a PMTU * reduction, we have to fragment it at IP level now. So * if the packet already contains something, we need to * flush. */ maxsize = pmtu - packet->overhead; if (packet->auth) maxsize -= SCTP_PAD4(packet->auth->skb->len); if (chunk_len > maxsize) retval = SCTP_XMIT_PMTU_FULL; /* It is also okay to fragment if the chunk we are * adding is a control chunk, but only if current packet * is not a GSO one otherwise it causes fragmentation of * a large frame. So in this case we allow the * fragmentation by forcing it to be in a new packet. */ if (!sctp_chunk_is_data(chunk) && packet->has_data) retval = SCTP_XMIT_PMTU_FULL; if (psize + chunk_len > packet->max_size) /* Hit GSO/PMTU limit, gotta flush */ retval = SCTP_XMIT_PMTU_FULL; if (!packet->transport->burst_limited && psize + chunk_len > (packet->transport->cwnd >> 1)) /* Do not allow a single GSO packet to use more * than half of cwnd. */ retval = SCTP_XMIT_PMTU_FULL; if (packet->transport->burst_limited && psize + chunk_len > (packet->transport->burst_limited >> 1)) /* Do not allow a single GSO packet to use more * than half of original cwnd. */ retval = SCTP_XMIT_PMTU_FULL; /* Otherwise it will fit in the GSO packet */ } out: return retval; }
4 7 7 7 7 6 4 7 7 7 7 7 7 7 7 7 7 7 4 4 4 5 5 5 5 5 5 5 5 5 5 5 7 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2018-2020 Christoph Hellwig. * * DMA operations that map physical memory directly without using an IOMMU. */ #include <linux/memblock.h> /* for max_pfn */ #include <linux/export.h> #include <linux/mm.h> #include <linux/dma-map-ops.h> #include <linux/scatterlist.h> #include <linux/pfn.h> #include <linux/vmalloc.h> #include <linux/set_memory.h> #include <linux/slab.h> #include <linux/pci-p2pdma.h> #include "direct.h" /* * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use * it for entirely different regions. In that case the arch code needs to * override the variable below for dma-direct to work properly. */ u64 zone_dma_limit __ro_after_init = DMA_BIT_MASK(24); static inline dma_addr_t phys_to_dma_direct(struct device *dev, phys_addr_t phys) { if (force_dma_unencrypted(dev)) return phys_to_dma_unencrypted(dev, phys); return phys_to_dma(dev, phys); } static inline struct page *dma_direct_to_page(struct device *dev, dma_addr_t dma_addr) { return pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_addr))); } u64 dma_direct_get_required_mask(struct device *dev) { phys_addr_t phys = (phys_addr_t)(max_pfn - 1) << PAGE_SHIFT; u64 max_dma = phys_to_dma_direct(dev, phys); return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; } static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 *phys_limit) { u64 dma_limit = min_not_zero( dev->coherent_dma_mask, dev->bus_dma_limit); /* * Optimistically try the zone that the physical address mask falls * into first. If that returns memory that isn't actually addressable * we will fallback to the next lower zone and try again. * * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding * zones. */ *phys_limit = dma_to_phys(dev, dma_limit); if (*phys_limit <= zone_dma_limit) return GFP_DMA; if (*phys_limit <= DMA_BIT_MASK(32)) return GFP_DMA32; return 0; } bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { dma_addr_t dma_addr = phys_to_dma_direct(dev, phys); if (dma_addr == DMA_MAPPING_ERROR) return false; return dma_addr + size - 1 <= min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); } static int dma_set_decrypted(struct device *dev, void *vaddr, size_t size) { if (!force_dma_unencrypted(dev)) return 0; return set_memory_decrypted((unsigned long)vaddr, PFN_UP(size)); } static int dma_set_encrypted(struct device *dev, void *vaddr, size_t size) { int ret; if (!force_dma_unencrypted(dev)) return 0; ret = set_memory_encrypted((unsigned long)vaddr, PFN_UP(size)); if (ret) pr_warn_ratelimited("leaking DMA memory that can't be re-encrypted\n"); return ret; } static void __dma_direct_free_pages(struct device *dev, struct page *page, size_t size) { if (swiotlb_free(dev, page, size)) return; dma_free_contiguous(dev, page, size); } static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size) { struct page *page = swiotlb_alloc(dev, size); if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { swiotlb_free(dev, page, size); return NULL; } return page; } static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp_t gfp, bool allow_highmem) { int node = dev_to_node(dev); struct page *page; u64 phys_limit; WARN_ON_ONCE(!PAGE_ALIGNED(size)); if (is_swiotlb_for_alloc(dev)) return dma_direct_alloc_swiotlb(dev, size); gfp |= dma_direct_optimal_gfp_mask(dev, &phys_limit); page = dma_alloc_contiguous(dev, size, gfp); if (page) { if (dma_coherent_ok(dev, page_to_phys(page), size) && (allow_highmem || !PageHighMem(page))) return page; dma_free_contiguous(dev, page, size); } while ((page = alloc_pages_node(node, gfp, get_order(size))) && !dma_coherent_ok(dev, page_to_phys(page), size)) { __free_pages(page, get_order(size)); if (IS_ENABLED(CONFIG_ZONE_DMA32) && phys_limit < DMA_BIT_MASK(64) && !(gfp & (GFP_DMA32 | GFP_DMA))) gfp |= GFP_DMA32; else if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) gfp = (gfp & ~GFP_DMA32) | GFP_DMA; else return NULL; } return page; } /* * Check if a potentially blocking operations needs to dip into the atomic * pools for the given device/gfp. */ static bool dma_direct_use_pool(struct device *dev, gfp_t gfp) { return !gfpflags_allow_blocking(gfp) && !is_swiotlb_for_alloc(dev); } static void *dma_direct_alloc_from_pool(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { struct page *page; u64 phys_limit; void *ret; if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))) return NULL; gfp |= dma_direct_optimal_gfp_mask(dev, &phys_limit); page = dma_alloc_from_pool(dev, size, &ret, gfp, dma_coherent_ok); if (!page) return NULL; *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return ret; } static void *dma_direct_alloc_no_mapping(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { struct page *page; page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true); if (!page) return NULL; /* remove any dirty cache lines on the kernel alias */ if (!PageHighMem(page)) arch_dma_prep_coherent(page, size); /* return the page pointer as the opaque cookie */ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return page; } void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { bool remap = false, set_uncached = false; struct page *page; void *ret; size = PAGE_ALIGN(size); if (attrs & DMA_ATTR_NO_WARN) gfp |= __GFP_NOWARN; if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) return dma_direct_alloc_no_mapping(dev, size, dma_handle, gfp); if (!dev_is_dma_coherent(dev)) { if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_ALLOC) && !is_swiotlb_for_alloc(dev)) return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); /* * If there is a global pool, always allocate from it for * non-coherent devices. */ if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL)) return dma_alloc_from_global_coherent(dev, size, dma_handle); /* * Otherwise we require the architecture to either be able to * mark arbitrary parts of the kernel direct mapping uncached, * or remapped it uncached. */ set_uncached = IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED); remap = IS_ENABLED(CONFIG_DMA_DIRECT_REMAP); if (!set_uncached && !remap) { pr_warn_once("coherent DMA allocations not supported on this platform.\n"); return NULL; } } /* * Remapping or decrypting memory may block, allocate the memory from * the atomic pools instead if we aren't allowed block. */ if ((remap || force_dma_unencrypted(dev)) && dma_direct_use_pool(dev, gfp)) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); /* we always manually zero the memory once we are done */ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true); if (!page) return NULL; /* * dma_alloc_contiguous can return highmem pages depending on a * combination the cma= arguments and per-arch setup. These need to be * remapped to return a kernel virtual address. */ if (PageHighMem(page)) { remap = true; set_uncached = false; } if (remap) { pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); if (force_dma_unencrypted(dev)) prot = pgprot_decrypted(prot); /* remove any dirty cache lines on the kernel alias */ arch_dma_prep_coherent(page, size); /* create a coherent mapping */ ret = dma_common_contiguous_remap(page, size, prot, __builtin_return_address(0)); if (!ret) goto out_free_pages; } else { ret = page_address(page); if (dma_set_decrypted(dev, ret, size)) goto out_leak_pages; } memset(ret, 0, size); if (set_uncached) { arch_dma_prep_coherent(page, size); ret = arch_dma_set_uncached(ret, size); if (IS_ERR(ret)) goto out_encrypt_pages; } *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return ret; out_encrypt_pages: if (dma_set_encrypted(dev, page_address(page), size)) return NULL; out_free_pages: __dma_direct_free_pages(dev, page, size); return NULL; out_leak_pages: return NULL; } void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { unsigned int page_order = get_order(size); if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) { /* cpu_addr is a struct page cookie, not a kernel address */ dma_free_contiguous(dev, cpu_addr, size); return; } if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_ALLOC) && !dev_is_dma_coherent(dev) && !is_swiotlb_for_alloc(dev)) { arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); return; } if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && !dev_is_dma_coherent(dev)) { if (!dma_release_from_global_coherent(page_order, cpu_addr)) WARN_ON_ONCE(1); return; } /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) return; if (is_vmalloc_addr(cpu_addr)) { vunmap(cpu_addr); } else { if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) arch_dma_clear_uncached(cpu_addr, size); if (dma_set_encrypted(dev, cpu_addr, size)) return; } __dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size); } struct page *dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) { struct page *page; void *ret; if (force_dma_unencrypted(dev) && dma_direct_use_pool(dev, gfp)) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); page = __dma_direct_alloc_pages(dev, size, gfp, false); if (!page) return NULL; ret = page_address(page); if (dma_set_decrypted(dev, ret, size)) goto out_leak_pages; memset(ret, 0, size); *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return page; out_leak_pages: return NULL; } void dma_direct_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_addr, enum dma_data_direction dir) { void *vaddr = page_address(page); /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && dma_free_from_pool(dev, vaddr, size)) return; if (dma_set_encrypted(dev, vaddr, size)) return; __dma_direct_free_pages(dev, page, size); } #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_SWIOTLB) void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { struct scatterlist *sg; int i; for_each_sg(sgl, sg, nents, i) { phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); swiotlb_sync_single_for_device(dev, paddr, sg->length, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, sg->length, dir); } } #endif #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ defined(CONFIG_SWIOTLB) void dma_direct_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { struct scatterlist *sg; int i; for_each_sg(sgl, sg, nents, i) { phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_cpu(paddr, sg->length, dir); swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir); } if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_cpu_all(); } /* * Unmaps segments, except for ones marked as pci_p2pdma which do not * require any further action as they contain a bus address. */ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *sg; int i; for_each_sg(sgl, sg, nents, i) { if (sg_dma_is_bus_address(sg)) sg_dma_unmark_bus_address(sg); else dma_direct_unmap_phys(dev, sg->dma_address, sg_dma_len(sg), dir, attrs); } } #endif int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { struct pci_p2pdma_map_state p2pdma_state = {}; struct scatterlist *sg; int i, ret; for_each_sg(sgl, sg, nents, i) { switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) { case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: /* * Any P2P mapping that traverses the PCI host bridge * must be mapped with CPU physical address and not PCI * bus addresses. */ break; case PCI_P2PDMA_MAP_NONE: sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg), sg->length, dir, attrs); if (sg->dma_address == DMA_MAPPING_ERROR) { ret = -EIO; goto out_unmap; } break; case PCI_P2PDMA_MAP_BUS_ADDR: sg->dma_address = pci_p2pdma_bus_addr_map( p2pdma_state.mem, sg_phys(sg)); sg_dma_len(sg) = sg->length; sg_dma_mark_bus_address(sg); continue; default: ret = -EREMOTEIO; goto out_unmap; } sg_dma_len(sg) = sg->length; } return nents; out_unmap: dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); return ret; } int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { struct page *page = dma_direct_to_page(dev, dma_addr); int ret; ret = sg_alloc_table(sgt, 1, GFP_KERNEL); if (!ret) sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); return ret; } bool dma_direct_can_mmap(struct device *dev) { return dev_is_dma_coherent(dev) || IS_ENABLED(CONFIG_DMA_NONCOHERENT_MMAP); } int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { unsigned long user_count = vma_pages(vma); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long pfn = PHYS_PFN(dma_to_phys(dev, dma_addr)); int ret = -ENXIO; vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); if (force_dma_unencrypted(dev)) vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret)) return ret; if (vma->vm_pgoff >= count || user_count > count - vma->vm_pgoff) return -ENXIO; return remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, user_count << PAGE_SHIFT, vma->vm_page_prot); } int dma_direct_supported(struct device *dev, u64 mask) { u64 min_mask = (max_pfn - 1) << PAGE_SHIFT; /* * Because 32-bit DMA masks are so common we expect every architecture * to be able to satisfy them - either by not supporting more physical * memory, or by providing a ZONE_DMA32. If neither is the case, the * architecture needs to use an IOMMU instead of the direct mapping. */ if (mask >= DMA_BIT_MASK(32)) return 1; /* * This check needs to be against the actual bit mask value, so use * phys_to_dma_unencrypted() here so that the SME encryption mask isn't * part of the check. */ if (IS_ENABLED(CONFIG_ZONE_DMA)) min_mask = min_t(u64, min_mask, zone_dma_limit); return mask >= phys_to_dma_unencrypted(dev, min_mask); } static const struct bus_dma_region *dma_find_range(struct device *dev, unsigned long start_pfn) { const struct bus_dma_region *m; for (m = dev->dma_range_map; PFN_DOWN(m->size); m++) { unsigned long cpu_start_pfn = PFN_DOWN(m->cpu_start); if (start_pfn >= cpu_start_pfn && start_pfn - cpu_start_pfn < PFN_DOWN(m->size)) return m; } return NULL; } /* * To check whether all ram resource ranges are covered by dma range map * Returns 0 when further check is needed * Returns 1 if there is some RAM range can't be covered by dma_range_map */ static int check_ram_in_range_map(unsigned long start_pfn, unsigned long nr_pages, void *data) { unsigned long end_pfn = start_pfn + nr_pages; struct device *dev = data; while (start_pfn < end_pfn) { const struct bus_dma_region *bdr; bdr = dma_find_range(dev, start_pfn); if (!bdr) return 1; start_pfn = PFN_DOWN(bdr->cpu_start) + PFN_DOWN(bdr->size); } return 0; } bool dma_direct_all_ram_mapped(struct device *dev) { if (!dev->dma_range_map) return true; return !walk_system_ram_range(0, PFN_DOWN(ULONG_MAX) + 1, dev, check_ram_in_range_map); } size_t dma_direct_max_mapping_size(struct device *dev) { /* If SWIOTLB is active, use its maximum mapping size */ if (is_swiotlb_active(dev) && (dma_addressing_limited(dev) || is_swiotlb_force_bounce(dev))) return swiotlb_max_mapping_size(dev); return SIZE_MAX; } bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) { return !dev_is_dma_coherent(dev) || swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr)); } /** * dma_direct_set_offset - Assign scalar offset for a single DMA range. * @dev: device pointer; needed to "own" the alloced memory. * @cpu_start: beginning of memory region covered by this offset. * @dma_start: beginning of DMA/PCI region covered by this offset. * @size: size of the region. * * This is for the simple case of a uniform offset which cannot * be discovered by "dma-ranges". * * It returns -ENOMEM if out of memory, -EINVAL if a map * already exists, 0 otherwise. * * Note: any call to this from a driver is a bug. The mapping needs * to be described by the device tree or other firmware interfaces. */ int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, dma_addr_t dma_start, u64 size) { struct bus_dma_region *map; u64 offset = (u64)cpu_start - (u64)dma_start; if (dev->dma_range_map) { dev_err(dev, "attempt to add DMA range to existing map\n"); return -EINVAL; } if (!offset) return 0; map = kzalloc_objs(*map, 2); if (!map) return -ENOMEM; map[0].cpu_start = cpu_start; map[0].dma_start = dma_start; map[0].size = size; dev->dma_range_map = map; return 0; }
678 22 581 581 581 1 16 16 16 16 16 5 5 5 5 5 211 211 211 211 211 211 211 3 3 3 2 2 2 1 2 2 2 2 3 3 3 3 3 3 3 3 3 5 5 5 3 3 1 3 7 7 7 5 5 7 3 3 1 3 7 7 7 7 7 7 1 7 7 7 8 6 5 7 8 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 // SPDX-License-Identifier: LGPL-2.1 /* * cgroup_freezer.c - control group freezer subsystem * * Copyright IBM Corporation, 2007 * * Author : Cedric Le Goater <clg@fr.ibm.com> */ #include <linux/export.h> #include <linux/slab.h> #include <linux/cgroup.h> #include <linux/fs.h> #include <linux/uaccess.h> #include <linux/freezer.h> #include <linux/seq_file.h> #include <linux/mutex.h> #include <linux/cpu.h> /* * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of * its ancestors has FREEZING_SELF set. */ enum freezer_state_flags { CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */ CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */ CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */ CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */ /* mask for all FREEZING flags */ CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT, }; struct freezer { struct cgroup_subsys_state css; unsigned int state; }; static DEFINE_MUTEX(freezer_mutex); static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) { return css ? container_of(css, struct freezer, css) : NULL; } static inline struct freezer *task_freezer(struct task_struct *task) { return css_freezer(task_css(task, freezer_cgrp_id)); } static struct freezer *parent_freezer(struct freezer *freezer) { return css_freezer(freezer->css.parent); } bool cgroup1_freezing(struct task_struct *task) { bool ret; rcu_read_lock(); ret = task_freezer(task)->state & CGROUP_FREEZING; rcu_read_unlock(); return ret; } static const char *freezer_state_strs(unsigned int state) { if (state & CGROUP_FROZEN) return "FROZEN"; if (state & CGROUP_FREEZING) return "FREEZING"; return "THAWED"; }; static struct cgroup_subsys_state * freezer_css_alloc(struct cgroup_subsys_state *parent_css) { struct freezer *freezer; freezer = kzalloc_obj(struct freezer); if (!freezer) return ERR_PTR(-ENOMEM); return &freezer->css; } /** * freezer_css_online - commit creation of a freezer css * @css: css being created * * We're committing to creation of @css. Mark it online and inherit * parent's freezing state while holding cpus read lock and freezer_mutex. */ static int freezer_css_online(struct cgroup_subsys_state *css) { struct freezer *freezer = css_freezer(css); struct freezer *parent = parent_freezer(freezer); cpus_read_lock(); mutex_lock(&freezer_mutex); freezer->state |= CGROUP_FREEZER_ONLINE; if (parent && (parent->state & CGROUP_FREEZING)) { freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN; static_branch_inc_cpuslocked(&freezer_active); } mutex_unlock(&freezer_mutex); cpus_read_unlock(); return 0; } /** * freezer_css_offline - initiate destruction of a freezer css * @css: css being destroyed * * @css is going away. Mark it dead and decrement freezer_active if * it was holding one. */ static void freezer_css_offline(struct cgroup_subsys_state *css) { struct freezer *freezer = css_freezer(css); cpus_read_lock(); mutex_lock(&freezer_mutex); if (freezer->state & CGROUP_FREEZING) static_branch_dec_cpuslocked(&freezer_active); freezer->state = 0; mutex_unlock(&freezer_mutex); cpus_read_unlock(); } static void freezer_css_free(struct cgroup_subsys_state *css) { kfree(css_freezer(css)); } /* * Tasks can be migrated into a different freezer anytime regardless of its * current state. freezer_attach() is responsible for making new tasks * conform to the current state. * * Freezer state changes and task migration are synchronized via * @freezer->lock. freezer_attach() makes the new tasks conform to the * current state and all following state changes can see the new tasks. */ static void freezer_attach(struct cgroup_taskset *tset) { struct task_struct *task; struct cgroup_subsys_state *new_css; mutex_lock(&freezer_mutex); /* * Make the new tasks conform to the current state of @new_css. * For simplicity, when migrating any task to a FROZEN cgroup, we * revert it to FREEZING and let update_if_frozen() determine the * correct state later. * * Tasks in @tset are on @new_css but may not conform to its * current state before executing the following - !frozen tasks may * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. */ cgroup_taskset_for_each(task, new_css, tset) { struct freezer *freezer = css_freezer(new_css); if (!(freezer->state & CGROUP_FREEZING)) { __thaw_task(task); } else { /* clear FROZEN and propagate upwards */ while (freezer && (freezer->state & CGROUP_FROZEN)) { freezer->state &= ~CGROUP_FROZEN; freezer = parent_freezer(freezer); } freeze_task(task); } } mutex_unlock(&freezer_mutex); } /** * freezer_fork - cgroup post fork callback * @task: a task which has just been forked * * @task has just been created and should conform to the current state of * the cgroup_freezer it belongs to. This function may race against * freezer_attach(). Losing to freezer_attach() means that we don't have * to do anything as freezer_attach() will put @task into the appropriate * state. */ static void freezer_fork(struct task_struct *task) { struct freezer *freezer; /* * The root cgroup is non-freezable, so we can skip locking the * freezer. This is safe regardless of race with task migration. * If we didn't race or won, skipping is obviously the right thing * to do. If we lost and root is the new cgroup, noop is still the * right thing to do. */ if (task_css_is_root(task, freezer_cgrp_id)) return; mutex_lock(&freezer_mutex); rcu_read_lock(); freezer = task_freezer(task); if (freezer->state & CGROUP_FREEZING) freeze_task(task); rcu_read_unlock(); mutex_unlock(&freezer_mutex); } /** * update_if_frozen - update whether a cgroup finished freezing * @css: css of interest * * Once FREEZING is initiated, transition to FROZEN is lazily updated by * calling this function. If the current state is FREEZING but not FROZEN, * this function checks whether all tasks of this cgroup and the descendant * cgroups finished freezing and, if so, sets FROZEN. * * The caller is responsible for grabbing RCU read lock and calling * update_if_frozen() on all descendants prior to invoking this function. * * Task states and freezer state might disagree while tasks are being * migrated into or out of @css, so we can't verify task states against * @freezer state here. See freezer_attach() for details. */ static void update_if_frozen(struct cgroup_subsys_state *css) { struct freezer *freezer = css_freezer(css); struct cgroup_subsys_state *pos; struct css_task_iter it; struct task_struct *task; lockdep_assert_held(&freezer_mutex); if (!(freezer->state & CGROUP_FREEZING) || (freezer->state & CGROUP_FROZEN)) return; /* are all (live) children frozen? */ rcu_read_lock(); css_for_each_child(pos, css) { struct freezer *child = css_freezer(pos); if ((child->state & CGROUP_FREEZER_ONLINE) && !(child->state & CGROUP_FROZEN)) { rcu_read_unlock(); return; } } rcu_read_unlock(); /* are all tasks frozen? */ css_task_iter_start(css, 0, &it); while ((task = css_task_iter_next(&it))) { if (freezing(task) && !frozen(task)) goto out_iter_end; } freezer->state |= CGROUP_FROZEN; out_iter_end: css_task_iter_end(&it); } static int freezer_read(struct seq_file *m, void *v) { struct cgroup_subsys_state *css = seq_css(m), *pos; mutex_lock(&freezer_mutex); rcu_read_lock(); /* update states bottom-up */ css_for_each_descendant_post(pos, css) { if (!css_tryget_online(pos)) continue; rcu_read_unlock(); update_if_frozen(pos); rcu_read_lock(); css_put(pos); } rcu_read_unlock(); mutex_unlock(&freezer_mutex); seq_puts(m, freezer_state_strs(css_freezer(css)->state)); seq_putc(m, '\n'); return 0; } static void freeze_cgroup(struct freezer *freezer) { struct css_task_iter it; struct task_struct *task; css_task_iter_start(&freezer->css, 0, &it); while ((task = css_task_iter_next(&it))) freeze_task(task); css_task_iter_end(&it); } static void unfreeze_cgroup(struct freezer *freezer) { struct css_task_iter it; struct task_struct *task; css_task_iter_start(&freezer->css, 0, &it); while ((task = css_task_iter_next(&it))) __thaw_task(task); css_task_iter_end(&it); } /** * freezer_apply_state - apply state change to a single cgroup_freezer * @freezer: freezer to apply state change to * @freeze: whether to freeze or unfreeze * @state: CGROUP_FREEZING_* flag to set or clear * * Set or clear @state on @cgroup according to @freeze, and perform * freezing or thawing as necessary. */ static void freezer_apply_state(struct freezer *freezer, bool freeze, unsigned int state) { /* also synchronizes against task migration, see freezer_attach() */ lockdep_assert_held(&freezer_mutex); if (!(freezer->state & CGROUP_FREEZER_ONLINE)) return; if (freeze) { if (!(freezer->state & CGROUP_FREEZING)) static_branch_inc_cpuslocked(&freezer_active); freezer->state |= state; freeze_cgroup(freezer); } else { bool was_freezing = freezer->state & CGROUP_FREEZING; freezer->state &= ~state; if (!(freezer->state & CGROUP_FREEZING)) { freezer->state &= ~CGROUP_FROZEN; if (was_freezing) static_branch_dec_cpuslocked(&freezer_active); unfreeze_cgroup(freezer); } } } /** * freezer_change_state - change the freezing state of a cgroup_freezer * @freezer: freezer of interest * @freeze: whether to freeze or thaw * * Freeze or thaw @freezer according to @freeze. The operations are * recursive - all descendants of @freezer will be affected. */ static void freezer_change_state(struct freezer *freezer, bool freeze) { struct cgroup_subsys_state *pos; cpus_read_lock(); /* * Update all its descendants in pre-order traversal. Each * descendant will try to inherit its parent's FREEZING state as * CGROUP_FREEZING_PARENT. */ mutex_lock(&freezer_mutex); rcu_read_lock(); css_for_each_descendant_pre(pos, &freezer->css) { struct freezer *pos_f = css_freezer(pos); struct freezer *parent = parent_freezer(pos_f); if (!css_tryget_online(pos)) continue; rcu_read_unlock(); if (pos_f == freezer) freezer_apply_state(pos_f, freeze, CGROUP_FREEZING_SELF); else freezer_apply_state(pos_f, parent->state & CGROUP_FREEZING, CGROUP_FREEZING_PARENT); rcu_read_lock(); css_put(pos); } rcu_read_unlock(); mutex_unlock(&freezer_mutex); cpus_read_unlock(); } static ssize_t freezer_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { bool freeze; buf = strstrip(buf); if (strcmp(buf, freezer_state_strs(0)) == 0) freeze = false; else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0) { pr_info_once("Freezing with imperfect legacy cgroup freezer. " "See cgroup.freeze of cgroup v2\n"); freeze = true; } else return -EINVAL; freezer_change_state(css_freezer(of_css(of)), freeze); return nbytes; } static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css, struct cftype *cft) { struct freezer *freezer = css_freezer(css); return (bool)(freezer->state & CGROUP_FREEZING_SELF); } static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css, struct cftype *cft) { struct freezer *freezer = css_freezer(css); return (bool)(freezer->state & CGROUP_FREEZING_PARENT); } static struct cftype files[] = { { .name = "state", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = freezer_read, .write = freezer_write, }, { .name = "self_freezing", .flags = CFTYPE_NOT_ON_ROOT, .read_u64 = freezer_self_freezing_read, }, { .name = "parent_freezing", .flags = CFTYPE_NOT_ON_ROOT, .read_u64 = freezer_parent_freezing_read, }, { } /* terminate */ }; struct cgroup_subsys freezer_cgrp_subsys = { .css_alloc = freezer_css_alloc, .css_online = freezer_css_online, .css_offline = freezer_css_offline, .css_free = freezer_css_free, .attach = freezer_attach, .fork = freezer_fork, .legacy_cftypes = files, };
3 3 3 3 3 3 3 1 3 3 2 3 3 2 2 1 1 1 1 2 2 2 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 3 3 3 2 3 3 4 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 // SPDX-License-Identifier: GPL-2.0-or-later /* * Native support for the Aiptek HyperPen USB Tablets * (4000U/5000U/6000U/8000U/12000U) * * Copyright (c) 2001 Chris Atenasio <chris@crud.net> * Copyright (c) 2002-2004 Bryan W. Headley <bwheadley@earthlink.net> * * based on wacom.c by * Vojtech Pavlik <vojtech@suse.cz> * Andreas Bach Aaen <abach@stofanet.dk> * Clifford Wolf <clifford@clifford.at> * Sam Mosel <sam.mosel@computer.org> * James E. Blair <corvus@gnu.org> * Daniel Egger <egger@suse.de> * * Many thanks to Oliver Kuechemann for his support. * * ChangeLog: * v0.1 - Initial release * v0.2 - Hack to get around fake event 28's. (Bryan W. Headley) * v0.3 - Make URB dynamic (Bryan W. Headley, Jun-8-2002) * Released to Linux 2.4.19 and 2.5.x * v0.4 - Rewrote substantial portions of the code to deal with * corrected control sequences, timing, dynamic configuration, * support of 6000U - 12000U, procfs, and macro key support * (Jan-1-2003 - Feb-5-2003, Bryan W. Headley) * v1.0 - Added support for diagnostic messages, count of messages * received from URB - Mar-8-2003, Bryan W. Headley * v1.1 - added support for tablet resolution, changed DV and proximity * some corrections - Jun-22-2003, martin schneebacher * - Added support for the sysfs interface, deprecating the * procfs interface for 2.5.x kernel. Also added support for * Wheel command. Bryan W. Headley July-15-2003. * v1.2 - Reworked jitter timer as a kernel thread. * Bryan W. Headley November-28-2003/Jan-10-2004. * v1.3 - Repaired issue of kernel thread going nuts on single-processor * machines, introduced programmableDelay as a command line * parameter. Feb 7 2004, Bryan W. Headley. * v1.4 - Re-wire jitter so it does not require a thread. Courtesy of * Rene van Paassen. Added reporting of physical pointer device * (e.g., stylus, mouse in reports 2, 3, 4, 5. We don't know * for reports 1, 6.) * what physical device reports for reports 1, 6.) Also enabled * MOUSE and LENS tool button modes. Renamed "rubber" to "eraser". * Feb 20, 2004, Bryan W. Headley. * v1.5 - Added previousJitterable, so we don't do jitter delay when the * user is holding a button down for periods of time. * * NOTE: * This kernel driver is augmented by the "Aiptek" XFree86 input * driver for your X server, as well as the Gaiptek GUI Front-end * "Tablet Manager". * These three products are highly interactive with one another, * so therefore it's easier to document them all as one subsystem. * Please visit the project's "home page", located at, * http://aiptektablet.sourceforge.net. */ #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/usb/input.h> #include <linux/uaccess.h> #include <linux/unaligned.h> /* * Aiptek status packet: * * (returned as Report 1 - relative coordinates from mouse and stylus) * * bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 * byte0 0 0 0 0 0 0 0 1 * byte1 0 0 0 0 0 BS2 BS Tip * byte2 X7 X6 X5 X4 X3 X2 X1 X0 * byte3 Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 * * (returned as Report 2 - absolute coordinates from the stylus) * * bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 * byte0 0 0 0 0 0 0 1 0 * byte1 X7 X6 X5 X4 X3 X2 X1 X0 * byte2 X15 X14 X13 X12 X11 X10 X9 X8 * byte3 Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 * byte4 Y15 Y14 Y13 Y12 Y11 Y10 Y9 Y8 * byte5 * * * BS2 BS1 Tip IR DV * byte6 P7 P6 P5 P4 P3 P2 P1 P0 * byte7 P15 P14 P13 P12 P11 P10 P9 P8 * * (returned as Report 3 - absolute coordinates from the mouse) * * bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 * byte0 0 0 0 0 0 0 1 1 * byte1 X7 X6 X5 X4 X3 X2 X1 X0 * byte2 X15 X14 X13 X12 X11 X10 X9 X8 * byte3 Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 * byte4 Y15 Y14 Y13 Y12 Y11 Y10 Y9 Y8 * byte5 * * * BS2 BS1 Tip IR DV * byte6 P7 P6 P5 P4 P3 P2 P1 P0 * byte7 P15 P14 P13 P12 P11 P10 P9 P8 * * (returned as Report 4 - macrokeys from the stylus) * * bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 * byte0 0 0 0 0 0 1 0 0 * byte1 0 0 0 BS2 BS Tip IR DV * byte2 0 0 0 0 0 0 1 0 * byte3 0 0 0 K4 K3 K2 K1 K0 * byte4 P7 P6 P5 P4 P3 P2 P1 P0 * byte5 P15 P14 P13 P12 P11 P10 P9 P8 * * (returned as Report 5 - macrokeys from the mouse) * * bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 * byte0 0 0 0 0 0 1 0 1 * byte1 0 0 0 BS2 BS Tip IR DV * byte2 0 0 0 0 0 0 1 0 * byte3 0 0 0 K4 K3 K2 K1 K0 * byte4 P7 P6 P5 P4 P3 P2 P1 P0 * byte5 P15 P14 P13 P12 P11 P10 P9 P8 * * IR: In Range = Proximity on * DV = Data Valid * BS = Barrel Switch (as in, macro keys) * BS2 also referred to as Tablet Pick * * Command Summary: * * Use report_type CONTROL (3) * Use report_id 2 * * Command/Data Description Return Bytes Return Value * 0x10/0x00 SwitchToMouse 0 * 0x10/0x01 SwitchToTablet 0 * 0x18/0x04 SetResolution 0 * 0x12/0xFF AutoGainOn 0 * 0x17/0x00 FilterOn 0 * 0x01/0x00 GetXExtension 2 MaxX * 0x01/0x01 GetYExtension 2 MaxY * 0x02/0x00 GetModelCode 2 ModelCode = LOBYTE * 0x03/0x00 GetODMCode 2 ODMCode * 0x08/0x00 GetPressureLevels 2 =512 * 0x04/0x00 GetFirmwareVersion 2 Firmware Version * 0x11/0x02 EnableMacroKeys 0 * * To initialize the tablet: * * (1) Send Resolution500LPI (Command) * (2) Query for Model code (Option Report) * (3) Query for ODM code (Option Report) * (4) Query for firmware (Option Report) * (5) Query for GetXExtension (Option Report) * (6) Query for GetYExtension (Option Report) * (7) Query for GetPressureLevels (Option Report) * (8) SwitchToTablet for Absolute coordinates, or * SwitchToMouse for Relative coordinates (Command) * (9) EnableMacroKeys (Command) * (10) FilterOn (Command) * (11) AutoGainOn (Command) * * (Step 9 can be omitted, but you'll then have no function keys.) */ #define USB_VENDOR_ID_AIPTEK 0x08ca #define USB_VENDOR_ID_KYE 0x0458 #define USB_REQ_GET_REPORT 0x01 #define USB_REQ_SET_REPORT 0x09 /* PointerMode codes */ #define AIPTEK_POINTER_ONLY_MOUSE_MODE 0 #define AIPTEK_POINTER_ONLY_STYLUS_MODE 1 #define AIPTEK_POINTER_EITHER_MODE 2 #define AIPTEK_POINTER_ALLOW_MOUSE_MODE(a) \ (a == AIPTEK_POINTER_ONLY_MOUSE_MODE || \ a == AIPTEK_POINTER_EITHER_MODE) #define AIPTEK_POINTER_ALLOW_STYLUS_MODE(a) \ (a == AIPTEK_POINTER_ONLY_STYLUS_MODE || \ a == AIPTEK_POINTER_EITHER_MODE) /* CoordinateMode code */ #define AIPTEK_COORDINATE_RELATIVE_MODE 0 #define AIPTEK_COORDINATE_ABSOLUTE_MODE 1 /* XTilt and YTilt values */ #define AIPTEK_TILT_MIN (-128) #define AIPTEK_TILT_MAX 127 #define AIPTEK_TILT_DISABLE (-10101) /* Wheel values */ #define AIPTEK_WHEEL_MIN 0 #define AIPTEK_WHEEL_MAX 1024 #define AIPTEK_WHEEL_DISABLE (-10101) /* ToolCode values, which BTW are 0x140 .. 0x14f * We have things set up such that if the tool button has changed, * the tools get reset. */ /* toolMode codes */ #define AIPTEK_TOOL_BUTTON_PEN_MODE BTN_TOOL_PEN #define AIPTEK_TOOL_BUTTON_PENCIL_MODE BTN_TOOL_PENCIL #define AIPTEK_TOOL_BUTTON_BRUSH_MODE BTN_TOOL_BRUSH #define AIPTEK_TOOL_BUTTON_AIRBRUSH_MODE BTN_TOOL_AIRBRUSH #define AIPTEK_TOOL_BUTTON_ERASER_MODE BTN_TOOL_RUBBER #define AIPTEK_TOOL_BUTTON_MOUSE_MODE BTN_TOOL_MOUSE #define AIPTEK_TOOL_BUTTON_LENS_MODE BTN_TOOL_LENS /* Diagnostic message codes */ #define AIPTEK_DIAGNOSTIC_NA 0 #define AIPTEK_DIAGNOSTIC_SENDING_RELATIVE_IN_ABSOLUTE 1 #define AIPTEK_DIAGNOSTIC_SENDING_ABSOLUTE_IN_RELATIVE 2 #define AIPTEK_DIAGNOSTIC_TOOL_DISALLOWED 3 /* Time to wait (in ms) to help mask hand jittering * when pressing the stylus buttons. */ #define AIPTEK_JITTER_DELAY_DEFAULT 50 /* Time to wait (in ms) in-between sending the tablet * a command and beginning the process of reading the return * sequence from the tablet. */ #define AIPTEK_PROGRAMMABLE_DELAY_25 25 #define AIPTEK_PROGRAMMABLE_DELAY_50 50 #define AIPTEK_PROGRAMMABLE_DELAY_100 100 #define AIPTEK_PROGRAMMABLE_DELAY_200 200 #define AIPTEK_PROGRAMMABLE_DELAY_300 300 #define AIPTEK_PROGRAMMABLE_DELAY_400 400 #define AIPTEK_PROGRAMMABLE_DELAY_DEFAULT AIPTEK_PROGRAMMABLE_DELAY_400 /* Mouse button programming */ #define AIPTEK_MOUSE_LEFT_BUTTON 0x04 #define AIPTEK_MOUSE_RIGHT_BUTTON 0x08 #define AIPTEK_MOUSE_MIDDLE_BUTTON 0x10 /* Stylus button programming */ #define AIPTEK_STYLUS_LOWER_BUTTON 0x08 #define AIPTEK_STYLUS_UPPER_BUTTON 0x10 /* Length of incoming packet from the tablet */ #define AIPTEK_PACKET_LENGTH 8 /* We report in EV_MISC both the proximity and * whether the report came from the stylus, tablet mouse * or "unknown" -- Unknown when the tablet is in relative * mode, because we only get report 1's. */ #define AIPTEK_REPORT_TOOL_UNKNOWN 0x10 #define AIPTEK_REPORT_TOOL_STYLUS 0x20 #define AIPTEK_REPORT_TOOL_MOUSE 0x40 static int programmableDelay = AIPTEK_PROGRAMMABLE_DELAY_DEFAULT; static int jitterDelay = AIPTEK_JITTER_DELAY_DEFAULT; struct aiptek_features { int odmCode; /* Tablet manufacturer code */ int modelCode; /* Tablet model code (not unique) */ int firmwareCode; /* prom/eeprom version */ char usbPath[64 + 1]; /* device's physical usb path */ }; struct aiptek_settings { int pointerMode; /* stylus-, mouse-only or either */ int coordinateMode; /* absolute/relative coords */ int toolMode; /* pen, pencil, brush, etc. tool */ int xTilt; /* synthetic xTilt amount */ int yTilt; /* synthetic yTilt amount */ int wheel; /* synthetic wheel amount */ int stylusButtonUpper; /* stylus upper btn delivers... */ int stylusButtonLower; /* stylus lower btn delivers... */ int mouseButtonLeft; /* mouse left btn delivers... */ int mouseButtonMiddle; /* mouse middle btn delivers... */ int mouseButtonRight; /* mouse right btn delivers... */ int programmableDelay; /* delay for tablet programming */ int jitterDelay; /* delay for hand jittering */ }; struct aiptek { struct input_dev *inputdev; /* input device struct */ struct usb_interface *intf; /* usb interface struct */ struct urb *urb; /* urb for incoming reports */ dma_addr_t data_dma; /* our dma stuffage */ struct aiptek_features features; /* tablet's array of features */ struct aiptek_settings curSetting; /* tablet's current programmable */ struct aiptek_settings newSetting; /* ... and new param settings */ unsigned int ifnum; /* interface number for IO */ int diagnostic; /* tablet diagnostic codes */ unsigned long eventCount; /* event count */ int inDelay; /* jitter: in jitter delay? */ unsigned long endDelay; /* jitter: time when delay ends */ int previousJitterable; /* jitterable prev value */ int lastMacro; /* macro key to reset */ int previousToolMode; /* pen, pencil, brush, etc. tool */ unsigned char *data; /* incoming packet data */ }; static const int eventTypes[] = { EV_KEY, EV_ABS, EV_REL, EV_MSC, }; static const int absEvents[] = { ABS_X, ABS_Y, ABS_PRESSURE, ABS_TILT_X, ABS_TILT_Y, ABS_WHEEL, ABS_MISC, }; static const int relEvents[] = { REL_X, REL_Y, REL_WHEEL, }; static const int buttonEvents[] = { BTN_LEFT, BTN_RIGHT, BTN_MIDDLE, BTN_TOOL_PEN, BTN_TOOL_RUBBER, BTN_TOOL_PENCIL, BTN_TOOL_AIRBRUSH, BTN_TOOL_BRUSH, BTN_TOOL_MOUSE, BTN_TOOL_LENS, BTN_TOUCH, BTN_STYLUS, BTN_STYLUS2, }; /* * Permit easy lookup of keyboard events to send, versus * the bitmap which comes from the tablet. This hides the * issue that the F_keys are not sequentially numbered. */ static const int macroKeyEvents[] = { KEY_ESC, KEY_F1, KEY_F2, KEY_F3, KEY_F4, KEY_F5, KEY_F6, KEY_F7, KEY_F8, KEY_F9, KEY_F10, KEY_F11, KEY_F12, KEY_F13, KEY_F14, KEY_F15, KEY_F16, KEY_F17, KEY_F18, KEY_F19, KEY_F20, KEY_F21, KEY_F22, KEY_F23, KEY_F24, KEY_STOP, KEY_AGAIN, KEY_PROPS, KEY_UNDO, KEY_FRONT, KEY_COPY, KEY_OPEN, KEY_PASTE, 0 }; /*********************************************************************** * Map values to strings and back. Every map should have the following * as its last element: { NULL, AIPTEK_INVALID_VALUE }. */ #define AIPTEK_INVALID_VALUE -1 struct aiptek_map { const char *string; int value; }; static int map_str_to_val(const struct aiptek_map *map, const char *str, size_t count) { const struct aiptek_map *p; if (str[count - 1] == '\n') count--; for (p = map; p->string; p++) if (!strncmp(str, p->string, count)) return p->value; return AIPTEK_INVALID_VALUE; } static const char *map_val_to_str(const struct aiptek_map *map, int val) { const struct aiptek_map *p; for (p = map; p->value != AIPTEK_INVALID_VALUE; p++) if (val == p->value) return p->string; return "unknown"; } /*********************************************************************** * aiptek_irq can receive one of six potential reports. * The documentation for each is in the body of the function. * * The tablet reports on several attributes per invocation of * aiptek_irq. Because the Linux Input Event system allows the * transmission of ONE attribute per input_report_xxx() call, * collation has to be done on the other end to reconstitute * a complete tablet report. Further, the number of Input Event reports * submitted varies, depending on what USB report type, and circumstance. * To deal with this, EV_MSC is used to indicate an 'end-of-report' * message. This has been an undocumented convention understood by the kernel * tablet driver and clients such as gpm and XFree86's tablet drivers. * * Of the information received from the tablet, the one piece I * cannot transmit is the proximity bit (without resorting to an EV_MSC * convention above.) I therefore have taken over REL_MISC and ABS_MISC * (for relative and absolute reports, respectively) for communicating * Proximity. Why two events? I thought it interesting to know if the * Proximity event occurred while the tablet was in absolute or relative * mode. * Update: REL_MISC proved not to be such a good idea. With REL_MISC you * get an event transmitted each time. ABS_MISC works better, since it * can be set and re-set. Thus, only using ABS_MISC from now on. * * Other tablets use the notion of a certain minimum stylus pressure * to infer proximity. While that could have been done, that is yet * another 'by convention' behavior, the documentation for which * would be spread between two (or more) pieces of software. * * EV_MSC usage was terminated for this purpose in Linux 2.5.x, and * replaced with the input_sync() method (which emits EV_SYN.) */ static void aiptek_irq(struct urb *urb) { struct aiptek *aiptek = urb->context; unsigned char *data = aiptek->data; struct input_dev *inputdev = aiptek->inputdev; struct usb_interface *intf = aiptek->intf; int jitterable = 0; int retval, macro, x, y, z, left, right, middle, p, dv, tip, bs, pck; switch (urb->status) { case 0: /* Success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* This urb is terminated, clean up */ dev_dbg(&intf->dev, "%s - urb shutting down with status: %d\n", __func__, urb->status); return; default: dev_dbg(&intf->dev, "%s - nonzero urb status received: %d\n", __func__, urb->status); goto exit; } /* See if we are in a delay loop -- throw out report if true. */ if (aiptek->inDelay == 1 && time_after(aiptek->endDelay, jiffies)) { goto exit; } aiptek->inDelay = 0; aiptek->eventCount++; /* Report 1 delivers relative coordinates with either a stylus * or the mouse. You do not know, however, which input * tool generated the event. */ if (data[0] == 1) { if (aiptek->curSetting.coordinateMode == AIPTEK_COORDINATE_ABSOLUTE_MODE) { aiptek->diagnostic = AIPTEK_DIAGNOSTIC_SENDING_RELATIVE_IN_ABSOLUTE; } else { x = (signed char) data[2]; y = (signed char) data[3]; /* jitterable keeps track of whether any button has been pressed. * We're also using it to remap the physical mouse button mask * to pseudo-settings. (We don't specifically care about it's * value after moving/transposing mouse button bitmasks, except * that a non-zero value indicates that one or more * mouse button was pressed.) */ jitterable = data[1] & 0x07; left = (data[1] & aiptek->curSetting.mouseButtonLeft >> 2) != 0 ? 1 : 0; right = (data[1] & aiptek->curSetting.mouseButtonRight >> 2) != 0 ? 1 : 0; middle = (data[1] & aiptek->curSetting.mouseButtonMiddle >> 2) != 0 ? 1 : 0; input_report_key(inputdev, BTN_LEFT, left); input_report_key(inputdev, BTN_MIDDLE, middle); input_report_key(inputdev, BTN_RIGHT, right); input_report_abs(inputdev, ABS_MISC, 1 | AIPTEK_REPORT_TOOL_UNKNOWN); input_report_rel(inputdev, REL_X, x); input_report_rel(inputdev, REL_Y, y); /* Wheel support is in the form of a single-event * firing. */ if (aiptek->curSetting.wheel != AIPTEK_WHEEL_DISABLE) { input_report_rel(inputdev, REL_WHEEL, aiptek->curSetting.wheel); aiptek->curSetting.wheel = AIPTEK_WHEEL_DISABLE; } if (aiptek->lastMacro != -1) { input_report_key(inputdev, macroKeyEvents[aiptek->lastMacro], 0); aiptek->lastMacro = -1; } input_sync(inputdev); } } /* Report 2 is delivered only by the stylus, and delivers * absolute coordinates. */ else if (data[0] == 2) { if (aiptek->curSetting.coordinateMode == AIPTEK_COORDINATE_RELATIVE_MODE) { aiptek->diagnostic = AIPTEK_DIAGNOSTIC_SENDING_ABSOLUTE_IN_RELATIVE; } else if (!AIPTEK_POINTER_ALLOW_STYLUS_MODE (aiptek->curSetting.pointerMode)) { aiptek->diagnostic = AIPTEK_DIAGNOSTIC_TOOL_DISALLOWED; } else { x = get_unaligned_le16(data + 1); y = get_unaligned_le16(data + 3); z = get_unaligned_le16(data + 6); dv = (data[5] & 0x01) != 0 ? 1 : 0; p = (data[5] & 0x02) != 0 ? 1 : 0; tip = (data[5] & 0x04) != 0 ? 1 : 0; /* Use jitterable to re-arrange button masks */ jitterable = data[5] & 0x18; bs = (data[5] & aiptek->curSetting.stylusButtonLower) != 0 ? 1 : 0; pck = (data[5] & aiptek->curSetting.stylusButtonUpper) != 0 ? 1 : 0; /* dv indicates 'data valid' (e.g., the tablet is in sync * and has delivered a "correct" report) We will ignore * all 'bad' reports... */ if (dv != 0) { /* If the selected tool changed, reset the old * tool key, and set the new one. */ if (aiptek->previousToolMode != aiptek->curSetting.toolMode) { input_report_key(inputdev, aiptek->previousToolMode, 0); input_report_key(inputdev, aiptek->curSetting.toolMode, 1); aiptek->previousToolMode = aiptek->curSetting.toolMode; } if (p != 0) { input_report_abs(inputdev, ABS_X, x); input_report_abs(inputdev, ABS_Y, y); input_report_abs(inputdev, ABS_PRESSURE, z); input_report_key(inputdev, BTN_TOUCH, tip); input_report_key(inputdev, BTN_STYLUS, bs); input_report_key(inputdev, BTN_STYLUS2, pck); if (aiptek->curSetting.xTilt != AIPTEK_TILT_DISABLE) { input_report_abs(inputdev, ABS_TILT_X, aiptek->curSetting.xTilt); } if (aiptek->curSetting.yTilt != AIPTEK_TILT_DISABLE) { input_report_abs(inputdev, ABS_TILT_Y, aiptek->curSetting.yTilt); } /* Wheel support is in the form of a single-event * firing. */ if (aiptek->curSetting.wheel != AIPTEK_WHEEL_DISABLE) { input_report_abs(inputdev, ABS_WHEEL, aiptek->curSetting.wheel); aiptek->curSetting.wheel = AIPTEK_WHEEL_DISABLE; } } input_report_abs(inputdev, ABS_MISC, p | AIPTEK_REPORT_TOOL_STYLUS); if (aiptek->lastMacro != -1) { input_report_key(inputdev, macroKeyEvents[aiptek->lastMacro], 0); aiptek->lastMacro = -1; } input_sync(inputdev); } } } /* Report 3's come from the mouse in absolute mode. */ else if (data[0] == 3) { if (aiptek->curSetting.coordinateMode == AIPTEK_COORDINATE_RELATIVE_MODE) { aiptek->diagnostic = AIPTEK_DIAGNOSTIC_SENDING_ABSOLUTE_IN_RELATIVE; } else if (!AIPTEK_POINTER_ALLOW_MOUSE_MODE (aiptek->curSetting.pointerMode)) { aiptek->diagnostic = AIPTEK_DIAGNOSTIC_TOOL_DISALLOWED; } else { x = get_unaligned_le16(data + 1); y = get_unaligned_le16(data + 3); jitterable = data[5] & 0x1c; dv = (data[5] & 0x01) != 0 ? 1 : 0; p = (data[5] & 0x02) != 0 ? 1 : 0; left = (data[5] & aiptek->curSetting.mouseButtonLeft) != 0 ? 1 : 0; right = (data[5] & aiptek->curSetting.mouseButtonRight) != 0 ? 1 : 0; middle = (data[5] & aiptek->curSetting.mouseButtonMiddle) != 0 ? 1 : 0; if (dv != 0) { /* If the selected tool changed, reset the old * tool key, and set the new one. */ if (aiptek->previousToolMode != aiptek->curSetting.toolMode) { input_report_key(inputdev, aiptek->previousToolMode, 0); input_report_key(inputdev, aiptek->curSetting.toolMode, 1); aiptek->previousToolMode = aiptek->curSetting.toolMode; } if (p != 0) { input_report_abs(inputdev, ABS_X, x); input_report_abs(inputdev, ABS_Y, y); input_report_key(inputdev, BTN_LEFT, left); input_report_key(inputdev, BTN_MIDDLE, middle); input_report_key(inputdev, BTN_RIGHT, right); /* Wheel support is in the form of a single-event * firing. */ if (aiptek->curSetting.wheel != AIPTEK_WHEEL_DISABLE) { input_report_abs(inputdev, ABS_WHEEL, aiptek->curSetting.wheel); aiptek->curSetting.wheel = AIPTEK_WHEEL_DISABLE; } } input_report_abs(inputdev, ABS_MISC, p | AIPTEK_REPORT_TOOL_MOUSE); if (aiptek->lastMacro != -1) { input_report_key(inputdev, macroKeyEvents[aiptek->lastMacro], 0); aiptek->lastMacro = -1; } input_sync(inputdev); } } } /* Report 4s come from the macro keys when pressed by stylus */ else if (data[0] == 4) { jitterable = data[1] & 0x18; dv = (data[1] & 0x01) != 0 ? 1 : 0; p = (data[1] & 0x02) != 0 ? 1 : 0; tip = (data[1] & 0x04) != 0 ? 1 : 0; bs = (data[1] & aiptek->curSetting.stylusButtonLower) != 0 ? 1 : 0; pck = (data[1] & aiptek->curSetting.stylusButtonUpper) != 0 ? 1 : 0; macro = dv && p && tip && !(data[3] & 1) ? (data[3] >> 1) : -1; z = get_unaligned_le16(data + 4); if (dv) { /* If the selected tool changed, reset the old * tool key, and set the new one. */ if (aiptek->previousToolMode != aiptek->curSetting.toolMode) { input_report_key(inputdev, aiptek->previousToolMode, 0); input_report_key(inputdev, aiptek->curSetting.toolMode, 1); aiptek->previousToolMode = aiptek->curSetting.toolMode; } } if (aiptek->lastMacro != -1 && aiptek->lastMacro != macro) { input_report_key(inputdev, macroKeyEvents[aiptek->lastMacro], 0); aiptek->lastMacro = -1; } if (macro != -1 && macro != aiptek->lastMacro) { input_report_key(inputdev, macroKeyEvents[macro], 1); aiptek->lastMacro = macro; } input_report_abs(inputdev, ABS_MISC, p | AIPTEK_REPORT_TOOL_STYLUS); input_sync(inputdev); } /* Report 5s come from the macro keys when pressed by mouse */ else if (data[0] == 5) { jitterable = data[1] & 0x1c; dv = (data[1] & 0x01) != 0 ? 1 : 0; p = (data[1] & 0x02) != 0 ? 1 : 0; left = (data[1]& aiptek->curSetting.mouseButtonLeft) != 0 ? 1 : 0; right = (data[1] & aiptek->curSetting.mouseButtonRight) != 0 ? 1 : 0; middle = (data[1] & aiptek->curSetting.mouseButtonMiddle) != 0 ? 1 : 0; macro = dv && p && left && !(data[3] & 1) ? (data[3] >> 1) : 0; if (dv) { /* If the selected tool changed, reset the old * tool key, and set the new one. */ if (aiptek->previousToolMode != aiptek->curSetting.toolMode) { input_report_key(inputdev, aiptek->previousToolMode, 0); input_report_key(inputdev, aiptek->curSetting.toolMode, 1); aiptek->previousToolMode = aiptek->curSetting.toolMode; } } if (aiptek->lastMacro != -1 && aiptek->lastMacro != macro) { input_report_key(inputdev, macroKeyEvents[aiptek->lastMacro], 0); aiptek->lastMacro = -1; } if (macro != -1 && macro != aiptek->lastMacro) { input_report_key(inputdev, macroKeyEvents[macro], 1); aiptek->lastMacro = macro; } input_report_abs(inputdev, ABS_MISC, p | AIPTEK_REPORT_TOOL_MOUSE); input_sync(inputdev); } /* We have no idea which tool can generate a report 6. Theoretically, * neither need to, having been given reports 4 & 5 for such use. * However, report 6 is the 'official-looking' report for macroKeys; * reports 4 & 5 supposively are used to support unnamed, unknown * hat switches (which just so happen to be the macroKeys.) */ else if (data[0] == 6) { macro = get_unaligned_le16(data + 1); if (macro > 0) { input_report_key(inputdev, macroKeyEvents[macro - 1], 0); } if (macro < 25) { input_report_key(inputdev, macroKeyEvents[macro + 1], 0); } /* If the selected tool changed, reset the old tool key, and set the new one. */ if (aiptek->previousToolMode != aiptek->curSetting.toolMode) { input_report_key(inputdev, aiptek->previousToolMode, 0); input_report_key(inputdev, aiptek->curSetting.toolMode, 1); aiptek->previousToolMode = aiptek->curSetting.toolMode; } input_report_key(inputdev, macroKeyEvents[macro], 1); input_report_abs(inputdev, ABS_MISC, 1 | AIPTEK_REPORT_TOOL_UNKNOWN); input_sync(inputdev); } else { dev_dbg(&intf->dev, "Unknown report %d\n", data[0]); } /* Jitter may occur when the user presses a button on the stlyus * or the mouse. What we do to prevent that is wait 'x' milliseconds * following a 'jitterable' event, which should give the hand some time * stabilize itself. * * We just introduced aiptek->previousJitterable to carry forth the * notion that jitter occurs when the button state changes from on to off: * a person drawing, holding a button down is not subject to jittering. * With that in mind, changing from upper button depressed to lower button * WILL transition through a jitter delay. */ if (aiptek->previousJitterable != jitterable && aiptek->curSetting.jitterDelay != 0 && aiptek->inDelay != 1) { aiptek->endDelay = jiffies + ((aiptek->curSetting.jitterDelay * HZ) / 1000); aiptek->inDelay = 1; } aiptek->previousJitterable = jitterable; exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval != 0) { dev_err(&intf->dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval); } } /*********************************************************************** * These are the USB id's known so far. We do not identify them to * specific Aiptek model numbers, because there has been overlaps, * use, and reuse of id's in existing models. Certain models have * been known to use more than one ID, indicative perhaps of * manufacturing revisions. In any event, we consider these * IDs to not be model-specific nor unique. */ static const struct usb_device_id aiptek_ids[] = { {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x01)}, {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x10)}, {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x20)}, {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x21)}, {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x22)}, {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x23)}, {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x24)}, {USB_DEVICE(USB_VENDOR_ID_KYE, 0x5003)}, {} }; MODULE_DEVICE_TABLE(usb, aiptek_ids); /*********************************************************************** * Open an instance of the tablet driver. */ static int aiptek_open(struct input_dev *inputdev) { struct aiptek *aiptek = input_get_drvdata(inputdev); aiptek->urb->dev = interface_to_usbdev(aiptek->intf); if (usb_submit_urb(aiptek->urb, GFP_KERNEL) != 0) return -EIO; return 0; } /*********************************************************************** * Close an instance of the tablet driver. */ static void aiptek_close(struct input_dev *inputdev) { struct aiptek *aiptek = input_get_drvdata(inputdev); usb_kill_urb(aiptek->urb); } /*********************************************************************** * aiptek_set_report and aiptek_get_report() are borrowed from Linux 2.4.x, * where they were known as usb_set_report and usb_get_report. */ static int aiptek_set_report(struct aiptek *aiptek, unsigned char report_type, unsigned char report_id, void *buffer, int size) { struct usb_device *udev = interface_to_usbdev(aiptek->intf); return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_REPORT, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT, (report_type << 8) + report_id, aiptek->ifnum, buffer, size, 5000); } static int aiptek_get_report(struct aiptek *aiptek, unsigned char report_type, unsigned char report_id, void *buffer, int size) { struct usb_device *udev = interface_to_usbdev(aiptek->intf); return usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), USB_REQ_GET_REPORT, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, (report_type << 8) + report_id, aiptek->ifnum, buffer, size, 5000); } /*********************************************************************** * Send a command to the tablet. */ static int aiptek_command(struct aiptek *aiptek, unsigned char command, unsigned char data) { const int sizeof_buf = 3 * sizeof(u8); int ret; u8 *buf; buf = kmalloc(sizeof_buf, GFP_KERNEL); if (!buf) return -ENOMEM; buf[0] = 2; buf[1] = command; buf[2] = data; if ((ret = aiptek_set_report(aiptek, 3, 2, buf, sizeof_buf)) != sizeof_buf) { dev_dbg(&aiptek->intf->dev, "aiptek_program: failed, tried to send: 0x%02x 0x%02x\n", command, data); } kfree(buf); return ret < 0 ? ret : 0; } /*********************************************************************** * Retrieve information from the tablet. Querying info is defined as first * sending the {command,data} sequence as a command, followed by a wait * (aka, "programmaticDelay") and then a "read" request. */ static int aiptek_query(struct aiptek *aiptek, unsigned char command, unsigned char data) { const int sizeof_buf = 3 * sizeof(u8); int ret; u8 *buf; buf = kmalloc(sizeof_buf, GFP_KERNEL); if (!buf) return -ENOMEM; buf[0] = 2; buf[1] = command; buf[2] = data; if (aiptek_command(aiptek, command, data) != 0) { kfree(buf); return -EIO; } msleep(aiptek->curSetting.programmableDelay); if (aiptek_get_report(aiptek, 3, 2, buf, sizeof_buf) != sizeof_buf) { dev_dbg(&aiptek->intf->dev, "aiptek_query failed: returned 0x%02x 0x%02x 0x%02x\n", buf[0], buf[1], buf[2]); ret = -EIO; } else { ret = get_unaligned_le16(buf + 1); } kfree(buf); return ret; } /*********************************************************************** * Program the tablet into either absolute or relative mode. * We also get information about the tablet's size. */ static int aiptek_program_tablet(struct aiptek *aiptek) { int ret; /* Execute Resolution500LPI */ if ((ret = aiptek_command(aiptek, 0x18, 0x04)) < 0) return ret; /* Query getModelCode */ if ((ret = aiptek_query(aiptek, 0x02, 0x00)) < 0) return ret; aiptek->features.modelCode = ret & 0xff; /* Query getODMCode */ if ((ret = aiptek_query(aiptek, 0x03, 0x00)) < 0) return ret; aiptek->features.odmCode = ret; /* Query getFirmwareCode */ if ((ret = aiptek_query(aiptek, 0x04, 0x00)) < 0) return ret; aiptek->features.firmwareCode = ret; /* Query getXextension */ if ((ret = aiptek_query(aiptek, 0x01, 0x00)) < 0) return ret; input_set_abs_params(aiptek->inputdev, ABS_X, 0, ret - 1, 0, 0); /* Query getYextension */ if ((ret = aiptek_query(aiptek, 0x01, 0x01)) < 0) return ret; input_set_abs_params(aiptek->inputdev, ABS_Y, 0, ret - 1, 0, 0); /* Query getPressureLevels */ if ((ret = aiptek_query(aiptek, 0x08, 0x00)) < 0) return ret; input_set_abs_params(aiptek->inputdev, ABS_PRESSURE, 0, ret - 1, 0, 0); /* Depending on whether we are in absolute or relative mode, we will * do a switchToTablet(absolute) or switchToMouse(relative) command. */ if (aiptek->curSetting.coordinateMode == AIPTEK_COORDINATE_ABSOLUTE_MODE) { /* Execute switchToTablet */ if ((ret = aiptek_command(aiptek, 0x10, 0x01)) < 0) { return ret; } } else { /* Execute switchToMouse */ if ((ret = aiptek_command(aiptek, 0x10, 0x00)) < 0) { return ret; } } /* Enable the macro keys */ if ((ret = aiptek_command(aiptek, 0x11, 0x02)) < 0) return ret; #if 0 /* Execute FilterOn */ if ((ret = aiptek_command(aiptek, 0x17, 0x00)) < 0) return ret; #endif /* Execute AutoGainOn */ if ((ret = aiptek_command(aiptek, 0x12, 0xff)) < 0) return ret; /* Reset the eventCount, so we track events from last (re)programming */ aiptek->diagnostic = AIPTEK_DIAGNOSTIC_NA; aiptek->eventCount = 0; return 0; } /*********************************************************************** * Sysfs functions. Sysfs prefers that individually-tunable parameters * exist in their separate pseudo-files. Summary data that is immutable * may exist in a singular file so long as you don't define a writeable * interface. */ /*********************************************************************** * support the 'size' file -- display support */ static ssize_t show_tabletSize(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%dx%d\n", input_abs_get_max(aiptek->inputdev, ABS_X) + 1, input_abs_get_max(aiptek->inputdev, ABS_Y) + 1); } /* These structs define the sysfs files, param #1 is the name of the * file, param 2 is the file permissions, param 3 & 4 are to the * output generator and input parser routines. Absence of a routine is * permitted -- it only means can't either 'cat' the file, or send data * to it. */ static DEVICE_ATTR(size, S_IRUGO, show_tabletSize, NULL); /*********************************************************************** * support routines for the 'pointer_mode' file. Note that this file * both displays current setting and allows reprogramming. */ static struct aiptek_map pointer_mode_map[] = { { "stylus", AIPTEK_POINTER_ONLY_STYLUS_MODE }, { "mouse", AIPTEK_POINTER_ONLY_MOUSE_MODE }, { "either", AIPTEK_POINTER_EITHER_MODE }, { NULL, AIPTEK_INVALID_VALUE } }; static ssize_t show_tabletPointerMode(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(pointer_mode_map, aiptek->curSetting.pointerMode)); } static ssize_t store_tabletPointerMode(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_mode = map_str_to_val(pointer_mode_map, buf, count); if (new_mode == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.pointerMode = new_mode; return count; } static DEVICE_ATTR(pointer_mode, S_IRUGO | S_IWUSR, show_tabletPointerMode, store_tabletPointerMode); /*********************************************************************** * support routines for the 'coordinate_mode' file. Note that this file * both displays current setting and allows reprogramming. */ static struct aiptek_map coordinate_mode_map[] = { { "absolute", AIPTEK_COORDINATE_ABSOLUTE_MODE }, { "relative", AIPTEK_COORDINATE_RELATIVE_MODE }, { NULL, AIPTEK_INVALID_VALUE } }; static ssize_t show_tabletCoordinateMode(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(coordinate_mode_map, aiptek->curSetting.coordinateMode)); } static ssize_t store_tabletCoordinateMode(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_mode = map_str_to_val(coordinate_mode_map, buf, count); if (new_mode == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.coordinateMode = new_mode; return count; } static DEVICE_ATTR(coordinate_mode, S_IRUGO | S_IWUSR, show_tabletCoordinateMode, store_tabletCoordinateMode); /*********************************************************************** * support routines for the 'tool_mode' file. Note that this file * both displays current setting and allows reprogramming. */ static struct aiptek_map tool_mode_map[] = { { "mouse", AIPTEK_TOOL_BUTTON_MOUSE_MODE }, { "eraser", AIPTEK_TOOL_BUTTON_ERASER_MODE }, { "pencil", AIPTEK_TOOL_BUTTON_PENCIL_MODE }, { "pen", AIPTEK_TOOL_BUTTON_PEN_MODE }, { "brush", AIPTEK_TOOL_BUTTON_BRUSH_MODE }, { "airbrush", AIPTEK_TOOL_BUTTON_AIRBRUSH_MODE }, { "lens", AIPTEK_TOOL_BUTTON_LENS_MODE }, { NULL, AIPTEK_INVALID_VALUE } }; static ssize_t show_tabletToolMode(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(tool_mode_map, aiptek->curSetting.toolMode)); } static ssize_t store_tabletToolMode(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_mode = map_str_to_val(tool_mode_map, buf, count); if (new_mode == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.toolMode = new_mode; return count; } static DEVICE_ATTR(tool_mode, S_IRUGO | S_IWUSR, show_tabletToolMode, store_tabletToolMode); /*********************************************************************** * support routines for the 'xtilt' file. Note that this file * both displays current setting and allows reprogramming. */ static ssize_t show_tabletXtilt(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); if (aiptek->curSetting.xTilt == AIPTEK_TILT_DISABLE) { return sysfs_emit(buf, "disable\n"); } else { return sysfs_emit(buf, "%d\n", aiptek->curSetting.xTilt); } } static ssize_t store_tabletXtilt(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int x; if (kstrtoint(buf, 10, &x)) { size_t len = buf[count - 1] == '\n' ? count - 1 : count; if (strncmp(buf, "disable", len)) return -EINVAL; aiptek->newSetting.xTilt = AIPTEK_TILT_DISABLE; } else { if (x < AIPTEK_TILT_MIN || x > AIPTEK_TILT_MAX) return -EINVAL; aiptek->newSetting.xTilt = x; } return count; } static DEVICE_ATTR(xtilt, S_IRUGO | S_IWUSR, show_tabletXtilt, store_tabletXtilt); /*********************************************************************** * support routines for the 'ytilt' file. Note that this file * both displays current setting and allows reprogramming. */ static ssize_t show_tabletYtilt(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); if (aiptek->curSetting.yTilt == AIPTEK_TILT_DISABLE) { return sysfs_emit(buf, "disable\n"); } else { return sysfs_emit(buf, "%d\n", aiptek->curSetting.yTilt); } } static ssize_t store_tabletYtilt(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int y; if (kstrtoint(buf, 10, &y)) { size_t len = buf[count - 1] == '\n' ? count - 1 : count; if (strncmp(buf, "disable", len)) return -EINVAL; aiptek->newSetting.yTilt = AIPTEK_TILT_DISABLE; } else { if (y < AIPTEK_TILT_MIN || y > AIPTEK_TILT_MAX) return -EINVAL; aiptek->newSetting.yTilt = y; } return count; } static DEVICE_ATTR(ytilt, S_IRUGO | S_IWUSR, show_tabletYtilt, store_tabletYtilt); /*********************************************************************** * support routines for the 'jitter' file. Note that this file * both displays current setting and allows reprogramming. */ static ssize_t show_tabletJitterDelay(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%d\n", aiptek->curSetting.jitterDelay); } static ssize_t store_tabletJitterDelay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int err, j; err = kstrtoint(buf, 10, &j); if (err) return err; aiptek->newSetting.jitterDelay = j; return count; } static DEVICE_ATTR(jitter, S_IRUGO | S_IWUSR, show_tabletJitterDelay, store_tabletJitterDelay); /*********************************************************************** * support routines for the 'delay' file. Note that this file * both displays current setting and allows reprogramming. */ static ssize_t show_tabletProgrammableDelay(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%d\n", aiptek->curSetting.programmableDelay); } static ssize_t store_tabletProgrammableDelay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int err, d; err = kstrtoint(buf, 10, &d); if (err) return err; aiptek->newSetting.programmableDelay = d; return count; } static DEVICE_ATTR(delay, S_IRUGO | S_IWUSR, show_tabletProgrammableDelay, store_tabletProgrammableDelay); /*********************************************************************** * support routines for the 'event_count' file. Note that this file * only displays current setting. */ static ssize_t show_tabletEventsReceived(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%ld\n", aiptek->eventCount); } static DEVICE_ATTR(event_count, S_IRUGO, show_tabletEventsReceived, NULL); /*********************************************************************** * support routines for the 'diagnostic' file. Note that this file * only displays current setting. */ static ssize_t show_tabletDiagnosticMessage(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); char *retMsg; switch (aiptek->diagnostic) { case AIPTEK_DIAGNOSTIC_NA: retMsg = "no errors\n"; break; case AIPTEK_DIAGNOSTIC_SENDING_RELATIVE_IN_ABSOLUTE: retMsg = "Error: receiving relative reports\n"; break; case AIPTEK_DIAGNOSTIC_SENDING_ABSOLUTE_IN_RELATIVE: retMsg = "Error: receiving absolute reports\n"; break; case AIPTEK_DIAGNOSTIC_TOOL_DISALLOWED: if (aiptek->curSetting.pointerMode == AIPTEK_POINTER_ONLY_MOUSE_MODE) { retMsg = "Error: receiving stylus reports\n"; } else { retMsg = "Error: receiving mouse reports\n"; } break; default: return 0; } return sysfs_emit(buf, retMsg); } static DEVICE_ATTR(diagnostic, S_IRUGO, show_tabletDiagnosticMessage, NULL); /*********************************************************************** * support routines for the 'stylus_upper' file. Note that this file * both displays current setting and allows for setting changing. */ static struct aiptek_map stylus_button_map[] = { { "upper", AIPTEK_STYLUS_UPPER_BUTTON }, { "lower", AIPTEK_STYLUS_LOWER_BUTTON }, { NULL, AIPTEK_INVALID_VALUE } }; static ssize_t show_tabletStylusUpper(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(stylus_button_map, aiptek->curSetting.stylusButtonUpper)); } static ssize_t store_tabletStylusUpper(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_button = map_str_to_val(stylus_button_map, buf, count); if (new_button == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.stylusButtonUpper = new_button; return count; } static DEVICE_ATTR(stylus_upper, S_IRUGO | S_IWUSR, show_tabletStylusUpper, store_tabletStylusUpper); /*********************************************************************** * support routines for the 'stylus_lower' file. Note that this file * both displays current setting and allows for setting changing. */ static ssize_t show_tabletStylusLower(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(stylus_button_map, aiptek->curSetting.stylusButtonLower)); } static ssize_t store_tabletStylusLower(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_button = map_str_to_val(stylus_button_map, buf, count); if (new_button == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.stylusButtonLower = new_button; return count; } static DEVICE_ATTR(stylus_lower, S_IRUGO | S_IWUSR, show_tabletStylusLower, store_tabletStylusLower); /*********************************************************************** * support routines for the 'mouse_left' file. Note that this file * both displays current setting and allows for setting changing. */ static struct aiptek_map mouse_button_map[] = { { "left", AIPTEK_MOUSE_LEFT_BUTTON }, { "middle", AIPTEK_MOUSE_MIDDLE_BUTTON }, { "right", AIPTEK_MOUSE_RIGHT_BUTTON }, { NULL, AIPTEK_INVALID_VALUE } }; static ssize_t show_tabletMouseLeft(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(mouse_button_map, aiptek->curSetting.mouseButtonLeft)); } static ssize_t store_tabletMouseLeft(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_button = map_str_to_val(mouse_button_map, buf, count); if (new_button == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.mouseButtonLeft = new_button; return count; } static DEVICE_ATTR(mouse_left, S_IRUGO | S_IWUSR, show_tabletMouseLeft, store_tabletMouseLeft); /*********************************************************************** * support routines for the 'mouse_middle' file. Note that this file * both displays current setting and allows for setting changing. */ static ssize_t show_tabletMouseMiddle(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(mouse_button_map, aiptek->curSetting.mouseButtonMiddle)); } static ssize_t store_tabletMouseMiddle(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_button = map_str_to_val(mouse_button_map, buf, count); if (new_button == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.mouseButtonMiddle = new_button; return count; } static DEVICE_ATTR(mouse_middle, S_IRUGO | S_IWUSR, show_tabletMouseMiddle, store_tabletMouseMiddle); /*********************************************************************** * support routines for the 'mouse_right' file. Note that this file * both displays current setting and allows for setting changing. */ static ssize_t show_tabletMouseRight(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(mouse_button_map, aiptek->curSetting.mouseButtonRight)); } static ssize_t store_tabletMouseRight(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_button = map_str_to_val(mouse_button_map, buf, count); if (new_button == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.mouseButtonRight = new_button; return count; } static DEVICE_ATTR(mouse_right, S_IRUGO | S_IWUSR, show_tabletMouseRight, store_tabletMouseRight); /*********************************************************************** * support routines for the 'wheel' file. Note that this file * both displays current setting and allows for setting changing. */ static ssize_t show_tabletWheel(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); if (aiptek->curSetting.wheel == AIPTEK_WHEEL_DISABLE) { return sysfs_emit(buf, "disable\n"); } else { return sysfs_emit(buf, "%d\n", aiptek->curSetting.wheel); } } static ssize_t store_tabletWheel(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int err, w; err = kstrtoint(buf, 10, &w); if (err) return err; aiptek->newSetting.wheel = w; return count; } static DEVICE_ATTR(wheel, S_IRUGO | S_IWUSR, show_tabletWheel, store_tabletWheel); /*********************************************************************** * support routines for the 'execute' file. Note that this file * both displays current setting and allows for setting changing. */ static ssize_t show_tabletExecute(struct device *dev, struct device_attribute *attr, char *buf) { /* There is nothing useful to display, so a one-line manual * is in order... */ return sysfs_emit(buf, "Write anything to this file to program your tablet.\n"); } static ssize_t store_tabletExecute(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); /* We do not care what you write to this file. Merely the action * of writing to this file triggers a tablet reprogramming. */ memcpy(&aiptek->curSetting, &aiptek->newSetting, sizeof(struct aiptek_settings)); if (aiptek_program_tablet(aiptek) < 0) return -EIO; return count; } static DEVICE_ATTR(execute, S_IRUGO | S_IWUSR, show_tabletExecute, store_tabletExecute); /*********************************************************************** * support routines for the 'odm_code' file. Note that this file * only displays current setting. */ static ssize_t show_tabletODMCode(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "0x%04x\n", aiptek->features.odmCode); } static DEVICE_ATTR(odm_code, S_IRUGO, show_tabletODMCode, NULL); /*********************************************************************** * support routines for the 'model_code' file. Note that this file * only displays current setting. */ static ssize_t show_tabletModelCode(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "0x%04x\n", aiptek->features.modelCode); } static DEVICE_ATTR(model_code, S_IRUGO, show_tabletModelCode, NULL); /*********************************************************************** * support routines for the 'firmware_code' file. Note that this file * only displays current setting. */ static ssize_t show_firmwareCode(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%04x\n", aiptek->features.firmwareCode); } static DEVICE_ATTR(firmware_code, S_IRUGO, show_firmwareCode, NULL); static struct attribute *aiptek_dev_attrs[] = { &dev_attr_size.attr, &dev_attr_pointer_mode.attr, &dev_attr_coordinate_mode.attr, &dev_attr_tool_mode.attr, &dev_attr_xtilt.attr, &dev_attr_ytilt.attr, &dev_attr_jitter.attr, &dev_attr_delay.attr, &dev_attr_event_count.attr, &dev_attr_diagnostic.attr, &dev_attr_odm_code.attr, &dev_attr_model_code.attr, &dev_attr_firmware_code.attr, &dev_attr_stylus_lower.attr, &dev_attr_stylus_upper.attr, &dev_attr_mouse_left.attr, &dev_attr_mouse_middle.attr, &dev_attr_mouse_right.attr, &dev_attr_wheel.attr, &dev_attr_execute.attr, NULL }; ATTRIBUTE_GROUPS(aiptek_dev); /*********************************************************************** * This routine is called when a tablet has been identified. It basically * sets up the tablet and the driver's internal structures. */ static int aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *usbdev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *endpoint; struct aiptek *aiptek; struct input_dev *inputdev; int i; int speeds[] = { 0, AIPTEK_PROGRAMMABLE_DELAY_50, AIPTEK_PROGRAMMABLE_DELAY_400, AIPTEK_PROGRAMMABLE_DELAY_25, AIPTEK_PROGRAMMABLE_DELAY_100, AIPTEK_PROGRAMMABLE_DELAY_200, AIPTEK_PROGRAMMABLE_DELAY_300 }; int err = -ENOMEM; /* programmableDelay is where the command-line specified * delay is kept. We make it the first element of speeds[], * so therefore, your override speed is tried first, then the * remainder. Note that the default value of 400ms will be tried * if you do not specify any command line parameter. */ speeds[0] = programmableDelay; aiptek = kzalloc_obj(*aiptek); inputdev = input_allocate_device(); if (!aiptek || !inputdev) { dev_warn(&intf->dev, "cannot allocate memory or input device\n"); goto fail1; } aiptek->data = usb_alloc_coherent(usbdev, AIPTEK_PACKET_LENGTH, GFP_KERNEL, &aiptek->data_dma); if (!aiptek->data) { dev_warn(&intf->dev, "cannot allocate usb buffer\n"); goto fail1; } aiptek->urb = usb_alloc_urb(0, GFP_KERNEL); if (!aiptek->urb) { dev_warn(&intf->dev, "cannot allocate urb\n"); goto fail2; } aiptek->inputdev = inputdev; aiptek->intf = intf; aiptek->ifnum = intf->cur_altsetting->desc.bInterfaceNumber; aiptek->inDelay = 0; aiptek->endDelay = 0; aiptek->previousJitterable = 0; aiptek->lastMacro = -1; /* Set up the curSettings struct. Said struct contains the current * programmable parameters. The newSetting struct contains changes * the user makes to the settings via the sysfs interface. Those * changes are not "committed" to curSettings until the user * writes to the sysfs/.../execute file. */ aiptek->curSetting.pointerMode = AIPTEK_POINTER_EITHER_MODE; aiptek->curSetting.coordinateMode = AIPTEK_COORDINATE_ABSOLUTE_MODE; aiptek->curSetting.toolMode = AIPTEK_TOOL_BUTTON_PEN_MODE; aiptek->curSetting.xTilt = AIPTEK_TILT_DISABLE; aiptek->curSetting.yTilt = AIPTEK_TILT_DISABLE; aiptek->curSetting.mouseButtonLeft = AIPTEK_MOUSE_LEFT_BUTTON; aiptek->curSetting.mouseButtonMiddle = AIPTEK_MOUSE_MIDDLE_BUTTON; aiptek->curSetting.mouseButtonRight = AIPTEK_MOUSE_RIGHT_BUTTON; aiptek->curSetting.stylusButtonUpper = AIPTEK_STYLUS_UPPER_BUTTON; aiptek->curSetting.stylusButtonLower = AIPTEK_STYLUS_LOWER_BUTTON; aiptek->curSetting.jitterDelay = jitterDelay; aiptek->curSetting.programmableDelay = programmableDelay; /* Both structs should have equivalent settings */ aiptek->newSetting = aiptek->curSetting; /* Determine the usb devices' physical path. * Asketh not why we always pretend we're using "../input0", * but I suspect this will have to be refactored one * day if a single USB device can be a keyboard & a mouse * & a tablet, and the inputX number actually will tell * us something... */ usb_make_path(usbdev, aiptek->features.usbPath, sizeof(aiptek->features.usbPath)); strlcat(aiptek->features.usbPath, "/input0", sizeof(aiptek->features.usbPath)); /* Set up client data, pointers to open and close routines * for the input device. */ inputdev->name = "Aiptek"; inputdev->phys = aiptek->features.usbPath; usb_to_input_id(usbdev, &inputdev->id); inputdev->dev.parent = &intf->dev; input_set_drvdata(inputdev, aiptek); inputdev->open = aiptek_open; inputdev->close = aiptek_close; /* Now program the capacities of the tablet, in terms of being * an input device. */ for (i = 0; i < ARRAY_SIZE(eventTypes); ++i) __set_bit(eventTypes[i], inputdev->evbit); for (i = 0; i < ARRAY_SIZE(absEvents); ++i) __set_bit(absEvents[i], inputdev->absbit); for (i = 0; i < ARRAY_SIZE(relEvents); ++i) __set_bit(relEvents[i], inputdev->relbit); __set_bit(MSC_SERIAL, inputdev->mscbit); /* Set up key and button codes */ for (i = 0; i < ARRAY_SIZE(buttonEvents); ++i) __set_bit(buttonEvents[i], inputdev->keybit); for (i = 0; i < ARRAY_SIZE(macroKeyEvents); ++i) __set_bit(macroKeyEvents[i], inputdev->keybit); /* * Program the input device coordinate capacities. We do not yet * know what maximum X, Y, and Z values are, so we're putting fake * values in. Later, we'll ask the tablet to put in the correct * values. */ input_set_abs_params(inputdev, ABS_X, 0, 2999, 0, 0); input_set_abs_params(inputdev, ABS_Y, 0, 2249, 0, 0); input_set_abs_params(inputdev, ABS_PRESSURE, 0, 511, 0, 0); input_set_abs_params(inputdev, ABS_TILT_X, AIPTEK_TILT_MIN, AIPTEK_TILT_MAX, 0, 0); input_set_abs_params(inputdev, ABS_TILT_Y, AIPTEK_TILT_MIN, AIPTEK_TILT_MAX, 0, 0); input_set_abs_params(inputdev, ABS_WHEEL, AIPTEK_WHEEL_MIN, AIPTEK_WHEEL_MAX - 1, 0, 0); err = usb_find_common_endpoints(intf->cur_altsetting, NULL, NULL, &endpoint, NULL); if (err) { dev_err(&intf->dev, "interface has no int in endpoints, but must have minimum 1\n"); goto fail3; } /* Go set up our URB, which is called when the tablet receives * input. */ usb_fill_int_urb(aiptek->urb, usbdev, usb_rcvintpipe(usbdev, endpoint->bEndpointAddress), aiptek->data, 8, aiptek_irq, aiptek, endpoint->bInterval); aiptek->urb->transfer_dma = aiptek->data_dma; aiptek->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; /* Program the tablet. This sets the tablet up in the mode * specified in newSetting, and also queries the tablet's * physical capacities. * * Sanity check: if a tablet doesn't like the slow programmatic * delay, we often get sizes of 0x0. Let's use that as an indicator * to try faster delays, up to 25 ms. If that logic fails, well, you'll * have to explain to us how your tablet thinks it's 0x0, and yet that's * not an error :-) */ for (i = 0; i < ARRAY_SIZE(speeds); ++i) { aiptek->curSetting.programmableDelay = speeds[i]; (void)aiptek_program_tablet(aiptek); if (input_abs_get_max(aiptek->inputdev, ABS_X) > 0) { dev_info(&intf->dev, "Aiptek using %d ms programming speed\n", aiptek->curSetting.programmableDelay); break; } } /* Murphy says that some day someone will have a tablet that fails the above test. That's you, Frederic Rodrigo */ if (i == ARRAY_SIZE(speeds)) { dev_info(&intf->dev, "Aiptek tried all speeds, no sane response\n"); err = -EINVAL; goto fail3; } /* Associate this driver's struct with the usb interface. */ usb_set_intfdata(intf, aiptek); /* Register the tablet as an Input Device */ err = input_register_device(aiptek->inputdev); if (err) { dev_warn(&intf->dev, "input_register_device returned err: %d\n", err); goto fail3; } return 0; fail3: usb_free_urb(aiptek->urb); fail2: usb_free_coherent(usbdev, AIPTEK_PACKET_LENGTH, aiptek->data, aiptek->data_dma); fail1: usb_set_intfdata(intf, NULL); input_free_device(inputdev); kfree(aiptek); return err; } /*********************************************************************** * Deal with tablet disconnecting from the system. */ static void aiptek_disconnect(struct usb_interface *intf) { struct aiptek *aiptek = usb_get_intfdata(intf); /* Disassociate driver's struct with usb interface */ usb_set_intfdata(intf, NULL); if (aiptek != NULL) { /* Free & unhook everything from the system. */ usb_kill_urb(aiptek->urb); input_unregister_device(aiptek->inputdev); usb_free_urb(aiptek->urb); usb_free_coherent(interface_to_usbdev(intf), AIPTEK_PACKET_LENGTH, aiptek->data, aiptek->data_dma); kfree(aiptek); } } static struct usb_driver aiptek_driver = { .name = "aiptek", .probe = aiptek_probe, .disconnect = aiptek_disconnect, .id_table = aiptek_ids, .dev_groups = aiptek_dev_groups, }; module_usb_driver(aiptek_driver); MODULE_AUTHOR("Bryan W. Headley/Chris Atenasio/Cedric Brun/Rene van Paassen"); MODULE_DESCRIPTION("Aiptek HyperPen USB Tablet Driver"); MODULE_LICENSE("GPL"); module_param(programmableDelay, int, 0); MODULE_PARM_DESC(programmableDelay, "delay used during tablet programming"); module_param(jitterDelay, int, 0); MODULE_PARM_DESC(jitterDelay, "stylus/mouse settlement delay");
14 14 14 5 5 5 5 4 4 1 4 1 4 1 4 1 4 1 4 4 4 6 5 5 4 5 6 6 6 6 6 6 6 6 3 6 6 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Codel - The Controlled-Delay Active Queue Management algorithm * * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> * * Implemented on linux by : * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/prefetch.h> #include <net/pkt_sched.h> #include <net/codel.h> #include <net/codel_impl.h> #include <net/codel_qdisc.h> #define DEFAULT_CODEL_LIMIT 1000 struct codel_sched_data { struct codel_params params; struct codel_vars vars; struct codel_stats stats; u32 drop_overlimit; }; /* This is the specific function called from codel_dequeue() * to dequeue a packet from queue. Note: backlog is handled in * codel, we dont need to reduce it here. */ static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) { struct Qdisc *sch = ctx; struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); if (skb) { sch->qstats.backlog -= qdisc_pkt_len(skb); prefetch(&skb->end); /* we'll need skb_shinfo() */ } return skb; } static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; qdisc_dequeue_drop(sch, skb, SKB_DROP_REASON_QDISC_CONGESTED); qdisc_qstats_drop(sch); } static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) { struct codel_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; skb = codel_dequeue(sch, &sch->qstats.backlog, &q->params, &q->vars, &q->stats, qdisc_pkt_len, codel_get_enqueue_time, drop_func, dequeue_func); if (q->stats.drop_count) { qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len); q->stats.drop_count = 0; q->stats.drop_len = 0; } if (skb) qdisc_bstats_update(sch, skb); return skb; } static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct codel_sched_data *q; if (likely(qdisc_qlen(sch) < sch->limit)) { codel_set_enqueue_time(skb); return qdisc_enqueue_tail(skb, sch); } q = qdisc_priv(sch); q->drop_overlimit++; return qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); } static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { [TCA_CODEL_TARGET] = { .type = NLA_U32 }, [TCA_CODEL_LIMIT] = { .type = NLA_U32 }, [TCA_CODEL_INTERVAL] = { .type = NLA_U32 }, [TCA_CODEL_ECN] = { .type = NLA_U32 }, [TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 }, }; static int codel_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { unsigned int dropped_pkts = 0, dropped_bytes = 0; struct codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_CODEL_MAX + 1]; int err; err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt, codel_policy, NULL); if (err < 0) return err; sch_tree_lock(sch); if (tb[TCA_CODEL_TARGET]) { u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]); WRITE_ONCE(q->params.target, ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT); } if (tb[TCA_CODEL_CE_THRESHOLD]) { u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]); WRITE_ONCE(q->params.ce_threshold, (val * NSEC_PER_USEC) >> CODEL_SHIFT); } if (tb[TCA_CODEL_INTERVAL]) { u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]); WRITE_ONCE(q->params.interval, ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT); } if (tb[TCA_CODEL_LIMIT]) WRITE_ONCE(sch->limit, nla_get_u32(tb[TCA_CODEL_LIMIT])); if (tb[TCA_CODEL_ECN]) WRITE_ONCE(q->params.ecn, !!nla_get_u32(tb[TCA_CODEL_ECN])); while (sch->q.qlen > sch->limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, true); if (!skb) break; dropped_pkts++; dropped_bytes += qdisc_pkt_len(skb); rtnl_qdisc_drop(skb, sch); } qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; } static int codel_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct codel_sched_data *q = qdisc_priv(sch); sch->limit = DEFAULT_CODEL_LIMIT; codel_params_init(&q->params); codel_vars_init(&q->vars); codel_stats_init(&q->stats); q->params.mtu = psched_mtu(qdisc_dev(sch)); if (opt) { int err = codel_change(sch, opt, extack); if (err) return err; } if (sch->limit >= 1) sch->flags |= TCQ_F_CAN_BYPASS; else sch->flags &= ~TCQ_F_CAN_BYPASS; sch->flags |= TCQ_F_DEQUEUE_DROPS; return 0; } static int codel_dump(struct Qdisc *sch, struct sk_buff *skb) { struct codel_sched_data *q = qdisc_priv(sch); codel_time_t ce_threshold; struct nlattr *opts; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put_u32(skb, TCA_CODEL_TARGET, codel_time_to_us(READ_ONCE(q->params.target))) || nla_put_u32(skb, TCA_CODEL_LIMIT, READ_ONCE(sch->limit)) || nla_put_u32(skb, TCA_CODEL_INTERVAL, codel_time_to_us(READ_ONCE(q->params.interval))) || nla_put_u32(skb, TCA_CODEL_ECN, READ_ONCE(q->params.ecn))) goto nla_put_failure; ce_threshold = READ_ONCE(q->params.ce_threshold); if (ce_threshold != CODEL_DISABLED_THRESHOLD && nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD, codel_time_to_us(ce_threshold))) goto nla_put_failure; return nla_nest_end(skb, opts); nla_put_failure: nla_nest_cancel(skb, opts); return -1; } static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { const struct codel_sched_data *q = qdisc_priv(sch); struct tc_codel_xstats st = { .maxpacket = q->stats.maxpacket, .count = q->vars.count, .lastcount = q->vars.lastcount, .drop_overlimit = q->drop_overlimit, .ldelay = codel_time_to_us(q->vars.ldelay), .dropping = q->vars.dropping, .ecn_mark = q->stats.ecn_mark, .ce_mark = q->stats.ce_mark, }; if (q->vars.dropping) { codel_tdiff_t delta = q->vars.drop_next - codel_get_time(); if (delta >= 0) st.drop_next = codel_time_to_us(delta); else st.drop_next = -codel_time_to_us(-delta); } return gnet_stats_copy_app(d, &st, sizeof(st)); } static void codel_reset(struct Qdisc *sch) { struct codel_sched_data *q = qdisc_priv(sch); qdisc_reset_queue(sch); codel_vars_init(&q->vars); } static struct Qdisc_ops codel_qdisc_ops __read_mostly = { .id = "codel", .priv_size = sizeof(struct codel_sched_data), .enqueue = codel_qdisc_enqueue, .dequeue = codel_qdisc_dequeue, .peek = qdisc_peek_dequeued, .init = codel_init, .reset = codel_reset, .change = codel_change, .dump = codel_dump, .dump_stats = codel_dump_stats, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("codel"); static int __init codel_module_init(void) { return register_qdisc(&codel_qdisc_ops); } static void __exit codel_module_exit(void) { unregister_qdisc(&codel_qdisc_ops); } module_init(codel_module_init) module_exit(codel_module_exit) MODULE_DESCRIPTION("Controlled Delay queue discipline"); MODULE_AUTHOR("Dave Taht"); MODULE_AUTHOR("Eric Dumazet"); MODULE_LICENSE("Dual BSD/GPL");
5 2 2 5 6 6 6 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 // SPDX-License-Identifier: GPL-2.0-or-later /* * CMAC: Cipher Block Mode for Authentication * * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> * * Based on work by: * Copyright © 2013 Tom St Denis <tstdenis@elliptictech.com> * Based on crypto/xcbc.c: * Copyright © 2006 USAGI/WIDE Project, * Author: Kazunori Miyazawa <miyazawa@linux-ipv6.org> */ #include <crypto/internal/cipher.h> #include <crypto/internal/hash.h> #include <crypto/utils.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/string.h> /* * +------------------------ * | <parent tfm> * +------------------------ * | cmac_tfm_ctx * +------------------------ * | consts (block size * 2) * +------------------------ */ struct cmac_tfm_ctx { struct crypto_cipher *child; __be64 consts[]; }; static int crypto_cmac_digest_setkey(struct crypto_shash *parent, const u8 *inkey, unsigned int keylen) { struct cmac_tfm_ctx *ctx = crypto_shash_ctx(parent); unsigned int bs = crypto_shash_blocksize(parent); __be64 *consts = ctx->consts; u64 _const[2]; int i, err = 0; u8 msb_mask, gfmask; err = crypto_cipher_setkey(ctx->child, inkey, keylen); if (err) return err; /* encrypt the zero block */ memset(consts, 0, bs); crypto_cipher_encrypt_one(ctx->child, (u8 *)consts, (u8 *)consts); switch (bs) { case 16: gfmask = 0x87; _const[0] = be64_to_cpu(consts[1]); _const[1] = be64_to_cpu(consts[0]); /* gf(2^128) multiply zero-ciphertext with u and u^2 */ for (i = 0; i < 4; i += 2) { msb_mask = ((s64)_const[1] >> 63) & gfmask; _const[1] = (_const[1] << 1) | (_const[0] >> 63); _const[0] = (_const[0] << 1) ^ msb_mask; consts[i + 0] = cpu_to_be64(_const[1]); consts[i + 1] = cpu_to_be64(_const[0]); } break; case 8: gfmask = 0x1B; _const[0] = be64_to_cpu(consts[0]); /* gf(2^64) multiply zero-ciphertext with u and u^2 */ for (i = 0; i < 2; i++) { msb_mask = ((s64)_const[0] >> 63) & gfmask; _const[0] = (_const[0] << 1) ^ msb_mask; consts[i] = cpu_to_be64(_const[0]); } break; } return 0; } static int crypto_cmac_digest_init(struct shash_desc *pdesc) { int bs = crypto_shash_blocksize(pdesc->tfm); u8 *prev = shash_desc_ctx(pdesc); memset(prev, 0, bs); return 0; } static int crypto_cmac_digest_update(struct shash_desc *pdesc, const u8 *p, unsigned int len) { struct crypto_shash *parent = pdesc->tfm; struct cmac_tfm_ctx *tctx = crypto_shash_ctx(parent); struct crypto_cipher *tfm = tctx->child; int bs = crypto_shash_blocksize(parent); u8 *prev = shash_desc_ctx(pdesc); do { crypto_xor(prev, p, bs); crypto_cipher_encrypt_one(tfm, prev, prev); p += bs; len -= bs; } while (len >= bs); return len; } static int crypto_cmac_digest_finup(struct shash_desc *pdesc, const u8 *src, unsigned int len, u8 *out) { struct crypto_shash *parent = pdesc->tfm; struct cmac_tfm_ctx *tctx = crypto_shash_ctx(parent); struct crypto_cipher *tfm = tctx->child; int bs = crypto_shash_blocksize(parent); u8 *prev = shash_desc_ctx(pdesc); unsigned int offset = 0; crypto_xor(prev, src, len); if (len != bs) { prev[len] ^= 0x80; offset += bs; } crypto_xor(prev, (const u8 *)tctx->consts + offset, bs); crypto_cipher_encrypt_one(tfm, out, prev); return 0; } static int cmac_init_tfm(struct crypto_shash *tfm) { struct shash_instance *inst = shash_alg_instance(tfm); struct cmac_tfm_ctx *ctx = crypto_shash_ctx(tfm); struct crypto_cipher_spawn *spawn; struct crypto_cipher *cipher; spawn = shash_instance_ctx(inst); cipher = crypto_spawn_cipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; return 0; } static int cmac_clone_tfm(struct crypto_shash *tfm, struct crypto_shash *otfm) { struct cmac_tfm_ctx *octx = crypto_shash_ctx(otfm); struct cmac_tfm_ctx *ctx = crypto_shash_ctx(tfm); struct crypto_cipher *cipher; cipher = crypto_clone_cipher(octx->child); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; return 0; } static void cmac_exit_tfm(struct crypto_shash *tfm) { struct cmac_tfm_ctx *ctx = crypto_shash_ctx(tfm); crypto_free_cipher(ctx->child); } static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb) { struct shash_instance *inst; struct crypto_cipher_spawn *spawn; struct crypto_alg *alg; u32 mask; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH, &mask); if (err) return err; inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; spawn = shash_instance_ctx(inst); err = crypto_grab_cipher(spawn, shash_crypto_instance(inst), crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; alg = crypto_spawn_cipher_alg(spawn); switch (alg->cra_blocksize) { case 16: case 8: break; default: err = -EINVAL; goto err_free_inst; } err = crypto_inst_setname(shash_crypto_instance(inst), tmpl->name, alg); if (err) goto err_free_inst; inst->alg.base.cra_priority = alg->cra_priority; inst->alg.base.cra_blocksize = alg->cra_blocksize; inst->alg.base.cra_ctxsize = sizeof(struct cmac_tfm_ctx) + alg->cra_blocksize * 2; inst->alg.base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | CRYPTO_AHASH_ALG_FINAL_NONZERO; inst->alg.digestsize = alg->cra_blocksize; inst->alg.descsize = alg->cra_blocksize; inst->alg.init = crypto_cmac_digest_init; inst->alg.update = crypto_cmac_digest_update; inst->alg.finup = crypto_cmac_digest_finup; inst->alg.setkey = crypto_cmac_digest_setkey; inst->alg.init_tfm = cmac_init_tfm; inst->alg.clone_tfm = cmac_clone_tfm; inst->alg.exit_tfm = cmac_exit_tfm; inst->free = shash_free_singlespawn_instance; err = shash_register_instance(tmpl, inst); if (err) { err_free_inst: shash_free_singlespawn_instance(inst); } return err; } static struct crypto_template crypto_cmac_tmpl = { .name = "cmac", .create = cmac_create, .module = THIS_MODULE, }; static int __init crypto_cmac_module_init(void) { return crypto_register_template(&crypto_cmac_tmpl); } static void __exit crypto_cmac_module_exit(void) { crypto_unregister_template(&crypto_cmac_tmpl); } module_init(crypto_cmac_module_init); module_exit(crypto_cmac_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CMAC keyed hash algorithm"); MODULE_ALIAS_CRYPTO("cmac"); MODULE_IMPORT_NS("CRYPTO_INTERNAL");
1 1 4 4 4 4 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. */ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> #include <linux/xattr.h> #include <linux/gfs2_ondisk.h> #include <linux/posix_acl_xattr.h> #include <linux/uaccess.h> #include "gfs2.h" #include "incore.h" #include "acl.h" #include "xattr.h" #include "glock.h" #include "inode.h" #include "meta_io.h" #include "quota.h" #include "rgrp.h" #include "super.h" #include "trans.h" #include "util.h" /* * ea_calc_size - returns the actual number of bytes the request will take up * (not counting any unstuffed data blocks) * * Returns: 1 if the EA should be stuffed */ static int ea_calc_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize, unsigned int *size) { unsigned int jbsize = sdp->sd_jbsize; /* Stuffed */ *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize + dsize, 8); if (*size <= jbsize) return 1; /* Unstuffed */ *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize + (sizeof(__be64) * DIV_ROUND_UP(dsize, jbsize)), 8); return 0; } static int ea_check_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize) { unsigned int size; if (dsize > GFS2_EA_MAX_DATA_LEN) return -ERANGE; ea_calc_size(sdp, nsize, dsize, &size); /* This can only happen with 512 byte blocks */ if (size > sdp->sd_jbsize) return -ERANGE; return 0; } static bool gfs2_eatype_valid(struct gfs2_sbd *sdp, u8 type) { switch(sdp->sd_sb.sb_fs_format) { case GFS2_FS_FORMAT_MAX: return true; case GFS2_FS_FORMAT_MIN: return type <= GFS2_EATYPE_SECURITY; default: return false; } } typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh, struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, void *private); static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh, ea_call_t ea_call, void *data) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_ea_header *ea, *prev = NULL; int error = 0; if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_EA)) return -EIO; for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) { if (!GFS2_EA_REC_LEN(ea)) { gfs2_consist_inode(ip); return -EIO; } if (!(bh->b_data <= (char *)ea && (char *)GFS2_EA2NEXT(ea) <= bh->b_data + bh->b_size)) { gfs2_consist_inode(ip); return -EIO; } if (!gfs2_eatype_valid(sdp, ea->ea_type)) { gfs2_consist_inode(ip); return -EIO; } error = ea_call(ip, bh, ea, prev, data); if (error) return error; if (GFS2_EA_IS_LAST(ea)) { if ((char *)GFS2_EA2NEXT(ea) != bh->b_data + bh->b_size) { gfs2_consist_inode(ip); return -EIO; } break; } } return error; } static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data) { struct buffer_head *bh, *eabh; __be64 *eablk, *end; int error; error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, 0, &bh); if (error) return error; if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT)) { error = ea_foreach_i(ip, bh, ea_call, data); goto out; } if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_IN)) { error = -EIO; goto out; } eablk = (__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)); end = eablk + GFS2_SB(&ip->i_inode)->sd_inptrs; for (; eablk < end; eablk++) { u64 bn; if (!*eablk) break; bn = be64_to_cpu(*eablk); error = gfs2_meta_read(ip->i_gl, bn, DIO_WAIT, 0, &eabh); if (error) break; error = ea_foreach_i(ip, eabh, ea_call, data); brelse(eabh); if (error) break; } out: brelse(bh); return error; } struct ea_find { int type; const char *name; size_t namel; struct gfs2_ea_location *ef_el; }; static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh, struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, void *private) { struct ea_find *ef = private; if (ea->ea_type == GFS2_EATYPE_UNUSED) return 0; if (ea->ea_type == ef->type) { if (ea->ea_name_len == ef->namel && !memcmp(GFS2_EA2NAME(ea), ef->name, ea->ea_name_len)) { struct gfs2_ea_location *el = ef->ef_el; get_bh(bh); el->el_bh = bh; el->el_ea = ea; el->el_prev = prev; return 1; } } return 0; } static int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, struct gfs2_ea_location *el) { struct ea_find ef; int error; ef.type = type; ef.name = name; ef.namel = strlen(name); ef.ef_el = el; memset(el, 0, sizeof(struct gfs2_ea_location)); error = ea_foreach(ip, ea_find_i, &ef); if (error > 0) return 0; return error; } /* * ea_dealloc_unstuffed * * Take advantage of the fact that all unstuffed blocks are * allocated from the same RG. But watch, this may not always * be true. * * Returns: errno */ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, void *private) { int *leave = private; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; struct gfs2_holder rg_gh; __be64 *dataptrs; u64 bn = 0; u64 bstart = 0; unsigned int blen = 0; unsigned int blks = 0; unsigned int x; int error; error = gfs2_rindex_update(sdp); if (error) return error; if (GFS2_EA_IS_STUFFED(ea)) return 0; dataptrs = GFS2_EA2DATAPTRS(ea); for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) { if (*dataptrs) { blks++; bn = be64_to_cpu(*dataptrs); } } if (!blks) return 0; rgd = gfs2_blk2rgrpd(sdp, bn, 1); if (!rgd) { gfs2_consist_inode(ip); return -EIO; } error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE, &rg_gh); if (error) return error; error = gfs2_trans_begin(sdp, rgd->rd_length + RES_DINODE + RES_EATTR + RES_STATFS + RES_QUOTA, blks); if (error) goto out_gunlock; gfs2_trans_add_meta(ip->i_gl, bh); dataptrs = GFS2_EA2DATAPTRS(ea); for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) { if (!*dataptrs) break; bn = be64_to_cpu(*dataptrs); if (bstart + blen == bn) blen++; else { if (bstart) gfs2_free_meta(ip, rgd, bstart, blen); bstart = bn; blen = 1; } *dataptrs = 0; gfs2_add_inode_blocks(&ip->i_inode, -1); } if (bstart) gfs2_free_meta(ip, rgd, bstart, blen); if (prev && !leave) { u32 len; len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea); prev->ea_rec_len = cpu_to_be32(len); if (GFS2_EA_IS_LAST(ea)) prev->ea_flags |= GFS2_EAFLAG_LAST; } else { ea->ea_type = GFS2_EATYPE_UNUSED; ea->ea_num_ptrs = 0; } inode_set_ctime_current(&ip->i_inode); __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); gfs2_trans_end(sdp); out_gunlock: gfs2_glock_dq_uninit(&rg_gh); return error; } static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, int leave) { int error; error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); if (error) return error; error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (error) goto out_alloc; error = ea_dealloc_unstuffed(ip, bh, ea, prev, (leave) ? &error : NULL); gfs2_quota_unhold(ip); out_alloc: return error; } struct ea_list { struct gfs2_ea_request *ei_er; unsigned int ei_size; }; static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh, struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, void *private) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct ea_list *ei = private; struct gfs2_ea_request *er = ei->ei_er; unsigned int ea_size; char *prefix; unsigned int l; if (ea->ea_type == GFS2_EATYPE_UNUSED) return 0; BUG_ON(ea->ea_type > GFS2_EATYPE_SECURITY && sdp->sd_sb.sb_fs_format == GFS2_FS_FORMAT_MIN); switch (ea->ea_type) { case GFS2_EATYPE_USR: prefix = "user."; l = 5; break; case GFS2_EATYPE_SYS: prefix = "system."; l = 7; break; case GFS2_EATYPE_SECURITY: prefix = "security."; l = 9; break; case GFS2_EATYPE_TRUSTED: prefix = "trusted."; l = 8; break; default: return 0; } ea_size = l + ea->ea_name_len + 1; if (er->er_data_len) { if (ei->ei_size + ea_size > er->er_data_len) return -ERANGE; memcpy(er->er_data + ei->ei_size, prefix, l); memcpy(er->er_data + ei->ei_size + l, GFS2_EA2NAME(ea), ea->ea_name_len); er->er_data[ei->ei_size + ea_size - 1] = 0; } ei->ei_size += ea_size; return 0; } /** * gfs2_listxattr - List gfs2 extended attributes * @dentry: The dentry whose inode we are interested in * @buffer: The buffer to write the results * @size: The size of the buffer * * Returns: actual size of data on success, -errno on error */ ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) { struct gfs2_inode *ip = GFS2_I(d_inode(dentry)); struct gfs2_ea_request er; struct gfs2_holder i_gh; int error; memset(&er, 0, sizeof(struct gfs2_ea_request)); if (size) { er.er_data = buffer; er.er_data_len = size; } error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; if (ip->i_eattr) { struct ea_list ei = { .ei_er = &er, .ei_size = 0 }; error = ea_foreach(ip, ea_list_i, &ei); if (!error) error = ei.ei_size; } gfs2_glock_dq_uninit(&i_gh); return error; } /** * gfs2_iter_unstuffed - copies the unstuffed xattr data to/from the * request buffer * @ip: The GFS2 inode * @ea: The extended attribute header structure * @din: The data to be copied in * @dout: The data to be copied out (one of din,dout will be NULL) * * Returns: errno */ static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, const char *din, char *dout) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head **bh; unsigned int amount = GFS2_EA_DATA_LEN(ea); unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); unsigned int x; int error = 0; unsigned char *pos; unsigned cp_size; bh = kzalloc_objs(struct buffer_head *, nptrs, GFP_NOFS); if (!bh) return -ENOMEM; for (x = 0; x < nptrs; x++) { error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0, 0, bh + x); if (error) { while (x--) brelse(bh[x]); goto out; } dataptrs++; } for (x = 0; x < nptrs; x++) { error = gfs2_meta_wait(sdp, bh[x]); if (error) { for (; x < nptrs; x++) brelse(bh[x]); goto out; } if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) { for (; x < nptrs; x++) brelse(bh[x]); error = -EIO; goto out; } pos = bh[x]->b_data + sizeof(struct gfs2_meta_header); cp_size = (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize; if (dout) { memcpy(dout, pos, cp_size); dout += sdp->sd_jbsize; } if (din) { gfs2_trans_add_meta(ip->i_gl, bh[x]); memcpy(pos, din, cp_size); din += sdp->sd_jbsize; } amount -= sdp->sd_jbsize; brelse(bh[x]); } out: kfree(bh); return error; } static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, char *data, size_t size) { int ret; size_t len = GFS2_EA_DATA_LEN(el->el_ea); if (len > size) return -ERANGE; if (GFS2_EA_IS_STUFFED(el->el_ea)) { memcpy(data, GFS2_EA2DATA(el->el_ea), len); return len; } ret = gfs2_iter_unstuffed(ip, el->el_ea, NULL, data); if (ret < 0) return ret; return len; } int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **ppdata) { struct gfs2_ea_location el; int error; int len; char *data; error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, &el); if (error) return error; if (!el.el_ea) goto out; if (!GFS2_EA_DATA_LEN(el.el_ea)) goto out; len = GFS2_EA_DATA_LEN(el.el_ea); data = kmalloc(len, GFP_NOFS); error = -ENOMEM; if (data == NULL) goto out; error = gfs2_ea_get_copy(ip, &el, data, len); if (error < 0) kfree(data); else *ppdata = data; out: brelse(el.el_bh); return error; } /** * __gfs2_xattr_get - Get a GFS2 extended attribute * @inode: The inode * @name: The name of the extended attribute * @buffer: The buffer to write the result into * @size: The size of the buffer * @type: The type of extended attribute * * Returns: actual size of data on success, -errno on error */ static int __gfs2_xattr_get(struct inode *inode, const char *name, void *buffer, size_t size, int type) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_ea_location el; int error; if (!ip->i_eattr) return -ENODATA; if (strlen(name) > GFS2_EA_MAX_NAME_LEN) return -EINVAL; error = gfs2_ea_find(ip, type, name, &el); if (error) return error; if (!el.el_ea) return -ENODATA; if (size) error = gfs2_ea_get_copy(ip, &el, buffer, size); else error = GFS2_EA_DATA_LEN(el.el_ea); brelse(el.el_bh); return error; } static int gfs2_xattr_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int ret; /* During lookup, SELinux calls this function with the glock locked. */ if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); if (ret) return ret; } else { gfs2_holder_mark_uninitialized(&gh); } ret = __gfs2_xattr_get(inode, name, buffer, size, handler->flags); if (gfs2_holder_initialized(&gh)) gfs2_glock_dq_uninit(&gh); return ret; } /** * ea_alloc_blk - allocates a new block for extended attributes. * @ip: A pointer to the inode that's getting extended attributes * @bhp: Pointer to pointer to a struct buffer_head * * Returns: errno */ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_ea_header *ea; unsigned int n = 1; u64 block; int error; error = gfs2_alloc_blocks(ip, &block, &n, 0); if (error) return error; gfs2_trans_remove_revoke(sdp, block, 1); *bhp = gfs2_meta_new(ip->i_gl, block); gfs2_trans_add_meta(ip->i_gl, *bhp); gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header)); ea = GFS2_EA_BH2FIRST(*bhp); ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize); ea->ea_type = GFS2_EATYPE_UNUSED; ea->ea_flags = GFS2_EAFLAG_LAST; ea->ea_num_ptrs = 0; gfs2_add_inode_blocks(&ip->i_inode, 1); return 0; } /** * ea_write - writes the request info to an ea, creating new blocks if * necessary * @ip: inode that is being modified * @ea: the location of the new ea in a block * @er: the write request * * Note: does not update ea_rec_len or the GFS2_EAFLAG_LAST bin of ea_flags * * returns : errno */ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, struct gfs2_ea_request *er) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); int error; ea->ea_data_len = cpu_to_be32(er->er_data_len); ea->ea_name_len = er->er_name_len; ea->ea_type = er->er_type; ea->__pad = 0; memcpy(GFS2_EA2NAME(ea), er->er_name, er->er_name_len); if (GFS2_EAREQ_SIZE_STUFFED(er) <= sdp->sd_jbsize) { ea->ea_num_ptrs = 0; memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len); } else { __be64 *dataptr = GFS2_EA2DATAPTRS(ea); const char *data = er->er_data; unsigned int data_len = er->er_data_len; unsigned int copy; unsigned int x; ea->ea_num_ptrs = DIV_ROUND_UP(er->er_data_len, sdp->sd_jbsize); for (x = 0; x < ea->ea_num_ptrs; x++) { struct buffer_head *bh; u64 block; int mh_size = sizeof(struct gfs2_meta_header); unsigned int n = 1; error = gfs2_alloc_blocks(ip, &block, &n, 0); if (error) return error; gfs2_trans_remove_revoke(sdp, block, 1); bh = gfs2_meta_new(ip->i_gl, block); gfs2_trans_add_meta(ip->i_gl, bh); gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); gfs2_add_inode_blocks(&ip->i_inode, 1); copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize : data_len; memcpy(bh->b_data + mh_size, data, copy); if (copy < sdp->sd_jbsize) memset(bh->b_data + mh_size + copy, 0, sdp->sd_jbsize - copy); *dataptr++ = cpu_to_be64(bh->b_blocknr); data += copy; data_len -= copy; brelse(bh); } gfs2_assert_withdraw(sdp, !data_len); } return 0; } typedef int (*ea_skeleton_call_t) (struct gfs2_inode *ip, struct gfs2_ea_request *er, void *private); static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, unsigned int blks, ea_skeleton_call_t skeleton_call, void *private) { struct gfs2_alloc_parms ap = { .target = blks }; int error; error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); if (error) return error; error = gfs2_quota_lock_check(ip, &ap); if (error) return error; error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_gunlock_q; error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), blks + gfs2_rg_blocks(ip, blks) + RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) goto out_ipres; error = skeleton_call(ip, er, private); if (error) goto out_end_trans; inode_set_ctime_current(&ip->i_inode); __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); out_end_trans: gfs2_trans_end(GFS2_SB(&ip->i_inode)); out_ipres: gfs2_inplace_release(ip); out_gunlock_q: gfs2_quota_unlock(ip); return error; } static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er, void *private) { struct buffer_head *bh; int error; error = ea_alloc_blk(ip, &bh); if (error) return error; ip->i_eattr = bh->b_blocknr; error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er); brelse(bh); return error; } /* * ea_init - initializes a new eattr block * * Returns: errno */ static int ea_init(struct gfs2_inode *ip, int type, const char *name, const void *data, size_t size) { struct gfs2_ea_request er; unsigned int jbsize = GFS2_SB(&ip->i_inode)->sd_jbsize; unsigned int blks = 1; er.er_type = type; er.er_name = name; er.er_name_len = strlen(name); er.er_data = (void *)data; er.er_data_len = size; if (GFS2_EAREQ_SIZE_STUFFED(&er) > jbsize) blks += DIV_ROUND_UP(er.er_data_len, jbsize); return ea_alloc_skeleton(ip, &er, blks, ea_init_i, NULL); } static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea) { u32 ea_size = GFS2_EA_SIZE(ea); struct gfs2_ea_header *new = (struct gfs2_ea_header *)((char *)ea + ea_size); u32 new_size = GFS2_EA_REC_LEN(ea) - ea_size; int last = ea->ea_flags & GFS2_EAFLAG_LAST; ea->ea_rec_len = cpu_to_be32(ea_size); ea->ea_flags ^= last; new->ea_rec_len = cpu_to_be32(new_size); new->ea_flags = last; return new; } static void ea_set_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) { struct gfs2_ea_header *ea = el->el_ea; struct gfs2_ea_header *prev = el->el_prev; u32 len; gfs2_trans_add_meta(ip->i_gl, el->el_bh); if (!prev || !GFS2_EA_IS_STUFFED(ea)) { ea->ea_type = GFS2_EATYPE_UNUSED; return; } else if (GFS2_EA2NEXT(prev) != ea) { prev = GFS2_EA2NEXT(prev); gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), GFS2_EA2NEXT(prev) == ea); } len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea); prev->ea_rec_len = cpu_to_be32(len); if (GFS2_EA_IS_LAST(ea)) prev->ea_flags |= GFS2_EAFLAG_LAST; } struct ea_set { int ea_split; struct gfs2_ea_request *es_er; struct gfs2_ea_location *es_el; struct buffer_head *es_bh; struct gfs2_ea_header *es_ea; }; static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, struct gfs2_ea_header *ea, struct ea_set *es) { struct gfs2_ea_request *er = es->es_er; int error; error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + 2 * RES_EATTR, 0); if (error) return error; gfs2_trans_add_meta(ip->i_gl, bh); if (es->ea_split) ea = ea_split_ea(ea); ea_write(ip, ea, er); if (es->es_el) ea_set_remove_stuffed(ip, es->es_el); inode_set_ctime_current(&ip->i_inode); __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); gfs2_trans_end(GFS2_SB(&ip->i_inode)); return error; } static int ea_set_simple_alloc(struct gfs2_inode *ip, struct gfs2_ea_request *er, void *private) { struct ea_set *es = private; struct gfs2_ea_header *ea = es->es_ea; int error; gfs2_trans_add_meta(ip->i_gl, es->es_bh); if (es->ea_split) ea = ea_split_ea(ea); error = ea_write(ip, ea, er); if (error) return error; if (es->es_el) ea_set_remove_stuffed(ip, es->es_el); return 0; } static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh, struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, void *private) { struct ea_set *es = private; unsigned int size; int stuffed; int error; stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er->er_name_len, es->es_er->er_data_len, &size); if (ea->ea_type == GFS2_EATYPE_UNUSED) { if (GFS2_EA_REC_LEN(ea) < size) return 0; if (!GFS2_EA_IS_STUFFED(ea)) { error = ea_remove_unstuffed(ip, bh, ea, prev, 1); if (error) return error; } es->ea_split = 0; } else if (GFS2_EA_REC_LEN(ea) - GFS2_EA_SIZE(ea) >= size) es->ea_split = 1; else return 0; if (stuffed) { error = ea_set_simple_noalloc(ip, bh, ea, es); if (error) return error; } else { unsigned int blks; es->es_bh = bh; es->es_ea = ea; blks = 2 + DIV_ROUND_UP(es->es_er->er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize); error = ea_alloc_skeleton(ip, es->es_er, blks, ea_set_simple_alloc, es); if (error) return error; } return 1; } static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, void *private) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *indbh, *newbh; __be64 *eablk; int error; int mh_size = sizeof(struct gfs2_meta_header); if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) { __be64 *end; error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, 0, &indbh); if (error) return error; if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) { error = -EIO; goto out; } eablk = (__be64 *)(indbh->b_data + mh_size); end = eablk + sdp->sd_inptrs; for (; eablk < end; eablk++) if (!*eablk) break; if (eablk == end) { error = -ENOSPC; goto out; } gfs2_trans_add_meta(ip->i_gl, indbh); } else { u64 blk; unsigned int n = 1; error = gfs2_alloc_blocks(ip, &blk, &n, 0); if (error) return error; gfs2_trans_remove_revoke(sdp, blk, 1); indbh = gfs2_meta_new(ip->i_gl, blk); gfs2_trans_add_meta(ip->i_gl, indbh); gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); gfs2_buffer_clear_tail(indbh, mh_size); eablk = (__be64 *)(indbh->b_data + mh_size); *eablk = cpu_to_be64(ip->i_eattr); ip->i_eattr = blk; ip->i_diskflags |= GFS2_DIF_EA_INDIRECT; gfs2_add_inode_blocks(&ip->i_inode, 1); eablk++; } error = ea_alloc_blk(ip, &newbh); if (error) goto out; *eablk = cpu_to_be64((u64)newbh->b_blocknr); error = ea_write(ip, GFS2_EA_BH2FIRST(newbh), er); brelse(newbh); if (error) goto out; if (private) ea_set_remove_stuffed(ip, private); out: brelse(indbh); return error; } static int ea_set_i(struct gfs2_inode *ip, int type, const char *name, const void *value, size_t size, struct gfs2_ea_location *el) { struct gfs2_ea_request er; struct ea_set es; unsigned int blks = 2; int error; er.er_type = type; er.er_name = name; er.er_data = (void *)value; er.er_name_len = strlen(name); er.er_data_len = size; memset(&es, 0, sizeof(struct ea_set)); es.es_er = &er; es.es_el = el; error = ea_foreach(ip, ea_set_simple, &es); if (error > 0) return 0; if (error) return error; if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT)) blks++; if (GFS2_EAREQ_SIZE_STUFFED(&er) > GFS2_SB(&ip->i_inode)->sd_jbsize) blks += DIV_ROUND_UP(er.er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize); return ea_alloc_skeleton(ip, &er, blks, ea_set_block, el); } static int ea_set_remove_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) { if (el->el_prev && GFS2_EA2NEXT(el->el_prev) != el->el_ea) { el->el_prev = GFS2_EA2NEXT(el->el_prev); gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), GFS2_EA2NEXT(el->el_prev) == el->el_ea); } return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev, 0); } static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) { struct gfs2_ea_header *ea = el->el_ea; struct gfs2_ea_header *prev = el->el_prev; int error; error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); if (error) return error; gfs2_trans_add_meta(ip->i_gl, el->el_bh); if (prev) { u32 len; len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea); prev->ea_rec_len = cpu_to_be32(len); if (GFS2_EA_IS_LAST(ea)) prev->ea_flags |= GFS2_EAFLAG_LAST; } else { ea->ea_type = GFS2_EATYPE_UNUSED; } inode_set_ctime_current(&ip->i_inode); __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); gfs2_trans_end(GFS2_SB(&ip->i_inode)); return error; } /** * gfs2_xattr_remove - Remove a GFS2 extended attribute * @ip: The inode * @type: The type of the extended attribute * @name: The name of the extended attribute * * This is not called directly by the VFS since we use the (common) * scheme of making a "set with NULL data" mean a remove request. Note * that this is different from a set with zero length data. * * Returns: 0, or errno on failure */ static int gfs2_xattr_remove(struct gfs2_inode *ip, int type, const char *name) { struct gfs2_ea_location el; int error; if (!ip->i_eattr) return -ENODATA; error = gfs2_ea_find(ip, type, name, &el); if (error) return error; if (!el.el_ea) return -ENODATA; if (GFS2_EA_IS_STUFFED(el.el_ea)) error = ea_remove_stuffed(ip, &el); else error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev, 0); brelse(el.el_bh); return error; } /** * __gfs2_xattr_set - Set (or remove) a GFS2 extended attribute * @inode: The inode * @name: The name of the extended attribute * @value: The value of the extended attribute (NULL for remove) * @size: The size of the @value argument * @flags: Create or Replace * @type: The type of the extended attribute * * See gfs2_xattr_remove() for details of the removal of xattrs. * * Returns: 0 or errno on failure */ int __gfs2_xattr_set(struct inode *inode, const char *name, const void *value, size_t size, int flags, int type) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_ea_location el; unsigned int namel = strlen(name); int error; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; if (namel > GFS2_EA_MAX_NAME_LEN) return -ERANGE; if (value == NULL) { error = gfs2_xattr_remove(ip, type, name); if (error == -ENODATA && !(flags & XATTR_REPLACE)) error = 0; return error; } if (ea_check_size(sdp, namel, size)) return -ERANGE; if (!ip->i_eattr) { if (flags & XATTR_REPLACE) return -ENODATA; return ea_init(ip, type, name, value, size); } error = gfs2_ea_find(ip, type, name, &el); if (error) return error; if (el.el_ea) { if (ip->i_diskflags & GFS2_DIF_APPENDONLY) { brelse(el.el_bh); return -EPERM; } error = -EEXIST; if (!(flags & XATTR_CREATE)) { int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea); error = ea_set_i(ip, type, name, value, size, &el); if (!error && unstuffed) ea_set_remove_unstuffed(ip, &el); } brelse(el.el_bh); return error; } error = -ENODATA; if (!(flags & XATTR_REPLACE)) error = ea_set_i(ip, type, name, value, size, NULL); return error; } static int gfs2_xattr_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int ret; ret = gfs2_qa_get(ip); if (ret) return ret; /* May be called from gfs_setattr with the glock locked. */ if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (ret) goto out; } else { if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE)) { ret = -EIO; goto out; } gfs2_holder_mark_uninitialized(&gh); } ret = __gfs2_xattr_set(inode, name, value, size, flags, handler->flags); if (gfs2_holder_initialized(&gh)) gfs2_glock_dq_uninit(&gh); out: gfs2_qa_put(ip); return ret; } static int ea_dealloc_indirect(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrp_list rlist; struct gfs2_rgrpd *rgd; struct buffer_head *indbh, *dibh; __be64 *eablk, *end; unsigned int rg_blocks = 0; u64 bstart = 0; unsigned int blen = 0; unsigned int blks = 0; unsigned int x; int error; error = gfs2_rindex_update(sdp); if (error) return error; memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, 0, &indbh); if (error) return error; if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) { error = -EIO; goto out; } eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); end = eablk + sdp->sd_inptrs; for (; eablk < end; eablk++) { u64 bn; if (!*eablk) break; bn = be64_to_cpu(*eablk); if (bstart + blen == bn) blen++; else { if (bstart) gfs2_rlist_add(ip, &rlist, bstart); bstart = bn; blen = 1; } blks++; } if (bstart) gfs2_rlist_add(ip, &rlist, bstart); else goto out; gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE); for (x = 0; x < rlist.rl_rgrps; x++) { rgd = gfs2_glock2rgrp(rlist.rl_ghs[x].gh_gl); rg_blocks += rgd->rd_length; } error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); if (error) goto out_rlist_free; error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + RES_STATFS + RES_QUOTA, blks); if (error) goto out_gunlock; gfs2_trans_add_meta(ip->i_gl, indbh); eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); bstart = 0; rgd = NULL; blen = 0; for (; eablk < end; eablk++) { u64 bn; if (!*eablk) break; bn = be64_to_cpu(*eablk); if (bstart + blen == bn) blen++; else { if (bstart) gfs2_free_meta(ip, rgd, bstart, blen); bstart = bn; rgd = gfs2_blk2rgrpd(sdp, bstart, true); blen = 1; } *eablk = 0; gfs2_add_inode_blocks(&ip->i_inode, -1); } if (bstart) gfs2_free_meta(ip, rgd, bstart, blen); ip->i_diskflags &= ~GFS2_DIF_EA_INDIRECT; error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } gfs2_trans_end(sdp); out_gunlock: gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); out_rlist_free: gfs2_rlist_free(&rlist); out: brelse(indbh); return error; } static int ea_dealloc_block(struct gfs2_inode *ip, bool initialized) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; struct buffer_head *dibh; struct gfs2_holder gh; int error; error = gfs2_rindex_update(sdp); if (error) return error; rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr, 1); if (!rgd) { gfs2_consist_inode(ip); return -EIO; } error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE, &gh); if (error) return error; error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_DINODE + RES_STATFS + RES_QUOTA, 1); if (error) goto out_gunlock; gfs2_free_meta(ip, rgd, ip->i_eattr, 1); ip->i_eattr = 0; gfs2_add_inode_blocks(&ip->i_inode, -1); if (initialized) { error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } } gfs2_trans_end(sdp); out_gunlock: gfs2_glock_dq_uninit(&gh); return error; } /** * gfs2_ea_dealloc - deallocate the extended attribute fork * @ip: the inode * @initialized: xattrs have been initialized * * Returns: errno */ int gfs2_ea_dealloc(struct gfs2_inode *ip, bool initialized) { int error; error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); if (error) return error; error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (error) return error; if (initialized) { error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); if (error) goto out_quota; if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) { error = ea_dealloc_indirect(ip); if (error) goto out_quota; } } error = ea_dealloc_block(ip, initialized); out_quota: gfs2_quota_unhold(ip); return error; } static const struct xattr_handler gfs2_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, .flags = GFS2_EATYPE_USR, .get = gfs2_xattr_get, .set = gfs2_xattr_set, }; static const struct xattr_handler gfs2_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, .flags = GFS2_EATYPE_SECURITY, .get = gfs2_xattr_get, .set = gfs2_xattr_set, }; static bool gfs2_xattr_trusted_list(struct dentry *dentry) { return capable(CAP_SYS_ADMIN); } static const struct xattr_handler gfs2_xattr_trusted_handler = { .prefix = XATTR_TRUSTED_PREFIX, .flags = GFS2_EATYPE_TRUSTED, .list = gfs2_xattr_trusted_list, .get = gfs2_xattr_get, .set = gfs2_xattr_set, }; const struct xattr_handler * const gfs2_xattr_handlers_max[] = { /* GFS2_FS_FORMAT_MAX */ &gfs2_xattr_trusted_handler, /* GFS2_FS_FORMAT_MIN */ &gfs2_xattr_user_handler, &gfs2_xattr_security_handler, NULL, }; const struct xattr_handler * const *gfs2_xattr_handlers_min = gfs2_xattr_handlers_max + 1;
5 3 4 4 2 2 5 6 3 5 4 2 2 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 // SPDX-License-Identifier: GPL-2.0-only /* * Xtables module for matching the value of the IPv4/IPv6 and TCP ECN bits * * (C) 2002 by Harald Welte <laforge@gnumonks.org> * (C) 2011 Patrick McHardy <kaber@trash.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/in.h> #include <linux/ip.h> #include <net/ip.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/tcp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_ecn.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_ecn"); MODULE_ALIAS("ip6t_ecn"); static bool match_tcp(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ecn_info *einfo = par->matchinfo; struct tcphdr _tcph; const struct tcphdr *th; /* In practice, TCP match does this, so can't fail. But let's * be good citizens. */ th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) return false; if (einfo->operation & XT_ECN_OP_MATCH_ECE) { if (einfo->invert & XT_ECN_OP_MATCH_ECE) { if (th->ece == 1) return false; } else { if (th->ece == 0) return false; } } if (einfo->operation & XT_ECN_OP_MATCH_CWR) { if (einfo->invert & XT_ECN_OP_MATCH_CWR) { if (th->cwr == 1) return false; } else { if (th->cwr == 0) return false; } } return true; } static inline bool match_ip(const struct sk_buff *skb, const struct xt_ecn_info *einfo) { return ((ip_hdr(skb)->tos & XT_ECN_IP_MASK) == einfo->ip_ect) ^ !!(einfo->invert & XT_ECN_OP_MATCH_IP); } static bool ecn_mt4(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ecn_info *info = par->matchinfo; if (info->operation & XT_ECN_OP_MATCH_IP && !match_ip(skb, info)) return false; if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && !match_tcp(skb, par)) return false; return true; } static int ecn_mt_check4(const struct xt_mtchk_param *par) { const struct xt_ecn_info *info = par->matchinfo; const struct ipt_ip *ip = par->entryinfo; if (info->operation & XT_ECN_OP_MATCH_MASK) return -EINVAL; if (info->invert & XT_ECN_OP_MATCH_MASK) return -EINVAL; if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n"); return -EINVAL; } return 0; } static inline bool match_ipv6(const struct sk_buff *skb, const struct xt_ecn_info *einfo) { return (((ipv6_hdr(skb)->flow_lbl[0] >> 4) & XT_ECN_IP_MASK) == einfo->ip_ect) ^ !!(einfo->invert & XT_ECN_OP_MATCH_IP); } static bool ecn_mt6(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ecn_info *info = par->matchinfo; if (info->operation & XT_ECN_OP_MATCH_IP && !match_ipv6(skb, info)) return false; if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && !match_tcp(skb, par)) return false; return true; } static int ecn_mt_check6(const struct xt_mtchk_param *par) { const struct xt_ecn_info *info = par->matchinfo; const struct ip6t_ip6 *ip = par->entryinfo; if (info->operation & XT_ECN_OP_MATCH_MASK) return -EINVAL; if (info->invert & XT_ECN_OP_MATCH_MASK) return -EINVAL; if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && (ip->proto != IPPROTO_TCP || ip->invflags & IP6T_INV_PROTO)) { pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n"); return -EINVAL; } return 0; } static struct xt_match ecn_mt_reg[] __read_mostly = { { .name = "ecn", .family = NFPROTO_IPV4, .match = ecn_mt4, .matchsize = sizeof(struct xt_ecn_info), .checkentry = ecn_mt_check4, .me = THIS_MODULE, }, { .name = "ecn", .family = NFPROTO_IPV6, .match = ecn_mt6, .matchsize = sizeof(struct xt_ecn_info), .checkentry = ecn_mt_check6, .me = THIS_MODULE, }, }; static int __init ecn_mt_init(void) { return xt_register_matches(ecn_mt_reg, ARRAY_SIZE(ecn_mt_reg)); } static void __exit ecn_mt_exit(void) { xt_unregister_matches(ecn_mt_reg, ARRAY_SIZE(ecn_mt_reg)); } module_init(ecn_mt_init); module_exit(ecn_mt_exit);
155 154 116 115 52 368 116 922 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 // SPDX-License-Identifier: GPL-2.0-only /* * Aug 8, 2011 Bob Pearson with help from Joakim Tjernlund and George Spelvin * cleaned up code to current version of sparse and added the slicing-by-8 * algorithm to the closely similar existing slicing-by-4 algorithm. * * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com> * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks! * Code was from the public domain, copyright abandoned. Code was * subsequently included in the kernel, thus was re-licensed under the * GNU GPL v2. * * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com> * Same crc32 function was used in 5 other places in the kernel. * I made one version, and deleted the others. * There are various incantations of crc32(). Some use a seed of 0 or ~0. * Some xor at the end with ~0. The generic crc32() function takes * seed as an argument, and doesn't xor at the end. Then individual * users can do whatever they need. * drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0. * fs/jffs2 uses seed 0, doesn't xor with ~0. * fs/partitions/efi.c uses seed ~0, xor's with ~0. */ /* see: Documentation/staging/crc32.rst for a description of algorithms */ #include <linux/crc32.h> #include <linux/export.h> #include <linux/module.h> #include <linux/types.h> #include "crc32table.h" static inline u32 __maybe_unused crc32_le_base(u32 crc, const u8 *p, size_t len) { while (len--) crc = (crc >> 8) ^ crc32table_le[(crc & 255) ^ *p++]; return crc; } static inline u32 __maybe_unused crc32_be_base(u32 crc, const u8 *p, size_t len) { while (len--) crc = (crc << 8) ^ crc32table_be[(crc >> 24) ^ *p++]; return crc; } static inline u32 __maybe_unused crc32c_base(u32 crc, const u8 *p, size_t len) { while (len--) crc = (crc >> 8) ^ crc32ctable_le[(crc & 255) ^ *p++]; return crc; } #ifdef CONFIG_CRC32_ARCH #include "crc32.h" /* $(SRCARCH)/crc32.h */ u32 crc32_optimizations(void) { return crc32_optimizations_arch(); } EXPORT_SYMBOL(crc32_optimizations); #else #define crc32_le_arch crc32_le_base #define crc32_be_arch crc32_be_base #define crc32c_arch crc32c_base #endif u32 crc32_le(u32 crc, const void *p, size_t len) { return crc32_le_arch(crc, p, len); } EXPORT_SYMBOL(crc32_le); u32 crc32_be(u32 crc, const void *p, size_t len) { return crc32_be_arch(crc, p, len); } EXPORT_SYMBOL(crc32_be); u32 crc32c(u32 crc, const void *p, size_t len) { return crc32c_arch(crc, p, len); } EXPORT_SYMBOL(crc32c); #ifdef crc32_mod_init_arch static int __init crc32_mod_init(void) { crc32_mod_init_arch(); return 0; } subsys_initcall(crc32_mod_init); static void __exit crc32_mod_exit(void) { } module_exit(crc32_mod_exit); #endif MODULE_DESCRIPTION("CRC32 library functions"); MODULE_LICENSE("GPL");
13 9 14 13 14 5 5 1 9 9 9 9 14 1 14 13 13 5 2 4 1 13 13 13 9 9 4 4 9 16 16 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 // SPDX-License-Identifier: GPL-2.0-only /* * (C) 2007 Patrick McHardy <kaber@trash.net> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/gen_stats.h> #include <linux/jhash.h> #include <linux/rtnetlink.h> #include <linux/random.h> #include <linux/slab.h> #include <net/gen_stats.h> #include <net/netlink.h> #include <net/netns/generic.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_RATEEST.h> #include <net/netfilter/xt_rateest.h> #define RATEEST_HSIZE 16 struct xt_rateest_net { struct mutex hash_lock; struct hlist_head hash[RATEEST_HSIZE]; }; static unsigned int xt_rateest_id; static unsigned int jhash_rnd __read_mostly; static unsigned int xt_rateest_hash(const char *name) { return jhash(name, sizeof_field(struct xt_rateest, name), jhash_rnd) & (RATEEST_HSIZE - 1); } static void xt_rateest_hash_insert(struct xt_rateest_net *xn, struct xt_rateest *est) { unsigned int h; h = xt_rateest_hash(est->name); hlist_add_head(&est->list, &xn->hash[h]); } static struct xt_rateest *__xt_rateest_lookup(struct xt_rateest_net *xn, const char *name) { struct xt_rateest *est; unsigned int h; h = xt_rateest_hash(name); hlist_for_each_entry(est, &xn->hash[h], list) { if (strcmp(est->name, name) == 0) { est->refcnt++; return est; } } return NULL; } struct xt_rateest *xt_rateest_lookup(struct net *net, const char *name) { struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); struct xt_rateest *est; mutex_lock(&xn->hash_lock); est = __xt_rateest_lookup(xn, name); mutex_unlock(&xn->hash_lock); return est; } EXPORT_SYMBOL_GPL(xt_rateest_lookup); void xt_rateest_put(struct net *net, struct xt_rateest *est) { struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); mutex_lock(&xn->hash_lock); if (--est->refcnt == 0) { hlist_del(&est->list); gen_kill_estimator(&est->rate_est); /* * gen_estimator est_timer() might access est->lock or bstats, * wait a RCU grace period before freeing 'est' */ kfree_rcu(est, rcu); } mutex_unlock(&xn->hash_lock); } EXPORT_SYMBOL_GPL(xt_rateest_put); static unsigned int xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_rateest_target_info *info = par->targinfo; struct gnet_stats_basic_sync *stats = &info->est->bstats; spin_lock_bh(&info->est->lock); u64_stats_add(&stats->bytes, skb->len); u64_stats_inc(&stats->packets); spin_unlock_bh(&info->est->lock); return XT_CONTINUE; } static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) { struct xt_rateest_net *xn = net_generic(par->net, xt_rateest_id); struct xt_rateest_target_info *info = par->targinfo; struct xt_rateest *est; struct { struct nlattr opt; struct gnet_estimator est; } cfg; int ret; if (strnlen(info->name, sizeof(est->name)) >= sizeof(est->name)) return -ENAMETOOLONG; net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); mutex_lock(&xn->hash_lock); est = __xt_rateest_lookup(xn, info->name); if (est) { mutex_unlock(&xn->hash_lock); /* * If estimator parameters are specified, they must match the * existing estimator. */ if ((!info->interval && !info->ewma_log) || (info->interval != est->params.interval || info->ewma_log != est->params.ewma_log)) { xt_rateest_put(par->net, est); return -EINVAL; } info->est = est; return 0; } ret = -ENOMEM; est = kzalloc_obj(*est); if (!est) goto err1; gnet_stats_basic_sync_init(&est->bstats); strscpy(est->name, info->name, sizeof(est->name)); spin_lock_init(&est->lock); est->refcnt = 1; est->params.interval = info->interval; est->params.ewma_log = info->ewma_log; cfg.opt.nla_len = nla_attr_size(sizeof(cfg.est)); cfg.opt.nla_type = TCA_STATS_RATE_EST; cfg.est.interval = info->interval; cfg.est.ewma_log = info->ewma_log; ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est, &est->lock, NULL, &cfg.opt); if (ret < 0) goto err2; info->est = est; xt_rateest_hash_insert(xn, est); mutex_unlock(&xn->hash_lock); return 0; err2: kfree(est); err1: mutex_unlock(&xn->hash_lock); return ret; } static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par) { struct xt_rateest_target_info *info = par->targinfo; xt_rateest_put(par->net, info->est); } static struct xt_target xt_rateest_tg_reg[] __read_mostly = { { .name = "RATEEST", .revision = 0, .family = NFPROTO_IPV4, .target = xt_rateest_tg, .checkentry = xt_rateest_tg_checkentry, .destroy = xt_rateest_tg_destroy, .targetsize = sizeof(struct xt_rateest_target_info), .usersize = offsetof(struct xt_rateest_target_info, est), .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "RATEEST", .revision = 0, .family = NFPROTO_IPV6, .target = xt_rateest_tg, .checkentry = xt_rateest_tg_checkentry, .destroy = xt_rateest_tg_destroy, .targetsize = sizeof(struct xt_rateest_target_info), .usersize = offsetof(struct xt_rateest_target_info, est), .me = THIS_MODULE, }, #endif }; static __net_init int xt_rateest_net_init(struct net *net) { struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); int i; mutex_init(&xn->hash_lock); for (i = 0; i < ARRAY_SIZE(xn->hash); i++) INIT_HLIST_HEAD(&xn->hash[i]); return 0; } static struct pernet_operations xt_rateest_net_ops = { .init = xt_rateest_net_init, .id = &xt_rateest_id, .size = sizeof(struct xt_rateest_net), }; static int __init xt_rateest_tg_init(void) { int err = register_pernet_subsys(&xt_rateest_net_ops); if (err) return err; return xt_register_targets(xt_rateest_tg_reg, ARRAY_SIZE(xt_rateest_tg_reg)); } static void __exit xt_rateest_tg_fini(void) { xt_unregister_targets(xt_rateest_tg_reg, ARRAY_SIZE(xt_rateest_tg_reg)); unregister_pernet_subsys(&xt_rateest_net_ops); } MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: packet rate estimator"); MODULE_ALIAS("ipt_RATEEST"); MODULE_ALIAS("ip6t_RATEEST"); module_init(xt_rateest_tg_init); module_exit(xt_rateest_tg_fini);
96 96 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2023-2025 Christoph Hellwig. * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. */ #include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_trans.h" #include "xfs_icache.h" #include "xfs_rmap.h" #include "xfs_rtbitmap.h" #include "xfs_rtrmap_btree.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_zone_alloc.h" #include "xfs_zone_priv.h" #include "xfs_zones.h" #include "xfs_trace.h" /* * Implement Garbage Collection (GC) of partially used zoned. * * To support the purely sequential writes in each zone, zoned XFS needs to be * able to move data remaining in a zone out of it to reset the zone to prepare * for writing to it again. * * This is done by the GC thread implemented in this file. To support that a * number of zones (XFS_GC_ZONES) is reserved from the user visible capacity to * write the garbage collected data into. * * Whenever the available space is below the chosen threshold, the GC thread * looks for potential non-empty but not fully used zones that are worth * reclaiming. Once found the rmap for the victim zone is queried, and after * a bit of sorting to reduce fragmentation, the still live extents are read * into memory and written to the GC target zone, and the bmap btree of the * files is updated to point to the new location. To avoid taking the IOLOCK * and MMAPLOCK for the entire GC process and thus affecting the latency of * user reads and writes to the files, the GC writes are speculative and the * I/O completion checks that no other writes happened for the affected regions * before remapping. * * Once a zone does not contain any valid data, be that through GC or user * block removal, it is queued for for a zone reset. The reset operation * carefully ensures that the RT device cache is flushed and all transactions * referencing the rmap have been committed to disk. */ /* * Size of each GC scratch allocation, and the number of buffers. */ #define XFS_GC_BUF_SIZE SZ_1M #define XFS_GC_NR_BUFS 2 static_assert(XFS_GC_NR_BUFS < BIO_MAX_VECS); /* * Chunk that is read and written for each GC operation. * * Note that for writes to actual zoned devices, the chunk can be split when * reaching the hardware limit. */ struct xfs_gc_bio { struct xfs_zone_gc_data *data; /* * Entry into the reading/writing/resetting list. Only accessed from * the GC thread, so no locking needed. */ struct list_head entry; /* * State of this gc_bio. Done means the current I/O completed. * Set from the bio end I/O handler, read from the GC thread. */ enum { XFS_GC_BIO_NEW, XFS_GC_BIO_DONE, } state; /* * Pointer to the inode and byte range in the inode that this * GC chunk is operating on. */ struct xfs_inode *ip; loff_t offset; unsigned int len; /* * Existing startblock (in the zone to be freed) and newly assigned * daddr in the zone GCed into. */ xfs_fsblock_t old_startblock; xfs_daddr_t new_daddr; /* Are we writing to a sequential write required zone? */ bool is_seq; /* Open Zone being written to */ struct xfs_open_zone *oz; struct xfs_rtgroup *victim_rtg; /* Bio used for reads and writes, including the bvec used by it */ struct bio bio; /* must be last */ }; #define XFS_ZONE_GC_RECS 1024 /* iterator, needs to be reinitialized for each victim zone */ struct xfs_zone_gc_iter { struct xfs_rtgroup *victim_rtg; unsigned int rec_count; unsigned int rec_idx; xfs_agblock_t next_startblock; struct xfs_rmap_irec *recs; }; /* * Per-mount GC state. */ struct xfs_zone_gc_data { struct xfs_mount *mp; /* bioset used to allocate the gc_bios */ struct bio_set bio_set; /* * Scratchpad to buffer GC data, organized as a ring buffer over * discontiguous folios. scratch_head is where the buffer is filled, * scratch_tail tracks the buffer space freed, and scratch_available * counts the space available in the ring buffer between the head and * the tail. */ struct folio *scratch_folios[XFS_GC_NR_BUFS]; unsigned int scratch_size; unsigned int scratch_available; unsigned int scratch_head; unsigned int scratch_tail; /* * List of bios currently being read, written and reset. * These lists are only accessed by the GC thread itself, and must only * be processed in order. */ struct list_head reading; struct list_head writing; struct list_head resetting; /* * Iterator for the victim zone. */ struct xfs_zone_gc_iter iter; }; /* * We aim to keep enough zones free in stock to fully use the open zone limit * for data placement purposes. Additionally, the m_zonegc_low_space tunable * can be set to make sure a fraction of the unused blocks are available for * writing. */ bool xfs_zoned_need_gc( struct xfs_mount *mp) { s64 available, free, threshold; s32 remainder; if (!xfs_zoned_have_reclaimable(mp->m_zone_info)) return false; available = xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE); if (available < xfs_rtgs_to_rfsbs(mp, mp->m_max_open_zones - XFS_OPEN_GC_ZONES)) return true; free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS); threshold = div_s64_rem(free, 100, &remainder); threshold = threshold * mp->m_zonegc_low_space + remainder * div_s64(mp->m_zonegc_low_space, 100); if (available < threshold) return true; return false; } static struct xfs_zone_gc_data * xfs_zone_gc_data_alloc( struct xfs_mount *mp) { struct xfs_zone_gc_data *data; int i; data = kzalloc_obj(*data); if (!data) return NULL; data->iter.recs = kzalloc_objs(*data->iter.recs, XFS_ZONE_GC_RECS); if (!data->iter.recs) goto out_free_data; if (bioset_init(&data->bio_set, 16, offsetof(struct xfs_gc_bio, bio), BIOSET_NEED_BVECS)) goto out_free_recs; for (i = 0; i < XFS_GC_NR_BUFS; i++) { data->scratch_folios[i] = folio_alloc(GFP_KERNEL, get_order(XFS_GC_BUF_SIZE)); if (!data->scratch_folios[i]) goto out_free_scratch; } data->scratch_size = XFS_GC_BUF_SIZE * XFS_GC_NR_BUFS; data->scratch_available = data->scratch_size; INIT_LIST_HEAD(&data->reading); INIT_LIST_HEAD(&data->writing); INIT_LIST_HEAD(&data->resetting); data->mp = mp; return data; out_free_scratch: while (--i >= 0) folio_put(data->scratch_folios[i]); bioset_exit(&data->bio_set); out_free_recs: kfree(data->iter.recs); out_free_data: kfree(data); return NULL; } static void xfs_zone_gc_data_free( struct xfs_zone_gc_data *data) { int i; for (i = 0; i < XFS_GC_NR_BUFS; i++) folio_put(data->scratch_folios[i]); bioset_exit(&data->bio_set); kfree(data->iter.recs); kfree(data); } static void xfs_zone_gc_iter_init( struct xfs_zone_gc_iter *iter, struct xfs_rtgroup *victim_rtg) { iter->next_startblock = 0; iter->rec_count = 0; iter->rec_idx = 0; iter->victim_rtg = victim_rtg; atomic_inc(&victim_rtg->rtg_gccount); } /* * Query the rmap of the victim zone to gather the records to evacuate. */ static int xfs_zone_gc_query_cb( struct xfs_btree_cur *cur, const struct xfs_rmap_irec *irec, void *private) { struct xfs_zone_gc_iter *iter = private; ASSERT(!XFS_RMAP_NON_INODE_OWNER(irec->rm_owner)); ASSERT(!xfs_is_sb_inum(cur->bc_mp, irec->rm_owner)); ASSERT(!(irec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))); iter->recs[iter->rec_count] = *irec; if (++iter->rec_count == XFS_ZONE_GC_RECS) { iter->next_startblock = irec->rm_startblock + irec->rm_blockcount; return 1; } return 0; } static int xfs_zone_gc_rmap_rec_cmp( const void *a, const void *b) { const struct xfs_rmap_irec *reca = a; const struct xfs_rmap_irec *recb = b; int diff; diff = cmp_int(reca->rm_owner, recb->rm_owner); if (diff) return diff; return cmp_int(reca->rm_offset, recb->rm_offset); } static int xfs_zone_gc_query( struct xfs_mount *mp, struct xfs_zone_gc_iter *iter) { struct xfs_rtgroup *rtg = iter->victim_rtg; struct xfs_rmap_irec ri_low = { }; struct xfs_rmap_irec ri_high; struct xfs_btree_cur *cur; struct xfs_trans *tp; int error; ASSERT(iter->next_startblock <= rtg_blocks(rtg)); if (iter->next_startblock == rtg_blocks(rtg)) goto done; ASSERT(iter->next_startblock < rtg_blocks(rtg)); ri_low.rm_startblock = iter->next_startblock; memset(&ri_high, 0xFF, sizeof(ri_high)); iter->rec_idx = 0; iter->rec_count = 0; tp = xfs_trans_alloc_empty(mp); xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); cur = xfs_rtrmapbt_init_cursor(tp, rtg); error = xfs_rmap_query_range(cur, &ri_low, &ri_high, xfs_zone_gc_query_cb, iter); xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); xfs_btree_del_cursor(cur, error < 0 ? error : 0); xfs_trans_cancel(tp); if (error < 0) return error; /* * Sort the rmap records by inode number and increasing offset to * defragment the mappings. * * This could be further enhanced by an even bigger look ahead window, * but that's better left until we have better detection of changes to * inode mapping to avoid the potential of GCing already dead data. */ sort(iter->recs, iter->rec_count, sizeof(iter->recs[0]), xfs_zone_gc_rmap_rec_cmp, NULL); if (error == 0) { /* * We finished iterating through the zone. */ iter->next_startblock = rtg_blocks(rtg); if (iter->rec_count == 0) goto done; } return 0; done: atomic_dec(&iter->victim_rtg->rtg_gccount); xfs_rtgroup_rele(iter->victim_rtg); iter->victim_rtg = NULL; return 0; } static bool xfs_zone_gc_iter_next( struct xfs_mount *mp, struct xfs_zone_gc_iter *iter, struct xfs_rmap_irec *chunk_rec, struct xfs_inode **ipp) { struct xfs_rmap_irec *irec; int error; if (!iter->victim_rtg) return false; retry: if (iter->rec_idx == iter->rec_count) { error = xfs_zone_gc_query(mp, iter); if (error) goto fail; if (!iter->victim_rtg) return false; } irec = &iter->recs[iter->rec_idx]; error = xfs_iget(mp, NULL, irec->rm_owner, XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, ipp); if (error) { /* * If the inode was already deleted, skip over it. */ if (error == -ENOENT) { iter->rec_idx++; goto retry; } goto fail; } if (!S_ISREG(VFS_I(*ipp)->i_mode) || !XFS_IS_REALTIME_INODE(*ipp)) { iter->rec_idx++; xfs_irele(*ipp); goto retry; } *chunk_rec = *irec; return true; fail: xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); return false; } static void xfs_zone_gc_iter_advance( struct xfs_zone_gc_iter *iter, xfs_extlen_t count_fsb) { struct xfs_rmap_irec *irec = &iter->recs[iter->rec_idx]; irec->rm_offset += count_fsb; irec->rm_startblock += count_fsb; irec->rm_blockcount -= count_fsb; if (!irec->rm_blockcount) iter->rec_idx++; } static struct xfs_rtgroup * xfs_zone_gc_pick_victim_from( struct xfs_mount *mp, uint32_t bucket) { struct xfs_zone_info *zi = mp->m_zone_info; uint32_t victim_used = U32_MAX; struct xfs_rtgroup *victim_rtg = NULL; uint32_t bit; if (!zi->zi_used_bucket_entries[bucket]) return NULL; for_each_set_bit(bit, zi->zi_used_bucket_bitmap[bucket], mp->m_sb.sb_rgcount) { struct xfs_rtgroup *rtg = xfs_rtgroup_grab(mp, bit); if (!rtg) continue; /* * If the zone is already undergoing GC, don't pick it again. * * This prevents us from picking one of the zones for which we * already submitted GC I/O, but for which the remapping hasn't * concluded yet. This won't cause data corruption, but * increases write amplification and slows down GC, so this is * a bad thing. */ if (atomic_read(&rtg->rtg_gccount)) { xfs_rtgroup_rele(rtg); continue; } /* skip zones that are just waiting for a reset */ if (rtg_rmap(rtg)->i_used_blocks == 0 || rtg_rmap(rtg)->i_used_blocks >= victim_used) { xfs_rtgroup_rele(rtg); continue; } if (victim_rtg) xfs_rtgroup_rele(victim_rtg); victim_rtg = rtg; victim_used = rtg_rmap(rtg)->i_used_blocks; /* * Any zone that is less than 1 percent used is fair game for * instant reclaim. All of these zones are in the last * bucket, so avoid the expensive division for the zones * in the other buckets. */ if (bucket == 0 && rtg_rmap(rtg)->i_used_blocks < rtg_blocks(rtg) / 100) break; } return victim_rtg; } /* * Iterate through all zones marked as reclaimable and find a candidate to * reclaim. */ static bool xfs_zone_gc_select_victim( struct xfs_zone_gc_data *data) { struct xfs_zone_gc_iter *iter = &data->iter; struct xfs_mount *mp = data->mp; struct xfs_zone_info *zi = mp->m_zone_info; struct xfs_rtgroup *victim_rtg = NULL; unsigned int bucket; spin_lock(&zi->zi_used_buckets_lock); for (bucket = 0; bucket < XFS_ZONE_USED_BUCKETS; bucket++) { victim_rtg = xfs_zone_gc_pick_victim_from(mp, bucket); if (victim_rtg) break; } spin_unlock(&zi->zi_used_buckets_lock); if (!victim_rtg) return false; trace_xfs_zone_gc_select_victim(victim_rtg, bucket); xfs_zone_gc_iter_init(iter, victim_rtg); return true; } static struct xfs_open_zone * xfs_zone_gc_steal_open( struct xfs_zone_info *zi) { struct xfs_open_zone *oz, *found = NULL; spin_lock(&zi->zi_open_zones_lock); list_for_each_entry(oz, &zi->zi_open_zones, oz_entry) { if (!found || oz->oz_allocated < found->oz_allocated) found = oz; } if (found) { found->oz_is_gc = true; list_del_init(&found->oz_entry); zi->zi_nr_open_zones--; } spin_unlock(&zi->zi_open_zones_lock); return found; } static struct xfs_open_zone * xfs_zone_gc_select_target( struct xfs_mount *mp) { struct xfs_zone_info *zi = mp->m_zone_info; struct xfs_open_zone *oz = zi->zi_open_gc_zone; /* * We need to wait for pending writes to finish. */ if (oz && oz->oz_written < rtg_blocks(oz->oz_rtg)) return NULL; ASSERT(zi->zi_nr_open_zones <= mp->m_max_open_zones - XFS_OPEN_GC_ZONES); oz = xfs_open_zone(mp, WRITE_LIFE_NOT_SET, true); if (oz) trace_xfs_zone_gc_target_opened(oz->oz_rtg); spin_lock(&zi->zi_open_zones_lock); zi->zi_open_gc_zone = oz; spin_unlock(&zi->zi_open_zones_lock); return oz; } /* * Ensure we have a valid open zone to write the GC data to. * * If the current target zone has space keep writing to it, else first wait for * all pending writes and then pick a new one. */ static struct xfs_open_zone * xfs_zone_gc_ensure_target( struct xfs_mount *mp) { struct xfs_open_zone *oz = mp->m_zone_info->zi_open_gc_zone; if (!oz || oz->oz_allocated == rtg_blocks(oz->oz_rtg)) return xfs_zone_gc_select_target(mp); return oz; } static void xfs_zone_gc_end_io( struct bio *bio) { struct xfs_gc_bio *chunk = container_of(bio, struct xfs_gc_bio, bio); struct xfs_zone_gc_data *data = chunk->data; WRITE_ONCE(chunk->state, XFS_GC_BIO_DONE); wake_up_process(data->mp->m_zone_info->zi_gc_thread); } static struct xfs_open_zone * xfs_zone_gc_alloc_blocks( struct xfs_zone_gc_data *data, xfs_extlen_t *count_fsb, xfs_daddr_t *daddr, bool *is_seq) { struct xfs_mount *mp = data->mp; struct xfs_open_zone *oz; oz = xfs_zone_gc_ensure_target(mp); if (!oz) return NULL; *count_fsb = min(*count_fsb, XFS_B_TO_FSB(mp, data->scratch_available)); /* * Directly allocate GC blocks from the reserved pool. * * If we'd take them from the normal pool we could be stealing blocks * from a regular writer, which would then have to wait for GC and * deadlock. */ spin_lock(&mp->m_sb_lock); *count_fsb = min(*count_fsb, rtg_blocks(oz->oz_rtg) - oz->oz_allocated); *count_fsb = min3(*count_fsb, mp->m_free[XC_FREE_RTEXTENTS].res_avail, mp->m_free[XC_FREE_RTAVAILABLE].res_avail); mp->m_free[XC_FREE_RTEXTENTS].res_avail -= *count_fsb; mp->m_free[XC_FREE_RTAVAILABLE].res_avail -= *count_fsb; spin_unlock(&mp->m_sb_lock); if (!*count_fsb) return NULL; *daddr = xfs_gbno_to_daddr(rtg_group(oz->oz_rtg), 0); *is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *daddr); if (!*is_seq) *daddr += XFS_FSB_TO_BB(mp, oz->oz_allocated); oz->oz_allocated += *count_fsb; atomic_inc(&oz->oz_ref); return oz; } static void xfs_zone_gc_add_data( struct xfs_gc_bio *chunk) { struct xfs_zone_gc_data *data = chunk->data; unsigned int len = chunk->len; unsigned int off = data->scratch_head; do { unsigned int this_off = off % XFS_GC_BUF_SIZE; unsigned int this_len = min(len, XFS_GC_BUF_SIZE - this_off); bio_add_folio_nofail(&chunk->bio, data->scratch_folios[off / XFS_GC_BUF_SIZE], this_len, this_off); len -= this_len; off += this_len; if (off == data->scratch_size) off = 0; } while (len); } static bool xfs_zone_gc_start_chunk( struct xfs_zone_gc_data *data) { struct xfs_zone_gc_iter *iter = &data->iter; struct xfs_mount *mp = data->mp; struct block_device *bdev = mp->m_rtdev_targp->bt_bdev; struct xfs_open_zone *oz; struct xfs_rmap_irec irec; struct xfs_gc_bio *chunk; struct xfs_inode *ip; struct bio *bio; xfs_daddr_t daddr; unsigned int len; bool is_seq; if (xfs_is_shutdown(mp)) return false; if (!xfs_zone_gc_iter_next(mp, iter, &irec, &ip)) return false; oz = xfs_zone_gc_alloc_blocks(data, &irec.rm_blockcount, &daddr, &is_seq); if (!oz) { xfs_irele(ip); return false; } len = XFS_FSB_TO_B(mp, irec.rm_blockcount); bio = bio_alloc_bioset(bdev, min(howmany(len, XFS_GC_BUF_SIZE) + 1, XFS_GC_NR_BUFS), REQ_OP_READ, GFP_NOFS, &data->bio_set); chunk = container_of(bio, struct xfs_gc_bio, bio); chunk->ip = ip; chunk->offset = XFS_FSB_TO_B(mp, irec.rm_offset); chunk->len = len; chunk->old_startblock = xfs_rgbno_to_rtb(iter->victim_rtg, irec.rm_startblock); chunk->new_daddr = daddr; chunk->is_seq = is_seq; chunk->data = data; chunk->oz = oz; chunk->victim_rtg = iter->victim_rtg; atomic_inc(&rtg_group(chunk->victim_rtg)->xg_active_ref); atomic_inc(&chunk->victim_rtg->rtg_gccount); bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock); bio->bi_end_io = xfs_zone_gc_end_io; xfs_zone_gc_add_data(chunk); data->scratch_head = (data->scratch_head + len) % data->scratch_size; data->scratch_available -= len; XFS_STATS_INC(mp, xs_gc_read_calls); WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); list_add_tail(&chunk->entry, &data->reading); xfs_zone_gc_iter_advance(iter, irec.rm_blockcount); submit_bio(bio); return true; } static void xfs_zone_gc_free_chunk( struct xfs_gc_bio *chunk) { atomic_dec(&chunk->victim_rtg->rtg_gccount); xfs_rtgroup_rele(chunk->victim_rtg); list_del(&chunk->entry); xfs_open_zone_put(chunk->oz); xfs_irele(chunk->ip); bio_put(&chunk->bio); } static void xfs_zone_gc_submit_write( struct xfs_zone_gc_data *data, struct xfs_gc_bio *chunk) { if (chunk->is_seq) { chunk->bio.bi_opf &= ~REQ_OP_WRITE; chunk->bio.bi_opf |= REQ_OP_ZONE_APPEND; } chunk->bio.bi_iter.bi_sector = chunk->new_daddr; chunk->bio.bi_end_io = xfs_zone_gc_end_io; submit_bio(&chunk->bio); } static struct xfs_gc_bio * xfs_zone_gc_split_write( struct xfs_zone_gc_data *data, struct xfs_gc_bio *chunk) { struct queue_limits *lim = &bdev_get_queue(chunk->bio.bi_bdev)->limits; struct xfs_gc_bio *split_chunk; int split_sectors; unsigned int split_len; struct bio *split; unsigned int nsegs; if (!chunk->is_seq) return NULL; split_sectors = bio_split_rw_at(&chunk->bio, lim, &nsegs, lim->max_zone_append_sectors << SECTOR_SHIFT); if (!split_sectors) return NULL; /* ensure the split chunk is still block size aligned */ split_sectors = ALIGN_DOWN(split_sectors << SECTOR_SHIFT, data->mp->m_sb.sb_blocksize) >> SECTOR_SHIFT; split_len = split_sectors << SECTOR_SHIFT; split = bio_split(&chunk->bio, split_sectors, GFP_NOFS, &data->bio_set); split_chunk = container_of(split, struct xfs_gc_bio, bio); split_chunk->data = data; ihold(VFS_I(chunk->ip)); split_chunk->ip = chunk->ip; split_chunk->is_seq = chunk->is_seq; split_chunk->offset = chunk->offset; split_chunk->len = split_len; split_chunk->old_startblock = chunk->old_startblock; split_chunk->new_daddr = chunk->new_daddr; split_chunk->oz = chunk->oz; atomic_inc(&chunk->oz->oz_ref); split_chunk->victim_rtg = chunk->victim_rtg; atomic_inc(&rtg_group(chunk->victim_rtg)->xg_active_ref); atomic_inc(&chunk->victim_rtg->rtg_gccount); chunk->offset += split_len; chunk->len -= split_len; chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len); /* add right before the original chunk */ WRITE_ONCE(split_chunk->state, XFS_GC_BIO_NEW); list_add_tail(&split_chunk->entry, &chunk->entry); return split_chunk; } static void xfs_zone_gc_write_chunk( struct xfs_gc_bio *chunk) { struct xfs_zone_gc_data *data = chunk->data; struct xfs_mount *mp = chunk->ip->i_mount; struct xfs_gc_bio *split_chunk; if (chunk->bio.bi_status) xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); if (xfs_is_shutdown(mp)) { xfs_zone_gc_free_chunk(chunk); return; } XFS_STATS_INC(mp, xs_gc_write_calls); XFS_STATS_ADD(mp, xs_gc_bytes, chunk->len); WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); list_move_tail(&chunk->entry, &data->writing); bio_reuse(&chunk->bio, REQ_OP_WRITE); while ((split_chunk = xfs_zone_gc_split_write(data, chunk))) xfs_zone_gc_submit_write(data, split_chunk); xfs_zone_gc_submit_write(data, chunk); } static void xfs_zone_gc_finish_chunk( struct xfs_gc_bio *chunk) { uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; struct xfs_zone_gc_data *data = chunk->data; struct xfs_inode *ip = chunk->ip; struct xfs_mount *mp = ip->i_mount; int error; if (chunk->bio.bi_status) xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); if (xfs_is_shutdown(mp)) { xfs_zone_gc_free_chunk(chunk); return; } data->scratch_tail = (data->scratch_tail + chunk->len) % data->scratch_size; data->scratch_available += chunk->len; /* * Cycle through the iolock and wait for direct I/O and layouts to * ensure no one is reading from the old mapping before it goes away. * * Note that xfs_zoned_end_io() below checks that no other writer raced * with us to update the mapping by checking that the old startblock * didn't change. */ xfs_ilock(ip, iolock); error = xfs_break_layouts(VFS_I(ip), &iolock, BREAK_UNMAP); if (!error) inode_dio_wait(VFS_I(ip)); xfs_iunlock(ip, iolock); if (error) goto free; if (chunk->is_seq) chunk->new_daddr = chunk->bio.bi_iter.bi_sector; error = xfs_zoned_end_io(ip, chunk->offset, chunk->len, chunk->new_daddr, chunk->oz, chunk->old_startblock); free: if (error) xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); xfs_zone_gc_free_chunk(chunk); } static void xfs_zone_gc_finish_reset( struct xfs_gc_bio *chunk) { struct xfs_rtgroup *rtg = chunk->bio.bi_private; struct xfs_mount *mp = rtg_mount(rtg); struct xfs_zone_info *zi = mp->m_zone_info; if (chunk->bio.bi_status) { xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); goto out; } xfs_group_set_mark(rtg_group(rtg), XFS_RTG_FREE); atomic_inc(&zi->zi_nr_free_zones); xfs_zoned_add_available(mp, rtg_blocks(rtg)); wake_up_all(&zi->zi_zone_wait); out: list_del(&chunk->entry); bio_put(&chunk->bio); } static void xfs_submit_zone_reset_bio( struct xfs_rtgroup *rtg, struct bio *bio) { struct xfs_mount *mp = rtg_mount(rtg); trace_xfs_zone_reset(rtg); ASSERT(rtg_rmap(rtg)->i_used_blocks == 0); if (XFS_TEST_ERROR(mp, XFS_ERRTAG_ZONE_RESET)) { bio_io_error(bio); return; } XFS_STATS_INC(mp, xs_gc_zone_reset_calls); bio->bi_iter.bi_sector = xfs_gbno_to_daddr(rtg_group(rtg), 0); if (!bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) { /* * Also use the bio to drive the state machine when neither * zone reset nor discard is supported to keep things simple. */ if (!bdev_max_discard_sectors(bio->bi_bdev)) { bio_endio(bio); return; } bio->bi_opf &= ~REQ_OP_ZONE_RESET; bio->bi_opf |= REQ_OP_DISCARD; bio->bi_iter.bi_size = XFS_FSB_TO_B(mp, rtg_blocks(rtg)); } submit_bio(bio); } static void xfs_bio_wait_endio(struct bio *bio) { complete(bio->bi_private); } int xfs_zone_gc_reset_sync( struct xfs_rtgroup *rtg) { DECLARE_COMPLETION_ONSTACK(done); struct bio bio; int error; bio_init(&bio, rtg_mount(rtg)->m_rtdev_targp->bt_bdev, NULL, 0, REQ_OP_ZONE_RESET | REQ_SYNC); bio.bi_private = &done; bio.bi_end_io = xfs_bio_wait_endio; xfs_submit_zone_reset_bio(rtg, &bio); wait_for_completion_io(&done); error = blk_status_to_errno(bio.bi_status); bio_uninit(&bio); return error; } static void xfs_zone_gc_reset_zones( struct xfs_zone_gc_data *data, struct xfs_group *reset_list) { struct xfs_group *next = reset_list; if (blkdev_issue_flush(data->mp->m_rtdev_targp->bt_bdev) < 0) { xfs_force_shutdown(data->mp, SHUTDOWN_META_IO_ERROR); return; } do { struct xfs_rtgroup *rtg = to_rtg(next); struct xfs_gc_bio *chunk; struct bio *bio; xfs_log_force_inode(rtg_rmap(rtg)); next = rtg_group(rtg)->xg_next_reset; rtg_group(rtg)->xg_next_reset = NULL; bio = bio_alloc_bioset(rtg_mount(rtg)->m_rtdev_targp->bt_bdev, 0, REQ_OP_ZONE_RESET, GFP_NOFS, &data->bio_set); bio->bi_private = rtg; bio->bi_end_io = xfs_zone_gc_end_io; chunk = container_of(bio, struct xfs_gc_bio, bio); chunk->data = data; WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); list_add_tail(&chunk->entry, &data->resetting); xfs_submit_zone_reset_bio(rtg, bio); } while (next); } static bool xfs_zone_gc_should_start_new_work( struct xfs_zone_gc_data *data) { struct xfs_open_zone *oz; if (xfs_is_shutdown(data->mp)) return false; if (!data->scratch_available) return false; oz = xfs_zone_gc_ensure_target(data->mp); if (!oz || oz->oz_allocated == rtg_blocks(oz->oz_rtg)) return false; if (!data->iter.victim_rtg) { if (kthread_should_stop() || kthread_should_park()) return false; if (!xfs_zoned_need_gc(data->mp)) return false; if (!xfs_zone_gc_select_victim(data)) return false; } return true; } /* * Handle the work to read and write data for GC and to reset the zones, * including handling all completions. * * Note that the order of the chunks is preserved so that we don't undo the * optimal order established by xfs_zone_gc_query(). */ static void xfs_zone_gc_handle_work( struct xfs_zone_gc_data *data) { struct xfs_zone_info *zi = data->mp->m_zone_info; struct xfs_gc_bio *chunk, *next; struct xfs_group *reset_list; struct blk_plug plug; spin_lock(&zi->zi_reset_list_lock); reset_list = zi->zi_reset_list; zi->zi_reset_list = NULL; spin_unlock(&zi->zi_reset_list_lock); if (reset_list) { set_current_state(TASK_RUNNING); xfs_zone_gc_reset_zones(data, reset_list); } list_for_each_entry_safe(chunk, next, &data->resetting, entry) { if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) break; set_current_state(TASK_RUNNING); xfs_zone_gc_finish_reset(chunk); } list_for_each_entry_safe(chunk, next, &data->writing, entry) { if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) break; set_current_state(TASK_RUNNING); xfs_zone_gc_finish_chunk(chunk); } blk_start_plug(&plug); list_for_each_entry_safe(chunk, next, &data->reading, entry) { if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) break; set_current_state(TASK_RUNNING); xfs_zone_gc_write_chunk(chunk); } blk_finish_plug(&plug); if (xfs_zone_gc_should_start_new_work(data)) { set_current_state(TASK_RUNNING); blk_start_plug(&plug); while (xfs_zone_gc_start_chunk(data)) ; blk_finish_plug(&plug); } } /* * Note that the current GC algorithm would break reflinks and thus duplicate * data that was shared by multiple owners before. Because of that reflinks * are currently not supported on zoned file systems and can't be created or * mounted. */ static int xfs_zoned_gcd( void *private) { struct xfs_zone_gc_data *data = private; struct xfs_mount *mp = data->mp; struct xfs_zone_info *zi = mp->m_zone_info; unsigned int nofs_flag; nofs_flag = memalloc_nofs_save(); set_freezable(); for (;;) { set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE); xfs_set_zonegc_running(mp); xfs_zone_gc_handle_work(data); /* * Only sleep if nothing set the state to running. Else check for * work again as someone might have queued up more work and woken * us in the meantime. */ if (get_current_state() == TASK_RUNNING) { try_to_freeze(); continue; } if (list_empty(&data->reading) && list_empty(&data->writing) && list_empty(&data->resetting) && !zi->zi_reset_list) { xfs_clear_zonegc_running(mp); xfs_zoned_resv_wake_all(mp); if (kthread_should_stop()) { __set_current_state(TASK_RUNNING); break; } if (kthread_should_park()) { __set_current_state(TASK_RUNNING); kthread_parkme(); continue; } } schedule(); } xfs_clear_zonegc_running(mp); if (data->iter.victim_rtg) xfs_rtgroup_rele(data->iter.victim_rtg); memalloc_nofs_restore(nofs_flag); xfs_zone_gc_data_free(data); return 0; } void xfs_zone_gc_start( struct xfs_mount *mp) { if (xfs_has_zoned(mp)) kthread_unpark(mp->m_zone_info->zi_gc_thread); } void xfs_zone_gc_stop( struct xfs_mount *mp) { if (xfs_has_zoned(mp)) kthread_park(mp->m_zone_info->zi_gc_thread); } int xfs_zone_gc_mount( struct xfs_mount *mp) { struct xfs_zone_info *zi = mp->m_zone_info; struct xfs_zone_gc_data *data; struct xfs_open_zone *oz; int error; /* * If there are no free zones available for GC, pick the open zone with * the least used space to GC into. This should only happen after an * unclean shutdown near ENOSPC while GC was ongoing. * * We also need to do this for the first gc zone allocation if we * unmounted while at the open limit. */ if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_FREE) || zi->zi_nr_open_zones == mp->m_max_open_zones) oz = xfs_zone_gc_steal_open(zi); else oz = xfs_open_zone(mp, WRITE_LIFE_NOT_SET, true); if (!oz) { xfs_warn(mp, "unable to allocate a zone for gc"); error = -EIO; goto out; } trace_xfs_zone_gc_target_opened(oz->oz_rtg); zi->zi_open_gc_zone = oz; data = xfs_zone_gc_data_alloc(mp); if (!data) { error = -ENOMEM; goto out_put_gc_zone; } zi->zi_gc_thread = kthread_create(xfs_zoned_gcd, data, "xfs-zone-gc/%s", mp->m_super->s_id); if (IS_ERR(zi->zi_gc_thread)) { xfs_warn(mp, "unable to create zone gc thread"); error = PTR_ERR(zi->zi_gc_thread); goto out_free_gc_data; } /* xfs_zone_gc_start will unpark for rw mounts */ kthread_park(zi->zi_gc_thread); return 0; out_free_gc_data: kfree(data); out_put_gc_zone: xfs_open_zone_put(zi->zi_open_gc_zone); out: return error; } void xfs_zone_gc_unmount( struct xfs_mount *mp) { struct xfs_zone_info *zi = mp->m_zone_info; kthread_stop(zi->zi_gc_thread); if (zi->zi_open_gc_zone) xfs_open_zone_put(zi->zi_open_gc_zone); }
6 6 6 2 6 6 6 1 5 5 5 1 1 5 5 5 5 5 5 5 5 3 5 5 5 5 5 5 5 2 1 1 2 7 2 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright Red Hat Inc. 2017 * * This file is part of the SCTP kernel implementation * * These functions manipulate sctp stream queue/scheduling. * * Please send any bug reports or fixes you make to the * email addresched(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> */ #include <linux/list.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/stream_sched.h> /* Priority handling * RFC DRAFT ndata section 3.2 */ static void sctp_sched_rr_unsched_all(struct sctp_stream *stream); static void sctp_sched_rr_next_stream(struct sctp_stream *stream) { struct list_head *pos; pos = stream->rr_next->rr_list.next; if (pos == &stream->rr_list) pos = pos->next; stream->rr_next = list_entry(pos, struct sctp_stream_out_ext, rr_list); } static void sctp_sched_rr_unsched(struct sctp_stream *stream, struct sctp_stream_out_ext *soute) { if (stream->rr_next == soute) /* Try to move to the next stream */ sctp_sched_rr_next_stream(stream); list_del_init(&soute->rr_list); /* If we have no other stream queued, clear next */ if (list_empty(&stream->rr_list)) stream->rr_next = NULL; } static void sctp_sched_rr_sched(struct sctp_stream *stream, struct sctp_stream_out_ext *soute) { if (!list_empty(&soute->rr_list)) /* Already scheduled. */ return; /* Schedule the stream */ list_add_tail(&soute->rr_list, &stream->rr_list); if (!stream->rr_next) stream->rr_next = soute; } static int sctp_sched_rr_set(struct sctp_stream *stream, __u16 sid, __u16 prio, gfp_t gfp) { return 0; } static int sctp_sched_rr_get(struct sctp_stream *stream, __u16 sid, __u16 *value) { return 0; } static int sctp_sched_rr_init(struct sctp_stream *stream) { INIT_LIST_HEAD(&stream->rr_list); stream->rr_next = NULL; return 0; } static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) { INIT_LIST_HEAD(&SCTP_SO(stream, sid)->ext->rr_list); return 0; } static void sctp_sched_rr_free_sid(struct sctp_stream *stream, __u16 sid) { } static void sctp_sched_rr_enqueue(struct sctp_outq *q, struct sctp_datamsg *msg) { struct sctp_stream *stream; struct sctp_chunk *ch; __u16 sid; ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list); sid = sctp_chunk_stream_no(ch); stream = &q->asoc->stream; sctp_sched_rr_sched(stream, SCTP_SO(stream, sid)->ext); } static struct sctp_chunk *sctp_sched_rr_dequeue(struct sctp_outq *q) { struct sctp_stream *stream = &q->asoc->stream; struct sctp_stream_out_ext *soute; struct sctp_chunk *ch = NULL; /* Bail out quickly if queue is empty */ if (list_empty(&q->out_chunk_list)) goto out; /* Find which chunk is next */ if (stream->out_curr) soute = stream->out_curr->ext; else soute = stream->rr_next; ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list); sctp_sched_dequeue_common(q, ch); out: return ch; } static void sctp_sched_rr_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch) { struct sctp_stream_out_ext *soute; __u16 sid; /* Last chunk on that msg, move to the next stream */ sid = sctp_chunk_stream_no(ch); soute = SCTP_SO(&q->asoc->stream, sid)->ext; sctp_sched_rr_next_stream(&q->asoc->stream); if (list_empty(&soute->outq)) sctp_sched_rr_unsched(&q->asoc->stream, soute); } static void sctp_sched_rr_sched_all(struct sctp_stream *stream) { struct sctp_association *asoc; struct sctp_stream_out_ext *soute; struct sctp_chunk *ch; asoc = container_of(stream, struct sctp_association, stream); list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { __u16 sid; sid = sctp_chunk_stream_no(ch); soute = SCTP_SO(stream, sid)->ext; if (soute) sctp_sched_rr_sched(stream, soute); } } static void sctp_sched_rr_unsched_all(struct sctp_stream *stream) { struct sctp_stream_out_ext *soute, *tmp; list_for_each_entry_safe(soute, tmp, &stream->rr_list, rr_list) sctp_sched_rr_unsched(stream, soute); } static const struct sctp_sched_ops sctp_sched_rr = { .set = sctp_sched_rr_set, .get = sctp_sched_rr_get, .init = sctp_sched_rr_init, .init_sid = sctp_sched_rr_init_sid, .free_sid = sctp_sched_rr_free_sid, .enqueue = sctp_sched_rr_enqueue, .dequeue = sctp_sched_rr_dequeue, .dequeue_done = sctp_sched_rr_dequeue_done, .sched_all = sctp_sched_rr_sched_all, .unsched_all = sctp_sched_rr_unsched_all, }; void sctp_sched_ops_rr_init(void) { sctp_sched_ops_register(SCTP_SS_RR, &sctp_sched_rr); }
2 5 5 1 4 4 4 2 4 1 3 3 5 1 1 1 77 72 3 1 1 1 68 1 1 7 5 1 3 3 3 2 1 1 1 1 1 1 2 3 3 3 3 3 3 8 7 6 8 4 4 3 3 3 3 3 2 2 2 3 2 2 2 8 146 145 144 144 145 1 270 266 38 20 12 12 4 3 3 3 3 3 1 3 5 5 4 4 4 3 5 4 3 3 3 2 5 4 13 1 146 8 2 266 9 748 741 748 80 78 65 65 740 541 541 537 2 540 4 539 2 1 78 77 77 77 1 183 1 18 15 1 8 104 106 102 535 158 154 149 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 // SPDX-License-Identifier: GPL-2.0 #include <linux/kmod.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <linux/net_tstamp.h> #include <linux/phylib_stubs.h> #include <linux/ptp_clock_kernel.h> #include <linux/wireless.h> #include <linux/if_bridge.h> #include <net/dsa_stubs.h> #include <net/netdev_lock.h> #include <net/wext.h> #include "dev.h" /* * Map an interface index to its name (SIOCGIFNAME) */ /* * We need this ioctl for efficient implementation of the * if_indextoname() function required by the IPv6 API. Without * it, we would have to search all the interfaces to find a * match. --pb */ static int dev_ifname(struct net *net, struct ifreq *ifr) { ifr->ifr_name[IFNAMSIZ-1] = 0; return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex); } /* * Perform a SIOCGIFCONF call. This structure will change * size eventually, and there is nothing I can do about it. * Thus we will need a 'compatibility mode'. */ int dev_ifconf(struct net *net, struct ifconf __user *uifc) { struct net_device *dev; void __user *pos; size_t size; int len, total = 0, done; /* both the ifconf and the ifreq structures are slightly different */ if (in_compat_syscall()) { struct compat_ifconf ifc32; if (copy_from_user(&ifc32, uifc, sizeof(struct compat_ifconf))) return -EFAULT; pos = compat_ptr(ifc32.ifcbuf); len = ifc32.ifc_len; size = sizeof(struct compat_ifreq); } else { struct ifconf ifc; if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) return -EFAULT; pos = ifc.ifc_buf; len = ifc.ifc_len; size = sizeof(struct ifreq); } /* Loop over the interfaces, and write an info block for each. */ rtnl_net_lock(net); for_each_netdev(net, dev) { if (!pos) done = inet_gifconf(dev, NULL, 0, size); else done = inet_gifconf(dev, pos + total, len - total, size); if (done < 0) { rtnl_net_unlock(net); return -EFAULT; } total += done; } rtnl_net_unlock(net); return put_user(total, &uifc->ifc_len); } static int dev_getifmap(struct net_device *dev, struct ifreq *ifr) { struct ifmap *ifmap = &ifr->ifr_map; if (in_compat_syscall()) { struct compat_ifmap *cifmap = (struct compat_ifmap *)ifmap; cifmap->mem_start = dev->mem_start; cifmap->mem_end = dev->mem_end; cifmap->base_addr = dev->base_addr; cifmap->irq = dev->irq; cifmap->dma = dev->dma; cifmap->port = dev->if_port; return 0; } ifmap->mem_start = dev->mem_start; ifmap->mem_end = dev->mem_end; ifmap->base_addr = dev->base_addr; ifmap->irq = dev->irq; ifmap->dma = dev->dma; ifmap->port = dev->if_port; return 0; } static int netif_setifmap(struct net_device *dev, struct ifreq *ifr) { struct compat_ifmap *cifmap = (struct compat_ifmap *)&ifr->ifr_map; if (!dev->netdev_ops->ndo_set_config) return -EOPNOTSUPP; if (in_compat_syscall()) { struct ifmap ifmap = { .mem_start = cifmap->mem_start, .mem_end = cifmap->mem_end, .base_addr = cifmap->base_addr, .irq = cifmap->irq, .dma = cifmap->dma, .port = cifmap->port, }; return dev->netdev_ops->ndo_set_config(dev, &ifmap); } return dev->netdev_ops->ndo_set_config(dev, &ifr->ifr_map); } /* * Perform the SIOCxIFxxx calls, inside rcu_read_lock() */ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); if (!dev) return -ENODEV; switch (cmd) { case SIOCGIFFLAGS: /* Get interface flags */ ifr->ifr_flags = (short)netif_get_flags(dev); return 0; case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */ ifr->ifr_metric = 0; return 0; case SIOCGIFMTU: /* Get the MTU of a device */ ifr->ifr_mtu = dev->mtu; return 0; case SIOCGIFSLAVE: err = -EINVAL; break; case SIOCGIFMAP: return dev_getifmap(dev, ifr); case SIOCGIFINDEX: ifr->ifr_ifindex = dev->ifindex; return 0; case SIOCGIFTXQLEN: ifr->ifr_qlen = dev->tx_queue_len; return 0; default: /* dev_ioctl() should ensure this case * is never reached */ WARN_ON(1); err = -ENOTTY; break; } return err; } int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg) { enum hwtstamp_tx_types tx_type; enum hwtstamp_rx_filters rx_filter; int tx_type_valid = 0; int rx_filter_valid = 0; if (cfg->flags & ~HWTSTAMP_FLAG_MASK) return -EINVAL; tx_type = cfg->tx_type; rx_filter = cfg->rx_filter; switch (tx_type) { case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: case HWTSTAMP_TX_ONESTEP_SYNC: case HWTSTAMP_TX_ONESTEP_P2P: tx_type_valid = 1; break; case __HWTSTAMP_TX_CNT: /* not a real value */ break; } switch (rx_filter) { case HWTSTAMP_FILTER_NONE: case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: rx_filter_valid = 1; break; case __HWTSTAMP_FILTER_CNT: /* not a real value */ break; } if (!tx_type_valid || !rx_filter_valid) return -ERANGE; return 0; } /** * dev_get_hwtstamp_phylib() - Get hardware timestamping settings of NIC * or of attached phylib PHY * @dev: Network device * @cfg: Timestamping configuration structure * * Helper for calling the default hardware provider timestamping. * * Note: phy_mii_ioctl() only handles SIOCSHWTSTAMP (not SIOCGHWTSTAMP), but * phydev->mii_ts has both hwtstamp_get() and hwtstamp_set() methods. So this * will return -EOPNOTSUPP for phylib only if hwtstamp_get() is not * implemented for now, which is still more accurate than letting the netdev * handle the GET request. */ int dev_get_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg) { struct hwtstamp_provider *hwprov; hwprov = rtnl_dereference(dev->hwprov); if (hwprov) { cfg->qualifier = hwprov->desc.qualifier; if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB && hwprov->phydev) return phy_hwtstamp_get(hwprov->phydev, cfg); if (hwprov->source == HWTSTAMP_SOURCE_NETDEV) return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg); return -EOPNOTSUPP; } if (phy_is_default_hwtstamp(dev->phydev)) return phy_hwtstamp_get(dev->phydev, cfg); return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg); } static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr) { const struct net_device_ops *ops = dev->netdev_ops; struct kernel_hwtstamp_config kernel_cfg = {}; struct hwtstamp_config cfg; int err; if (!ops->ndo_hwtstamp_get) return -EOPNOTSUPP; if (!netif_device_present(dev)) return -ENODEV; kernel_cfg.ifr = ifr; netdev_lock_ops(dev); err = dev_get_hwtstamp_phylib(dev, &kernel_cfg); netdev_unlock_ops(dev); if (err) return err; /* If the request was resolved through an unconverted driver, omit * the copy_to_user(), since the implementation has already done that */ if (!kernel_cfg.copied_to_user) { hwtstamp_config_from_kernel(&cfg, &kernel_cfg); if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg))) return -EFAULT; } return 0; } /** * dev_set_hwtstamp_phylib() - Change hardware timestamping of NIC * or of attached phylib PHY * @dev: Network device * @cfg: Timestamping configuration structure * @extack: Netlink extended ack message structure, for error reporting * * Helper for enforcing a common policy that phylib timestamping, if available, * should take precedence in front of hardware timestamping provided by the * netdev. If the netdev driver needs to perform specific actions even for PHY * timestamping to work properly (a switch port must trap the timestamped * frames and not forward them), it must set dev->see_all_hwtstamp_requests. */ int dev_set_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; struct kernel_hwtstamp_config old_cfg = {}; struct hwtstamp_provider *hwprov; struct phy_device *phydev; bool changed = false; bool phy_ts; int err; hwprov = rtnl_dereference(dev->hwprov); if (hwprov) { if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB && hwprov->phydev) { phy_ts = true; phydev = hwprov->phydev; } else if (hwprov->source == HWTSTAMP_SOURCE_NETDEV) { phy_ts = false; } else { return -EOPNOTSUPP; } cfg->qualifier = hwprov->desc.qualifier; } else { phy_ts = phy_is_default_hwtstamp(dev->phydev); if (phy_ts) phydev = dev->phydev; } cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV; if (phy_ts && dev->see_all_hwtstamp_requests) { err = ops->ndo_hwtstamp_get(dev, &old_cfg); if (err) return err; } if (!phy_ts || dev->see_all_hwtstamp_requests) { err = ops->ndo_hwtstamp_set(dev, cfg, extack); if (err) { if (extack->_msg) netdev_err(dev, "%s\n", extack->_msg); return err; } } if (phy_ts && dev->see_all_hwtstamp_requests) changed = kernel_hwtstamp_config_changed(&old_cfg, cfg); if (phy_ts) { err = phy_hwtstamp_set(phydev, cfg, extack); if (err) { if (changed) ops->ndo_hwtstamp_set(dev, &old_cfg, NULL); return err; } } return 0; } static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) { const struct net_device_ops *ops = dev->netdev_ops; struct kernel_hwtstamp_config kernel_cfg = {}; struct netlink_ext_ack extack = {}; struct hwtstamp_config cfg; int err; if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; hwtstamp_config_to_kernel(&kernel_cfg, &cfg); kernel_cfg.ifr = ifr; err = net_hwtstamp_validate(&kernel_cfg); if (err) return err; err = dsa_conduit_hwtstamp_validate(dev, &kernel_cfg, &extack); if (err) { if (extack._msg) netdev_err(dev, "%s\n", extack._msg); return err; } if (!ops->ndo_hwtstamp_set) return -EOPNOTSUPP; if (!netif_device_present(dev)) return -ENODEV; netdev_lock_ops(dev); err = dev_set_hwtstamp_phylib(dev, &kernel_cfg, &extack); netdev_unlock_ops(dev); if (err) return err; /* The driver may have modified the configuration, so copy the * updated version of it back to user space */ if (!kernel_cfg.copied_to_user) { hwtstamp_config_from_kernel(&cfg, &kernel_cfg); if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg))) return -EFAULT; } return 0; } int generic_hwtstamp_get_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg) { const struct net_device_ops *ops = dev->netdev_ops; int err; if (!netif_device_present(dev)) return -ENODEV; if (!ops->ndo_hwtstamp_get) return -EOPNOTSUPP; netdev_lock_ops(dev); err = dev_get_hwtstamp_phylib(dev, kernel_cfg); netdev_unlock_ops(dev); return err; } EXPORT_SYMBOL(generic_hwtstamp_get_lower); int generic_hwtstamp_set_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; int err; if (!netif_device_present(dev)) return -ENODEV; if (!ops->ndo_hwtstamp_set) return -EOPNOTSUPP; netdev_lock_ops(dev); err = dev_set_hwtstamp_phylib(dev, kernel_cfg, extack); netdev_unlock_ops(dev); return err; } EXPORT_SYMBOL(generic_hwtstamp_set_lower); static int dev_siocbond(struct net_device *dev, struct ifreq *ifr, unsigned int cmd) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocbond) { int ret = -ENODEV; netdev_lock_ops(dev); if (netif_device_present(dev)) ret = ops->ndo_siocbond(dev, ifr, cmd); netdev_unlock_ops(dev); return ret; } return -EOPNOTSUPP; } static int dev_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, unsigned int cmd) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocdevprivate) { int ret = -ENODEV; netdev_lock_ops(dev); if (netif_device_present(dev)) ret = ops->ndo_siocdevprivate(dev, ifr, data, cmd); netdev_unlock_ops(dev); return ret; } return -EOPNOTSUPP; } static int dev_siocwandev(struct net_device *dev, struct if_settings *ifs) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocwandev) { int ret = -ENODEV; netdev_lock_ops(dev); if (netif_device_present(dev)) ret = ops->ndo_siocwandev(dev, ifs); netdev_unlock_ops(dev); return ret; } return -EOPNOTSUPP; } /* * Perform the SIOCxIFxxx calls, inside rtnl_net_lock() */ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, unsigned int cmd) { int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); const struct net_device_ops *ops; if (!dev) return -ENODEV; ops = dev->netdev_ops; switch (cmd) { case SIOCSIFFLAGS: /* Set interface flags */ return dev_change_flags(dev, ifr->ifr_flags, NULL); case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */ return -EOPNOTSUPP; case SIOCSIFMTU: /* Set the MTU of a device */ return dev_set_mtu(dev, ifr->ifr_mtu); case SIOCSIFHWADDR: if (dev->addr_len > sizeof(ifr->ifr_hwaddr)) return -EINVAL; return dev_set_mac_address_user(dev, (struct sockaddr_storage *)&ifr->ifr_hwaddr, NULL); case SIOCSIFHWBROADCAST: if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, min(sizeof(ifr->ifr_hwaddr.sa_data), (size_t)dev->addr_len)); netdev_lock_ops(dev); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); netdev_unlock_ops(dev); return 0; case SIOCSIFMAP: netdev_lock_ops(dev); err = netif_setifmap(dev, ifr); netdev_unlock_ops(dev); return err; case SIOCADDMULTI: if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; netdev_lock_ops(dev); err = dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); netdev_unlock_ops(dev); return err; case SIOCDELMULTI: if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; netdev_lock_ops(dev); err = dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); netdev_unlock_ops(dev); return err; case SIOCSIFTXQLEN: if (ifr->ifr_qlen < 0) return -EINVAL; return dev_change_tx_queue_len(dev, ifr->ifr_qlen); case SIOCSIFNAME: ifr->ifr_newname[IFNAMSIZ-1] = '\0'; return dev_change_name(dev, ifr->ifr_newname); case SIOCWANDEV: return dev_siocwandev(dev, &ifr->ifr_settings); case SIOCDEVPRIVATE ... SIOCDEVPRIVATE + 15: return dev_siocdevprivate(dev, ifr, data, cmd); case SIOCSHWTSTAMP: return dev_set_hwtstamp(dev, ifr); case SIOCGHWTSTAMP: return dev_get_hwtstamp(dev, ifr); case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: return dev_eth_ioctl(dev, ifr, cmd); case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: case SIOCBONDCHANGEACTIVE: return dev_siocbond(dev, ifr, cmd); /* Unknown ioctl */ default: err = -EINVAL; } return err; } /** * dev_load - load a network module * @net: the applicable net namespace * @name: name of interface * * If a network interface is not present and the process has suitable * privileges this function loads the module. If module loading is not * available in this kernel then it becomes a nop. */ void dev_load(struct net *net, const char *name) { struct net_device *dev; int no_module; rcu_read_lock(); dev = dev_get_by_name_rcu(net, name); rcu_read_unlock(); no_module = !dev; if (no_module && capable(CAP_NET_ADMIN)) no_module = request_module("netdev-%s", name); if (no_module && capable(CAP_SYS_MODULE)) request_module("%s", name); } EXPORT_SYMBOL(dev_load); /* * This function handles all "interface"-type I/O control requests. The actual * 'doing' part of this is dev_ifsioc above. */ /** * dev_ioctl - network device ioctl * @net: the applicable net namespace * @cmd: command to issue * @ifr: pointer to a struct ifreq in user space * @data: data exchanged with userspace * @need_copyout: whether or not copy_to_user() should be called * * Issue ioctl functions to devices. This is normally called by the * user space syscall interfaces but can sometimes be useful for * other purposes. The return value is the return from the syscall if * positive or a negative errno code on error. */ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, void __user *data, bool *need_copyout) { int ret; char *colon; if (need_copyout) *need_copyout = true; if (cmd == SIOCGIFNAME) return dev_ifname(net, ifr); ifr->ifr_name[IFNAMSIZ-1] = 0; colon = strchr(ifr->ifr_name, ':'); if (colon) *colon = 0; /* * See which interface the caller is talking about. */ switch (cmd) { case SIOCGIFHWADDR: dev_load(net, ifr->ifr_name); ret = netif_get_mac_address(&ifr->ifr_hwaddr, net, ifr->ifr_name); if (colon) *colon = ':'; return ret; /* * These ioctl calls: * - can be done by all. * - atomic and do not require locking. * - return a value */ case SIOCGIFFLAGS: case SIOCGIFMETRIC: case SIOCGIFMTU: case SIOCGIFSLAVE: case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: dev_load(net, ifr->ifr_name); rcu_read_lock(); ret = dev_ifsioc_locked(net, ifr, cmd); rcu_read_unlock(); if (colon) *colon = ':'; return ret; case SIOCETHTOOL: dev_load(net, ifr->ifr_name); ret = dev_ethtool(net, ifr, data); if (colon) *colon = ':'; return ret; /* * These ioctl calls: * - require superuser power. * - require strict serialization. * - return a value */ case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSIFNAME: dev_load(net, ifr->ifr_name); if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; rtnl_net_lock(net); ret = dev_ifsioc(net, ifr, data, cmd); rtnl_net_unlock(net); if (colon) *colon = ':'; return ret; /* * These ioctl calls: * - require superuser power. * - require strict serialization. * - do not return a value */ case SIOCSIFMAP: case SIOCSIFTXQLEN: if (!capable(CAP_NET_ADMIN)) return -EPERM; fallthrough; /* * These ioctl calls: * - require local superuser power. * - require strict serialization. * - do not return a value */ case SIOCSIFFLAGS: case SIOCSIFMETRIC: case SIOCSIFMTU: case SIOCSIFHWADDR: case SIOCSIFSLAVE: case SIOCADDMULTI: case SIOCDELMULTI: case SIOCSIFHWBROADCAST: case SIOCSMIIREG: case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: case SIOCSHWTSTAMP: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; fallthrough; case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: dev_load(net, ifr->ifr_name); rtnl_net_lock(net); ret = dev_ifsioc(net, ifr, data, cmd); rtnl_net_unlock(net); if (need_copyout) *need_copyout = false; return ret; case SIOCGIFMEM: /* Get the per device memory space. We can add this but * currently do not support it */ case SIOCSIFMEM: /* Set the per device memory buffer space. * Not applicable in our case */ case SIOCSIFLINK: return -ENOTTY; /* * Unknown or private ioctl. */ default: if (cmd == SIOCWANDEV || cmd == SIOCGHWTSTAMP || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { dev_load(net, ifr->ifr_name); rtnl_net_lock(net); ret = dev_ifsioc(net, ifr, data, cmd); rtnl_net_unlock(net); return ret; } return -ENOTTY; } }
4 3 4 1 4 3 3 3 2 2 1 1 3 4 4 4 4 1286 1283 18 17 11 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 /* Copyright 2011, Siemens AG * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ /* Based on patches from Jon Smirl <jonsmirl@gmail.com> * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ /* Jon's code is based on 6lowpan implementation for Contiki which is: * Copyright (c) 2008, Swedish Institute of Computer Science. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the Institute nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/ieee802154.h> #include <linux/if_arp.h> #include <net/ipv6.h> #include <net/netdev_lock.h> #include "6lowpan_i.h" static int open_count; static const struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; static int lowpan_dev_init(struct net_device *ldev) { netdev_lockdep_set_classes(ldev); return 0; } static int lowpan_open(struct net_device *dev) { if (!open_count) lowpan_rx_init(); open_count++; return 0; } static int lowpan_stop(struct net_device *dev) { open_count--; if (!open_count) lowpan_rx_exit(); return 0; } static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n) { struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n)); /* default no short_addr is available for a neighbour */ neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC); return 0; } static int lowpan_get_iflink(const struct net_device *dev) { return READ_ONCE(lowpan_802154_dev(dev)->wdev->ifindex); } static const struct net_device_ops lowpan_netdev_ops = { .ndo_init = lowpan_dev_init, .ndo_start_xmit = lowpan_xmit, .ndo_open = lowpan_open, .ndo_stop = lowpan_stop, .ndo_neigh_construct = lowpan_neigh_construct, .ndo_get_iflink = lowpan_get_iflink, }; static void lowpan_setup(struct net_device *ldev) { memset(ldev->broadcast, 0xff, IEEE802154_ADDR_LEN); /* We need an ipv6hdr as minimum len when calling xmit */ ldev->hard_header_len = sizeof(struct ipv6hdr); ldev->flags = IFF_BROADCAST | IFF_MULTICAST; ldev->priv_flags |= IFF_NO_QUEUE; ldev->netdev_ops = &lowpan_netdev_ops; ldev->header_ops = &lowpan_header_ops; ldev->needs_free_netdev = true; ldev->netns_immutable = true; } static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN) return -EINVAL; } return 0; } static int lowpan_newlink(struct net_device *ldev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct nlattr **tb = params->tb; struct net_device *wdev; int ret; ASSERT_RTNL(); pr_debug("adding new link\n"); if (!tb[IFLA_LINK]) return -EINVAL; if (params->link_net && !net_eq(params->link_net, dev_net(ldev))) return -EINVAL; /* find and hold wpan device */ wdev = dev_get_by_index(dev_net(ldev), nla_get_u32(tb[IFLA_LINK])); if (!wdev) return -ENODEV; if (wdev->type != ARPHRD_IEEE802154) { dev_put(wdev); return -EINVAL; } if (wdev->ieee802154_ptr->lowpan_dev) { dev_put(wdev); return -EBUSY; } lowpan_802154_dev(ldev)->wdev = wdev; /* Set the lowpan hardware address to the wpan hardware address. */ __dev_addr_set(ldev, wdev->dev_addr, IEEE802154_ADDR_LEN); /* We need headroom for possible wpan_dev_hard_header call and tailroom * for encryption/fcs handling. The lowpan interface will replace * the IPv6 header with 6LoWPAN header. At worst case the 6LoWPAN * header has LOWPAN_IPHC_MAX_HEADER_LEN more bytes than the IPv6 * header. */ ldev->needed_headroom = LOWPAN_IPHC_MAX_HEADER_LEN + wdev->needed_headroom; ldev->needed_tailroom = wdev->needed_tailroom; ldev->neigh_priv_len = sizeof(struct lowpan_802154_neigh); ret = lowpan_register_netdevice(ldev, LOWPAN_LLTYPE_IEEE802154); if (ret < 0) { dev_put(wdev); return ret; } wdev->ieee802154_ptr->lowpan_dev = ldev; return 0; } static void lowpan_dellink(struct net_device *ldev, struct list_head *head) { struct net_device *wdev = lowpan_802154_dev(ldev)->wdev; ASSERT_RTNL(); wdev->ieee802154_ptr->lowpan_dev = NULL; lowpan_unregister_netdevice(ldev); dev_put(wdev); } static struct rtnl_link_ops lowpan_link_ops __read_mostly = { .kind = "lowpan", .priv_size = LOWPAN_PRIV_SIZE(sizeof(struct lowpan_802154_dev)), .setup = lowpan_setup, .newlink = lowpan_newlink, .dellink = lowpan_dellink, .validate = lowpan_validate, }; static inline int __init lowpan_netlink_init(void) { return rtnl_link_register(&lowpan_link_ops); } static inline void lowpan_netlink_fini(void) { rtnl_link_unregister(&lowpan_link_ops); } static int lowpan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *ndev = netdev_notifier_info_to_dev(ptr); struct wpan_dev *wpan_dev; if (ndev->type != ARPHRD_IEEE802154) return NOTIFY_DONE; wpan_dev = ndev->ieee802154_ptr; if (!wpan_dev) return NOTIFY_DONE; switch (event) { case NETDEV_UNREGISTER: /* Check if wpan interface is unregistered that we * also delete possible lowpan interfaces which belongs * to the wpan interface. */ if (wpan_dev->lowpan_dev) lowpan_dellink(wpan_dev->lowpan_dev, NULL); break; default: return NOTIFY_DONE; } return NOTIFY_OK; } static struct notifier_block lowpan_dev_notifier = { .notifier_call = lowpan_device_event, }; static int __init lowpan_init_module(void) { int err = 0; err = lowpan_net_frag_init(); if (err < 0) goto out; err = lowpan_netlink_init(); if (err < 0) goto out_frag; err = register_netdevice_notifier(&lowpan_dev_notifier); if (err < 0) goto out_pack; return 0; out_pack: lowpan_netlink_fini(); out_frag: lowpan_net_frag_exit(); out: return err; } static void __exit lowpan_cleanup_module(void) { lowpan_netlink_fini(); lowpan_net_frag_exit(); unregister_netdevice_notifier(&lowpan_dev_notifier); } module_init(lowpan_init_module); module_exit(lowpan_cleanup_module); MODULE_DESCRIPTION("IPv6 over Low power Wireless Personal Area Network IEEE 802.15.4 core"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("lowpan");
4 3 4 3 2 3 22 23 20 23 72 13 2 72 6 5 5 5 1 1 1 1 2 1 1 1 1 1 1 1 1 1 4 23 3 3 2 2 1 2 2 1 1 1 1 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * OSS compatible i/o control * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_device.h" #include "seq_oss_readq.h" #include "seq_oss_writeq.h" #include "seq_oss_timer.h" #include "seq_oss_synth.h" #include "seq_oss_midi.h" #include "seq_oss_event.h" static int snd_seq_oss_synth_info_user(struct seq_oss_devinfo *dp, void __user *arg) { struct synth_info info; if (copy_from_user(&info, arg, sizeof(info))) return -EFAULT; if (snd_seq_oss_synth_make_info(dp, info.device, &info) < 0) return -EINVAL; if (copy_to_user(arg, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_seq_oss_midi_info_user(struct seq_oss_devinfo *dp, void __user *arg) { struct midi_info info; if (copy_from_user(&info, arg, sizeof(info))) return -EFAULT; if (snd_seq_oss_midi_make_info(dp, info.device, &info) < 0) return -EINVAL; if (copy_to_user(arg, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_seq_oss_oob_user(struct seq_oss_devinfo *dp, void __user *arg) { unsigned char ev[8]; struct snd_seq_event tmpev; if (copy_from_user(ev, arg, 8)) return -EFAULT; memset(&tmpev, 0, sizeof(tmpev)); snd_seq_oss_fill_addr(dp, &tmpev, dp->addr.client, dp->addr.port); tmpev.time.tick = 0; if (! snd_seq_oss_process_event(dp, (union evrec *)ev, &tmpev)) { snd_seq_oss_dispatch(dp, &tmpev, 0, 0); } return 0; } int snd_seq_oss_ioctl(struct seq_oss_devinfo *dp, unsigned int cmd, unsigned long carg) { int dev, val; void __user *arg = (void __user *)carg; int __user *p = arg; switch (cmd) { case SNDCTL_TMR_TIMEBASE: case SNDCTL_TMR_TEMPO: case SNDCTL_TMR_START: case SNDCTL_TMR_STOP: case SNDCTL_TMR_CONTINUE: case SNDCTL_TMR_METRONOME: case SNDCTL_TMR_SOURCE: case SNDCTL_TMR_SELECT: case SNDCTL_SEQ_CTRLRATE: return snd_seq_oss_timer_ioctl(dp->timer, cmd, arg); case SNDCTL_SEQ_PANIC: snd_seq_oss_reset(dp); return -EINVAL; case SNDCTL_SEQ_SYNC: if (! is_write_mode(dp->file_mode) || dp->writeq == NULL) return 0; while (snd_seq_oss_writeq_sync(dp->writeq)) ; if (signal_pending(current)) return -ERESTARTSYS; return 0; case SNDCTL_SEQ_RESET: snd_seq_oss_reset(dp); return 0; case SNDCTL_SEQ_TESTMIDI: if (get_user(dev, p)) return -EFAULT; return snd_seq_oss_midi_open(dp, dev, dp->file_mode); case SNDCTL_SEQ_GETINCOUNT: if (dp->readq == NULL || ! is_read_mode(dp->file_mode)) return 0; return put_user(dp->readq->qlen, p) ? -EFAULT : 0; case SNDCTL_SEQ_GETOUTCOUNT: if (! is_write_mode(dp->file_mode) || dp->writeq == NULL) return 0; return put_user(snd_seq_oss_writeq_get_free_size(dp->writeq), p) ? -EFAULT : 0; case SNDCTL_SEQ_GETTIME: return put_user(snd_seq_oss_timer_cur_tick(dp->timer), p) ? -EFAULT : 0; case SNDCTL_SEQ_RESETSAMPLES: if (get_user(dev, p)) return -EFAULT; return snd_seq_oss_synth_ioctl(dp, dev, cmd, carg); case SNDCTL_SEQ_NRSYNTHS: return put_user(dp->max_synthdev, p) ? -EFAULT : 0; case SNDCTL_SEQ_NRMIDIS: return put_user(dp->max_mididev, p) ? -EFAULT : 0; case SNDCTL_SYNTH_MEMAVL: if (get_user(dev, p)) return -EFAULT; val = snd_seq_oss_synth_ioctl(dp, dev, cmd, carg); return put_user(val, p) ? -EFAULT : 0; case SNDCTL_FM_4OP_ENABLE: if (get_user(dev, p)) return -EFAULT; snd_seq_oss_synth_ioctl(dp, dev, cmd, carg); return 0; case SNDCTL_SYNTH_INFO: case SNDCTL_SYNTH_ID: return snd_seq_oss_synth_info_user(dp, arg); case SNDCTL_SEQ_OUTOFBAND: return snd_seq_oss_oob_user(dp, arg); case SNDCTL_MIDI_INFO: return snd_seq_oss_midi_info_user(dp, arg); case SNDCTL_SEQ_THRESHOLD: if (! is_write_mode(dp->file_mode)) return 0; if (get_user(val, p)) return -EFAULT; if (val < 1) val = 1; if (val >= dp->writeq->maxlen) val = dp->writeq->maxlen - 1; snd_seq_oss_writeq_set_output(dp->writeq, val); return 0; case SNDCTL_MIDI_PRETIME: if (dp->readq == NULL || !is_read_mode(dp->file_mode)) return 0; if (get_user(val, p)) return -EFAULT; if (val <= 0) val = -1; else val = (HZ * val) / 10; dp->readq->pre_event_timeout = val; return put_user(val, p) ? -EFAULT : 0; default: if (! is_write_mode(dp->file_mode)) return -EIO; return snd_seq_oss_synth_ioctl(dp, 0, cmd, carg); } return 0; }
201 199 200 200 106 198 2 1 201 192 12 10 8 192 83 189 87 192 1 192 192 127 129 192 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "io_uring.h" #include "rsrc.h" #include "nop.h" struct io_nop { /* NOTE: kiocb has the file as the first member, so don't do it here */ struct file *file; int result; int fd; unsigned int flags; __u64 extra1; __u64 extra2; }; #define NOP_FLAGS (IORING_NOP_INJECT_RESULT | IORING_NOP_FIXED_FILE | \ IORING_NOP_FIXED_BUFFER | IORING_NOP_FILE | \ IORING_NOP_TW | IORING_NOP_CQE32) int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_nop *nop = io_kiocb_to_cmd(req, struct io_nop); nop->flags = READ_ONCE(sqe->nop_flags); if (nop->flags & ~NOP_FLAGS) return -EINVAL; if (nop->flags & IORING_NOP_INJECT_RESULT) nop->result = READ_ONCE(sqe->len); else nop->result = 0; if (nop->flags & IORING_NOP_FILE) nop->fd = READ_ONCE(sqe->fd); else nop->fd = -1; if (nop->flags & IORING_NOP_FIXED_BUFFER) req->buf_index = READ_ONCE(sqe->buf_index); if (nop->flags & IORING_NOP_CQE32) { struct io_ring_ctx *ctx = req->ctx; if (!(ctx->flags & (IORING_SETUP_CQE32|IORING_SETUP_CQE_MIXED))) return -EINVAL; nop->extra1 = READ_ONCE(sqe->off); nop->extra2 = READ_ONCE(sqe->addr); } return 0; } int io_nop(struct io_kiocb *req, unsigned int issue_flags) { struct io_nop *nop = io_kiocb_to_cmd(req, struct io_nop); int ret = nop->result; if (nop->flags & IORING_NOP_FILE) { if (nop->flags & IORING_NOP_FIXED_FILE) { req->file = io_file_get_fixed(req, nop->fd, issue_flags); req->flags |= REQ_F_FIXED_FILE; } else { req->file = io_file_get_normal(req, nop->fd); } if (!req->file) { ret = -EBADF; goto done; } } if (nop->flags & IORING_NOP_FIXED_BUFFER) { if (!io_find_buf_node(req, issue_flags)) ret = -EFAULT; } done: if (ret < 0) req_set_fail(req); if (nop->flags & IORING_NOP_CQE32) io_req_set_res32(req, nop->result, 0, nop->extra1, nop->extra2); else io_req_set_res(req, nop->result, 0); if (nop->flags & IORING_NOP_TW) { req->io_task_work.func = io_req_task_complete; io_req_task_work_add(req); return IOU_ISSUE_SKIP_COMPLETE; } return IOU_COMPLETE; }
16 16 14 7 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 // SPDX-License-Identifier: GPL-2.0-or-later /* * xt_connmark - Netfilter module to operate on connection marks * * Copyright (C) 2002,2004 MARA Systems AB <https://www.marasystems.com> * by Henrik Nordstrom <hno@marasystems.com> * Copyright © CC Computer Consultants GmbH, 2007 - 2008 * Jan Engelhardt <jengelh@medozas.de> */ #include <linux/module.h> #include <linux/skbuff.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_connmark.h> MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>"); MODULE_DESCRIPTION("Xtables: connection mark operations"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_CONNMARK"); MODULE_ALIAS("ip6t_CONNMARK"); MODULE_ALIAS("ipt_connmark"); MODULE_ALIAS("ip6t_connmark"); static unsigned int connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) { enum ip_conntrack_info ctinfo; u_int32_t new_targetmark; struct nf_conn *ct; u_int32_t newmark; u_int32_t oldmark; ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) return XT_CONTINUE; switch (info->mode) { case XT_CONNMARK_SET: oldmark = READ_ONCE(ct->mark); newmark = (oldmark & ~info->ctmask) ^ info->ctmark; if (info->shift_dir == D_SHIFT_RIGHT) newmark >>= info->shift_bits; else newmark <<= info->shift_bits; if (READ_ONCE(ct->mark) != newmark) { WRITE_ONCE(ct->mark, newmark); nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_SAVE: new_targetmark = (skb->mark & info->nfmask); if (info->shift_dir == D_SHIFT_RIGHT) new_targetmark >>= info->shift_bits; else new_targetmark <<= info->shift_bits; newmark = (READ_ONCE(ct->mark) & ~info->ctmask) ^ new_targetmark; if (READ_ONCE(ct->mark) != newmark) { WRITE_ONCE(ct->mark, newmark); nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: new_targetmark = (READ_ONCE(ct->mark) & info->ctmask); if (info->shift_dir == D_SHIFT_RIGHT) new_targetmark >>= info->shift_bits; else new_targetmark <<= info->shift_bits; newmark = (skb->mark & ~info->nfmask) ^ new_targetmark; skb->mark = newmark; break; } return XT_CONTINUE; } static unsigned int connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_connmark_tginfo1 *info = par->targinfo; const struct xt_connmark_tginfo2 info2 = { .ctmark = info->ctmark, .ctmask = info->ctmask, .nfmask = info->nfmask, .mode = info->mode, }; return connmark_tg_shift(skb, &info2); } static unsigned int connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_connmark_tginfo2 *info = par->targinfo; return connmark_tg_shift(skb, info); } static int connmark_tg_check(const struct xt_tgchk_param *par) { int ret; ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info_ratelimited("cannot load conntrack support for proto=%u\n", par->family); return ret; } static void connmark_tg_destroy(const struct xt_tgdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static bool connmark_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_connmark_mtinfo1 *info = par->matchinfo; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) return false; return ((READ_ONCE(ct->mark) & info->mask) == info->mark) ^ info->invert; } static int connmark_mt_check(const struct xt_mtchk_param *par) { int ret; ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info_ratelimited("cannot load conntrack support for proto=%u\n", par->family); return ret; } static void connmark_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static struct xt_target connmark_tg_reg[] __read_mostly = { { .name = "CONNMARK", .revision = 1, .family = NFPROTO_IPV4, .checkentry = connmark_tg_check, .target = connmark_tg, .targetsize = sizeof(struct xt_connmark_tginfo1), .destroy = connmark_tg_destroy, .me = THIS_MODULE, }, { .name = "CONNMARK", .revision = 2, .family = NFPROTO_IPV4, .checkentry = connmark_tg_check, .target = connmark_tg_v2, .targetsize = sizeof(struct xt_connmark_tginfo2), .destroy = connmark_tg_destroy, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "CONNMARK", .revision = 1, .family = NFPROTO_IPV6, .checkentry = connmark_tg_check, .target = connmark_tg, .targetsize = sizeof(struct xt_connmark_tginfo1), .destroy = connmark_tg_destroy, .me = THIS_MODULE, }, { .name = "CONNMARK", .revision = 2, .family = NFPROTO_IPV6, .checkentry = connmark_tg_check, .target = connmark_tg_v2, .targetsize = sizeof(struct xt_connmark_tginfo2), .destroy = connmark_tg_destroy, .me = THIS_MODULE, }, #endif }; static struct xt_match connmark_mt_reg __read_mostly = { .name = "connmark", .revision = 1, .family = NFPROTO_UNSPEC, .checkentry = connmark_mt_check, .match = connmark_mt, .matchsize = sizeof(struct xt_connmark_mtinfo1), .destroy = connmark_mt_destroy, .me = THIS_MODULE, }; static int __init connmark_mt_init(void) { int ret; ret = xt_register_targets(connmark_tg_reg, ARRAY_SIZE(connmark_tg_reg)); if (ret < 0) return ret; ret = xt_register_match(&connmark_mt_reg); if (ret < 0) { xt_unregister_targets(connmark_tg_reg, ARRAY_SIZE(connmark_tg_reg)); return ret; } return 0; } static void __exit connmark_mt_exit(void) { xt_unregister_match(&connmark_mt_reg); xt_unregister_targets(connmark_tg_reg, ARRAY_SIZE(connmark_tg_reg)); } module_init(connmark_mt_init); module_exit(connmark_mt_exit);
46 65 46 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 #undef TRACE_SYSTEM #define TRACE_SYSTEM irq_matrix #if !defined(_TRACE_IRQ_MATRIX_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_IRQ_MATRIX_H #include <linux/tracepoint.h> struct irq_matrix; struct cpumap; DECLARE_EVENT_CLASS(irq_matrix_global, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix), TP_STRUCT__entry( __field( unsigned int, online_maps ) __field( unsigned int, global_available ) __field( unsigned int, global_reserved ) __field( unsigned int, total_allocated ) ), TP_fast_assign( __entry->online_maps = matrix->online_maps; __entry->global_available = matrix->global_available; __entry->global_reserved = matrix->global_reserved; __entry->total_allocated = matrix->total_allocated; ), TP_printk("online_maps=%d global_avl=%u, global_rsvd=%u, total_alloc=%u", __entry->online_maps, __entry->global_available, __entry->global_reserved, __entry->total_allocated) ); DECLARE_EVENT_CLASS(irq_matrix_global_update, TP_PROTO(int bit, struct irq_matrix *matrix), TP_ARGS(bit, matrix), TP_STRUCT__entry( __field( int, bit ) __field( unsigned int, online_maps ) __field( unsigned int, global_available ) __field( unsigned int, global_reserved ) __field( unsigned int, total_allocated ) ), TP_fast_assign( __entry->bit = bit; __entry->online_maps = matrix->online_maps; __entry->global_available = matrix->global_available; __entry->global_reserved = matrix->global_reserved; __entry->total_allocated = matrix->total_allocated; ), TP_printk("bit=%d online_maps=%d global_avl=%u, global_rsvd=%u, total_alloc=%u", __entry->bit, __entry->online_maps, __entry->global_available, __entry->global_reserved, __entry->total_allocated) ); DECLARE_EVENT_CLASS(irq_matrix_cpu, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap), TP_STRUCT__entry( __field( int, bit ) __field( unsigned int, cpu ) __field( bool, online ) __field( unsigned int, available ) __field( unsigned int, allocated ) __field( unsigned int, managed ) __field( unsigned int, online_maps ) __field( unsigned int, global_available ) __field( unsigned int, global_reserved ) __field( unsigned int, total_allocated ) ), TP_fast_assign( __entry->bit = bit; __entry->cpu = cpu; __entry->online = cmap->online; __entry->available = cmap->available; __entry->allocated = cmap->allocated; __entry->managed = cmap->managed; __entry->online_maps = matrix->online_maps; __entry->global_available = matrix->global_available; __entry->global_reserved = matrix->global_reserved; __entry->total_allocated = matrix->total_allocated; ), TP_printk("bit=%d cpu=%u online=%d avl=%u alloc=%u managed=%u online_maps=%u global_avl=%u, global_rsvd=%u, total_alloc=%u", __entry->bit, __entry->cpu, __entry->online, __entry->available, __entry->allocated, __entry->managed, __entry->online_maps, __entry->global_available, __entry->global_reserved, __entry->total_allocated) ); DEFINE_EVENT(irq_matrix_global, irq_matrix_online, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix) ); DEFINE_EVENT(irq_matrix_global, irq_matrix_offline, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix) ); DEFINE_EVENT(irq_matrix_global, irq_matrix_reserve, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix) ); DEFINE_EVENT(irq_matrix_global, irq_matrix_remove_reserved, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix) ); DEFINE_EVENT(irq_matrix_global_update, irq_matrix_assign_system, TP_PROTO(int bit, struct irq_matrix *matrix), TP_ARGS(bit, matrix) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_reserve_managed, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_remove_managed, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_alloc_managed, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_assign, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_alloc, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_free, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); #endif /* _TRACE_IRQ_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 /* * Copyright (c) 2004-2011 Atheros Communications Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef HTC_OPS_H #define HTC_OPS_H #include "htc.h" #include "debug.h" static inline void *ath6kl_htc_create(struct ath6kl *ar) { return ar->htc_ops->create(ar); } static inline int ath6kl_htc_wait_target(struct htc_target *target) { return target->dev->ar->htc_ops->wait_target(target); } static inline int ath6kl_htc_start(struct htc_target *target) { return target->dev->ar->htc_ops->start(target); } static inline int ath6kl_htc_conn_service(struct htc_target *target, struct htc_service_connect_req *req, struct htc_service_connect_resp *resp) { return target->dev->ar->htc_ops->conn_service(target, req, resp); } static inline int ath6kl_htc_tx(struct htc_target *target, struct htc_packet *packet) { return target->dev->ar->htc_ops->tx(target, packet); } static inline void ath6kl_htc_stop(struct htc_target *target) { return target->dev->ar->htc_ops->stop(target); } static inline void ath6kl_htc_cleanup(struct htc_target *target) { return target->dev->ar->htc_ops->cleanup(target); } static inline void ath6kl_htc_flush_txep(struct htc_target *target, enum htc_endpoint_id endpoint, u16 tag) { return target->dev->ar->htc_ops->flush_txep(target, endpoint, tag); } static inline void ath6kl_htc_flush_rx_buf(struct htc_target *target) { return target->dev->ar->htc_ops->flush_rx_buf(target); } static inline void ath6kl_htc_activity_changed(struct htc_target *target, enum htc_endpoint_id endpoint, bool active) { return target->dev->ar->htc_ops->activity_changed(target, endpoint, active); } static inline int ath6kl_htc_get_rxbuf_num(struct htc_target *target, enum htc_endpoint_id endpoint) { return target->dev->ar->htc_ops->get_rxbuf_num(target, endpoint); } static inline int ath6kl_htc_add_rxbuf_multiple(struct htc_target *target, struct list_head *pktq) { return target->dev->ar->htc_ops->add_rxbuf_multiple(target, pktq); } static inline int ath6kl_htc_credit_setup(struct htc_target *target, struct ath6kl_htc_credit_info *info) { return target->dev->ar->htc_ops->credit_setup(target, info); } static inline void ath6kl_htc_tx_complete(struct ath6kl *ar, struct sk_buff *skb) { ar->htc_ops->tx_complete(ar, skb); } static inline void ath6kl_htc_rx_complete(struct ath6kl *ar, struct sk_buff *skb, u8 pipe) { ar->htc_ops->rx_complete(ar, skb, pipe); } #endif
26 5 5 5 2 3 5 5 1 8 4 12 12 12 12 12 28 29 29 13 12 16 14 29 29 29 22 29 7 2 3 3 3 2 2 2 1 1 1 1 8 7 1 7 5 2 7 8 2 8 8 6 10 11 11 11 11 2 11 11 1 6 11 11 11 11 2 9 1 8 11 11 1 10 11 3 3 3 3 9 9 9 9 9 9 9 8 8 3 3 8 5 5 2 8 5 16 15 15 15 15 15 14 12 13 13 2 2 12 12 12 12 12 3 12 11 12 12 8 8 8 8 8 5 5 4 5 12 12 7 2 1 2 19 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 // SPDX-License-Identifier: GPL-2.0 /* * Provide access to virtual console memory. * /dev/vcs: the screen as it is being viewed right now (possibly scrolled) * /dev/vcsN: the screen of /dev/ttyN (1 <= N <= 63) * [minor: N] * * /dev/vcsaN: idem, but including attributes, and prefixed with * the 4 bytes lines,columns,x,y (as screendump used to give). * Attribute/character pair is in native endianity. * [minor: N+128] * * /dev/vcsuN: similar to /dev/vcsaN but using 4-byte unicode values * instead of 1-byte screen glyph values. * [minor: N+64] * * /dev/vcsuaN: same idea as /dev/vcsaN for unicode (not yet implemented). * * This replaces screendump and part of selection, so that the system * administrator can control access using file system permissions. * * aeb@cwi.nl - efter Friedas begravelse - 950211 * * machek@k332.feld.cvut.cz - modified not to send characters to wrong console * - fixed some fatal off-by-one bugs (0-- no longer == -1 -> looping and looping and looping...) * - making it shorter - scr_readw are macros which expand in PRETTY long code */ #include <linux/kernel.h> #include <linux/major.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/tty.h> #include <linux/interrupt.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/vt_kern.h> #include <linux/selection.h> #include <linux/kbd_kern.h> #include <linux/console.h> #include <linux/device.h> #include <linux/sched.h> #include <linux/fs.h> #include <linux/poll.h> #include <linux/signal.h> #include <linux/slab.h> #include <linux/notifier.h> #include <linux/uaccess.h> #include <asm/byteorder.h> #include <linux/unaligned.h> #define HEADER_SIZE 4u #define CON_BUF_SIZE (IS_ENABLED(CONFIG_BASE_SMALL) ? 256 : PAGE_SIZE) DEFINE_FREE(free_page_ptr, void *, if (_T) free_page((unsigned long)_T)); /* * Our minor space: * * 0 ... 63 glyph mode without attributes * 64 ... 127 unicode mode without attributes * 128 ... 191 glyph mode with attributes * 192 ... 255 unused (reserved for unicode with attributes) * * This relies on MAX_NR_CONSOLES being <= 63, meaning 63 actual consoles * with minors 0, 64, 128 and 192 being proxies for the foreground console. */ #if MAX_NR_CONSOLES > 63 #warning "/dev/vcs* devices may not accommodate more than 63 consoles" #endif #define console(inode) (iminor(inode) & 63) #define use_unicode(inode) (iminor(inode) & 64) #define use_attributes(inode) (iminor(inode) & 128) struct vcs_poll_data { struct notifier_block notifier; unsigned int cons_num; int event; wait_queue_head_t waitq; struct fasync_struct *fasync; }; static int vcs_notifier(struct notifier_block *nb, unsigned long code, void *_param) { struct vt_notifier_param *param = _param; struct vc_data *vc = param->vc; struct vcs_poll_data *poll = container_of(nb, struct vcs_poll_data, notifier); int currcons = poll->cons_num; int fa_band; switch (code) { case VT_UPDATE: fa_band = POLL_PRI; break; case VT_DEALLOCATE: fa_band = POLL_HUP; break; default: return NOTIFY_DONE; } if (currcons == 0) currcons = fg_console; else currcons--; if (currcons != vc->vc_num) return NOTIFY_DONE; poll->event = code; wake_up_interruptible(&poll->waitq); kill_fasync(&poll->fasync, SIGIO, fa_band); return NOTIFY_OK; } static void vcs_poll_data_free(struct vcs_poll_data *poll) { unregister_vt_notifier(&poll->notifier); kfree(poll); } static struct vcs_poll_data * vcs_poll_data_get(struct file *file) { struct vcs_poll_data *poll = file->private_data, *kill = NULL; if (poll) return poll; poll = kzalloc_obj(*poll); if (!poll) return NULL; poll->cons_num = console(file_inode(file)); init_waitqueue_head(&poll->waitq); poll->notifier.notifier_call = vcs_notifier; /* * In order not to lose any update event, we must pretend one might * have occurred before we have a chance to register our notifier. * This is also how user space has come to detect which kernels * support POLLPRI on /dev/vcs* devices i.e. using poll() with * POLLPRI and a zero timeout. */ poll->event = VT_UPDATE; if (register_vt_notifier(&poll->notifier) != 0) { kfree(poll); return NULL; } /* * This code may be called either through ->poll() or ->fasync(). * If we have two threads using the same file descriptor, they could * both enter this function, both notice that the structure hasn't * been allocated yet and go ahead allocating it in parallel, but * only one of them must survive and be shared otherwise we'd leak * memory with a dangling notifier callback. */ spin_lock(&file->f_lock); if (!file->private_data) { file->private_data = poll; } else { /* someone else raced ahead of us */ kill = poll; poll = file->private_data; } spin_unlock(&file->f_lock); if (kill) vcs_poll_data_free(kill); return poll; } /** * vcs_vc - return VC for @inode * @inode: inode for which to return a VC * @viewed: returns whether this console is currently foreground (viewed) * * Must be called with console_lock. */ static struct vc_data *vcs_vc(struct inode *inode, bool *viewed) { unsigned int currcons = console(inode); WARN_CONSOLE_UNLOCKED(); if (currcons == 0) { currcons = fg_console; if (viewed) *viewed = true; } else { currcons--; if (viewed) *viewed = false; } return vc_cons[currcons].d; } /** * vcs_size - return size for a VC in @vc * @vc: which VC * @attr: does it use attributes? * @unicode: is it unicode? * * Must be called with console_lock. */ static int vcs_size(const struct vc_data *vc, bool attr, bool unicode) { int size; WARN_CONSOLE_UNLOCKED(); size = vc->vc_rows * vc->vc_cols; if (attr) { if (unicode) return -EOPNOTSUPP; size = 2 * size + HEADER_SIZE; } else if (unicode) size *= 4; return size; } static loff_t vcs_lseek(struct file *file, loff_t offset, int orig) { struct inode *inode = file_inode(file); struct vc_data *vc; int size; scoped_guard(console_lock) { vc = vcs_vc(inode, NULL); if (!vc) return -ENXIO; size = vcs_size(vc, use_attributes(inode), use_unicode(inode)); } if (size < 0) return size; return fixed_size_llseek(file, offset, orig, size); } static int vcs_read_buf_uni(struct vc_data *vc, char *con_buf, unsigned int pos, unsigned int count, bool viewed) { unsigned int nr, row, col, maxcol = vc->vc_cols; int ret; ret = vc_uniscr_check(vc); if (ret) return ret; pos /= 4; row = pos / maxcol; col = pos % maxcol; nr = maxcol - col; do { if (nr > count / 4) nr = count / 4; vc_uniscr_copy_line(vc, con_buf, viewed, row, col, nr); con_buf += nr * 4; count -= nr * 4; row++; col = 0; nr = maxcol; } while (count); return 0; } static void vcs_read_buf_noattr(const struct vc_data *vc, char *con_buf, unsigned int pos, unsigned int count, bool viewed) { u16 *org; unsigned int col, maxcol = vc->vc_cols; org = screen_pos(vc, pos, viewed); col = pos % maxcol; pos += maxcol - col; while (count-- > 0) { *con_buf++ = (vcs_scr_readw(vc, org++) & 0xff); if (++col == maxcol) { org = screen_pos(vc, pos, viewed); col = 0; pos += maxcol; } } } static unsigned int vcs_read_buf(const struct vc_data *vc, char *con_buf, unsigned int pos, unsigned int count, bool viewed, unsigned int *skip) { u16 *org, *con_buf16; unsigned int col, maxcol = vc->vc_cols; unsigned int filled = count; if (pos < HEADER_SIZE) { /* clamp header values if they don't fit */ con_buf[0] = min(vc->vc_rows, 0xFFu); con_buf[1] = min(vc->vc_cols, 0xFFu); getconsxy(vc, con_buf + 2); *skip += pos; count += pos; if (count > CON_BUF_SIZE) { count = CON_BUF_SIZE; filled = count - pos; } /* Advance state pointers and move on. */ count -= min(HEADER_SIZE, count); pos = HEADER_SIZE; con_buf += HEADER_SIZE; /* If count >= 0, then pos is even... */ } else if (pos & 1) { /* * Skip first byte for output if start address is odd. Update * region sizes up/down depending on free space in buffer. */ (*skip)++; if (count < CON_BUF_SIZE) count++; else filled--; } if (!count) return filled; pos -= HEADER_SIZE; pos /= 2; col = pos % maxcol; org = screen_pos(vc, pos, viewed); pos += maxcol - col; /* * Buffer has even length, so we can always copy character + attribute. * We do not copy last byte to userspace if count is odd. */ count = (count + 1) / 2; con_buf16 = (u16 *)con_buf; while (count) { *con_buf16++ = vcs_scr_readw(vc, org++); count--; if (++col == maxcol) { org = screen_pos(vc, pos, viewed); col = 0; pos += maxcol; } } return filled; } static ssize_t vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct inode *inode = file_inode(file); struct vc_data *vc; struct vcs_poll_data *poll; unsigned int read; ssize_t ret; loff_t pos; bool viewed, attr, uni_mode; char *con_buf __free(free_page_ptr) = (char *)__get_free_page(GFP_KERNEL); if (!con_buf) return -ENOMEM; pos = *ppos; /* Select the proper current console and verify * sanity of the situation under the console lock. */ guard(console_lock)(); uni_mode = use_unicode(inode); attr = use_attributes(inode); if (pos < 0) return -EINVAL; /* we enforce 32-bit alignment for pos and count in unicode mode */ if (uni_mode && (pos | count) & 3) return -EINVAL; poll = file->private_data; if (count && poll) poll->event = 0; read = 0; ret = 0; while (count) { unsigned int this_round, skip = 0; int size; vc = vcs_vc(inode, &viewed); if (!vc) { ret = -ENXIO; break; } /* Check whether we are above size each round, * as copy_to_user at the end of this loop * could sleep. */ size = vcs_size(vc, attr, uni_mode); if (size < 0) { ret = size; break; } if (pos >= size) break; if (count > size - pos) count = size - pos; this_round = count; if (this_round > CON_BUF_SIZE) this_round = CON_BUF_SIZE; /* Perform the whole read into the local con_buf. * Then we can drop the console spinlock and safely * attempt to move it to userspace. */ if (uni_mode) { ret = vcs_read_buf_uni(vc, con_buf, pos, this_round, viewed); if (ret) break; } else if (!attr) { vcs_read_buf_noattr(vc, con_buf, pos, this_round, viewed); } else { this_round = vcs_read_buf(vc, con_buf, pos, this_round, viewed, &skip); } /* Finally, release the console semaphore while we push * all the data to userspace from our temporary buffer. * * AKPM: Even though it's a semaphore, we should drop it because * the pagefault handling code may want to call printk(). */ console_unlock(); ret = copy_to_user(buf, con_buf + skip, this_round); console_lock(); if (ret) { read += this_round - ret; ret = -EFAULT; break; } buf += this_round; pos += this_round; read += this_round; count -= this_round; } *ppos += read; if (read) return read; return ret; } static u16 *vcs_write_buf_noattr(struct vc_data *vc, const char *con_buf, unsigned int pos, unsigned int count, bool viewed, u16 **org0) { u16 *org; unsigned int col, maxcol = vc->vc_cols; *org0 = org = screen_pos(vc, pos, viewed); col = pos % maxcol; pos += maxcol - col; while (count > 0) { unsigned char c = *con_buf++; count--; vcs_scr_writew(vc, (vcs_scr_readw(vc, org) & 0xff00) | c, org); org++; if (++col == maxcol) { org = screen_pos(vc, pos, viewed); col = 0; pos += maxcol; } } return org; } /* * Compilers (gcc 10) are unable to optimize the swap in cpu_to_le16. So do it * the poor man way. */ static inline u16 vc_compile_le16(u8 hi, u8 lo) { #ifdef __BIG_ENDIAN return (lo << 8u) | hi; #else return (hi << 8u) | lo; #endif } static u16 *vcs_write_buf(struct vc_data *vc, const char *con_buf, unsigned int pos, unsigned int count, bool viewed, u16 **org0) { u16 *org; unsigned int col, maxcol = vc->vc_cols; unsigned char c; /* header */ if (pos < HEADER_SIZE) { char header[HEADER_SIZE]; getconsxy(vc, header + 2); while (pos < HEADER_SIZE && count > 0) { count--; header[pos++] = *con_buf++; } if (!viewed) putconsxy(vc, header + 2); } if (!count) return NULL; pos -= HEADER_SIZE; col = (pos/2) % maxcol; *org0 = org = screen_pos(vc, pos/2, viewed); /* odd pos -- the first single character */ if (pos & 1) { count--; c = *con_buf++; vcs_scr_writew(vc, vc_compile_le16(c, vcs_scr_readw(vc, org)), org); org++; pos++; if (++col == maxcol) { org = screen_pos(vc, pos/2, viewed); col = 0; } } pos /= 2; pos += maxcol - col; /* even pos -- handle attr+character pairs */ while (count > 1) { unsigned short w; w = get_unaligned(((unsigned short *)con_buf)); vcs_scr_writew(vc, w, org++); con_buf += 2; count -= 2; if (++col == maxcol) { org = screen_pos(vc, pos, viewed); col = 0; pos += maxcol; } } if (!count) return org; /* odd pos -- the remaining character */ c = *con_buf++; vcs_scr_writew(vc, vc_compile_le16(vcs_scr_readw(vc, org) >> 8, c), org); return org; } static ssize_t vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct inode *inode = file_inode(file); struct vc_data *vc; u16 *org0, *org; unsigned int written; int size; ssize_t ret; loff_t pos; bool viewed, attr; if (use_unicode(inode)) return -EOPNOTSUPP; char *con_buf __free(free_page_ptr) = (char *)__get_free_page(GFP_KERNEL); if (!con_buf) return -ENOMEM; pos = *ppos; /* Select the proper current console and verify * sanity of the situation under the console lock. */ guard(console_lock)(); attr = use_attributes(inode); vc = vcs_vc(inode, &viewed); if (!vc) return -ENXIO; size = vcs_size(vc, attr, false); if (size < 0) return size; if (pos < 0 || pos > size) return -EINVAL; if (count > size - pos) count = size - pos; written = 0; while (count) { unsigned int this_round = count; if (this_round > CON_BUF_SIZE) this_round = CON_BUF_SIZE; /* Temporarily drop the console lock so that we can read * in the write data from userspace safely. */ console_unlock(); ret = copy_from_user(con_buf, buf, this_round); console_lock(); if (ret) { this_round -= ret; if (!this_round) { /* Abort loop if no data were copied. Otherwise * fail with -EFAULT. */ if (written) break; return -EFAULT; } } /* The vc might have been freed or vcs_size might have changed * while we slept to grab the user buffer, so recheck. * Return data written up to now on failure. */ vc = vcs_vc(inode, &viewed); if (!vc) { if (written) break; return -ENXIO; } size = vcs_size(vc, attr, false); if (size < 0) { if (written) break; return size; } if (pos >= size) break; if (this_round > size - pos) this_round = size - pos; /* OK, now actually push the write to the console * under the lock using the local kernel buffer. */ if (attr) org = vcs_write_buf(vc, con_buf, pos, this_round, viewed, &org0); else org = vcs_write_buf_noattr(vc, con_buf, pos, this_round, viewed, &org0); count -= this_round; written += this_round; buf += this_round; pos += this_round; if (org) update_region(vc, (unsigned long)(org0), org - org0); } *ppos += written; ret = written; if (written) vcs_scr_updated(vc); return ret; } static __poll_t vcs_poll(struct file *file, poll_table *wait) { struct vcs_poll_data *poll = vcs_poll_data_get(file); __poll_t ret = DEFAULT_POLLMASK|EPOLLERR; if (poll) { poll_wait(file, &poll->waitq, wait); switch (poll->event) { case VT_UPDATE: ret = DEFAULT_POLLMASK|EPOLLPRI; break; case VT_DEALLOCATE: ret = DEFAULT_POLLMASK|EPOLLHUP|EPOLLERR; break; case 0: ret = DEFAULT_POLLMASK; break; } } return ret; } static int vcs_fasync(int fd, struct file *file, int on) { struct vcs_poll_data *poll = file->private_data; if (!poll) { /* don't allocate anything if all we want is disable fasync */ if (!on) return 0; poll = vcs_poll_data_get(file); if (!poll) return -ENOMEM; } return fasync_helper(fd, file, on, &poll->fasync); } static int vcs_open(struct inode *inode, struct file *filp) { unsigned int currcons = console(inode); bool attr = use_attributes(inode); bool uni_mode = use_unicode(inode); /* we currently don't support attributes in unicode mode */ if (attr && uni_mode) return -EOPNOTSUPP; guard(console_lock)(); if (currcons && !vc_cons_allocated(currcons - 1)) return -ENXIO; return 0; } static int vcs_release(struct inode *inode, struct file *file) { struct vcs_poll_data *poll = file->private_data; if (poll) vcs_poll_data_free(poll); return 0; } static const struct file_operations vcs_fops = { .llseek = vcs_lseek, .read = vcs_read, .write = vcs_write, .poll = vcs_poll, .fasync = vcs_fasync, .open = vcs_open, .release = vcs_release, }; static const struct class vc_class = { .name = "vc", }; void vcs_make_sysfs(int index) { device_create(&vc_class, NULL, MKDEV(VCS_MAJOR, index + 1), NULL, "vcs%u", index + 1); device_create(&vc_class, NULL, MKDEV(VCS_MAJOR, index + 65), NULL, "vcsu%u", index + 1); device_create(&vc_class, NULL, MKDEV(VCS_MAJOR, index + 129), NULL, "vcsa%u", index + 1); } void vcs_remove_sysfs(int index) { device_destroy(&vc_class, MKDEV(VCS_MAJOR, index + 1)); device_destroy(&vc_class, MKDEV(VCS_MAJOR, index + 65)); device_destroy(&vc_class, MKDEV(VCS_MAJOR, index + 129)); } int __init vcs_init(void) { unsigned int i; if (register_chrdev(VCS_MAJOR, "vcs", &vcs_fops)) panic("unable to get major %d for vcs device", VCS_MAJOR); if (class_register(&vc_class)) panic("unable to create vc_class"); device_create(&vc_class, NULL, MKDEV(VCS_MAJOR, 0), NULL, "vcs"); device_create(&vc_class, NULL, MKDEV(VCS_MAJOR, 64), NULL, "vcsu"); device_create(&vc_class, NULL, MKDEV(VCS_MAJOR, 128), NULL, "vcsa"); for (i = 0; i < MIN_NR_CONSOLES; i++) vcs_make_sysfs(i); return 0; }
10 10 10 7 7 7 7 7 10 1 10 7 7 10 8 1 10 10 5 1 10 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2022 Oracle. All Rights Reserved. * Author: Allison Henderson <allison.henderson@oracle.com> */ #include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_shared.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_bmap_btree.h" #include "xfs_trans_priv.h" #include "xfs_log.h" #include "xfs_inode.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_attr.h" #include "xfs_attr_item.h" #include "xfs_trace.h" #include "xfs_trans_space.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_log_priv.h" #include "xfs_log_recover.h" #include "xfs_parent.h" struct kmem_cache *xfs_attri_cache; struct kmem_cache *xfs_attrd_cache; static const struct xfs_item_ops xfs_attri_item_ops; static const struct xfs_item_ops xfs_attrd_item_ops; static inline struct xfs_attri_log_item *ATTRI_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_attri_log_item, attri_item); } /* * Shared xattr name/value buffers for logged extended attribute operations * * When logging updates to extended attributes, we can create quite a few * attribute log intent items for a single xattr update. To avoid cycling the * memory allocator and memcpy overhead, the name (and value, for setxattr) * are kept in a refcounted object that is shared across all related log items * and the upper-level deferred work state structure. The shared buffer has * a control structure, followed by the name, and then the value. */ static inline struct xfs_attri_log_nameval * xfs_attri_log_nameval_get( struct xfs_attri_log_nameval *nv) { if (!refcount_inc_not_zero(&nv->refcount)) return NULL; return nv; } static inline void xfs_attri_log_nameval_put( struct xfs_attri_log_nameval *nv) { if (!nv) return; if (refcount_dec_and_test(&nv->refcount)) kvfree(nv); } static inline struct xfs_attri_log_nameval * xfs_attri_log_nameval_alloc( const void *name, unsigned int name_len, const void *new_name, unsigned int new_name_len, const void *value, unsigned int value_len, const void *new_value, unsigned int new_value_len) { struct xfs_attri_log_nameval *nv; /* * This could be over 64kB in length, so we have to use kvmalloc() for * this. But kvmalloc() utterly sucks, so we use our own version. */ nv = xlog_kvmalloc(sizeof(struct xfs_attri_log_nameval) + name_len + new_name_len + value_len + new_value_len); nv->name.iov_base = nv + 1; nv->name.iov_len = name_len; memcpy(nv->name.iov_base, name, name_len); if (new_name_len) { nv->new_name.iov_base = nv->name.iov_base + name_len; nv->new_name.iov_len = new_name_len; memcpy(nv->new_name.iov_base, new_name, new_name_len); } else { nv->new_name.iov_base = NULL; nv->new_name.iov_len = 0; } if (value_len) { nv->value.iov_base = nv->name.iov_base + name_len + new_name_len; nv->value.iov_len = value_len; memcpy(nv->value.iov_base, value, value_len); } else { nv->value.iov_base = NULL; nv->value.iov_len = 0; } if (new_value_len) { nv->new_value.iov_base = nv->name.iov_base + name_len + new_name_len + value_len; nv->new_value.iov_len = new_value_len; memcpy(nv->new_value.iov_base, new_value, new_value_len); } else { nv->new_value.iov_base = NULL; nv->new_value.iov_len = 0; } refcount_set(&nv->refcount, 1); return nv; } STATIC void xfs_attri_item_free( struct xfs_attri_log_item *attrip) { kvfree(attrip->attri_item.li_lv_shadow); xfs_attri_log_nameval_put(attrip->attri_nameval); kmem_cache_free(xfs_attri_cache, attrip); } /* * Freeing the attrip requires that we remove it from the AIL if it has already * been placed there. However, the ATTRI may not yet have been placed in the * AIL when called by xfs_attri_release() from ATTRD processing due to the * ordering of committed vs unpin operations in bulk insert operations. Hence * the reference count to ensure only the last caller frees the ATTRI. */ STATIC void xfs_attri_release( struct xfs_attri_log_item *attrip) { ASSERT(atomic_read(&attrip->attri_refcount) > 0); if (!atomic_dec_and_test(&attrip->attri_refcount)) return; xfs_trans_ail_delete(&attrip->attri_item, 0); xfs_attri_item_free(attrip); } STATIC void xfs_attri_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); struct xfs_attri_log_nameval *nv = attrip->attri_nameval; *nvecs += 2; *nbytes += sizeof(struct xfs_attri_log_format) + xlog_calc_iovec_len(nv->name.iov_len); if (nv->new_name.iov_len) { *nvecs += 1; *nbytes += xlog_calc_iovec_len(nv->new_name.iov_len); } if (nv->value.iov_len) { *nvecs += 1; *nbytes += xlog_calc_iovec_len(nv->value.iov_len); } if (nv->new_value.iov_len) { *nvecs += 1; *nbytes += xlog_calc_iovec_len(nv->new_value.iov_len); } } /* * This is called to fill in the log iovecs for the given attri log * item. We use 1 iovec for the attri_format_item, 1 for the name, and * another for the value if it is present */ STATIC void xfs_attri_item_format( struct xfs_log_item *lip, struct xlog_format_buf *lfb) { struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); struct xfs_attri_log_nameval *nv = attrip->attri_nameval; attrip->attri_format.alfi_type = XFS_LI_ATTRI; attrip->attri_format.alfi_size = 1; /* * This size accounting must be done before copying the attrip into the * iovec. If we do it after, the wrong size will be recorded to the log * and we trip across assertion checks for bad region sizes later during * the log recovery. */ ASSERT(nv->name.iov_len > 0); attrip->attri_format.alfi_size++; if (nv->new_name.iov_len > 0) attrip->attri_format.alfi_size++; if (nv->value.iov_len > 0) attrip->attri_format.alfi_size++; if (nv->new_value.iov_len > 0) attrip->attri_format.alfi_size++; xlog_format_copy(lfb, XLOG_REG_TYPE_ATTRI_FORMAT, &attrip->attri_format, sizeof(struct xfs_attri_log_format)); xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_NAME, nv->name.iov_base, nv->name.iov_len); if (nv->new_name.iov_len > 0) xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_NEWNAME, nv->new_name.iov_base, nv->new_name.iov_len); if (nv->value.iov_len > 0) xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_VALUE, nv->value.iov_base, nv->value.iov_len); if (nv->new_value.iov_len > 0) xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_NEWVALUE, nv->new_value.iov_base, nv->new_value.iov_len); } /* * The unpin operation is the last place an ATTRI is manipulated in the log. It * is either inserted in the AIL or aborted in the event of a log I/O error. In * either case, the ATTRI transaction has been successfully committed to make * it this far. Therefore, we expect whoever committed the ATTRI to either * construct and commit the ATTRD or drop the ATTRD's reference in the event of * error. Simply drop the log's ATTRI reference now that the log is done with * it. */ STATIC void xfs_attri_item_unpin( struct xfs_log_item *lip, int remove) { xfs_attri_release(ATTRI_ITEM(lip)); } STATIC void xfs_attri_item_release( struct xfs_log_item *lip) { xfs_attri_release(ATTRI_ITEM(lip)); } /* * Allocate and initialize an attri item. Caller may allocate an additional * trailing buffer for name and value */ STATIC struct xfs_attri_log_item * xfs_attri_init( struct xfs_mount *mp, struct xfs_attri_log_nameval *nv) { struct xfs_attri_log_item *attrip; attrip = kmem_cache_zalloc(xfs_attri_cache, GFP_KERNEL | __GFP_NOFAIL); /* * Grab an extra reference to the name/value buffer for this log item. * The caller retains its own reference! */ attrip->attri_nameval = xfs_attri_log_nameval_get(nv); ASSERT(attrip->attri_nameval); xfs_log_item_init(mp, &attrip->attri_item, XFS_LI_ATTRI, &xfs_attri_item_ops); attrip->attri_format.alfi_id = (uintptr_t)(void *)attrip; atomic_set(&attrip->attri_refcount, 2); return attrip; } static inline struct xfs_attrd_log_item *ATTRD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_attrd_log_item, attrd_item); } STATIC void xfs_attrd_item_free(struct xfs_attrd_log_item *attrdp) { kvfree(attrdp->attrd_item.li_lv_shadow); kmem_cache_free(xfs_attrd_cache, attrdp); } STATIC void xfs_attrd_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { *nvecs += 1; *nbytes += sizeof(struct xfs_attrd_log_format); } /* * This is called to fill in the log iovecs for the given attrd log item. We use * only 1 iovec for the attrd_format, and we point that at the attr_log_format * structure embedded in the attrd item. */ STATIC void xfs_attrd_item_format( struct xfs_log_item *lip, struct xlog_format_buf *lfb) { struct xfs_attrd_log_item *attrdp = ATTRD_ITEM(lip); attrdp->attrd_format.alfd_type = XFS_LI_ATTRD; attrdp->attrd_format.alfd_size = 1; xlog_format_copy(lfb, XLOG_REG_TYPE_ATTRD_FORMAT, &attrdp->attrd_format, sizeof(struct xfs_attrd_log_format)); } /* * The ATTRD is either committed or aborted if the transaction is canceled. If * the transaction is canceled, drop our reference to the ATTRI and free the * ATTRD. */ STATIC void xfs_attrd_item_release( struct xfs_log_item *lip) { struct xfs_attrd_log_item *attrdp = ATTRD_ITEM(lip); xfs_attri_release(attrdp->attrd_attrip); xfs_attrd_item_free(attrdp); } static struct xfs_log_item * xfs_attrd_item_intent( struct xfs_log_item *lip) { return &ATTRD_ITEM(lip)->attrd_attrip->attri_item; } static inline unsigned int xfs_attr_log_item_op(const struct xfs_attri_log_format *attrp) { return attrp->alfi_op_flags & XFS_ATTRI_OP_FLAGS_TYPE_MASK; } /* Log an attr to the intent item. */ STATIC void xfs_attr_log_item( struct xfs_trans *tp, struct xfs_attri_log_item *attrip, const struct xfs_attr_intent *attr) { struct xfs_attri_log_format *attrp; struct xfs_attri_log_nameval *nv = attr->xattri_nameval; struct xfs_da_args *args = attr->xattri_da_args; /* * At this point the xfs_attr_intent has been constructed, and we've * created the log intent. Fill in the attri log item and log format * structure with fields from this xfs_attr_intent */ attrp = &attrip->attri_format; attrp->alfi_ino = args->dp->i_ino; ASSERT(!(attr->xattri_op_flags & ~XFS_ATTRI_OP_FLAGS_TYPE_MASK)); attrp->alfi_op_flags = attr->xattri_op_flags; attrp->alfi_value_len = nv->value.iov_len; switch (xfs_attr_log_item_op(attrp)) { case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: ASSERT(nv->value.iov_len == nv->new_value.iov_len); attrp->alfi_igen = VFS_I(args->dp)->i_generation; attrp->alfi_old_name_len = nv->name.iov_len; attrp->alfi_new_name_len = nv->new_name.iov_len; break; case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: case XFS_ATTRI_OP_FLAGS_PPTR_SET: attrp->alfi_igen = VFS_I(args->dp)->i_generation; fallthrough; default: attrp->alfi_name_len = nv->name.iov_len; break; } ASSERT(!(args->attr_filter & ~XFS_ATTRI_FILTER_MASK)); attrp->alfi_attr_filter = args->attr_filter; } /* Get an ATTRI. */ static struct xfs_log_item * xfs_attr_create_intent( struct xfs_trans *tp, struct list_head *items, unsigned int count, bool sort) { struct xfs_mount *mp = tp->t_mountp; struct xfs_attri_log_item *attrip; struct xfs_attr_intent *attr; struct xfs_da_args *args; ASSERT(count == 1); /* * Each attr item only performs one attribute operation at a time, so * this is a list of one */ attr = list_first_entry_or_null(items, struct xfs_attr_intent, xattri_list); args = attr->xattri_da_args; if (!(args->op_flags & XFS_DA_OP_LOGGED)) return NULL; /* * Create a buffer to store the attribute name and value. This buffer * will be shared between the higher level deferred xattr work state * and the lower level xattr log items. */ if (!attr->xattri_nameval) { /* * Transfer our reference to the name/value buffer to the * deferred work state structure. */ attr->xattri_nameval = xfs_attri_log_nameval_alloc( args->name, args->namelen, args->new_name, args->new_namelen, args->value, args->valuelen, args->new_value, args->new_valuelen); } attrip = xfs_attri_init(mp, attr->xattri_nameval); xfs_attr_log_item(tp, attrip, attr); return &attrip->attri_item; } static inline void xfs_attr_free_item( struct xfs_attr_intent *attr) { if (attr->xattri_da_state) xfs_da_state_free(attr->xattri_da_state); xfs_attri_log_nameval_put(attr->xattri_nameval); if (attr->xattri_da_args->op_flags & XFS_DA_OP_RECOVERY) kfree(attr); else kmem_cache_free(xfs_attr_intent_cache, attr); } static inline struct xfs_attr_intent *attri_entry(const struct list_head *e) { return list_entry(e, struct xfs_attr_intent, xattri_list); } /* Process an attr. */ STATIC int xfs_attr_finish_item( struct xfs_trans *tp, struct xfs_log_item *done, struct list_head *item, struct xfs_btree_cur **state) { struct xfs_attr_intent *attr = attri_entry(item); struct xfs_da_args *args; int error; args = attr->xattri_da_args; /* Reset trans after EAGAIN cycle since the transaction is new */ args->trans = tp; if (XFS_TEST_ERROR(args->dp->i_mount, XFS_ERRTAG_LARP)) { error = -EIO; goto out; } /* If an attr removal is trivially complete, we're done. */ if (attr->xattri_op_flags == XFS_ATTRI_OP_FLAGS_REMOVE && !xfs_inode_hasattr(args->dp)) { error = 0; goto out; } error = xfs_attr_set_iter(attr); if (!error && attr->xattri_dela_state != XFS_DAS_DONE) return -EAGAIN; out: xfs_attr_free_item(attr); return error; } /* Abort all pending ATTRs. */ STATIC void xfs_attr_abort_intent( struct xfs_log_item *intent) { xfs_attri_release(ATTRI_ITEM(intent)); } /* Cancel an attr */ STATIC void xfs_attr_cancel_item( struct list_head *item) { struct xfs_attr_intent *attr = attri_entry(item); xfs_attr_free_item(attr); } STATIC bool xfs_attri_item_match( struct xfs_log_item *lip, uint64_t intent_id) { return ATTRI_ITEM(lip)->attri_format.alfi_id == intent_id; } static inline bool xfs_attri_validate_namelen(unsigned int namelen) { return namelen > 0 && namelen <= XATTR_NAME_MAX; } /* Is this recovered ATTRI format ok? */ static inline bool xfs_attri_validate( struct xfs_mount *mp, struct xfs_attri_log_format *attrp) { unsigned int op = xfs_attr_log_item_op(attrp); if (attrp->alfi_op_flags & ~XFS_ATTRI_OP_FLAGS_TYPE_MASK) return false; if (attrp->alfi_attr_filter & ~XFS_ATTRI_FILTER_MASK) return false; if (!xfs_attr_check_namespace(attrp->alfi_attr_filter & XFS_ATTR_NSP_ONDISK_MASK)) return false; switch (op) { case XFS_ATTRI_OP_FLAGS_PPTR_SET: case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: if (!xfs_has_parent(mp)) return false; if (attrp->alfi_value_len != sizeof(struct xfs_parent_rec)) return false; if (!xfs_attri_validate_namelen(attrp->alfi_name_len)) return false; if (!(attrp->alfi_attr_filter & XFS_ATTR_PARENT)) return false; break; case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: if (!xfs_is_using_logged_xattrs(mp)) return false; if (attrp->alfi_value_len > XATTR_SIZE_MAX) return false; if (!xfs_attri_validate_namelen(attrp->alfi_name_len)) return false; break; case XFS_ATTRI_OP_FLAGS_REMOVE: if (!xfs_is_using_logged_xattrs(mp)) return false; if (attrp->alfi_value_len != 0) return false; if (!xfs_attri_validate_namelen(attrp->alfi_name_len)) return false; break; case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: if (!xfs_has_parent(mp)) return false; if (!xfs_attri_validate_namelen(attrp->alfi_old_name_len)) return false; if (!xfs_attri_validate_namelen(attrp->alfi_new_name_len)) return false; if (attrp->alfi_value_len != sizeof(struct xfs_parent_rec)) return false; if (!(attrp->alfi_attr_filter & XFS_ATTR_PARENT)) return false; break; default: return false; } return xfs_verify_ino(mp, attrp->alfi_ino); } static int xfs_attri_iread_extents( struct xfs_inode *ip) { struct xfs_trans *tp; int error; tp = xfs_trans_alloc_empty(ip->i_mount); xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_iread_extents(tp, ip, XFS_ATTR_FORK); xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_trans_cancel(tp); return error; } static inline struct xfs_attr_intent * xfs_attri_recover_work( struct xfs_mount *mp, struct xfs_defer_pending *dfp, struct xfs_attri_log_format *attrp, struct xfs_inode **ipp, struct xfs_attri_log_nameval *nv) { struct xfs_attr_intent *attr; struct xfs_da_args *args; struct xfs_inode *ip; int local; int error; /* * Parent pointer attr items record the generation but regular logged * xattrs do not; select the right iget function. */ switch (xfs_attr_log_item_op(attrp)) { case XFS_ATTRI_OP_FLAGS_PPTR_SET: case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: error = xlog_recover_iget_handle(mp, attrp->alfi_ino, attrp->alfi_igen, &ip); break; default: error = xlog_recover_iget(mp, attrp->alfi_ino, &ip); break; } if (error) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attrp, sizeof(*attrp)); return ERR_PTR(-EFSCORRUPTED); } if (xfs_inode_has_attr_fork(ip)) { error = xfs_attri_iread_extents(ip); if (error) { xfs_irele(ip); return ERR_PTR(error); } } attr = kzalloc(sizeof(struct xfs_attr_intent) + sizeof(struct xfs_da_args), GFP_KERNEL | __GFP_NOFAIL); args = (struct xfs_da_args *)(attr + 1); attr->xattri_da_args = args; attr->xattri_op_flags = xfs_attr_log_item_op(attrp); /* * We're reconstructing the deferred work state structure from the * recovered log item. Grab a reference to the name/value buffer and * attach it to the new work state. */ attr->xattri_nameval = xfs_attri_log_nameval_get(nv); ASSERT(attr->xattri_nameval); args->dp = ip; args->geo = mp->m_attr_geo; args->whichfork = XFS_ATTR_FORK; args->name = nv->name.iov_base; args->namelen = nv->name.iov_len; args->new_name = nv->new_name.iov_base; args->new_namelen = nv->new_name.iov_len; args->value = nv->value.iov_base; args->valuelen = nv->value.iov_len; args->new_value = nv->new_value.iov_base; args->new_valuelen = nv->new_value.iov_len; args->attr_filter = attrp->alfi_attr_filter & XFS_ATTRI_FILTER_MASK; args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT | XFS_DA_OP_LOGGED; args->owner = args->dp->i_ino; xfs_attr_sethash(args); switch (xfs_attr_intent_op(attr)) { case XFS_ATTRI_OP_FLAGS_PPTR_SET: case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: args->total = xfs_attr_calc_size(args, &local); if (xfs_inode_hasattr(args->dp)) attr->xattri_dela_state = xfs_attr_init_replace_state(args); else attr->xattri_dela_state = xfs_attr_init_add_state(args); break; case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: case XFS_ATTRI_OP_FLAGS_REMOVE: attr->xattri_dela_state = xfs_attr_init_remove_state(args); break; } xfs_defer_add_item(dfp, &attr->xattri_list); *ipp = ip; return attr; } /* * Process an attr intent item that was recovered from the log. We need to * delete the attr that it describes. */ STATIC int xfs_attr_recover_work( struct xfs_defer_pending *dfp, struct list_head *capture_list) { struct xfs_log_item *lip = dfp->dfp_intent; struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); struct xfs_attr_intent *attr; struct xfs_mount *mp = lip->li_log->l_mp; struct xfs_inode *ip = NULL; struct xfs_da_args *args; struct xfs_trans *tp; struct xfs_trans_res resv; struct xfs_attri_log_format *attrp; struct xfs_attri_log_nameval *nv = attrip->attri_nameval; int error; unsigned int total = 0; /* * First check the validity of the attr described by the ATTRI. If any * are bad, then assume that all are bad and just toss the ATTRI. */ attrp = &attrip->attri_format; if (!xfs_attri_validate(mp, attrp) || !xfs_attr_namecheck(attrp->alfi_attr_filter, nv->name.iov_base, nv->name.iov_len)) return -EFSCORRUPTED; attr = xfs_attri_recover_work(mp, dfp, attrp, &ip, nv); if (IS_ERR(attr)) return PTR_ERR(attr); args = attr->xattri_da_args; switch (xfs_attr_intent_op(attr)) { case XFS_ATTRI_OP_FLAGS_PPTR_SET: case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: resv = xfs_attr_set_resv(args); total = args->total; break; case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: case XFS_ATTRI_OP_FLAGS_REMOVE: resv = M_RES(mp)->tr_attrrm; total = XFS_ATTRRM_SPACE_RES(mp); break; } resv = xlog_recover_resv(&resv); error = xfs_trans_alloc(mp, &resv, total, 0, XFS_TRANS_RESERVE, &tp); if (error) return error; args->trans = tp; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); error = xlog_recover_finish_intent(tp, dfp); if (error == -EFSCORRUPTED) XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, &attrip->attri_format, sizeof(attrip->attri_format)); if (error) goto out_cancel; error = xfs_defer_ops_capture_and_commit(tp, capture_list); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_irele(ip); return error; out_cancel: xfs_trans_cancel(tp); goto out_unlock; } /* Re-log an intent item to push the log tail forward. */ static struct xfs_log_item * xfs_attr_relog_intent( struct xfs_trans *tp, struct xfs_log_item *intent, struct xfs_log_item *done_item) { struct xfs_attri_log_item *old_attrip; struct xfs_attri_log_item *new_attrip; struct xfs_attri_log_format *new_attrp; struct xfs_attri_log_format *old_attrp; old_attrip = ATTRI_ITEM(intent); old_attrp = &old_attrip->attri_format; /* * Create a new log item that shares the same name/value buffer as the * old log item. */ new_attrip = xfs_attri_init(tp->t_mountp, old_attrip->attri_nameval); new_attrp = &new_attrip->attri_format; new_attrp->alfi_ino = old_attrp->alfi_ino; new_attrp->alfi_igen = old_attrp->alfi_igen; new_attrp->alfi_op_flags = old_attrp->alfi_op_flags; new_attrp->alfi_value_len = old_attrp->alfi_value_len; switch (xfs_attr_log_item_op(old_attrp)) { case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: new_attrp->alfi_new_name_len = old_attrp->alfi_new_name_len; new_attrp->alfi_old_name_len = old_attrp->alfi_old_name_len; break; default: new_attrp->alfi_name_len = old_attrp->alfi_name_len; break; } new_attrp->alfi_attr_filter = old_attrp->alfi_attr_filter; return &new_attrip->attri_item; } /* Get an ATTRD so we can process all the attrs. */ static struct xfs_log_item * xfs_attr_create_done( struct xfs_trans *tp, struct xfs_log_item *intent, unsigned int count) { struct xfs_attri_log_item *attrip; struct xfs_attrd_log_item *attrdp; attrip = ATTRI_ITEM(intent); attrdp = kmem_cache_zalloc(xfs_attrd_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &attrdp->attrd_item, XFS_LI_ATTRD, &xfs_attrd_item_ops); attrdp->attrd_attrip = attrip; attrdp->attrd_format.alfd_alf_id = attrip->attri_format.alfi_id; return &attrdp->attrd_item; } void xfs_attr_defer_add( struct xfs_da_args *args, enum xfs_attr_defer_op op) { struct xfs_attr_intent *new; unsigned int log_op = 0; bool is_pptr = args->attr_filter & XFS_ATTR_PARENT; if (is_pptr) { ASSERT(xfs_has_parent(args->dp->i_mount)); ASSERT((args->attr_filter & ~XFS_ATTR_PARENT) == 0); ASSERT(args->op_flags & XFS_DA_OP_LOGGED); ASSERT(args->valuelen == sizeof(struct xfs_parent_rec)); } new = kmem_cache_zalloc(xfs_attr_intent_cache, GFP_NOFS | __GFP_NOFAIL); new->xattri_da_args = args; /* Compute log operation from the higher level op and namespace. */ switch (op) { case XFS_ATTR_DEFER_SET: if (is_pptr) log_op = XFS_ATTRI_OP_FLAGS_PPTR_SET; else log_op = XFS_ATTRI_OP_FLAGS_SET; break; case XFS_ATTR_DEFER_REPLACE: if (is_pptr) log_op = XFS_ATTRI_OP_FLAGS_PPTR_REPLACE; else log_op = XFS_ATTRI_OP_FLAGS_REPLACE; break; case XFS_ATTR_DEFER_REMOVE: if (is_pptr) log_op = XFS_ATTRI_OP_FLAGS_PPTR_REMOVE; else log_op = XFS_ATTRI_OP_FLAGS_REMOVE; break; default: ASSERT(0); break; } new->xattri_op_flags = log_op; /* Set up initial attr operation state. */ switch (log_op) { case XFS_ATTRI_OP_FLAGS_PPTR_SET: case XFS_ATTRI_OP_FLAGS_SET: new->xattri_dela_state = xfs_attr_init_add_state(args); break; case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: ASSERT(args->new_valuelen == args->valuelen); new->xattri_dela_state = xfs_attr_init_replace_state(args); break; case XFS_ATTRI_OP_FLAGS_REPLACE: new->xattri_dela_state = xfs_attr_init_replace_state(args); break; case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: case XFS_ATTRI_OP_FLAGS_REMOVE: new->xattri_dela_state = xfs_attr_init_remove_state(args); break; } xfs_defer_add(args->trans, &new->xattri_list, &xfs_attr_defer_type); trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp); } const struct xfs_defer_op_type xfs_attr_defer_type = { .name = "attr", .max_items = 1, .create_intent = xfs_attr_create_intent, .abort_intent = xfs_attr_abort_intent, .create_done = xfs_attr_create_done, .finish_item = xfs_attr_finish_item, .cancel_item = xfs_attr_cancel_item, .recover_work = xfs_attr_recover_work, .relog_intent = xfs_attr_relog_intent, }; static inline void * xfs_attri_validate_name_iovec( struct xfs_mount *mp, struct xfs_attri_log_format *attri_formatp, const struct kvec *iovec, unsigned int name_len) { if (iovec->iov_len != xlog_calc_iovec_len(name_len)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, sizeof(*attri_formatp)); return NULL; } if (!xfs_attr_namecheck(attri_formatp->alfi_attr_filter, iovec->iov_base, name_len)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, sizeof(*attri_formatp)); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, iovec->iov_base, iovec->iov_len); return NULL; } return iovec->iov_base; } static inline void * xfs_attri_validate_value_iovec( struct xfs_mount *mp, struct xfs_attri_log_format *attri_formatp, const struct kvec *iovec, unsigned int value_len) { if (iovec->iov_len != xlog_calc_iovec_len(value_len)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, sizeof(*attri_formatp)); return NULL; } if ((attri_formatp->alfi_attr_filter & XFS_ATTR_PARENT) && !xfs_parent_valuecheck(mp, iovec->iov_base, value_len)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, sizeof(*attri_formatp)); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, iovec->iov_base, iovec->iov_len); return NULL; } return iovec->iov_base; } STATIC int xlog_recover_attri_commit_pass2( struct xlog *log, struct list_head *buffer_list, struct xlog_recover_item *item, xfs_lsn_t lsn) { struct xfs_mount *mp = log->l_mp; struct xfs_attri_log_item *attrip; struct xfs_attri_log_format *attri_formatp; struct xfs_attri_log_nameval *nv; const void *attr_name; const void *attr_value = NULL; const void *attr_new_name = NULL; const void *attr_new_value = NULL; size_t len; unsigned int name_len = 0; unsigned int value_len = 0; unsigned int new_name_len = 0; unsigned int new_value_len = 0; unsigned int op, i = 0; /* Validate xfs_attri_log_format before the large memory allocation */ len = sizeof(struct xfs_attri_log_format); if (item->ri_buf[i].iov_len != len) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } attri_formatp = item->ri_buf[i].iov_base; if (!xfs_attri_validate(mp, attri_formatp)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); return -EFSCORRUPTED; } /* Check the number of log iovecs makes sense for the op code. */ op = xfs_attr_log_item_op(attri_formatp); switch (op) { case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: case XFS_ATTRI_OP_FLAGS_PPTR_SET: /* Log item, attr name, attr value */ if (item->ri_total != 3) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); return -EFSCORRUPTED; } name_len = attri_formatp->alfi_name_len; value_len = attri_formatp->alfi_value_len; break; case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: /* Log item, attr name, optional attr value */ if (item->ri_total != 2 + !!attri_formatp->alfi_value_len) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); return -EFSCORRUPTED; } name_len = attri_formatp->alfi_name_len; value_len = attri_formatp->alfi_value_len; break; case XFS_ATTRI_OP_FLAGS_REMOVE: /* Log item, attr name */ if (item->ri_total != 2) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); return -EFSCORRUPTED; } name_len = attri_formatp->alfi_name_len; break; case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: /* * Log item, attr name, new attr name, attr value, new attr * value */ if (item->ri_total != 5) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); return -EFSCORRUPTED; } name_len = attri_formatp->alfi_old_name_len; new_name_len = attri_formatp->alfi_new_name_len; new_value_len = value_len = attri_formatp->alfi_value_len; break; default: XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); return -EFSCORRUPTED; } i++; /* Validate the attr name */ attr_name = xfs_attri_validate_name_iovec(mp, attri_formatp, &item->ri_buf[i], name_len); if (!attr_name) return -EFSCORRUPTED; i++; /* Validate the new attr name */ if (new_name_len > 0) { attr_new_name = xfs_attri_validate_name_iovec(mp, attri_formatp, &item->ri_buf[i], new_name_len); if (!attr_new_name) return -EFSCORRUPTED; i++; } /* Validate the attr value, if present */ if (value_len != 0) { attr_value = xfs_attri_validate_value_iovec(mp, attri_formatp, &item->ri_buf[i], value_len); if (!attr_value) return -EFSCORRUPTED; i++; } /* Validate the new attr value, if present */ if (new_value_len != 0) { attr_new_value = xfs_attri_validate_value_iovec(mp, attri_formatp, &item->ri_buf[i], new_value_len); if (!attr_new_value) return -EFSCORRUPTED; i++; } /* * Make sure we got the correct number of buffers for the operation * that we just loaded. */ if (i != item->ri_total) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); return -EFSCORRUPTED; } /* * Memory alloc failure will cause replay to abort. We attach the * name/value buffer to the recovered incore log item and drop our * reference. */ nv = xfs_attri_log_nameval_alloc(attr_name, name_len, attr_new_name, new_name_len, attr_value, value_len, attr_new_value, new_value_len); attrip = xfs_attri_init(mp, nv); memcpy(&attrip->attri_format, attri_formatp, len); xlog_recover_intent_item(log, &attrip->attri_item, lsn, &xfs_attr_defer_type); xfs_attri_log_nameval_put(nv); return 0; } /* * This routine is called when an ATTRD format structure is found in a committed * transaction in the log. Its purpose is to cancel the corresponding ATTRI if * it was still in the log. To do this it searches the AIL for the ATTRI with * an id equal to that in the ATTRD format structure. If we find it we drop * the ATTRD reference, which removes the ATTRI from the AIL and frees it. */ STATIC int xlog_recover_attrd_commit_pass2( struct xlog *log, struct list_head *buffer_list, struct xlog_recover_item *item, xfs_lsn_t lsn) { struct xfs_attrd_log_format *attrd_formatp; attrd_formatp = item->ri_buf[0].iov_base; if (item->ri_buf[0].iov_len != sizeof(struct xfs_attrd_log_format)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } xlog_recover_release_intent(log, XFS_LI_ATTRI, attrd_formatp->alfd_alf_id); return 0; } static const struct xfs_item_ops xfs_attri_item_ops = { .flags = XFS_ITEM_INTENT, .iop_size = xfs_attri_item_size, .iop_format = xfs_attri_item_format, .iop_unpin = xfs_attri_item_unpin, .iop_release = xfs_attri_item_release, .iop_match = xfs_attri_item_match, }; const struct xlog_recover_item_ops xlog_attri_item_ops = { .item_type = XFS_LI_ATTRI, .commit_pass2 = xlog_recover_attri_commit_pass2, }; static const struct xfs_item_ops xfs_attrd_item_ops = { .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED | XFS_ITEM_INTENT_DONE, .iop_size = xfs_attrd_item_size, .iop_format = xfs_attrd_item_format, .iop_release = xfs_attrd_item_release, .iop_intent = xfs_attrd_item_intent, }; const struct xlog_recover_item_ops xlog_attrd_item_ops = { .item_type = XFS_LI_ATTRD, .commit_pass2 = xlog_recover_attrd_commit_pass2, };
8 8 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 1 1 1 1 1 1 1 1 1 1 1 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) International Business Machines Corp., 2006 * * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner */ /* * UBI wear-leveling sub-system. * * This sub-system is responsible for wear-leveling. It works in terms of * physical eraseblocks and erase counters and knows nothing about logical * eraseblocks, volumes, etc. From this sub-system's perspective all physical * eraseblocks are of two types - used and free. Used physical eraseblocks are * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function. * * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter * header. The rest of the physical eraseblock contains only %0xFF bytes. * * When physical eraseblocks are returned to the WL sub-system by means of the * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is * done asynchronously in context of the per-UBI device background thread, * which is also managed by the WL sub-system. * * The wear-leveling is ensured by means of moving the contents of used * physical eraseblocks with low erase counter to free physical eraseblocks * with high erase counter. * * If the WL sub-system fails to erase a physical eraseblock, it marks it as * bad. * * This sub-system is also responsible for scrubbing. If a bit-flip is detected * in a physical eraseblock, it has to be moved. Technically this is the same * as moving it for wear-leveling reasons. * * As it was said, for the UBI sub-system all physical eraseblocks are either * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while * used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub * RB-trees, as well as (temporarily) in the @wl->pq queue. * * When the WL sub-system returns a physical eraseblock, the physical * eraseblock is protected from being moved for some "time". For this reason, * the physical eraseblock is not directly moved from the @wl->free tree to the * @wl->used tree. There is a protection queue in between where this * physical eraseblock is temporarily stored (@wl->pq). * * All this protection stuff is needed because: * o we don't want to move physical eraseblocks just after we have given them * to the user; instead, we first want to let users fill them up with data; * * o there is a chance that the user will put the physical eraseblock very * soon, so it makes sense not to move it for some time, but wait. * * Physical eraseblocks stay protected only for limited time. But the "time" is * measured in erase cycles in this case. This is implemented with help of the * protection queue. Eraseblocks are put to the tail of this queue when they * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the * head of the queue on each erase operation (for any eraseblock). So the * length of the queue defines how may (global) erase cycles PEBs are protected. * * To put it differently, each physical eraseblock has 2 main states: free and * used. The former state corresponds to the @wl->free tree. The latter state * is split up on several sub-states: * o the WL movement is allowed (@wl->used tree); * o the WL movement is disallowed (@wl->erroneous) because the PEB is * erroneous - e.g., there was a read error; * o the WL movement is temporarily prohibited (@wl->pq queue); * o scrubbing is needed (@wl->scrub tree). * * Depending on the sub-state, wear-leveling entries of the used physical * eraseblocks may be kept in one of those structures. * * Note, in this implementation, we keep a small in-RAM object for each physical * eraseblock. This is surely not a scalable solution. But it appears to be good * enough for moderately large flashes and it is simple. In future, one may * re-work this sub-system and make it more scalable. * * At the moment this sub-system does not utilize the sequence number, which * was introduced relatively recently. But it would be wise to do this because * the sequence number of a logical eraseblock characterizes how old is it. For * example, when we move a PEB with low erase counter, and we need to pick the * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we * pick target PEB with an average EC if our PEB is not very "old". This is a * room for future re-works of the WL sub-system. */ #include <linux/slab.h> #include <linux/crc32.h> #include <linux/freezer.h> #include <linux/kthread.h> #include "ubi.h" #include "wl.h" /* Number of physical eraseblocks reserved for wear-leveling purposes */ #define WL_RESERVED_PEBS 1 /* * Maximum difference between two erase counters. If this threshold is * exceeded, the WL sub-system starts moving data from used physical * eraseblocks with low erase counter to free physical eraseblocks with high * erase counter. */ #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD /* * When a physical eraseblock is moved, the WL sub-system has to pick the target * physical eraseblock to move to. The simplest way would be just to pick the * one with the highest erase counter. But in certain workloads this could lead * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a * situation when the picked physical eraseblock is constantly erased after the * data is written to it. So, we have a constant which limits the highest erase * counter of the free physical eraseblock to pick. Namely, the WL sub-system * does not pick eraseblocks with erase counter greater than the lowest erase * counter plus %WL_FREE_MAX_DIFF. */ #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) /* * Maximum number of consecutive background thread failures which is enough to * switch to read-only mode. */ #define WL_MAX_FAILURES 32 static int self_check_ec(struct ubi_device *ubi, int pnum, int ec); static int self_check_in_wl_tree(const struct ubi_device *ubi, struct ubi_wl_entry *e, struct rb_root *root); static int self_check_in_pq(const struct ubi_device *ubi, struct ubi_wl_entry *e); /** * wl_tree_add - add a wear-leveling entry to a WL RB-tree. * @e: the wear-leveling entry to add * @root: the root of the tree * * Note, we use (erase counter, physical eraseblock number) pairs as keys in * the @ubi->used and @ubi->free RB-trees. */ static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root) { struct rb_node **p, *parent = NULL; p = &root->rb_node; while (*p) { struct ubi_wl_entry *e1; parent = *p; e1 = rb_entry(parent, struct ubi_wl_entry, u.rb); if (e->ec < e1->ec) p = &(*p)->rb_left; else if (e->ec > e1->ec) p = &(*p)->rb_right; else { ubi_assert(e->pnum != e1->pnum); if (e->pnum < e1->pnum) p = &(*p)->rb_left; else p = &(*p)->rb_right; } } rb_link_node(&e->u.rb, parent, p); rb_insert_color(&e->u.rb, root); } /** * wl_entry_destroy - destroy a wear-leveling entry. * @ubi: UBI device description object * @e: the wear-leveling entry to add * * This function destroys a wear leveling entry and removes * the reference from the lookup table. */ static void wl_entry_destroy(struct ubi_device *ubi, struct ubi_wl_entry *e) { ubi->lookuptbl[e->pnum] = NULL; kmem_cache_free(ubi_wl_entry_slab, e); } /** * do_work - do one pending work. * @ubi: UBI device description object * @executed: whether there is one work is executed * * This function returns zero in case of success and a negative error code in * case of failure. If @executed is not NULL and there is one work executed, * @executed is set as %1, otherwise @executed is set as %0. */ static int do_work(struct ubi_device *ubi, int *executed) { int err; struct ubi_work *wrk; cond_resched(); /* * @ubi->work_sem is used to synchronize with the workers. Workers take * it in read mode, so many of them may be doing works at a time. But * the queue flush code has to be sure the whole queue of works is * done, and it takes the mutex in write mode. */ down_read(&ubi->work_sem); spin_lock(&ubi->wl_lock); if (list_empty(&ubi->works)) { spin_unlock(&ubi->wl_lock); up_read(&ubi->work_sem); if (executed) *executed = 0; return 0; } if (executed) *executed = 1; wrk = list_entry(ubi->works.next, struct ubi_work, list); list_del(&wrk->list); ubi->works_count -= 1; ubi_assert(ubi->works_count >= 0); spin_unlock(&ubi->wl_lock); /* * Call the worker function. Do not touch the work structure * after this call as it will have been freed or reused by that * time by the worker function. */ err = wrk->func(ubi, wrk, 0); if (err) ubi_err(ubi, "work failed with error code %d", err); up_read(&ubi->work_sem); return err; } /** * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree. * @e: the wear-leveling entry to check * @root: the root of the tree * * This function returns non-zero if @e is in the @root RB-tree and zero if it * is not. */ static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) { struct rb_node *p; p = root->rb_node; while (p) { struct ubi_wl_entry *e1; e1 = rb_entry(p, struct ubi_wl_entry, u.rb); if (e->pnum == e1->pnum) { ubi_assert(e == e1); return 1; } if (e->ec < e1->ec) p = p->rb_left; else if (e->ec > e1->ec) p = p->rb_right; else { ubi_assert(e->pnum != e1->pnum); if (e->pnum < e1->pnum) p = p->rb_left; else p = p->rb_right; } } return 0; } /** * in_pq - check if a wear-leveling entry is present in the protection queue. * @ubi: UBI device description object * @e: the wear-leveling entry to check * * This function returns non-zero if @e is in the protection queue and zero * if it is not. */ static inline int in_pq(const struct ubi_device *ubi, struct ubi_wl_entry *e) { struct ubi_wl_entry *p; int i; for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) list_for_each_entry(p, &ubi->pq[i], u.list) if (p == e) return 1; return 0; } /** * prot_queue_add - add physical eraseblock to the protection queue. * @ubi: UBI device description object * @e: the physical eraseblock to add * * This function adds @e to the tail of the protection queue @ubi->pq, where * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be * temporarily protected from the wear-leveling worker. Note, @wl->lock has to * be locked. */ static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) { int pq_tail = ubi->pq_head - 1; if (pq_tail < 0) pq_tail = UBI_PROT_QUEUE_LEN - 1; ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN); list_add_tail(&e->u.list, &ubi->pq[pq_tail]); dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec); } /** * find_wl_entry - find wear-leveling entry closest to certain erase counter. * @ubi: UBI device description object * @root: the RB-tree where to look for * @diff: maximum possible difference from the smallest erase counter * @pick_max: pick PEB even its erase counter beyonds 'min_ec + @diff' * * This function looks for a wear leveling entry with erase counter closest to * min + @diff, where min is the smallest erase counter. */ static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi, struct rb_root *root, int diff, int pick_max) { struct rb_node *p; struct ubi_wl_entry *e; int max; e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); max = e->ec + diff; p = root->rb_node; while (p) { struct ubi_wl_entry *e1; e1 = rb_entry(p, struct ubi_wl_entry, u.rb); if (e1->ec >= max) { if (pick_max) e = e1; p = p->rb_left; } else { p = p->rb_right; e = e1; } } return e; } /** * find_mean_wl_entry - find wear-leveling entry with medium erase counter. * @ubi: UBI device description object * @root: the RB-tree where to look for * * This function looks for a wear leveling entry with medium erase counter, * but not greater or equivalent than the lowest erase counter plus * %WL_FREE_MAX_DIFF/2. */ static struct ubi_wl_entry *find_mean_wl_entry(struct ubi_device *ubi, struct rb_root *root) { struct ubi_wl_entry *e, *first, *last; first = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); last = rb_entry(rb_last(root), struct ubi_wl_entry, u.rb); if (last->ec - first->ec < WL_FREE_MAX_DIFF) { e = rb_entry(root->rb_node, struct ubi_wl_entry, u.rb); /* * If no fastmap has been written and fm_anchor is not * reserved and this WL entry can be used as anchor PEB * hold it back and return the second best WL entry such * that fastmap can use the anchor PEB later. */ e = may_reserve_for_fm(ubi, e, root); } else e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/2, 0); return e; } /** * wl_get_wle - get a mean wl entry to be used by ubi_wl_get_peb() or * refill_wl_user_pool(). * @ubi: UBI device description object * * This function returns a wear leveling entry in case of success and * NULL in case of failure. */ static struct ubi_wl_entry *wl_get_wle(struct ubi_device *ubi) { struct ubi_wl_entry *e; e = find_mean_wl_entry(ubi, &ubi->free); if (!e) { ubi_err(ubi, "no free eraseblocks"); return NULL; } self_check_in_wl_tree(ubi, e, &ubi->free); /* * Move the physical eraseblock to the protection queue where it will * be protected from being moved for some time. */ rb_erase(&e->u.rb, &ubi->free); ubi->free_count--; dbg_wl("PEB %d EC %d", e->pnum, e->ec); return e; } /** * prot_queue_del - remove a physical eraseblock from the protection queue. * @ubi: UBI device description object * @pnum: the physical eraseblock to remove * * This function deletes PEB @pnum from the protection queue and returns zero * in case of success and %-ENODEV if the PEB was not found. */ static int prot_queue_del(struct ubi_device *ubi, int pnum) { struct ubi_wl_entry *e; e = ubi->lookuptbl[pnum]; if (!e) return -ENODEV; if (self_check_in_pq(ubi, e)) return -ENODEV; list_del(&e->u.list); dbg_wl("deleted PEB %d from the protection queue", e->pnum); return 0; } /** * ubi_sync_erase - synchronously erase a physical eraseblock. * @ubi: UBI device description object * @e: the physical eraseblock to erase * @torture: if the physical eraseblock has to be tortured * * This function returns zero in case of success and a negative error code in * case of failure. */ int ubi_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture) { int err; struct ubi_ec_hdr *ec_hdr; unsigned long long ec = e->ec; dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec); err = self_check_ec(ubi, e->pnum, e->ec); if (err) return -EINVAL; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); if (!ec_hdr) return -ENOMEM; err = ubi_io_sync_erase(ubi, e->pnum, torture); if (err < 0) goto out_free; ec += err; if (ec > UBI_MAX_ERASECOUNTER) { /* * Erase counter overflow. Upgrade UBI and use 64-bit * erase counters internally. */ ubi_err(ubi, "erase counter overflow at PEB %d, EC %llu", e->pnum, ec); err = -EINVAL; goto out_free; } dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec); ec_hdr->ec = cpu_to_be64(ec); err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); if (err) goto out_free; e->ec = ec; spin_lock(&ubi->wl_lock); if (e->ec > ubi->max_ec) ubi->max_ec = e->ec; spin_unlock(&ubi->wl_lock); out_free: kfree(ec_hdr); return err; } /** * serve_prot_queue - check if it is time to stop protecting PEBs. * @ubi: UBI device description object * * This function is called after each erase operation and removes PEBs from the * tail of the protection queue. These PEBs have been protected for long enough * and should be moved to the used tree. */ static void serve_prot_queue(struct ubi_device *ubi) { struct ubi_wl_entry *e, *tmp; int count; /* * There may be several protected physical eraseblock to remove, * process them all. */ repeat: count = 0; spin_lock(&ubi->wl_lock); list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) { dbg_wl("PEB %d EC %d protection over, move to used tree", e->pnum, e->ec); list_del(&e->u.list); wl_tree_add(e, &ubi->used); if (count++ > 32) { /* * Let's be nice and avoid holding the spinlock for * too long. */ spin_unlock(&ubi->wl_lock); cond_resched(); goto repeat; } } ubi->pq_head += 1; if (ubi->pq_head == UBI_PROT_QUEUE_LEN) ubi->pq_head = 0; ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN); spin_unlock(&ubi->wl_lock); } /** * __schedule_ubi_work - schedule a work. * @ubi: UBI device description object * @wrk: the work to schedule * * This function adds a work defined by @wrk to the tail of the pending works * list. Can only be used if ubi->work_sem is already held in read mode! */ static void __schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) { spin_lock(&ubi->wl_lock); list_add_tail(&wrk->list, &ubi->works); ubi_assert(ubi->works_count >= 0); ubi->works_count += 1; if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi)) wake_up_process(ubi->bgt_thread); spin_unlock(&ubi->wl_lock); } /** * schedule_ubi_work - schedule a work. * @ubi: UBI device description object * @wrk: the work to schedule * * This function adds a work defined by @wrk to the tail of the pending works * list. */ static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) { down_read(&ubi->work_sem); __schedule_ubi_work(ubi, wrk); up_read(&ubi->work_sem); } static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, int shutdown); /** * schedule_erase - schedule an erase work. * @ubi: UBI device description object * @e: the WL entry of the physical eraseblock to erase * @vol_id: the volume ID that last used this PEB * @lnum: the last used logical eraseblock number for the PEB * @torture: if the physical eraseblock has to be tortured * @nested: denotes whether the work_sem is already held * * This function returns zero in case of success and a %-ENOMEM in case of * failure. */ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int vol_id, int lnum, int torture, bool nested) { struct ubi_work *wl_wrk; ubi_assert(e); dbg_wl("schedule erasure of PEB %d, EC %d, torture %d", e->pnum, e->ec, torture); wl_wrk = kmalloc_obj(struct ubi_work, GFP_NOFS); if (!wl_wrk) return -ENOMEM; wl_wrk->func = &erase_worker; wl_wrk->e = e; wl_wrk->vol_id = vol_id; wl_wrk->lnum = lnum; wl_wrk->torture = torture; if (nested) __schedule_ubi_work(ubi, wl_wrk); else schedule_ubi_work(ubi, wl_wrk); return 0; } static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk); /** * do_sync_erase - run the erase worker synchronously. * @ubi: UBI device description object * @e: the WL entry of the physical eraseblock to erase * @vol_id: the volume ID that last used this PEB * @lnum: the last used logical eraseblock number for the PEB * @torture: if the physical eraseblock has to be tortured * */ static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int vol_id, int lnum, int torture) { struct ubi_work wl_wrk; dbg_wl("sync erase of PEB %i", e->pnum); wl_wrk.e = e; wl_wrk.vol_id = vol_id; wl_wrk.lnum = lnum; wl_wrk.torture = torture; return __erase_worker(ubi, &wl_wrk); } static int ensure_wear_leveling(struct ubi_device *ubi, int nested); /** * wear_leveling_worker - wear-leveling worker function. * @ubi: UBI device description object * @wrk: the work object * @shutdown: non-zero if the worker has to free memory and exit * because the WL-subsystem is shutting down * * This function copies a more worn out physical eraseblock to a less worn out * one. Returns zero in case of success and a negative error code in case of * failure. */ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, int shutdown) { int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0; int erase = 0, keep = 0, vol_id = -1, lnum = -1; struct ubi_wl_entry *e1, *e2; struct ubi_vid_io_buf *vidb; struct ubi_vid_hdr *vid_hdr; int dst_leb_clean = 0; kfree(wrk); if (shutdown) return 0; vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); if (!vidb) return -ENOMEM; vid_hdr = ubi_get_vid_hdr(vidb); down_read(&ubi->fm_eba_sem); mutex_lock(&ubi->move_mutex); spin_lock(&ubi->wl_lock); ubi_assert(!ubi->move_from && !ubi->move_to); ubi_assert(!ubi->move_to_put); #ifdef CONFIG_MTD_UBI_FASTMAP if (!next_peb_for_wl(ubi, true) || #else if (!ubi->free.rb_node || #endif (!ubi->used.rb_node && !ubi->scrub.rb_node)) { /* * No free physical eraseblocks? Well, they must be waiting in * the queue to be erased. Cancel movement - it will be * triggered again when a free physical eraseblock appears. * * No used physical eraseblocks? They must be temporarily * protected from being moved. They will be moved to the * @ubi->used tree later and the wear-leveling will be * triggered again. */ dbg_wl("cancel WL, a list is empty: free %d, used %d", !ubi->free.rb_node, !ubi->used.rb_node); goto out_cancel; } #ifdef CONFIG_MTD_UBI_FASTMAP e1 = find_anchor_wl_entry(&ubi->used); if (e1 && ubi->fm_anchor && (ubi->fm_anchor->ec - e1->ec >= UBI_WL_THRESHOLD)) { ubi->fm_do_produce_anchor = 1; /* * fm_anchor is no longer considered a good anchor. * NULL assignment also prevents multiple wear level checks * of this PEB. */ wl_tree_add(ubi->fm_anchor, &ubi->free); ubi->fm_anchor = NULL; ubi->free_count++; } if (ubi->fm_do_produce_anchor) { if (!e1) goto out_cancel; e2 = get_peb_for_wl(ubi); if (!e2) goto out_cancel; self_check_in_wl_tree(ubi, e1, &ubi->used); rb_erase(&e1->u.rb, &ubi->used); dbg_wl("anchor-move PEB %d to PEB %d", e1->pnum, e2->pnum); ubi->fm_do_produce_anchor = 0; } else if (!ubi->scrub.rb_node) { #else if (!ubi->scrub.rb_node) { #endif /* * Now pick the least worn-out used physical eraseblock and a * highly worn-out free physical eraseblock. If the erase * counters differ much enough, start wear-leveling. */ e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); e2 = get_peb_for_wl(ubi); if (!e2) goto out_cancel; if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { dbg_wl("no WL needed: min used EC %d, max free EC %d", e1->ec, e2->ec); /* Give the unused PEB back */ wl_tree_add(e2, &ubi->free); ubi->free_count++; goto out_cancel; } self_check_in_wl_tree(ubi, e1, &ubi->used); rb_erase(&e1->u.rb, &ubi->used); dbg_wl("move PEB %d EC %d to PEB %d EC %d", e1->pnum, e1->ec, e2->pnum, e2->ec); } else { /* Perform scrubbing */ scrubbing = 1; e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); e2 = get_peb_for_wl(ubi); if (!e2) goto out_cancel; self_check_in_wl_tree(ubi, e1, &ubi->scrub); rb_erase(&e1->u.rb, &ubi->scrub); dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); } ubi->move_from = e1; ubi->move_to = e2; spin_unlock(&ubi->wl_lock); /* * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum. * We so far do not know which logical eraseblock our physical * eraseblock (@e1) belongs to. We have to read the volume identifier * header first. * * Note, we are protected from this PEB being unmapped and erased. The * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB * which is being moved was unmapped. */ err = ubi_io_read_vid_hdr(ubi, e1->pnum, vidb, 0); if (err && err != UBI_IO_BITFLIPS) { dst_leb_clean = 1; if (err == UBI_IO_FF) { /* * We are trying to move PEB without a VID header. UBI * always write VID headers shortly after the PEB was * given, so we have a situation when it has not yet * had a chance to write it, because it was preempted. * So add this PEB to the protection queue so far, * because presumably more data will be written there * (including the missing VID header), and then we'll * move it. */ dbg_wl("PEB %d has no VID header", e1->pnum); protect = 1; goto out_not_moved; } else if (err == UBI_IO_FF_BITFLIPS) { /* * The same situation as %UBI_IO_FF, but bit-flips were * detected. It is better to schedule this PEB for * scrubbing. */ dbg_wl("PEB %d has no VID header but has bit-flips", e1->pnum); scrubbing = 1; goto out_not_moved; } else if (ubi->fast_attach && err == UBI_IO_BAD_HDR_EBADMSG) { /* * While a full scan would detect interrupted erasures * at attach time we can face them here when attached from * Fastmap. */ dbg_wl("PEB %d has ECC errors, maybe from an interrupted erasure", e1->pnum); erase = 1; goto out_not_moved; } ubi_err(ubi, "error %d while reading VID header from PEB %d", err, e1->pnum); goto out_error; } vol_id = be32_to_cpu(vid_hdr->vol_id); lnum = be32_to_cpu(vid_hdr->lnum); err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vidb); if (err) { if (err == MOVE_CANCEL_RACE) { /* * The LEB has not been moved because the volume is * being deleted or the PEB has been put meanwhile. We * should prevent this PEB from being selected for * wear-leveling movement again, so put it to the * protection queue. */ protect = 1; dst_leb_clean = 1; goto out_not_moved; } if (err == MOVE_RETRY) { /* * For source PEB: * 1. The scrubbing is set for scrub type PEB, it will * be put back into ubi->scrub list. * 2. Non-scrub type PEB will be put back into ubi->used * list. */ keep = 1; dst_leb_clean = 1; goto out_not_moved; } if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR || err == MOVE_TARGET_RD_ERR) { /* * Target PEB had bit-flips or write error - torture it. */ torture = 1; keep = 1; goto out_not_moved; } if (err == MOVE_SOURCE_RD_ERR) { /* * An error happened while reading the source PEB. Do * not switch to R/O mode in this case, and give the * upper layers a possibility to recover from this, * e.g. by unmapping corresponding LEB. Instead, just * put this PEB to the @ubi->erroneous list to prevent * UBI from trying to move it over and over again. */ if (ubi->erroneous_peb_count > ubi->max_erroneous) { ubi_err(ubi, "too many erroneous eraseblocks (%d)", ubi->erroneous_peb_count); goto out_error; } dst_leb_clean = 1; erroneous = 1; goto out_not_moved; } if (err < 0) goto out_error; ubi_assert(0); } /* The PEB has been successfully moved */ if (scrubbing) ubi_msg(ubi, "scrubbed PEB %d (LEB %d:%d), data moved to PEB %d", e1->pnum, vol_id, lnum, e2->pnum); ubi_free_vid_buf(vidb); spin_lock(&ubi->wl_lock); if (!ubi->move_to_put) { wl_tree_add(e2, &ubi->used); e2 = NULL; } ubi->move_from = ubi->move_to = NULL; ubi->move_to_put = ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); err = do_sync_erase(ubi, e1, vol_id, lnum, 0); if (err) { if (e2) { spin_lock(&ubi->wl_lock); wl_entry_destroy(ubi, e2); spin_unlock(&ubi->wl_lock); } goto out_ro; } if (e2) { /* * Well, the target PEB was put meanwhile, schedule it for * erasure. */ dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase", e2->pnum, vol_id, lnum); err = do_sync_erase(ubi, e2, vol_id, lnum, 0); if (err) goto out_ro; } dbg_wl("done"); mutex_unlock(&ubi->move_mutex); up_read(&ubi->fm_eba_sem); return 0; /* * For some reasons the LEB was not moved, might be an error, might be * something else. @e1 was not changed, so return it back. @e2 might * have been changed, schedule it for erasure. */ out_not_moved: if (vol_id != -1) dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)", e1->pnum, vol_id, lnum, e2->pnum, err); else dbg_wl("cancel moving PEB %d to PEB %d (%d)", e1->pnum, e2->pnum, err); spin_lock(&ubi->wl_lock); if (protect) prot_queue_add(ubi, e1); else if (erroneous) { wl_tree_add(e1, &ubi->erroneous); ubi->erroneous_peb_count += 1; } else if (scrubbing) wl_tree_add(e1, &ubi->scrub); else if (keep) wl_tree_add(e1, &ubi->used); if (dst_leb_clean) { wl_tree_add(e2, &ubi->free); ubi->free_count++; } ubi_assert(!ubi->move_to_put); ubi->move_from = ubi->move_to = NULL; ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); ubi_free_vid_buf(vidb); if (dst_leb_clean) { ensure_wear_leveling(ubi, 1); } else { err = do_sync_erase(ubi, e2, vol_id, lnum, torture); if (err) goto out_ro; } if (erase) { err = do_sync_erase(ubi, e1, vol_id, lnum, 1); if (err) goto out_ro; } mutex_unlock(&ubi->move_mutex); up_read(&ubi->fm_eba_sem); return 0; out_error: if (vol_id != -1) ubi_err(ubi, "error %d while moving PEB %d to PEB %d", err, e1->pnum, e2->pnum); else ubi_err(ubi, "error %d while moving PEB %d (LEB %d:%d) to PEB %d", err, e1->pnum, vol_id, lnum, e2->pnum); spin_lock(&ubi->wl_lock); ubi->move_from = ubi->move_to = NULL; ubi->move_to_put = ubi->wl_scheduled = 0; wl_entry_destroy(ubi, e1); wl_entry_destroy(ubi, e2); spin_unlock(&ubi->wl_lock); ubi_free_vid_buf(vidb); out_ro: ubi_ro_mode(ubi); mutex_unlock(&ubi->move_mutex); up_read(&ubi->fm_eba_sem); ubi_assert(err != 0); return err < 0 ? err : -EIO; out_cancel: ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); mutex_unlock(&ubi->move_mutex); up_read(&ubi->fm_eba_sem); ubi_free_vid_buf(vidb); return 0; } /** * ensure_wear_leveling - schedule wear-leveling if it is needed. * @ubi: UBI device description object * @nested: set to non-zero if this function is called from UBI worker * * This function checks if it is time to start wear-leveling and schedules it * if yes. This function returns zero in case of success and a negative error * code in case of failure. */ static int ensure_wear_leveling(struct ubi_device *ubi, int nested) { int err = 0; struct ubi_work *wrk; spin_lock(&ubi->wl_lock); if (ubi->wl_scheduled) /* Wear-leveling is already in the work queue */ goto out_unlock; /* * If the ubi->scrub tree is not empty, scrubbing is needed, and the * WL worker has to be scheduled anyway. */ if (!ubi->scrub.rb_node) { #ifdef CONFIG_MTD_UBI_FASTMAP if (!need_wear_leveling(ubi)) goto out_unlock; #else struct ubi_wl_entry *e1; struct ubi_wl_entry *e2; if (!ubi->used.rb_node || !ubi->free.rb_node) /* No physical eraseblocks - no deal */ goto out_unlock; /* * We schedule wear-leveling only if the difference between the * lowest erase counter of used physical eraseblocks and a high * erase counter of free physical eraseblocks is greater than * %UBI_WL_THRESHOLD. */ e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); e2 = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, 0); if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) goto out_unlock; #endif dbg_wl("schedule wear-leveling"); } else dbg_wl("schedule scrubbing"); ubi->wl_scheduled = 1; spin_unlock(&ubi->wl_lock); wrk = kmalloc_obj(struct ubi_work, GFP_NOFS); if (!wrk) { err = -ENOMEM; goto out_cancel; } wrk->func = &wear_leveling_worker; if (nested) __schedule_ubi_work(ubi, wrk); else schedule_ubi_work(ubi, wrk); return err; out_cancel: spin_lock(&ubi->wl_lock); ubi->wl_scheduled = 0; out_unlock: spin_unlock(&ubi->wl_lock); return err; } /** * __erase_worker - physical eraseblock erase worker function. * @ubi: UBI device description object * @wl_wrk: the work object * * This function erases a physical eraseblock and perform torture testing if * needed. It also takes care about marking the physical eraseblock bad if * needed. Returns zero in case of success and a negative error code in case of * failure. */ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk) { struct ubi_wl_entry *e = wl_wrk->e; int pnum = e->pnum; int vol_id = wl_wrk->vol_id; int lnum = wl_wrk->lnum; int err, available_consumed = 0; dbg_wl("erase PEB %d EC %d LEB %d:%d", pnum, e->ec, wl_wrk->vol_id, wl_wrk->lnum); err = ubi_sync_erase(ubi, e, wl_wrk->torture); if (!err) { spin_lock(&ubi->wl_lock); if (!ubi->fm_disabled && !ubi->fm_anchor && e->pnum < UBI_FM_MAX_START) { /* * Abort anchor production, if needed it will be * enabled again in the wear leveling started below. */ ubi->fm_anchor = e; ubi->fm_do_produce_anchor = 0; } else { wl_tree_add(e, &ubi->free); ubi->free_count++; } spin_unlock(&ubi->wl_lock); /* * One more erase operation has happened, take care about * protected physical eraseblocks. */ serve_prot_queue(ubi); /* And take care about wear-leveling */ err = ensure_wear_leveling(ubi, 1); return err; } ubi_err(ubi, "failed to erase PEB %d, error %d", pnum, err); if (err == -EINTR || err == -ENOMEM || err == -EAGAIN || err == -EBUSY) { int err1; /* Re-schedule the LEB for erasure */ err1 = schedule_erase(ubi, e, vol_id, lnum, 0, true); if (err1) { spin_lock(&ubi->wl_lock); wl_entry_destroy(ubi, e); spin_unlock(&ubi->wl_lock); err = err1; goto out_ro; } return err; } spin_lock(&ubi->wl_lock); wl_entry_destroy(ubi, e); spin_unlock(&ubi->wl_lock); if (err != -EIO) /* * If this is not %-EIO, we have no idea what to do. Scheduling * this physical eraseblock for erasure again would cause * errors again and again. Well, lets switch to R/O mode. */ goto out_ro; /* It is %-EIO, the PEB went bad */ if (!ubi->bad_allowed) { ubi_err(ubi, "bad physical eraseblock %d detected", pnum); goto out_ro; } spin_lock(&ubi->volumes_lock); if (ubi->beb_rsvd_pebs == 0) { if (ubi->avail_pebs == 0) { spin_unlock(&ubi->volumes_lock); ubi_err(ubi, "no reserved/available physical eraseblocks"); goto out_ro; } ubi->avail_pebs -= 1; available_consumed = 1; } spin_unlock(&ubi->volumes_lock); ubi_msg(ubi, "mark PEB %d as bad", pnum); err = ubi_io_mark_bad(ubi, pnum); if (err) goto out_ro; spin_lock(&ubi->volumes_lock); if (ubi->beb_rsvd_pebs > 0) { if (available_consumed) { /* * The amount of reserved PEBs increased since we last * checked. */ ubi->avail_pebs += 1; available_consumed = 0; } ubi->beb_rsvd_pebs -= 1; } ubi->bad_peb_count += 1; ubi->good_peb_count -= 1; ubi_calculate_reserved(ubi); if (available_consumed) ubi_warn(ubi, "no PEBs in the reserved pool, used an available PEB"); else if (ubi->beb_rsvd_pebs) ubi_msg(ubi, "%d PEBs left in the reserve", ubi->beb_rsvd_pebs); else ubi_warn(ubi, "last PEB from the reserve was used"); spin_unlock(&ubi->volumes_lock); return err; out_ro: if (available_consumed) { spin_lock(&ubi->volumes_lock); ubi->avail_pebs += 1; spin_unlock(&ubi->volumes_lock); } ubi_ro_mode(ubi); return err; } static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, int shutdown) { int ret; if (shutdown) { struct ubi_wl_entry *e = wl_wrk->e; dbg_wl("cancel erasure of PEB %d EC %d", e->pnum, e->ec); kfree(wl_wrk); wl_entry_destroy(ubi, e); return 0; } ret = __erase_worker(ubi, wl_wrk); kfree(wl_wrk); return ret; } /** * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system. * @ubi: UBI device description object * @vol_id: the volume ID that last used this PEB * @lnum: the last used logical eraseblock number for the PEB * @pnum: physical eraseblock to return * @torture: if this physical eraseblock has to be tortured * * This function is called to return physical eraseblock @pnum to the pool of * free physical eraseblocks. The @torture flag has to be set if an I/O error * occurred to this @pnum and it has to be tested. This function returns zero * in case of success, and a negative error code in case of failure. */ int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum, int pnum, int torture) { int err; struct ubi_wl_entry *e; dbg_wl("PEB %d", pnum); ubi_assert(pnum >= 0); ubi_assert(pnum < ubi->peb_count); down_read(&ubi->fm_protect); retry: spin_lock(&ubi->wl_lock); e = ubi->lookuptbl[pnum]; if (!e) { /* * This wl entry has been removed for some errors by other * process (eg. wear leveling worker), corresponding process * (except __erase_worker, which cannot concurrent with * ubi_wl_put_peb) will set ubi ro_mode at the same time, * just ignore this wl entry. */ spin_unlock(&ubi->wl_lock); up_read(&ubi->fm_protect); return 0; } if (e == ubi->move_from) { /* * User is putting the physical eraseblock which was selected to * be moved. It will be scheduled for erasure in the * wear-leveling worker. */ dbg_wl("PEB %d is being moved, wait", pnum); spin_unlock(&ubi->wl_lock); /* Wait for the WL worker by taking the @ubi->move_mutex */ mutex_lock(&ubi->move_mutex); mutex_unlock(&ubi->move_mutex); goto retry; } else if (e == ubi->move_to) { /* * User is putting the physical eraseblock which was selected * as the target the data is moved to. It may happen if the EBA * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()' * but the WL sub-system has not put the PEB to the "used" tree * yet, but it is about to do this. So we just set a flag which * will tell the WL worker that the PEB is not needed anymore * and should be scheduled for erasure. */ dbg_wl("PEB %d is the target of data moving", pnum); ubi_assert(!ubi->move_to_put); ubi->move_to_put = 1; spin_unlock(&ubi->wl_lock); up_read(&ubi->fm_protect); return 0; } else { if (in_wl_tree(e, &ubi->used)) { self_check_in_wl_tree(ubi, e, &ubi->used); rb_erase(&e->u.rb, &ubi->used); } else if (in_wl_tree(e, &ubi->scrub)) { self_check_in_wl_tree(ubi, e, &ubi->scrub); rb_erase(&e->u.rb, &ubi->scrub); } else if (in_wl_tree(e, &ubi->erroneous)) { self_check_in_wl_tree(ubi, e, &ubi->erroneous); rb_erase(&e->u.rb, &ubi->erroneous); ubi->erroneous_peb_count -= 1; ubi_assert(ubi->erroneous_peb_count >= 0); /* Erroneous PEBs should be tortured */ torture = 1; } else { err = prot_queue_del(ubi, e->pnum); if (err) { ubi_err(ubi, "PEB %d not found", pnum); ubi_ro_mode(ubi); spin_unlock(&ubi->wl_lock); up_read(&ubi->fm_protect); return err; } } } spin_unlock(&ubi->wl_lock); err = schedule_erase(ubi, e, vol_id, lnum, torture, false); if (err) { spin_lock(&ubi->wl_lock); wl_tree_add(e, &ubi->used); spin_unlock(&ubi->wl_lock); } up_read(&ubi->fm_protect); return err; } /** * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing. * @ubi: UBI device description object * @pnum: the physical eraseblock to schedule * * If a bit-flip in a physical eraseblock is detected, this physical eraseblock * needs scrubbing. This function schedules a physical eraseblock for * scrubbing which is done in background. This function returns zero in case of * success and a negative error code in case of failure. */ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) { struct ubi_wl_entry *e; ubi_msg(ubi, "schedule PEB %d for scrubbing", pnum); retry: spin_lock(&ubi->wl_lock); e = ubi->lookuptbl[pnum]; if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub) || in_wl_tree(e, &ubi->erroneous)) { spin_unlock(&ubi->wl_lock); return 0; } if (e == ubi->move_to) { /* * This physical eraseblock was used to move data to. The data * was moved but the PEB was not yet inserted to the proper * tree. We should just wait a little and let the WL worker * proceed. */ spin_unlock(&ubi->wl_lock); dbg_wl("the PEB %d is not in proper tree, retry", pnum); yield(); goto retry; } if (in_wl_tree(e, &ubi->used)) { self_check_in_wl_tree(ubi, e, &ubi->used); rb_erase(&e->u.rb, &ubi->used); } else { int err; err = prot_queue_del(ubi, e->pnum); if (err) { ubi_err(ubi, "PEB %d not found", pnum); ubi_ro_mode(ubi); spin_unlock(&ubi->wl_lock); return err; } } wl_tree_add(e, &ubi->scrub); spin_unlock(&ubi->wl_lock); /* * Technically scrubbing is the same as wear-leveling, so it is done * by the WL worker. */ return ensure_wear_leveling(ubi, 0); } /** * ubi_wl_flush - flush all pending works. * @ubi: UBI device description object * @vol_id: the volume id to flush for * @lnum: the logical eraseblock number to flush for * * This function executes all pending works for a particular volume id / * logical eraseblock number pair. If either value is set to %UBI_ALL, then it * acts as a wildcard for all of the corresponding volume numbers or logical * eraseblock numbers. It returns zero in case of success and a negative error * code in case of failure. */ int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum) { int err = 0; int found = 1; /* * Erase while the pending works queue is not empty, but not more than * the number of currently pending works. */ dbg_wl("flush pending work for LEB %d:%d (%d pending works)", vol_id, lnum, ubi->works_count); while (found) { struct ubi_work *wrk, *tmp; found = 0; down_read(&ubi->work_sem); spin_lock(&ubi->wl_lock); list_for_each_entry_safe(wrk, tmp, &ubi->works, list) { if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) && (lnum == UBI_ALL || wrk->lnum == lnum)) { list_del(&wrk->list); ubi->works_count -= 1; ubi_assert(ubi->works_count >= 0); spin_unlock(&ubi->wl_lock); err = wrk->func(ubi, wrk, 0); if (err) { up_read(&ubi->work_sem); return err; } spin_lock(&ubi->wl_lock); found = 1; break; } } spin_unlock(&ubi->wl_lock); up_read(&ubi->work_sem); } /* * Make sure all the works which have been done in parallel are * finished. */ down_write(&ubi->work_sem); up_write(&ubi->work_sem); return err; } static bool scrub_possible(struct ubi_device *ubi, struct ubi_wl_entry *e) { if (in_wl_tree(e, &ubi->scrub)) return false; else if (in_wl_tree(e, &ubi->erroneous)) return false; else if (ubi->move_from == e) return false; else if (ubi->move_to == e) return false; return true; } /** * ubi_bitflip_check - Check an eraseblock for bitflips and scrub it if needed. * @ubi: UBI device description object * @pnum: the physical eraseblock to schedule * @force: don't read the block, assume bitflips happened and take action. * * This function reads the given eraseblock and checks if bitflips occured. * In case of bitflips, the eraseblock is scheduled for scrubbing. * If scrubbing is forced with @force, the eraseblock is not read, * but scheduled for scrubbing right away. * * Returns: * %EINVAL, PEB is out of range * %ENOENT, PEB is no longer used by UBI * %EBUSY, PEB cannot be checked now or a check is currently running on it * %EAGAIN, bit flips happened but scrubbing is currently not possible * %EUCLEAN, bit flips happened and PEB is scheduled for scrubbing * %0, no bit flips detected */ int ubi_bitflip_check(struct ubi_device *ubi, int pnum, int force) { int err = 0; struct ubi_wl_entry *e; if (pnum < 0 || pnum >= ubi->peb_count) { err = -EINVAL; goto out; } /* * Pause all parallel work, otherwise it can happen that the * erase worker frees a wl entry under us. */ down_write(&ubi->work_sem); /* * Make sure that the wl entry does not change state while * inspecting it. */ spin_lock(&ubi->wl_lock); e = ubi->lookuptbl[pnum]; if (!e) { spin_unlock(&ubi->wl_lock); err = -ENOENT; goto out_resume; } /* * Does it make sense to check this PEB? */ if (!scrub_possible(ubi, e)) { spin_unlock(&ubi->wl_lock); err = -EBUSY; goto out_resume; } spin_unlock(&ubi->wl_lock); if (!force) { mutex_lock(&ubi->buf_mutex); err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); mutex_unlock(&ubi->buf_mutex); } if (force || err == UBI_IO_BITFLIPS) { /* * Okay, bit flip happened, let's figure out what we can do. */ spin_lock(&ubi->wl_lock); /* * Recheck. We released wl_lock, UBI might have killed the * wl entry under us. */ e = ubi->lookuptbl[pnum]; if (!e) { spin_unlock(&ubi->wl_lock); err = -ENOENT; goto out_resume; } /* * Need to re-check state */ if (!scrub_possible(ubi, e)) { spin_unlock(&ubi->wl_lock); err = -EBUSY; goto out_resume; } if (in_pq(ubi, e)) { prot_queue_del(ubi, e->pnum); wl_tree_add(e, &ubi->scrub); spin_unlock(&ubi->wl_lock); err = ensure_wear_leveling(ubi, 1); } else if (in_wl_tree(e, &ubi->used)) { rb_erase(&e->u.rb, &ubi->used); wl_tree_add(e, &ubi->scrub); spin_unlock(&ubi->wl_lock); err = ensure_wear_leveling(ubi, 1); } else if (in_wl_tree(e, &ubi->free)) { rb_erase(&e->u.rb, &ubi->free); ubi->free_count--; spin_unlock(&ubi->wl_lock); /* * This PEB is empty we can schedule it for * erasure right away. No wear leveling needed. */ err = schedule_erase(ubi, e, UBI_UNKNOWN, UBI_UNKNOWN, force ? 0 : 1, true); } else { spin_unlock(&ubi->wl_lock); err = -EAGAIN; } if (!err && !force) err = -EUCLEAN; } else { err = 0; } out_resume: up_write(&ubi->work_sem); out: return err; } /** * tree_destroy - destroy an RB-tree. * @ubi: UBI device description object * @root: the root of the tree to destroy */ static void tree_destroy(struct ubi_device *ubi, struct rb_root *root) { struct rb_node *rb; struct ubi_wl_entry *e; rb = root->rb_node; while (rb) { if (rb->rb_left) rb = rb->rb_left; else if (rb->rb_right) rb = rb->rb_right; else { e = rb_entry(rb, struct ubi_wl_entry, u.rb); rb = rb_parent(rb); if (rb) { if (rb->rb_left == &e->u.rb) rb->rb_left = NULL; else rb->rb_right = NULL; } wl_entry_destroy(ubi, e); } } } /** * ubi_thread - UBI background thread. * @u: the UBI device description object pointer */ int ubi_thread(void *u) { int failures = 0; struct ubi_device *ubi = u; ubi_msg(ubi, "background thread \"%s\" started, PID %d", ubi->bgt_name, task_pid_nr(current)); set_freezable(); for (;;) { int err; if (kthread_should_stop()) break; if (try_to_freeze()) continue; spin_lock(&ubi->wl_lock); if (list_empty(&ubi->works) || ubi->ro_mode || !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) { set_current_state(TASK_INTERRUPTIBLE); spin_unlock(&ubi->wl_lock); /* * Check kthread_should_stop() after we set the task * state to guarantee that we either see the stop bit * and exit or the task state is reset to runnable such * that it's not scheduled out indefinitely and detects * the stop bit at kthread_should_stop(). */ if (kthread_should_stop()) { set_current_state(TASK_RUNNING); break; } schedule(); continue; } spin_unlock(&ubi->wl_lock); err = do_work(ubi, NULL); if (err) { ubi_err(ubi, "%s: work failed with error code %d", ubi->bgt_name, err); if (failures++ > WL_MAX_FAILURES) { /* * Too many failures, disable the thread and * switch to read-only mode. */ ubi_msg(ubi, "%s: %d consecutive failures", ubi->bgt_name, WL_MAX_FAILURES); ubi_ro_mode(ubi); ubi->thread_enabled = 0; continue; } } else failures = 0; cond_resched(); } dbg_wl("background thread \"%s\" is killed", ubi->bgt_name); ubi->thread_enabled = 0; return 0; } /** * shutdown_work - shutdown all pending works. * @ubi: UBI device description object */ static void shutdown_work(struct ubi_device *ubi) { while (!list_empty(&ubi->works)) { struct ubi_work *wrk; wrk = list_entry(ubi->works.next, struct ubi_work, list); list_del(&wrk->list); wrk->func(ubi, wrk, 1); ubi->works_count -= 1; ubi_assert(ubi->works_count >= 0); } } /** * erase_aeb - erase a PEB given in UBI attach info PEB * @ubi: UBI device description object * @aeb: UBI attach info PEB * @sync: If true, erase synchronously. Otherwise schedule for erasure */ static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync) { struct ubi_wl_entry *e; int err; e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); if (!e) return -ENOMEM; e->pnum = aeb->pnum; e->ec = aeb->ec; ubi->lookuptbl[e->pnum] = e; if (sync) { err = ubi_sync_erase(ubi, e, false); if (err) goto out_free; wl_tree_add(e, &ubi->free); ubi->free_count++; } else { err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false); if (err) goto out_free; } return 0; out_free: wl_entry_destroy(ubi, e); return err; } /** * ubi_wl_init - initialize the WL sub-system using attaching information. * @ubi: UBI device description object * @ai: attaching information * * This function returns zero in case of success, and a negative error code in * case of failure. */ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) { int err, i, reserved_pebs, found_pebs = 0; struct rb_node *rb1, *rb2; struct ubi_ainf_volume *av; struct ubi_ainf_peb *aeb, *tmp; struct ubi_wl_entry *e; ubi->used = ubi->erroneous = ubi->free = ubi->scrub = RB_ROOT; spin_lock_init(&ubi->wl_lock); mutex_init(&ubi->move_mutex); init_rwsem(&ubi->work_sem); ubi->max_ec = ai->max_ec; INIT_LIST_HEAD(&ubi->works); sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num); err = -ENOMEM; ubi->lookuptbl = kcalloc(ubi->peb_count, sizeof(void *), GFP_KERNEL); if (!ubi->lookuptbl) return err; for (i = 0; i < UBI_PROT_QUEUE_LEN; i++) INIT_LIST_HEAD(&ubi->pq[i]); ubi->pq_head = 0; ubi->free_count = 0; list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) { cond_resched(); err = erase_aeb(ubi, aeb, false); if (err) goto out_free; found_pebs++; } list_for_each_entry(aeb, &ai->free, u.list) { cond_resched(); e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); if (!e) { err = -ENOMEM; goto out_free; } e->pnum = aeb->pnum; e->ec = aeb->ec; ubi_assert(e->ec >= 0); wl_tree_add(e, &ubi->free); ubi->free_count++; ubi->lookuptbl[e->pnum] = e; found_pebs++; } ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) { ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) { cond_resched(); e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); if (!e) { err = -ENOMEM; goto out_free; } e->pnum = aeb->pnum; e->ec = aeb->ec; ubi->lookuptbl[e->pnum] = e; if (!aeb->scrub) { dbg_wl("add PEB %d EC %d to the used tree", e->pnum, e->ec); wl_tree_add(e, &ubi->used); } else { dbg_wl("add PEB %d EC %d to the scrub tree", e->pnum, e->ec); wl_tree_add(e, &ubi->scrub); } found_pebs++; } } list_for_each_entry(aeb, &ai->fastmap, u.list) { cond_resched(); e = ubi_find_fm_block(ubi, aeb->pnum); if (e) { ubi_assert(!ubi->lookuptbl[e->pnum]); ubi->lookuptbl[e->pnum] = e; } else { bool sync = false; /* * Usually old Fastmap PEBs are scheduled for erasure * and we don't have to care about them but if we face * an power cut before scheduling them we need to * take care of them here. */ if (ubi->lookuptbl[aeb->pnum]) continue; /* * The fastmap update code might not find a free PEB for * writing the fastmap anchor to and then reuses the * current fastmap anchor PEB. When this PEB gets erased * and a power cut happens before it is written again we * must make sure that the fastmap attach code doesn't * find any outdated fastmap anchors, hence we erase the * outdated fastmap anchor PEBs synchronously here. */ if (aeb->vol_id == UBI_FM_SB_VOLUME_ID) sync = true; err = erase_aeb(ubi, aeb, sync); if (err) goto out_free; } found_pebs++; } dbg_wl("found %i PEBs", found_pebs); ubi_assert(ubi->good_peb_count == found_pebs); reserved_pebs = WL_RESERVED_PEBS; ubi_fastmap_init(ubi, &reserved_pebs); if (ubi->avail_pebs < reserved_pebs) { ubi_err(ubi, "no enough physical eraseblocks (%d, need %d)", ubi->avail_pebs, reserved_pebs); if (ubi->corr_peb_count) ubi_err(ubi, "%d PEBs are corrupted and not used", ubi->corr_peb_count); err = -ENOSPC; goto out_free; } ubi->avail_pebs -= reserved_pebs; ubi->rsvd_pebs += reserved_pebs; /* Schedule wear-leveling if needed */ err = ensure_wear_leveling(ubi, 0); if (err) goto out_free; #ifdef CONFIG_MTD_UBI_FASTMAP if (!ubi->ro_mode && !ubi->fm_disabled) ubi_ensure_anchor_pebs(ubi); #endif return 0; out_free: shutdown_work(ubi); tree_destroy(ubi, &ubi->used); tree_destroy(ubi, &ubi->free); tree_destroy(ubi, &ubi->scrub); kfree(ubi->lookuptbl); return err; } /** * protection_queue_destroy - destroy the protection queue. * @ubi: UBI device description object */ static void protection_queue_destroy(struct ubi_device *ubi) { int i; struct ubi_wl_entry *e, *tmp; for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) { list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) { list_del(&e->u.list); wl_entry_destroy(ubi, e); } } } /** * ubi_wl_close - close the wear-leveling sub-system. * @ubi: UBI device description object */ void ubi_wl_close(struct ubi_device *ubi) { dbg_wl("close the WL sub-system"); ubi_fastmap_close(ubi); shutdown_work(ubi); protection_queue_destroy(ubi); tree_destroy(ubi, &ubi->used); tree_destroy(ubi, &ubi->erroneous); tree_destroy(ubi, &ubi->free); tree_destroy(ubi, &ubi->scrub); kfree(ubi->lookuptbl); } /** * self_check_ec - make sure that the erase counter of a PEB is correct. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * @ec: the erase counter to check * * This function returns zero if the erase counter of physical eraseblock @pnum * is equivalent to @ec, and a negative error code if not or if an error * occurred. */ static int self_check_ec(struct ubi_device *ubi, int pnum, int ec) { int err; long long read_ec; struct ubi_ec_hdr *ec_hdr; if (!ubi_dbg_chk_gen(ubi)) return 0; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); if (!ec_hdr) return -ENOMEM; err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0); if (err && err != UBI_IO_BITFLIPS) { /* The header does not have to exist */ err = 0; goto out_free; } read_ec = be64_to_cpu(ec_hdr->ec); if (ec != read_ec && read_ec - ec > 1) { ubi_err(ubi, "self-check failed for PEB %d", pnum); ubi_err(ubi, "read EC is %lld, should be %d", read_ec, ec); dump_stack(); err = 1; } else err = 0; out_free: kfree(ec_hdr); return err; } /** * self_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree. * @ubi: UBI device description object * @e: the wear-leveling entry to check * @root: the root of the tree * * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it * is not. */ static int self_check_in_wl_tree(const struct ubi_device *ubi, struct ubi_wl_entry *e, struct rb_root *root) { if (!ubi_dbg_chk_gen(ubi)) return 0; if (in_wl_tree(e, root)) return 0; ubi_err(ubi, "self-check failed for PEB %d, EC %d, RB-tree %p ", e->pnum, e->ec, root); dump_stack(); return -EINVAL; } /** * self_check_in_pq - check if wear-leveling entry is in the protection * queue. * @ubi: UBI device description object * @e: the wear-leveling entry to check * * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not. */ static int self_check_in_pq(const struct ubi_device *ubi, struct ubi_wl_entry *e) { if (!ubi_dbg_chk_gen(ubi)) return 0; if (in_pq(ubi, e)) return 0; ubi_err(ubi, "self-check failed for PEB %d, EC %d, Protect queue", e->pnum, e->ec); dump_stack(); return -EINVAL; } #ifndef CONFIG_MTD_UBI_FASTMAP static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) { struct ubi_wl_entry *e; e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, 0); self_check_in_wl_tree(ubi, e, &ubi->free); ubi->free_count--; ubi_assert(ubi->free_count >= 0); rb_erase(&e->u.rb, &ubi->free); return e; } /** * produce_free_peb - produce a free physical eraseblock. * @ubi: UBI device description object * * This function tries to make a free PEB by means of synchronous execution of * pending works. This may be needed if, for example the background thread is * disabled. Returns zero in case of success and a negative error code in case * of failure. */ static int produce_free_peb(struct ubi_device *ubi) { int err; while (!ubi->free.rb_node && ubi->works_count) { spin_unlock(&ubi->wl_lock); dbg_wl("do one work synchronously"); err = do_work(ubi, NULL); spin_lock(&ubi->wl_lock); if (err) return err; } return 0; } /** * ubi_wl_get_peb - get a physical eraseblock. * @ubi: UBI device description object * * This function returns a physical eraseblock in case of success and a * negative error code in case of failure. * Returns with ubi->fm_eba_sem held in read mode! */ int ubi_wl_get_peb(struct ubi_device *ubi) { int err; struct ubi_wl_entry *e; retry: down_read(&ubi->fm_eba_sem); spin_lock(&ubi->wl_lock); if (!ubi->free.rb_node) { if (ubi->works_count == 0) { ubi_err(ubi, "no free eraseblocks"); ubi_assert(list_empty(&ubi->works)); spin_unlock(&ubi->wl_lock); return -ENOSPC; } err = produce_free_peb(ubi); if (err < 0) { spin_unlock(&ubi->wl_lock); return err; } spin_unlock(&ubi->wl_lock); up_read(&ubi->fm_eba_sem); goto retry; } e = wl_get_wle(ubi); prot_queue_add(ubi, e); spin_unlock(&ubi->wl_lock); err = ubi_self_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset, ubi->peb_size - ubi->vid_hdr_aloffset); if (err) { ubi_err(ubi, "new PEB %d does not contain all 0xFF bytes", e->pnum); return err; } return e->pnum; } #else #include "fastmap-wl.c" #endif
2 1 1 1 1 4 1 1 1 3 3 3 3 3 3 3 1 3 3 2 2 3 3 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 3