Total coverage: 248398 (16%)of 1643536
2 4 3 1 4 2 2 2 2 2 2 2 2 2 2 2 1 1 3 3 3 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:ip,port,net type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/tcp.h> #include <linux/netfilter.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ /* 2 Range as input support for IPv4 added */ /* 3 nomatch flag support added */ /* 4 Counters support added */ /* 5 Comments support added */ /* 6 Forceadd support added */ /* 7 skbinfo support added */ #define IPSET_TYPE_REV_MAX 8 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,net"); /* Type specific function prefix */ #define HTYPE hash_ipportnet /* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0 * However this way we have to store internally cidr - 1, * dancing back and forth. */ #define IP_SET_HASH_WITH_NETS_PACKED #define IP_SET_HASH_WITH_PROTO #define IP_SET_HASH_WITH_NETS /* IPv4 variant */ /* Member elements */ struct hash_ipportnet4_elem { __be32 ip; __be32 ip2; __be16 port; u8 cidr:7; u8 nomatch:1; u8 proto; }; /* Common functions */ static bool hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1, const struct hash_ipportnet4_elem *ip2, u32 *multi) { return ip1->ip == ip2->ip && ip1->ip2 == ip2->ip2 && ip1->cidr == ip2->cidr && ip1->port == ip2->port && ip1->proto == ip2->proto; } static int hash_ipportnet4_do_data_match(const struct hash_ipportnet4_elem *elem) { return elem->nomatch ? -ENOTEMPTY : 1; } static void hash_ipportnet4_data_set_flags(struct hash_ipportnet4_elem *elem, u32 flags) { elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH); } static void hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *elem, u8 *flags) { swap(*flags, elem->nomatch); } static void hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr) { elem->ip2 &= ip_set_netmask(cidr); elem->cidr = cidr - 1; } static bool hash_ipportnet4_data_list(struct sk_buff *skb, const struct hash_ipportnet4_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) || nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next, const struct hash_ipportnet4_elem *d) { next->ip = d->ip; next->port = d->port; next->ip2 = d->ip2; } #define MTYPE hash_ipportnet4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" static int hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_ipportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) return -EINVAL; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2); e.ip2 &= ip_set_netmask(e.cidr + 1); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct hash_ipportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; bool with_ports = false; u8 cidr; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from); if (ret) return ret; if (tb[IPSET_ATTR_CIDR2]) { cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; e.cidr = cidr - 1; } e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); with_ports = ip_set_proto_with_ports(e.proto); if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else { return -IPSET_ERR_MISSING_PROTO; } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; if (adt == IPSET_TEST || !(tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_IP_TO] || with_ports || tb[IPSET_ATTR_IP2_TO])) { e.ip = htonl(ip); e.ip2 = htonl(ip2_from & ip_set_hostmask(e.cidr + 1)); ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } ip_to = ip; if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; if (ip > ip_to) swap(ip, ip_to); } else if (tb[IPSET_ATTR_CIDR]) { cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } port_to = port = ntohs(e.port); if (tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); } ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); if (ret) return ret; if (ip2_from > ip2_to) swap(ip2_from, ip2_to); if (ip2_from + UINT_MAX == ip2_to) return -IPSET_ERR_HASH_RANGE; } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1); } if (retried) { ip = ntohl(h->next.ip); p = ntohs(h->next.port); ip2 = ntohl(h->next.ip2); } else { p = port; ip2 = ip2_from; } for (; ip <= ip_to; ip++) { e.ip = htonl(ip); for (; p <= port_to; p++) { e.port = htons(p); do { i++; e.ip2 = htonl(ip2); ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr); e.cidr = cidr - 1; if (i > IPSET_MAX_RANGE) { hash_ipportnet4_data_next(&h->next, &e); return -ERANGE; } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } while (ip2++ < ip2_to); ip2 = ip2_from; } p = port; } return ret; } /* IPv6 variant */ struct hash_ipportnet6_elem { union nf_inet_addr ip; union nf_inet_addr ip2; __be16 port; u8 cidr:7; u8 nomatch:1; u8 proto; }; /* Common functions */ static bool hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1, const struct hash_ipportnet6_elem *ip2, u32 *multi) { return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) && ip1->cidr == ip2->cidr && ip1->port == ip2->port && ip1->proto == ip2->proto; } static int hash_ipportnet6_do_data_match(const struct hash_ipportnet6_elem *elem) { return elem->nomatch ? -ENOTEMPTY : 1; } static void hash_ipportnet6_data_set_flags(struct hash_ipportnet6_elem *elem, u32 flags) { elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH); } static void hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *elem, u8 *flags) { swap(*flags, elem->nomatch); } static void hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr) { ip6_netmask(&elem->ip2, cidr); elem->cidr = cidr - 1; } static bool hash_ipportnet6_data_list(struct sk_buff *skb, const struct hash_ipportnet6_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) || nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipportnet6_data_next(struct hash_ipportnet6_elem *next, const struct hash_ipportnet6_elem *d) { next->port = d->port; } #undef MTYPE #undef HOST_MASK #define MTYPE hash_ipportnet6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_ipportnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) return -EINVAL; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6); ip6_netmask(&e.ip2, e.cidr + 1); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_ipportnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; u8 cidr; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (unlikely(tb[IPSET_ATTR_CIDR])) { cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr != HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2); if (ret) return ret; if (tb[IPSET_ATTR_CIDR2]) { cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; e.cidr = cidr - 1; } ip6_netmask(&e.ip2, e.cidr + 1); e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); with_ports = ip_set_proto_with_ports(e.proto); if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else { return -IPSET_ERR_MISSING_PROTO; } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } port = ntohs(e.port); port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); if (retried) port = ntohs(h->next.port); for (; port <= port_to; port++) { e.port = htons(port); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } return ret; } static struct ip_set_type hash_ipportnet_type __read_mostly = { .name = "hash:ip,port,net", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ipportnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_PORT] = { .type = NLA_U16 }, [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_ipportnet_init(void) { return ip_set_type_register(&hash_ipportnet_type); } static void __exit hash_ipportnet_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_ipportnet_type); } module_init(hash_ipportnet_init); module_exit(hash_ipportnet_fini);
31 7 96 15 20 20 23 9 9 19 12 12 20 20 9 109 55 2 101 103 57 106 17 9 23 20 20 5 31 31 31 22 2 11 31 5 42 29 39 34 28 14 20 15 22 17 1 23 31 19 13 1 30 13 53 16 38 15 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 // SPDX-License-Identifier: GPL-2.0 /* Generic part */ typedef struct { block_t *p; block_t key; struct buffer_head *bh; } Indirect; static DEFINE_RWLOCK(pointers_lock); static inline void add_chain(Indirect *p, struct buffer_head *bh, block_t *v) { p->key = *(p->p = v); p->bh = bh; } static inline int verify_chain(Indirect *from, Indirect *to) { while (from <= to && from->key == *from->p) from++; return (from > to); } static inline block_t *block_end(struct buffer_head *bh) { return (block_t *)((char*)bh->b_data + bh->b_size); } static inline Indirect *get_branch(struct inode *inode, int depth, int *offsets, Indirect chain[DEPTH], int *err) { struct super_block *sb = inode->i_sb; Indirect *p = chain; struct buffer_head *bh; *err = 0; /* i_data is not going away, no lock needed */ add_chain (chain, NULL, i_data(inode) + *offsets); if (!p->key) goto no_block; while (--depth) { bh = sb_bread(sb, block_to_cpu(p->key)); if (!bh) goto failure; read_lock(&pointers_lock); if (!verify_chain(chain, p)) goto changed; add_chain(++p, bh, (block_t *)bh->b_data + *++offsets); read_unlock(&pointers_lock); if (!p->key) goto no_block; } return NULL; changed: read_unlock(&pointers_lock); brelse(bh); *err = -EAGAIN; goto no_block; failure: *err = -EIO; no_block: return p; } static int alloc_branch(struct inode *inode, int num, int *offsets, Indirect *branch) { int n = 0; int i; int parent = minix_new_block(inode); int err = -ENOSPC; branch[0].key = cpu_to_block(parent); if (parent) for (n = 1; n < num; n++) { struct buffer_head *bh; /* Allocate the next block */ int nr = minix_new_block(inode); if (!nr) break; branch[n].key = cpu_to_block(nr); bh = sb_getblk(inode->i_sb, parent); if (!bh) { minix_free_block(inode, nr); err = -ENOMEM; break; } lock_buffer(bh); memset(bh->b_data, 0, bh->b_size); branch[n].bh = bh; branch[n].p = (block_t*) bh->b_data + offsets[n]; *branch[n].p = branch[n].key; set_buffer_uptodate(bh); unlock_buffer(bh); mark_buffer_dirty_inode(bh, inode); parent = nr; } if (n == num) return 0; /* Allocation failed, free what we already allocated */ for (i = 1; i < n; i++) bforget(branch[i].bh); for (i = 0; i < n; i++) minix_free_block(inode, block_to_cpu(branch[i].key)); return err; } static inline int splice_branch(struct inode *inode, Indirect chain[DEPTH], Indirect *where, int num) { int i; write_lock(&pointers_lock); /* Verify that place we are splicing to is still there and vacant */ if (!verify_chain(chain, where-1) || *where->p) goto changed; *where->p = where->key; write_unlock(&pointers_lock); /* We are done with atomic stuff, now do the rest of housekeeping */ inode_set_ctime_current(inode); /* had we spliced it onto indirect block? */ if (where->bh) mark_buffer_dirty_inode(where->bh, inode); mark_inode_dirty(inode); return 0; changed: write_unlock(&pointers_lock); for (i = 1; i < num; i++) bforget(where[i].bh); for (i = 0; i < num; i++) minix_free_block(inode, block_to_cpu(where[i].key)); return -EAGAIN; } static int get_block(struct inode * inode, sector_t block, struct buffer_head *bh, int create) { int err = -EIO; int offsets[DEPTH]; Indirect chain[DEPTH]; Indirect *partial; int left; int depth = block_to_path(inode, block, offsets); if (depth == 0) goto out; reread: partial = get_branch(inode, depth, offsets, chain, &err); /* Simplest case - block found, no allocation needed */ if (!partial) { got_it: map_bh(bh, inode->i_sb, block_to_cpu(chain[depth-1].key)); /* Clean up and exit */ partial = chain+depth-1; /* the whole chain */ goto cleanup; } /* Next simple case - plain lookup or failed read of indirect block */ if (!create || err == -EIO) { cleanup: while (partial > chain) { brelse(partial->bh); partial--; } out: return err; } /* * Indirect block might be removed by truncate while we were * reading it. Handling of that case (forget what we've got and * reread) is taken out of the main path. */ if (err == -EAGAIN) goto changed; left = (chain + depth) - partial; err = alloc_branch(inode, left, offsets+(partial-chain), partial); if (err) goto cleanup; if (splice_branch(inode, chain, partial, left) < 0) goto changed; set_buffer_new(bh); goto got_it; changed: while (partial > chain) { brelse(partial->bh); partial--; } goto reread; } static inline int all_zeroes(block_t *p, block_t *q) { while (p < q) if (*p++) return 0; return 1; } static Indirect *find_shared(struct inode *inode, int depth, int offsets[DEPTH], Indirect chain[DEPTH], block_t *top) { Indirect *partial, *p; int k, err; *top = 0; for (k = depth; k > 1 && !offsets[k-1]; k--) ; partial = get_branch(inode, k, offsets, chain, &err); write_lock(&pointers_lock); if (!partial) partial = chain + k-1; if (!partial->key && *partial->p) { write_unlock(&pointers_lock); goto no_top; } for (p=partial;p>chain && all_zeroes((block_t*)p->bh->b_data,p->p);p--) ; if (p == chain + k - 1 && p > chain) { p->p--; } else { *top = *p->p; *p->p = 0; } write_unlock(&pointers_lock); while(partial > p) { brelse(partial->bh); partial--; } no_top: return partial; } static inline void free_data(struct inode *inode, block_t *p, block_t *q) { unsigned long nr; for ( ; p < q ; p++) { nr = block_to_cpu(*p); if (nr) { *p = 0; minix_free_block(inode, nr); } } } static void free_branches(struct inode *inode, block_t *p, block_t *q, int depth) { struct buffer_head * bh; unsigned long nr; if (depth--) { for ( ; p < q ; p++) { nr = block_to_cpu(*p); if (!nr) continue; *p = 0; bh = sb_bread(inode->i_sb, nr); if (!bh) continue; free_branches(inode, (block_t*)bh->b_data, block_end(bh), depth); bforget(bh); minix_free_block(inode, nr); mark_inode_dirty(inode); } } else free_data(inode, p, q); } static inline void truncate (struct inode * inode) { struct super_block *sb = inode->i_sb; block_t *idata = i_data(inode); int offsets[DEPTH]; Indirect chain[DEPTH]; Indirect *partial; block_t nr = 0; int n; int first_whole; long iblock; iblock = (inode->i_size + sb->s_blocksize -1) >> sb->s_blocksize_bits; block_truncate_page(inode->i_mapping, inode->i_size, get_block); n = block_to_path(inode, iblock, offsets); if (!n) return; if (n == 1) { free_data(inode, idata+offsets[0], idata + DIRECT); first_whole = 0; goto do_indirects; } first_whole = offsets[0] + 1 - DIRECT; partial = find_shared(inode, n, offsets, chain, &nr); if (nr) { if (partial == chain) mark_inode_dirty(inode); else mark_buffer_dirty_inode(partial->bh, inode); free_branches(inode, &nr, &nr+1, (chain+n-1) - partial); } /* Clear the ends of indirect blocks on the shared branch */ while (partial > chain) { free_branches(inode, partial->p + 1, block_end(partial->bh), (chain+n-1) - partial); mark_buffer_dirty_inode(partial->bh, inode); brelse (partial->bh); partial--; } do_indirects: /* Kill the remaining (whole) subtrees */ while (first_whole < DEPTH-1) { nr = idata[DIRECT+first_whole]; if (nr) { idata[DIRECT+first_whole] = 0; mark_inode_dirty(inode); free_branches(inode, &nr, &nr+1, first_whole+1); } first_whole++; } inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); mark_inode_dirty(inode); } static inline unsigned nblocks(loff_t size, struct super_block *sb) { int k = sb->s_blocksize_bits - 10; unsigned blocks, res, direct = DIRECT, i = DEPTH; blocks = (size + sb->s_blocksize - 1) >> (BLOCK_SIZE_BITS + k); res = blocks; while (--i && blocks > direct) { blocks -= direct; blocks += sb->s_blocksize/sizeof(block_t) - 1; blocks /= sb->s_blocksize/sizeof(block_t); res += blocks; direct = 1; } return res; }
38 38 48 1 48 1 1 26 24 1 6 11 2 24 12 34 29 5 6 1 32 24 18 15 29 26 14 27 13 2 26 2 15 7 6 2 47 48 48 48 28 6 1 3 3 17 1 14 5 30 30 30 30 47 43 27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 // SPDX-License-Identifier: GPL-2.0 /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * The IP fragmentation functionality. * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox <alan@lxorguk.ukuu.org.uk> * * Fixes: * Alan Cox : Split from ip.c , see ip_input.c for history. * David S. Miller : Begin massive cleanup... * Andi Kleen : Add sysctls. * xxxx : Overlapfrag bug. * Ultima : ip_expire() kernel panic. * Bill Hawes : Frag accounting and evictor fixes. * John McDonald : 0 length frag bug. * Alexey Kuznetsov: SMP races, threading, cleanup. * Patrick McHardy : LRU queue of frag heads for evictor. */ #define pr_fmt(fmt) "IPv4: " fmt #include <linux/compiler.h> #include <linux/module.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/jiffies.h> #include <linux/skbuff.h> #include <linux/list.h> #include <linux/ip.h> #include <linux/icmp.h> #include <linux/netdevice.h> #include <linux/jhash.h> #include <linux/random.h> #include <linux/slab.h> #include <net/route.h> #include <net/dst.h> #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> #include <net/checksum.h> #include <net/inetpeer.h> #include <net/inet_frag.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/inet.h> #include <linux/netfilter_ipv4.h> #include <net/inet_ecn.h> #include <net/l3mdev.h> /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c * as well. Or notify me, at least. --ANK */ static const char ip_frag_cache_name[] = "ip4-frags"; /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { struct inet_frag_queue q; u8 ecn; /* RFC3168 support */ u16 max_df_size; /* largest frag with DF set seen */ int iif; unsigned int rid; struct inet_peer *peer; }; static u8 ip4_frag_ecn(u8 tos) { return 1 << (tos & INET_ECN_MASK); } static struct inet_frags ip4_frags; static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev, int *refs); static void ip4_frag_init(struct inet_frag_queue *q, const void *a) { struct ipq *qp = container_of(q, struct ipq, q); const struct frag_v4_compare_key *key = a; struct net *net = q->fqdir->net; struct inet_peer *p = NULL; q->key.v4 = *key; qp->ecn = 0; if (q->fqdir->max_dist) { rcu_read_lock(); p = inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif); if (p && !refcount_inc_not_zero(&p->refcnt)) p = NULL; rcu_read_unlock(); } qp->peer = p; } static void ip4_frag_free(struct inet_frag_queue *q) { struct ipq *qp; qp = container_of(q, struct ipq, q); if (qp->peer) inet_putpeer(qp->peer); } static bool frag_expire_skip_icmp(u32 user) { return user == IP_DEFRAG_AF_PACKET || ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN, __IP_DEFRAG_CONNTRACK_IN_END) || ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_BRIDGE_IN, __IP_DEFRAG_CONNTRACK_BRIDGE_IN); } /* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ static void ip_expire(struct timer_list *t) { enum skb_drop_reason reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT; struct inet_frag_queue *frag = timer_container_of(frag, t, timer); const struct iphdr *iph; struct sk_buff *head = NULL; struct net *net; struct ipq *qp; int refs = 1; qp = container_of(frag, struct ipq, q); net = qp->q.fqdir->net; rcu_read_lock(); spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) goto out; qp->q.flags |= INET_FRAG_DROP; inet_frag_kill(&qp->q, &refs); /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ if (READ_ONCE(qp->q.fqdir->dead)) { inet_frag_queue_flush(&qp->q, 0); goto out; } __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); if (!(qp->q.flags & INET_FRAG_FIRST_IN)) goto out; /* sk_buff::dev and sk_buff::rbnode are unionized. So we * pull the head out of the tree in order to be able to * deal with head->dev. */ head = inet_frag_pull_head(&qp->q); if (!head) goto out; head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out; /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); reason = ip_route_input_noref(head, iph->daddr, iph->saddr, ip4h_dscp(iph), head->dev); if (reason) goto out; /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT; if (frag_expire_skip_icmp(qp->q.key.v4.user) && (skb_rtable(head)->rt_type != RTN_LOCAL)) goto out; spin_unlock(&qp->q.lock); icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); goto out_rcu_unlock; out: spin_unlock(&qp->q.lock); out_rcu_unlock: rcu_read_unlock(); kfree_skb_reason(head, reason); inet_frag_putn(&qp->q, refs); } /* Find the correct entry in the "incomplete datagrams" queue for * this IP datagram, and create new one, if nothing is found. */ static struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user, int vif) { struct frag_v4_compare_key key = { .saddr = iph->saddr, .daddr = iph->daddr, .user = user, .vif = vif, .id = iph->id, .protocol = iph->protocol, }; struct inet_frag_queue *q; q = inet_frag_find(net->ipv4.fqdir, &key); if (!q) return NULL; return container_of(q, struct ipq, q); } /* Is the fragment too far ahead to be part of ipq? */ static int ip_frag_too_far(struct ipq *qp) { struct inet_peer *peer = qp->peer; unsigned int max = qp->q.fqdir->max_dist; unsigned int start, end; int rc; if (!peer || !max) return 0; start = qp->rid; end = atomic_inc_return(&peer->rid); qp->rid = end; rc = qp->q.fragments_tail && (end - start) > max; if (rc) __IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS); return rc; } static int ip_frag_reinit(struct ipq *qp) { if (!mod_timer_pending(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) return -ETIMEDOUT; inet_frag_queue_flush(&qp->q, SKB_DROP_REASON_FRAG_TOO_FAR); qp->q.flags = 0; qp->q.len = 0; qp->q.meat = 0; qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; qp->q.last_run_head = NULL; qp->iif = 0; qp->ecn = 0; return 0; } /* Add new segment to existing queue. */ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb, int *refs) { struct net *net = qp->q.fqdir->net; int ihl, end, flags, offset; struct sk_buff *prev_tail; struct net_device *dev; unsigned int fragsize; int err = -ENOENT; SKB_DR(reason); u8 ecn; /* If reassembly is already done, @skb must be a duplicate frag. */ if (qp->q.flags & INET_FRAG_COMPLETE) { SKB_DR_SET(reason, DUP_FRAG); goto err; } if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && unlikely(ip_frag_too_far(qp)) && unlikely(err = ip_frag_reinit(qp))) { inet_frag_kill(&qp->q, refs); goto err; } ecn = ip4_frag_ecn(ip_hdr(skb)->tos); offset = ntohs(ip_hdr(skb)->frag_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; offset <<= 3; /* offset is in 8-byte chunks */ ihl = ip_hdrlen(skb); /* Determine the position of this fragment. */ end = offset + skb->len - skb_network_offset(skb) - ihl; err = -EINVAL; /* Is this the final fragment? */ if ((flags & IP_MF) == 0) { /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ if (end < qp->q.len || ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len)) goto discard_qp; qp->q.flags |= INET_FRAG_LAST_IN; qp->q.len = end; } else { if (end&7) { end &= ~7; if (skb->ip_summed != CHECKSUM_UNNECESSARY) skb->ip_summed = CHECKSUM_NONE; } if (end > qp->q.len) { /* Some bits beyond end -> corruption. */ if (qp->q.flags & INET_FRAG_LAST_IN) goto discard_qp; qp->q.len = end; } } if (end == offset) goto discard_qp; err = -ENOMEM; if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) goto discard_qp; err = pskb_trim_rcsum(skb, end - offset); if (err) goto discard_qp; /* Note : skb->rbnode and skb->dev share the same location. */ dev = skb->dev; /* Makes sure compiler wont do silly aliasing games */ barrier(); prev_tail = qp->q.fragments_tail; err = inet_frag_queue_insert(&qp->q, skb, offset, end); if (err) goto insert_error; if (dev) qp->iif = dev->ifindex; qp->q.stamp = skb->tstamp; qp->q.tstamp_type = skb->tstamp_type; qp->q.meat += skb->len; qp->ecn |= ecn; add_frag_mem_limit(qp->q.fqdir, skb->truesize); if (offset == 0) qp->q.flags |= INET_FRAG_FIRST_IN; fragsize = skb->len + ihl; if (fragsize > qp->q.max_size) qp->q.max_size = fragsize; if (ip_hdr(skb)->frag_off & htons(IP_DF) && fragsize > qp->max_df_size) qp->max_df_size = fragsize; if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && qp->q.meat == qp->q.len) { unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; err = ip_frag_reasm(qp, skb, prev_tail, dev, refs); skb->_skb_refdst = orefdst; if (err) inet_frag_kill(&qp->q, refs); return err; } skb_dst_drop(skb); skb_orphan(skb); return -EINPROGRESS; insert_error: if (err == IPFRAG_DUP) { SKB_DR_SET(reason, DUP_FRAG); err = -EINVAL; goto err; } err = -EINVAL; __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); discard_qp: inet_frag_kill(&qp->q, refs); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); err: kfree_skb_reason(skb, reason); return err; } static bool ip_frag_coalesce_ok(const struct ipq *qp) { return qp->q.key.v4.user == IP_DEFRAG_LOCAL_DELIVER; } /* Build a new IP datagram from all its fragments. */ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev, int *refs) { struct net *net = qp->q.fqdir->net; struct iphdr *iph; void *reasm_data; int len, err; u8 ecn; inet_frag_kill(&qp->q, refs); ecn = ip_frag_ecn_table[qp->ecn]; if (unlikely(ecn == 0xff)) { err = -EINVAL; goto out_fail; } /* Make the one we just received the head. */ reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail); if (!reasm_data) goto out_nomem; len = ip_hdrlen(skb) + qp->q.len; err = -E2BIG; if (len > 65535) goto out_oversize; inet_frag_reasm_finish(&qp->q, skb, reasm_data, ip_frag_coalesce_ok(qp)); skb->dev = dev; IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size); iph = ip_hdr(skb); iph->tot_len = htons(len); iph->tos |= ecn; /* When we set IP_DF on a refragmented skb we must also force a * call to ip_fragment to avoid forwarding a DF-skb of size s while * original sender only sent fragments of size f (where f < s). * * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest * frag seen to avoid sending tiny DF-fragments in case skb was built * from one very small df-fragment and one large non-df frag. */ if (qp->max_df_size == qp->q.max_size) { IPCB(skb)->flags |= IPSKB_FRAG_PMTU; iph->frag_off = htons(IP_DF); } else { iph->frag_off = 0; } ip_send_check(iph); __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS); qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; qp->q.last_run_head = NULL; return 0; out_nomem: net_dbg_ratelimited("queue_glue: no memory for gluing queue %p\n", qp); err = -ENOMEM; goto out_fail; out_oversize: net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr); out_fail: __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); return err; } /* Process an incoming IP datagram fragment. */ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) { struct net_device *dev; struct ipq *qp; int vif; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); /* Lookup (or create) queue header */ rcu_read_lock(); dev = skb->dev ? : skb_dst_dev_rcu(skb); vif = l3mdev_master_ifindex_rcu(dev); qp = ip_find(net, ip_hdr(skb), user, vif); if (qp) { int ret, refs = 0; spin_lock(&qp->q.lock); ret = ip_frag_queue(qp, skb, &refs); spin_unlock(&qp->q.lock); rcu_read_unlock(); inet_frag_putn(&qp->q, refs); return ret; } rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -ENOMEM; } EXPORT_SYMBOL(ip_defrag); struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) { struct iphdr iph; int netoff; u32 len; if (skb->protocol != htons(ETH_P_IP)) return skb; netoff = skb_network_offset(skb); if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0) return skb; if (iph.ihl < 5 || iph.version != 4) return skb; len = ntohs(iph.tot_len); if (skb->len < netoff + len || len < (iph.ihl * 4)) return skb; if (ip_is_fragment(&iph)) { skb = skb_share_check(skb, GFP_ATOMIC); if (skb) { if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) { kfree_skb(skb); return NULL; } if (pskb_trim_rcsum(skb, netoff + len)) { kfree_skb(skb); return NULL; } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(net, skb, user)) return NULL; skb_clear_hash(skb); } } return skb; } EXPORT_SYMBOL(ip_check_defrag); #ifdef CONFIG_SYSCTL static int dist_min; static struct ctl_table ip4_frags_ns_ctl_table[] = { { .procname = "ipfrag_high_thresh", .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "ipfrag_low_thresh", .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "ipfrag_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ipfrag_max_dist", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &dist_min, }, }; /* secret interval has been deprecated */ static int ip4_frags_secret_interval_unused; static struct ctl_table ip4_frags_ctl_table[] = { { .procname = "ipfrag_secret_interval", .data = &ip4_frags_secret_interval_unused, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, }; static int __net_init ip4_frags_ns_ctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; table = ip4_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); if (!table) goto err_alloc; } table[0].data = &net->ipv4.fqdir->high_thresh; table[0].extra1 = &net->ipv4.fqdir->low_thresh; table[1].data = &net->ipv4.fqdir->low_thresh; table[1].extra2 = &net->ipv4.fqdir->high_thresh; table[2].data = &net->ipv4.fqdir->timeout; table[3].data = &net->ipv4.fqdir->max_dist; hdr = register_net_sysctl_sz(net, "net/ipv4", table, ARRAY_SIZE(ip4_frags_ns_ctl_table)); if (!hdr) goto err_reg; net->ipv4.frags_hdr = hdr; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net) { const struct ctl_table *table; table = net->ipv4.frags_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.frags_hdr); kfree(table); } static void __init ip4_frags_ctl_register(void) { register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table); } #else static int ip4_frags_ns_ctl_register(struct net *net) { return 0; } static void ip4_frags_ns_ctl_unregister(struct net *net) { } static void __init ip4_frags_ctl_register(void) { } #endif static int __net_init ipv4_frags_init_net(struct net *net) { int res; res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net); if (res < 0) return res; /* Fragment cache limits. * * The fragment memory accounting code, (tries to) account for * the real memory usage, by measuring both the size of frag * queue struct (inet_frag_queue (ipv4:ipq/ipv6:frag_queue)) * and the SKB's truesize. * * A 64K fragment consumes 129736 bytes (44*2944)+200 * (1500 truesize == 2944, sizeof(struct ipq) == 200) * * We will commit 4MB at one time. Should we cross that limit * we will prune down to 3MB, making room for approx 8 big 64K * fragments 8x128k. */ net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024; net->ipv4.fqdir->low_thresh = 3 * 1024 * 1024; /* * Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival * by TTL. */ net->ipv4.fqdir->timeout = IP_FRAG_TIME; net->ipv4.fqdir->max_dist = 64; res = ip4_frags_ns_ctl_register(net); if (res < 0) fqdir_exit(net->ipv4.fqdir); return res; } static void __net_exit ipv4_frags_pre_exit_net(struct net *net) { fqdir_pre_exit(net->ipv4.fqdir); } static void __net_exit ipv4_frags_exit_net(struct net *net) { ip4_frags_ns_ctl_unregister(net); fqdir_exit(net->ipv4.fqdir); } static struct pernet_operations ip4_frags_ops = { .init = ipv4_frags_init_net, .pre_exit = ipv4_frags_pre_exit_net, .exit = ipv4_frags_exit_net, }; static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed) { return jhash2(data, sizeof(struct frag_v4_compare_key) / sizeof(u32), seed); } static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed) { const struct inet_frag_queue *fq = data; return jhash2((const u32 *)&fq->key.v4, sizeof(struct frag_v4_compare_key) / sizeof(u32), seed); } static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) { const struct frag_v4_compare_key *key = arg->key; const struct inet_frag_queue *fq = ptr; return !!memcmp(&fq->key, key, sizeof(*key)); } static const struct rhashtable_params ip4_rhash_params = { .head_offset = offsetof(struct inet_frag_queue, node), .key_offset = offsetof(struct inet_frag_queue, key), .key_len = sizeof(struct frag_v4_compare_key), .hashfn = ip4_key_hashfn, .obj_hashfn = ip4_obj_hashfn, .obj_cmpfn = ip4_obj_cmpfn, .automatic_shrinking = true, }; void __init ipfrag_init(void) { ip4_frags.constructor = ip4_frag_init; ip4_frags.destructor = ip4_frag_free; ip4_frags.qsize = sizeof(struct ipq); ip4_frags.frag_expire = ip_expire; ip4_frags.frags_cache_name = ip_frag_cache_name; ip4_frags.rhash_params = ip4_rhash_params; if (inet_frags_init(&ip4_frags)) panic("IP: failed to allocate ip4_frags cache\n"); ip4_frags_ctl_register(); register_pernet_subsys(&ip4_frags_ops); }
17 3 14 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 // SPDX-License-Identifier: GPL-2.0 /* * 32 bit compatibility code for System V IPC * * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> * Copyright (C) 2000 VA Linux Co * Copyright (C) 2000 Don Dugger <n0ano@valinux.com> * Copyright (C) 2000 Hewlett-Packard Co. * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> * Copyright (C) 2000 Gerhard Tonn (ton@de.ibm.com) * Copyright (C) 2000-2002 Andi Kleen, SuSE Labs (x86-64 port) * Copyright (C) 2000 Silicon Graphics, Inc. * Copyright (C) 2001 IBM * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation * Copyright (C) 2004 Arnd Bergmann (arnd@arndb.de) * * This code is collected from the versions for sparc64, mips64, s390x, ia64, * ppc64 and x86_64, all of which are based on the original sparc64 version * by Jakub Jelinek. * */ #include <linux/compat.h> #include <linux/errno.h> #include <linux/highuid.h> #include <linux/init.h> #include <linux/msg.h> #include <linux/shm.h> #include <linux/syscalls.h> #include <linux/ptrace.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include "util.h" int get_compat_ipc64_perm(struct ipc64_perm *to, struct compat_ipc64_perm __user *from) { struct compat_ipc64_perm v; if (copy_from_user(&v, from, sizeof(v))) return -EFAULT; to->uid = v.uid; to->gid = v.gid; to->mode = v.mode; return 0; } int get_compat_ipc_perm(struct ipc64_perm *to, struct compat_ipc_perm __user *from) { struct compat_ipc_perm v; if (copy_from_user(&v, from, sizeof(v))) return -EFAULT; to->uid = v.uid; to->gid = v.gid; to->mode = v.mode; return 0; } void to_compat_ipc64_perm(struct compat_ipc64_perm *to, struct ipc64_perm *from) { to->key = from->key; to->uid = from->uid; to->gid = from->gid; to->cuid = from->cuid; to->cgid = from->cgid; to->mode = from->mode; to->seq = from->seq; } void to_compat_ipc_perm(struct compat_ipc_perm *to, struct ipc64_perm *from) { to->key = from->key; SET_UID(to->uid, from->uid); SET_GID(to->gid, from->gid); SET_UID(to->cuid, from->cuid); SET_GID(to->cgid, from->cgid); to->mode = from->mode; to->seq = from->seq; }
1 13 2 11 11 11 1 3 9 1 1 3 1 7 8 3 1 4 1 1 3 1 2 1 1 67 68 67 6 33 4 35 28 79 75 21 3 21 7 18 1 14 1 14 1 5 5 18 18 17 1 5 18 6 5 2 21 19 1 4 4 4 19 18 21 11 11 11 11 11 18 73 73 49 28 11 11 21 21 18 18 21 19 21 15 15 2 4 20 9 3 9 4 9 36 13 31 18 6 7 8 8 6 2 8 1 8 8 2 6 8 13 13 13 3 3 67 67 43 28 8 4 2 3 1 3 3 1 4 4 4 4 8 8 8 1 88 82 19 2 2 2 49 49 99 8 6 14 8 6 76 14 1 76 14 1 3 2 2 3 1 11 49 19 86 7 88 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 // SPDX-License-Identifier: GPL-2.0 /* Multipath TCP * * Copyright (c) 2017 - 2019, Intel Corporation. */ #define pr_fmt(fmt) "MPTCP: " fmt #include <linux/kernel.h> #include <crypto/sha2.h> #include <net/tcp.h> #include <net/mptcp.h> #include "protocol.h" #include "mib.h" #include <trace/events/mptcp.h> static bool mptcp_cap_flag_sha256(u8 flags) { return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256; } static void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, int opsize, struct mptcp_options_received *mp_opt) { u8 subtype = *ptr >> 4; int expected_opsize; u16 subopt; u8 version; u8 flags; u8 i; switch (subtype) { case MPTCPOPT_MP_CAPABLE: /* strict size checking */ if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { if (skb->len > tcp_hdr(skb)->doff << 2) expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA; else expected_opsize = TCPOLEN_MPTCP_MPC_ACK; subopt = OPTION_MPTCP_MPC_ACK; } else { if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK) { expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK; subopt = OPTION_MPTCP_MPC_SYNACK; } else { expected_opsize = TCPOLEN_MPTCP_MPC_SYN; subopt = OPTION_MPTCP_MPC_SYN; } } /* Cfr RFC 8684 Section 3.3.0: * If a checksum is present but its use had * not been negotiated in the MP_CAPABLE handshake, the receiver MUST * close the subflow with a RST, as it is not behaving as negotiated. * If a checksum is not present when its use has been negotiated, the * receiver MUST close the subflow with a RST, as it is considered * broken * We parse even option with mismatching csum presence, so that * later in subflow_data_ready we can trigger the reset. */ if (opsize != expected_opsize && (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA || opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM)) break; /* try to be gentle vs future versions on the initial syn */ version = *ptr++ & MPTCP_VERSION_MASK; if (opsize != TCPOLEN_MPTCP_MPC_SYN) { if (version != MPTCP_SUPPORTED_VERSION) break; } else if (version < MPTCP_SUPPORTED_VERSION) { break; } flags = *ptr++; if (!mptcp_cap_flag_sha256(flags) || (flags & MPTCP_CAP_EXTENSIBILITY)) break; /* RFC 6824, Section 3.1: * "For the Checksum Required bit (labeled "A"), if either * host requires the use of checksums, checksums MUST be used. * In other words, the only way for checksums not to be used * is if both hosts in their SYNs set A=0." */ if (flags & MPTCP_CAP_CHECKSUM_REQD) mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0); mp_opt->suboptions |= subopt; if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) { mp_opt->sndr_key = get_unaligned_be64(ptr); ptr += 8; } if (opsize >= TCPOLEN_MPTCP_MPC_ACK) { mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; } if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) { /* Section 3.1.: * "the data parameters in a MP_CAPABLE are semantically * equivalent to those in a DSS option and can be used * interchangeably." */ mp_opt->suboptions |= OPTION_MPTCP_DSS; mp_opt->use_map = 1; mp_opt->mpc_map = 1; mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; } if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) { mp_opt->csum = get_unaligned((__force __sum16 *)ptr); mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; ptr += 2; } pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u\n", version, flags, opsize, mp_opt->sndr_key, mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum); break; case MPTCPOPT_MP_JOIN: if (opsize == TCPOLEN_MPTCP_MPJ_SYN) { mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYN; mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP; mp_opt->join_id = *ptr++; mp_opt->token = get_unaligned_be32(ptr); ptr += 4; mp_opt->nonce = get_unaligned_be32(ptr); ptr += 4; pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u\n", mp_opt->backup, mp_opt->join_id, mp_opt->token, mp_opt->nonce); } else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) { mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYNACK; mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP; mp_opt->join_id = *ptr++; mp_opt->thmac = get_unaligned_be64(ptr); ptr += 8; mp_opt->nonce = get_unaligned_be32(ptr); ptr += 4; pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u\n", mp_opt->backup, mp_opt->join_id, mp_opt->thmac, mp_opt->nonce); } else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) { mp_opt->suboptions |= OPTION_MPTCP_MPJ_ACK; ptr += 2; memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN); pr_debug("MP_JOIN hmac\n"); } break; case MPTCPOPT_DSS: pr_debug("DSS\n"); ptr++; flags = (*ptr++) & MPTCP_DSS_FLAG_MASK; mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0; mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0; mp_opt->use_map = (flags & MPTCP_DSS_HAS_MAP) != 0; mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0; mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK); pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d\n", mp_opt->data_fin, mp_opt->dsn64, mp_opt->use_map, mp_opt->ack64, mp_opt->use_ack); expected_opsize = TCPOLEN_MPTCP_DSS_BASE; if (mp_opt->use_ack) { if (mp_opt->ack64) expected_opsize += TCPOLEN_MPTCP_DSS_ACK64; else expected_opsize += TCPOLEN_MPTCP_DSS_ACK32; } if (mp_opt->use_map) { if (mp_opt->dsn64) expected_opsize += TCPOLEN_MPTCP_DSS_MAP64; else expected_opsize += TCPOLEN_MPTCP_DSS_MAP32; } /* Always parse any csum presence combination, we will enforce * RFC 8684 Section 3.3.0 checks later in subflow_data_ready */ if (opsize != expected_opsize && opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) break; mp_opt->suboptions |= OPTION_MPTCP_DSS; if (mp_opt->use_ack) { if (mp_opt->ack64) { mp_opt->data_ack = get_unaligned_be64(ptr); ptr += 8; } else { mp_opt->data_ack = get_unaligned_be32(ptr); ptr += 4; } pr_debug("data_ack=%llu\n", mp_opt->data_ack); } if (mp_opt->use_map) { if (mp_opt->dsn64) { mp_opt->data_seq = get_unaligned_be64(ptr); ptr += 8; } else { mp_opt->data_seq = get_unaligned_be32(ptr); ptr += 4; } mp_opt->subflow_seq = get_unaligned_be32(ptr); ptr += 4; mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) { mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; mp_opt->csum = get_unaligned((__force __sum16 *)ptr); ptr += 2; } pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u\n", mp_opt->data_seq, mp_opt->subflow_seq, mp_opt->data_len, !!(mp_opt->suboptions & OPTION_MPTCP_CSUMREQD), mp_opt->csum); } break; case MPTCPOPT_ADD_ADDR: mp_opt->echo = (*ptr++) & MPTCP_ADDR_ECHO; if (!mp_opt->echo) { if (opsize == TCPOLEN_MPTCP_ADD_ADDR || opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT) mp_opt->addr.family = AF_INET; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6 || opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT) mp_opt->addr.family = AF_INET6; #endif else break; } else { if (opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE || opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) mp_opt->addr.family = AF_INET; #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE || opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) mp_opt->addr.family = AF_INET6; #endif else break; } mp_opt->suboptions |= OPTION_MPTCP_ADD_ADDR; mp_opt->addr.id = *ptr++; mp_opt->addr.port = 0; mp_opt->ahmac = 0; if (mp_opt->addr.family == AF_INET) { memcpy((u8 *)&mp_opt->addr.addr.s_addr, (u8 *)ptr, 4); ptr += 4; if (opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT || opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) { mp_opt->addr.port = htons(get_unaligned_be16(ptr)); ptr += 2; } } #if IS_ENABLED(CONFIG_MPTCP_IPV6) else { memcpy(mp_opt->addr.addr6.s6_addr, (u8 *)ptr, 16); ptr += 16; if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT || opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) { mp_opt->addr.port = htons(get_unaligned_be16(ptr)); ptr += 2; } } #endif if (!mp_opt->echo) { mp_opt->ahmac = get_unaligned_be64(ptr); ptr += 8; } pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d\n", (mp_opt->addr.family == AF_INET6) ? "6" : "", mp_opt->addr.id, mp_opt->ahmac, mp_opt->echo, ntohs(mp_opt->addr.port)); break; case MPTCPOPT_RM_ADDR: if (opsize < TCPOLEN_MPTCP_RM_ADDR_BASE + 1 || opsize > TCPOLEN_MPTCP_RM_ADDR_BASE + MPTCP_RM_IDS_MAX) break; ptr++; mp_opt->suboptions |= OPTION_MPTCP_RM_ADDR; mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE; for (i = 0; i < mp_opt->rm_list.nr; i++) mp_opt->rm_list.ids[i] = *ptr++; pr_debug("RM_ADDR: rm_list_nr=%d\n", mp_opt->rm_list.nr); break; case MPTCPOPT_MP_PRIO: if (opsize != TCPOLEN_MPTCP_PRIO) break; mp_opt->suboptions |= OPTION_MPTCP_PRIO; mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP; pr_debug("MP_PRIO: prio=%d\n", mp_opt->backup); break; case MPTCPOPT_MP_FASTCLOSE: if (opsize != TCPOLEN_MPTCP_FASTCLOSE) break; ptr += 2; mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; mp_opt->suboptions |= OPTION_MPTCP_FASTCLOSE; pr_debug("MP_FASTCLOSE: recv_key=%llu\n", mp_opt->rcvr_key); break; case MPTCPOPT_RST: if (opsize != TCPOLEN_MPTCP_RST) break; if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) break; mp_opt->suboptions |= OPTION_MPTCP_RST; flags = *ptr++; mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT; mp_opt->reset_reason = *ptr; pr_debug("MP_RST: transient=%u reason=%u\n", mp_opt->reset_transient, mp_opt->reset_reason); break; case MPTCPOPT_MP_FAIL: if (opsize != TCPOLEN_MPTCP_FAIL) break; ptr += 2; mp_opt->suboptions |= OPTION_MPTCP_FAIL; mp_opt->fail_seq = get_unaligned_be64(ptr); pr_debug("MP_FAIL: data_seq=%llu\n", mp_opt->fail_seq); break; default: break; } } void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt) { const struct tcphdr *th = tcp_hdr(skb); const unsigned char *ptr; int length; /* Ensure that casting the whole status to u32 is efficient and safe */ BUILD_BUG_ON(sizeof_field(struct mptcp_options_received, status) != sizeof(u32)); BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_options_received, status), sizeof(u32))); *(u32 *)&mp_opt->status = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); while (length > 0) { int opcode = *ptr++; int opsize; switch (opcode) { case TCPOPT_EOL: return; case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ length--; continue; default: if (length < 2) return; opsize = *ptr++; if (opsize < 2) /* "silly options" */ return; if (opsize > length) return; /* don't parse partial options */ if (opcode == TCPOPT_MPTCP) mptcp_parse_option(skb, ptr, opsize, mp_opt); ptr += opsize - 2; length -= opsize; } } } bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, unsigned int *size, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); /* we will use snd_isn to detect first pkt [re]transmission * in mptcp_established_options_mp() */ subflow->snd_isn = TCP_SKB_CB(skb)->end_seq; if (subflow->request_mptcp) { if (unlikely(subflow_simultaneous_connect(sk))) { WARN_ON_ONCE(!mptcp_try_fallback(sk, MPTCP_MIB_SIMULTCONNFALLBACK)); /* Ensure mptcp_finish_connect() will not process the * MPC handshake. */ subflow->request_mptcp = 0; return false; } opts->suboptions = OPTION_MPTCP_MPC_SYN; opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk)); opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk)); *size = TCPOLEN_MPTCP_MPC_SYN; return true; } else if (subflow->request_join) { pr_debug("remote_token=%u, nonce=%u\n", subflow->remote_token, subflow->local_nonce); opts->suboptions = OPTION_MPTCP_MPJ_SYN; opts->join_id = subflow->local_id; opts->token = subflow->remote_token; opts->nonce = subflow->local_nonce; opts->backup = subflow->request_bkup; *size = TCPOLEN_MPTCP_MPJ_SYN; return true; } return false; } static void clear_3rdack_retransmission(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); sk_stop_timer(sk, &icsk->icsk_delack_timer); icsk->icsk_ack.ato = 0; icsk->icsk_ack.pending &= ~(ICSK_ACK_SCHED | ICSK_ACK_TIMER); } static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, bool snd_data_fin_enable, unsigned int *size, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_ext *mpext; unsigned int data_len; u8 len; /* When skb is not available, we better over-estimate the emitted * options len. A full DSS option (28 bytes) is longer than * TCPOLEN_MPTCP_MPC_ACK_DATA(22) or TCPOLEN_MPTCP_MPJ_ACK(24), so * tell the caller to defer the estimate to * mptcp_established_options_dss(), which will reserve enough space. */ if (!skb) return false; /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */ if (READ_ONCE(subflow->fully_established) || snd_data_fin_enable || subflow->snd_isn != TCP_SKB_CB(skb)->seq || sk->sk_state != TCP_ESTABLISHED) return false; if (subflow->mp_capable) { mpext = mptcp_get_ext(skb); data_len = mpext ? mpext->data_len : 0; /* we will check ops->data_len in mptcp_write_options() to * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and * TCPOLEN_MPTCP_MPC_ACK */ opts->data_len = data_len; opts->suboptions = OPTION_MPTCP_MPC_ACK; opts->sndr_key = subflow->local_key; opts->rcvr_key = subflow->remote_key; opts->csum_reqd = READ_ONCE(msk->csum_enabled); opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk)); /* Section 3.1. * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK * packets that start the first subflow of an MPTCP connection, * as well as the first packet that carries data */ if (data_len > 0) { len = TCPOLEN_MPTCP_MPC_ACK_DATA; if (opts->csum_reqd) { /* we need to propagate more info to csum the pseudo hdr */ opts->data_seq = mpext->data_seq; opts->subflow_seq = mpext->subflow_seq; opts->csum = mpext->csum; len += TCPOLEN_MPTCP_DSS_CHECKSUM; } *size = ALIGN(len, 4); } else { *size = TCPOLEN_MPTCP_MPC_ACK; } pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d\n", subflow, subflow->local_key, subflow->remote_key, data_len); return true; } else if (subflow->mp_join) { opts->suboptions = OPTION_MPTCP_MPJ_ACK; memcpy(opts->hmac, subflow->hmac, MPTCPOPT_HMAC_LEN); *size = TCPOLEN_MPTCP_MPJ_ACK; pr_debug("subflow=%p\n", subflow); /* we can use the full delegate action helper only from BH context * If we are in process context - sk is flushing the backlog at * socket lock release time - just set the appropriate flag, will * be handled by the release callback */ if (sock_owned_by_user(sk)) set_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status); else mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_ACK); return true; } return false; } static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, struct sk_buff *skb, struct mptcp_ext *ext) { /* The write_seq value has already been incremented, so the actual * sequence number for the DATA_FIN is one less. */ u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq) - 1; if (!ext->use_map || !skb->len) { /* RFC6824 requires a DSS mapping with specific values * if DATA_FIN is set but no data payload is mapped */ ext->data_fin = 1; ext->use_map = 1; ext->dsn64 = 1; ext->data_seq = data_fin_tx_seq; ext->subflow_seq = 0; ext->data_len = 1; } else if (ext->data_seq + ext->data_len == data_fin_tx_seq) { /* If there's an existing DSS mapping and it is the * final mapping, DATA_FIN consumes 1 additional byte of * mapping space. */ ext->data_fin = 1; ext->data_len++; } } static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, bool snd_data_fin_enable, unsigned int *size, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); unsigned int dss_size = 0; struct mptcp_ext *mpext; unsigned int ack_size; bool ret = false; u64 ack_seq; opts->csum_reqd = READ_ONCE(msk->csum_enabled); mpext = skb ? mptcp_get_ext(skb) : NULL; if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) { unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; if (mpext) { if (opts->csum_reqd) map_size += TCPOLEN_MPTCP_DSS_CHECKSUM; opts->ext_copy = *mpext; } dss_size = map_size; if (skb && snd_data_fin_enable) mptcp_write_data_fin(subflow, skb, &opts->ext_copy); opts->suboptions = OPTION_MPTCP_DSS; ret = true; } /* passive sockets msk will set the 'can_ack' after accept(), even * if the first subflow may have the already the remote key handy */ opts->ext_copy.use_ack = 0; if (!READ_ONCE(msk->can_ack)) { *size = ALIGN(dss_size, 4); return ret; } ack_seq = READ_ONCE(msk->ack_seq); if (READ_ONCE(msk->use_64bit_ack)) { ack_size = TCPOLEN_MPTCP_DSS_ACK64; opts->ext_copy.data_ack = ack_seq; opts->ext_copy.ack64 = 1; } else { ack_size = TCPOLEN_MPTCP_DSS_ACK32; opts->ext_copy.data_ack32 = (uint32_t)ack_seq; opts->ext_copy.ack64 = 0; } opts->ext_copy.use_ack = 1; opts->suboptions = OPTION_MPTCP_DSS; /* Add kind/length/subtype/flag overhead if mapping is not populated */ if (dss_size == 0) ack_size += TCPOLEN_MPTCP_DSS_BASE; dss_size += ack_size; *size = ALIGN(dss_size, 4); return true; } static u64 add_addr_generate_hmac(u64 key1, u64 key2, struct mptcp_addr_info *addr) { u16 port = ntohs(addr->port); u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[19]; int i = 0; msg[i++] = addr->id; if (addr->family == AF_INET) { memcpy(&msg[i], &addr->addr.s_addr, 4); i += 4; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (addr->family == AF_INET6) { memcpy(&msg[i], &addr->addr6.s6_addr, 16); i += 16; } #endif msg[i++] = port >> 8; msg[i++] = port & 0xFF; mptcp_crypto_hmac_sha(key1, key2, msg, i, hmac); return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); } static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); bool drop_other_suboptions = false; unsigned int opt_size = *size; struct mptcp_addr_info addr; bool echo; int len; /* add addr will strip the existing options, be sure to avoid breaking * MPC/MPJ handshakes */ if (!mptcp_pm_should_add_signal(msk) || (opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) || !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &addr, &echo, &drop_other_suboptions)) return false; /* * Later on, mptcp_write_options() will enforce mutually exclusion with * DSS, bail out if such option is set and we can't drop it. */ if (drop_other_suboptions) remaining += opt_size; else if (opts->suboptions & OPTION_MPTCP_DSS) return false; len = mptcp_add_addr_len(addr.family, echo, !!addr.port); if (remaining < len) return false; *size = len; if (drop_other_suboptions) { pr_debug("drop other suboptions\n"); opts->suboptions = 0; /* note that e.g. DSS could have written into the memory * aliased by ahmac, we must reset the field here * to avoid appending the hmac even for ADD_ADDR echo * options */ opts->ahmac = 0; *size -= opt_size; } opts->addr = addr; opts->suboptions |= OPTION_MPTCP_ADD_ADDR; if (!echo) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX); opts->ahmac = add_addr_generate_hmac(READ_ONCE(msk->local_key), READ_ONCE(msk->remote_key), &opts->addr); } else { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX); } pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d\n", opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port)); return true; } static bool mptcp_established_options_rm_addr(struct sock *sk, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_rm_list rm_list; int i, len; if (!mptcp_pm_should_rm_signal(msk) || !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_list))) return false; len = mptcp_rm_addr_len(&rm_list); if (len < 0) return false; if (remaining < len) return false; *size = len; opts->suboptions |= OPTION_MPTCP_RM_ADDR; opts->rm_list = rm_list; for (i = 0; i < opts->rm_list.nr; i++) pr_debug("rm_list_ids[%d]=%d\n", i, opts->rm_list.ids[i]); MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RMADDRTX, opts->rm_list.nr); return true; } static bool mptcp_established_options_mp_prio(struct sock *sk, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); /* can't send MP_PRIO with MPC, as they share the same option space: * 'backup'. Also it makes no sense at all */ if (!subflow->send_mp_prio || (opts->suboptions & OPTIONS_MPTCP_MPC)) return false; /* account for the trailing 'nop' option */ if (remaining < TCPOLEN_MPTCP_PRIO_ALIGN) return false; *size = TCPOLEN_MPTCP_PRIO_ALIGN; opts->suboptions |= OPTION_MPTCP_PRIO; opts->backup = subflow->request_bkup; pr_debug("prio=%d\n", opts->backup); return true; } static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); if (remaining < TCPOLEN_MPTCP_RST) return false; *size = TCPOLEN_MPTCP_RST; opts->suboptions |= OPTION_MPTCP_RST; opts->reset_transient = subflow->reset_transient; opts->reset_reason = subflow->reset_reason; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX); return true; } static bool mptcp_established_options_fastclose(struct sock *sk, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); if (likely(!subflow->send_fastclose)) return false; if (remaining < TCPOLEN_MPTCP_FASTCLOSE) return false; *size = TCPOLEN_MPTCP_FASTCLOSE; opts->suboptions |= OPTION_MPTCP_FASTCLOSE; opts->rcvr_key = READ_ONCE(msk->remote_key); pr_debug("FASTCLOSE key=%llu\n", opts->rcvr_key); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX); return true; } static bool mptcp_established_options_mp_fail(struct sock *sk, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); if (likely(!subflow->send_mp_fail)) return false; if (remaining < TCPOLEN_MPTCP_FAIL) return false; *size = TCPOLEN_MPTCP_FAIL; opts->suboptions |= OPTION_MPTCP_FAIL; opts->fail_seq = subflow->map_seq; pr_debug("MP_FAIL fail_seq=%llu\n", opts->fail_seq); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); return true; } bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); unsigned int opt_size = 0; bool snd_data_fin; bool ret = false; opts->suboptions = 0; /* Force later mptcp_write_options(), but do not use any actual * option space. */ if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb))) return true; if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { if (mptcp_established_options_fastclose(sk, &opt_size, remaining, opts) || mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; } /* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if there is room */ if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; } return true; } snd_data_fin = mptcp_data_fin_enabled(msk); if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, opts)) ret = true; else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, opts)) { unsigned int mp_fail_size; ret = true; if (mptcp_established_options_mp_fail(sk, &mp_fail_size, remaining - opt_size, opts)) { *size += opt_size + mp_fail_size; remaining -= opt_size - mp_fail_size; return true; } } /* we reserved enough space for the above options, and exceeding the * TCP option space would be fatal */ if (WARN_ON_ONCE(opt_size > remaining)) return false; *size += opt_size; remaining -= opt_size; if (mptcp_established_options_add_addr(sk, skb, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; ret = true; } else if (mptcp_established_options_rm_addr(sk, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; ret = true; } if (mptcp_established_options_mp_prio(sk, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; ret = true; } return ret; } bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, struct mptcp_out_options *opts) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); if (subflow_req->mp_capable) { opts->suboptions = OPTION_MPTCP_MPC_SYNACK; opts->sndr_key = subflow_req->local_key; opts->csum_reqd = subflow_req->csum_reqd; opts->allow_join_id0 = subflow_req->allow_join_id0; *size = TCPOLEN_MPTCP_MPC_SYNACK; pr_debug("subflow_req=%p, local_key=%llu\n", subflow_req, subflow_req->local_key); return true; } else if (subflow_req->mp_join) { opts->suboptions = OPTION_MPTCP_MPJ_SYNACK; opts->backup = subflow_req->request_bkup; opts->join_id = subflow_req->local_id; opts->thmac = subflow_req->thmac; opts->nonce = subflow_req->local_nonce; pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u\n", subflow_req, opts->backup, opts->join_id, opts->thmac, opts->nonce); *size = TCPOLEN_MPTCP_MPJ_SYNACK; return true; } return false; } static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, struct mptcp_subflow_context *subflow, struct sk_buff *skb, struct mptcp_options_received *mp_opt) { /* here we can process OoO, in-window pkts, only in-sequence 4th ack * will make the subflow fully established */ if (likely(READ_ONCE(subflow->fully_established))) { /* on passive sockets, check for 3rd ack retransmission * note that msk is always set by subflow_syn_recv_sock() * for mp_join subflows */ if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 && TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq && subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) && !subflow->request_join) tcp_send_ack(ssk); goto check_notify; } /* we must process OoO packets before the first subflow is fully * established. OoO packets are instead a protocol violation * for MP_JOIN subflows as the peer must not send any data * before receiving the forth ack - cfr. RFC 8684 section 3.2. */ if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) { if (subflow->mp_join) goto reset; if (subflow->is_mptfo && mp_opt->suboptions & OPTION_MPTCP_MPC_ACK) goto set_fully_established; return subflow->mp_capable; } if (subflow->remote_key_valid && (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) || ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && (!mp_opt->echo || subflow->mp_join)))) { /* subflows are fully established as soon as we get any * additional ack, including ADD_ADDR. */ goto set_fully_established; } /* If the first established packet does not contain MP_CAPABLE + data * then fallback to TCP. Fallback scenarios requires a reset for * MP_JOIN subflows. */ if (!(mp_opt->suboptions & OPTIONS_MPTCP_MPC)) { if (subflow->mp_join) goto reset; subflow->mp_capable = 0; if (!mptcp_try_fallback(ssk, MPTCP_MIB_MPCAPABLEDATAFALLBACK)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_FALLBACKFAILED); goto reset; } return false; } if (unlikely(!READ_ONCE(msk->pm.server_side))) pr_warn_once("bogus mpc option on established client sk"); set_fully_established: if (mp_opt->deny_join_id0) WRITE_ONCE(msk->pm.remote_deny_join_id0, true); mptcp_data_lock((struct sock *)msk); __mptcp_subflow_fully_established(msk, subflow, mp_opt); mptcp_data_unlock((struct sock *)msk); check_notify: /* if the subflow is not already linked into the conn_list, we can't * notify the PM: this subflow is still on the listener queue * and the PM possibly acquiring the subflow lock could race with * the listener close */ if (likely(subflow->pm_notified) || list_empty(&subflow->node)) return true; subflow->pm_notified = 1; if (subflow->mp_join) { clear_3rdack_retransmission(ssk); mptcp_pm_subflow_established(msk); } else { mptcp_pm_fully_established(msk, ssk); } return true; reset: mptcp_subflow_reset(ssk); return false; } u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq) { u32 old_seq32, cur_seq32; old_seq32 = (u32)old_seq; cur_seq32 = (u32)cur_seq; cur_seq = (old_seq & GENMASK_ULL(63, 32)) + cur_seq32; if (unlikely(cur_seq32 < old_seq32 && before(old_seq32, cur_seq32))) return cur_seq + (1LL << 32); /* reverse wrap could happen, too */ if (unlikely(cur_seq32 > old_seq32 && after(old_seq32, cur_seq32))) return cur_seq - (1LL << 32); return cur_seq; } static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una) { msk->bytes_acked += new_snd_una - msk->snd_una; WRITE_ONCE(msk->snd_una, new_snd_una); } static void rwin_update(struct mptcp_sock *msk, struct sock *ssk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct tcp_sock *tp = tcp_sk(ssk); u64 mptcp_rcv_wnd; /* Avoid touching extra cachelines if TCP is going to accept this * skb without filling the TCP-level window even with a possibly * outdated mptcp-level rwin. */ if (!skb->len || skb->len < tcp_receive_window(tp)) return; mptcp_rcv_wnd = atomic64_read(&msk->rcv_wnd_sent); if (!after64(mptcp_rcv_wnd, subflow->rcv_wnd_sent)) return; /* Some other subflow grew the mptcp-level rwin since rcv_wup, * resync. */ tp->rcv_wnd += mptcp_rcv_wnd - subflow->rcv_wnd_sent; subflow->rcv_wnd_sent = mptcp_rcv_wnd; } static void ack_update_msk(struct mptcp_sock *msk, struct sock *ssk, struct mptcp_options_received *mp_opt) { u64 new_wnd_end, new_snd_una, snd_nxt = READ_ONCE(msk->snd_nxt); struct sock *sk = (struct sock *)msk; u64 old_snd_una; mptcp_data_lock(sk); /* avoid ack expansion on update conflict, to reduce the risk of * wrongly expanding to a future ack sequence number, which is way * more dangerous than missing an ack */ old_snd_una = msk->snd_una; new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64); /* ACK for data not even sent yet? Ignore.*/ if (unlikely(after64(new_snd_una, snd_nxt))) new_snd_una = old_snd_una; new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; if (after64(new_wnd_end, msk->wnd_end)) WRITE_ONCE(msk->wnd_end, new_wnd_end); /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ if (after64(msk->wnd_end, snd_nxt)) __mptcp_check_push(sk, ssk); if (after64(new_snd_una, old_snd_una)) { __mptcp_snd_una_update(msk, new_snd_una); __mptcp_data_acked(sk); } msk->last_ack_recv = tcp_jiffies32; mptcp_data_unlock(sk); trace_ack_update_msk(mp_opt->data_ack, old_snd_una, new_snd_una, new_wnd_end, READ_ONCE(msk->wnd_end)); } bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit) { /* Skip if DATA_FIN was already received. * If updating simultaneously with the recvmsg loop, values * should match. If they mismatch, the peer is misbehaving and * we will prefer the most recent information. */ if (READ_ONCE(msk->rcv_data_fin)) return false; WRITE_ONCE(msk->rcv_data_fin_seq, mptcp_expand_seq(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit)); WRITE_ONCE(msk->rcv_data_fin, 1); return true; } static bool add_addr_hmac_valid(struct mptcp_sock *msk, struct mptcp_options_received *mp_opt) { u64 hmac = 0; if (mp_opt->echo) return true; hmac = add_addr_generate_hmac(READ_ONCE(msk->remote_key), READ_ONCE(msk->local_key), &mp_opt->addr); pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n", msk, hmac, mp_opt->ahmac); return hmac == mp_opt->ahmac; } /* Return false in case of error (or subflow has been reset), * else return true. */ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_options_received mp_opt; struct mptcp_ext *mpext; if (__mptcp_check_fallback(msk)) { /* Keep it simple and unconditionally trigger send data cleanup and * pending queue spooling. We will need to acquire the data lock * for more accurate checks, and once the lock is acquired, such * helpers are cheap. */ mptcp_data_lock(subflow->conn); if (sk_stream_memory_free(sk)) __mptcp_check_push(subflow->conn, sk); /* on fallback we just need to ignore the msk-level snd_una, as * this is really plain TCP */ __mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt)); __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); return true; } mptcp_get_options(skb, &mp_opt); /* The subflow can be in close state only if check_fully_established() * just sent a reset. If so, tell the caller to ignore the current packet. */ if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return sk->sk_state != TCP_CLOSE; if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) { if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) && READ_ONCE(msk->local_key) == mp_opt.rcvr_key) { WRITE_ONCE(msk->rcv_fastclose, true); mptcp_schedule_work((struct sock *)msk); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSERX); } if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) && add_addr_hmac_valid(msk, &mp_opt)) { if (!mp_opt.echo) { mptcp_pm_add_addr_received(sk, &mp_opt.addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); } else { mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); mptcp_pm_del_add_timer(msk, &mp_opt.addr, true); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); } if (mp_opt.addr.port) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD); } if (mp_opt.suboptions & OPTION_MPTCP_RM_ADDR) mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list); if (mp_opt.suboptions & OPTION_MPTCP_PRIO) { mptcp_pm_mp_prio_received(sk, mp_opt.backup); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX); } if (mp_opt.suboptions & OPTION_MPTCP_FAIL) { mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX); } if (mp_opt.suboptions & OPTION_MPTCP_RST) { subflow->reset_seen = 1; subflow->reset_reason = mp_opt.reset_reason; subflow->reset_transient = mp_opt.reset_transient; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTRX); } if (!(mp_opt.suboptions & OPTION_MPTCP_DSS)) return true; } /* we can't wait for recvmsg() to update the ack_seq, otherwise * monodirectional flows will stuck */ if (mp_opt.use_ack) ack_update_msk(msk, sk, &mp_opt); rwin_update(msk, sk, skb); /* Zero-data-length packets are dropped by the caller and not * propagated to the MPTCP layer, so the skb extension does not * need to be allocated or populated. DATA_FIN information, if * present, needs to be updated here before the skb is freed. */ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { if (mp_opt.data_fin && mp_opt.data_len == 1 && mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64)) mptcp_schedule_work((struct sock *)msk); return true; } mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) return false; memset(mpext, 0, sizeof(*mpext)); if (likely(mp_opt.use_map)) { if (mp_opt.mpc_map) { /* this is an MP_CAPABLE carrying MPTCP data * we know this map the first chunk of data */ mptcp_crypto_key_sha(subflow->remote_key, NULL, &mpext->data_seq); mpext->data_seq++; mpext->subflow_seq = 1; mpext->dsn64 = 1; mpext->mpc_map = 1; mpext->data_fin = 0; } else { mpext->data_seq = mp_opt.data_seq; mpext->subflow_seq = mp_opt.subflow_seq; mpext->dsn64 = mp_opt.dsn64; mpext->data_fin = mp_opt.data_fin; } mpext->data_len = mp_opt.data_len; mpext->use_map = 1; mpext->csum_reqd = !!(mp_opt.suboptions & OPTION_MPTCP_CSUMREQD); if (mpext->csum_reqd) mpext->csum = mp_opt.csum; } return true; } static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) { const struct sock *ssk = (const struct sock *)tp; struct mptcp_subflow_context *subflow; u64 ack_seq, rcv_wnd_old, rcv_wnd_new; struct mptcp_sock *msk; u32 new_win; u64 win; subflow = mptcp_subflow_ctx(ssk); msk = mptcp_sk(subflow->conn); ack_seq = READ_ONCE(msk->ack_seq); rcv_wnd_new = ack_seq + tp->rcv_wnd; rcv_wnd_old = atomic64_read(&msk->rcv_wnd_sent); if (after64(rcv_wnd_new, rcv_wnd_old)) { u64 rcv_wnd; for (;;) { rcv_wnd = atomic64_cmpxchg(&msk->rcv_wnd_sent, rcv_wnd_old, rcv_wnd_new); if (rcv_wnd == rcv_wnd_old) break; rcv_wnd_old = rcv_wnd; if (before64(rcv_wnd_new, rcv_wnd_old)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE); goto raise_win; } MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT); } goto update_wspace; } if (rcv_wnd_new != rcv_wnd_old) { raise_win: /* The msk-level rcv wnd is after the tcp level one, * sync the latter. */ rcv_wnd_new = rcv_wnd_old; win = rcv_wnd_old - ack_seq; tp->rcv_wnd = min_t(u64, win, U32_MAX); new_win = tp->rcv_wnd; /* Make sure we do not exceed the maximum possible * scaled window. */ if (unlikely(th->syn)) new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale; if (!tp->rx_opt.rcv_wscale && READ_ONCE(sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows)) new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); /* RFC1323 scaling applied */ new_win >>= tp->rx_opt.rcv_wscale; th->window = htons(new_win); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED); } update_wspace: WRITE_ONCE(msk->old_wspace, tp->rcv_wnd); subflow->rcv_wnd_sent = rcv_wnd_new; } static void mptcp_track_rwin(struct tcp_sock *tp) { const struct sock *ssk = (const struct sock *)tp; struct mptcp_subflow_context *subflow; struct mptcp_sock *msk; if (!ssk) return; subflow = mptcp_subflow_ctx(ssk); msk = mptcp_sk(subflow->conn); WRITE_ONCE(msk->old_wspace, tp->rcv_wnd); } __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) { struct csum_pseudo_header header; __wsum csum; /* cfr RFC 8684 3.3.1.: * the data sequence number used in the pseudo-header is * always the 64-bit value, irrespective of what length is used in the * DSS option itself. */ header.data_seq = cpu_to_be64(data_seq); header.subflow_seq = htonl(subflow_seq); header.data_len = htons(data_len); header.csum = 0; csum = csum_partial(&header, sizeof(header), sum); return csum_fold(csum); } static __sum16 mptcp_make_csum(const struct mptcp_ext *mpext) { return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len, ~csum_unfold(mpext->csum)); } static void put_len_csum(u16 len, __sum16 csum, void *data) { __sum16 *sumptr = data + 2; __be16 *ptr = data; put_unaligned_be16(len, ptr); put_unaligned(csum, sumptr); } void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp, struct mptcp_out_options *opts) { const struct sock *ssk = (const struct sock *)tp; struct mptcp_subflow_context *subflow; /* Which options can be used together? * * X: mutually exclusive * O: often used together * C: can be used together in some cases * P: could be used together but we prefer not to (optimisations) * * Opt: | MPC | MPJ | DSS | ADD | RM | PRIO | FAIL | FC | * ------|------|------|------|------|------|------|------|------| * MPC |------|------|------|------|------|------|------|------| * MPJ | X |------|------|------|------|------|------|------| * DSS | X | X |------|------|------|------|------|------| * ADD | X | X | P |------|------|------|------|------| * RM | C | C | C | P |------|------|------|------| * PRIO | X | C | C | C | C |------|------|------| * FAIL | X | X | C | X | X | X |------|------| * FC | X | X | X | X | X | X | X |------| * RST | X | X | X | X | X | X | O | O | * ------|------|------|------|------|------|------|------|------| * * The same applies in mptcp_established_options() function. */ if (likely(OPTION_MPTCP_DSS & opts->suboptions)) { struct mptcp_ext *mpext = &opts->ext_copy; u8 len = TCPOLEN_MPTCP_DSS_BASE; u8 flags = 0; if (mpext->use_ack) { flags = MPTCP_DSS_HAS_ACK; if (mpext->ack64) { len += TCPOLEN_MPTCP_DSS_ACK64; flags |= MPTCP_DSS_ACK64; } else { len += TCPOLEN_MPTCP_DSS_ACK32; } } if (mpext->use_map) { len += TCPOLEN_MPTCP_DSS_MAP64; /* Use only 64-bit mapping flags for now, add * support for optional 32-bit mappings later. */ flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; if (mpext->data_fin) flags |= MPTCP_DSS_DATA_FIN; if (opts->csum_reqd) len += TCPOLEN_MPTCP_DSS_CHECKSUM; } *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); if (mpext->use_ack) { if (mpext->ack64) { put_unaligned_be64(mpext->data_ack, ptr); ptr += 2; } else { put_unaligned_be32(mpext->data_ack32, ptr); ptr += 1; } } if (mpext->use_map) { put_unaligned_be64(mpext->data_seq, ptr); ptr += 2; put_unaligned_be32(mpext->subflow_seq, ptr); ptr += 1; if (opts->csum_reqd) { /* data_len == 0 is reserved for the infinite mapping, * the checksum will also be set to 0. */ put_len_csum(mpext->data_len, (mpext->data_len ? mptcp_make_csum(mpext) : 0), ptr); } else { put_unaligned_be32(mpext->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); } ptr += 1; } /* We might need to add MP_FAIL options in rare cases */ if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) goto mp_fail; } else if (OPTIONS_MPTCP_MPC & opts->suboptions) { u8 len, flag = MPTCP_CAP_HMAC_SHA256; if (OPTION_MPTCP_MPC_SYN & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYN; } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYNACK; } else if (opts->data_len) { len = TCPOLEN_MPTCP_MPC_ACK_DATA; if (opts->csum_reqd) len += TCPOLEN_MPTCP_DSS_CHECKSUM; } else { len = TCPOLEN_MPTCP_MPC_ACK; } if (opts->csum_reqd) flag |= MPTCP_CAP_CHECKSUM_REQD; if (!opts->allow_join_id0) flag |= MPTCP_CAP_DENY_JOIN_ID0; *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len, MPTCP_SUPPORTED_VERSION, flag); if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & opts->suboptions)) goto mp_capable_done; put_unaligned_be64(opts->sndr_key, ptr); ptr += 2; if (!((OPTION_MPTCP_MPC_ACK) & opts->suboptions)) goto mp_capable_done; put_unaligned_be64(opts->rcvr_key, ptr); ptr += 2; if (!opts->data_len) goto mp_capable_done; if (opts->csum_reqd) { put_len_csum(opts->data_len, __mptcp_make_csum(opts->data_seq, opts->subflow_seq, opts->data_len, ~csum_unfold(opts->csum)), ptr); } else { put_unaligned_be32(opts->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); } ptr += 1; /* MPC is additionally mutually exclusive with MP_PRIO */ goto mp_capable_done; } else if (OPTIONS_MPTCP_MPJ & opts->suboptions) { if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) { *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, TCPOLEN_MPTCP_MPJ_SYN, opts->backup, opts->join_id); put_unaligned_be32(opts->token, ptr); ptr += 1; put_unaligned_be32(opts->nonce, ptr); ptr += 1; } else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) { *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, TCPOLEN_MPTCP_MPJ_SYNACK, opts->backup, opts->join_id); put_unaligned_be64(opts->thmac, ptr); ptr += 2; put_unaligned_be32(opts->nonce, ptr); ptr += 1; } else { *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, TCPOLEN_MPTCP_MPJ_ACK, 0, 0); memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN); ptr += 5; } } else if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) { u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; u8 echo = MPTCP_ADDR_ECHO; #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (opts->addr.family == AF_INET6) len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; #endif if (opts->addr.port) len += TCPOLEN_MPTCP_PORT_LEN; if (opts->ahmac) { len += sizeof(opts->ahmac); echo = 0; } *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR, len, echo, opts->addr.id); if (opts->addr.family == AF_INET) { memcpy((u8 *)ptr, (u8 *)&opts->addr.addr.s_addr, 4); ptr += 1; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (opts->addr.family == AF_INET6) { memcpy((u8 *)ptr, opts->addr.addr6.s6_addr, 16); ptr += 4; } #endif if (!opts->addr.port) { if (opts->ahmac) { put_unaligned_be64(opts->ahmac, ptr); ptr += 2; } } else { u16 port = ntohs(opts->addr.port); if (opts->ahmac) { u8 *bptr = (u8 *)ptr; put_unaligned_be16(port, bptr); bptr += 2; put_unaligned_be64(opts->ahmac, bptr); bptr += 8; put_unaligned_be16(TCPOPT_NOP << 8 | TCPOPT_NOP, bptr); ptr += 3; } else { put_unaligned_be32(port << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); ptr += 1; } } } else if (unlikely(OPTION_MPTCP_FASTCLOSE & opts->suboptions)) { /* FASTCLOSE is mutually exclusive with others except RST */ *ptr++ = mptcp_option(MPTCPOPT_MP_FASTCLOSE, TCPOLEN_MPTCP_FASTCLOSE, 0, 0); put_unaligned_be64(opts->rcvr_key, ptr); ptr += 2; if (OPTION_MPTCP_RST & opts->suboptions) goto mp_rst; return; } else if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) { mp_fail: /* MP_FAIL is mutually exclusive with others except RST */ subflow = mptcp_subflow_ctx(ssk); subflow->send_mp_fail = 0; *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL, TCPOLEN_MPTCP_FAIL, 0, 0); put_unaligned_be64(opts->fail_seq, ptr); ptr += 2; if (OPTION_MPTCP_RST & opts->suboptions) goto mp_rst; return; } else if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) { mp_rst: *ptr++ = mptcp_option(MPTCPOPT_RST, TCPOLEN_MPTCP_RST, opts->reset_transient, opts->reset_reason); return; } else if (unlikely(!opts->suboptions)) { /* Fallback to TCP */ mptcp_track_rwin(tp); return; } if (OPTION_MPTCP_PRIO & opts->suboptions) { subflow = mptcp_subflow_ctx(ssk); subflow->send_mp_prio = 0; *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO, TCPOLEN_MPTCP_PRIO, opts->backup, TCPOPT_NOP); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPPRIOTX); } mp_capable_done: if (OPTION_MPTCP_RM_ADDR & opts->suboptions) { u8 i = 1; *ptr++ = mptcp_option(MPTCPOPT_RM_ADDR, TCPOLEN_MPTCP_RM_ADDR_BASE + opts->rm_list.nr, 0, opts->rm_list.ids[0]); while (i < opts->rm_list.nr) { u8 id1, id2, id3, id4; id1 = opts->rm_list.ids[i]; id2 = i + 1 < opts->rm_list.nr ? opts->rm_list.ids[i + 1] : TCPOPT_NOP; id3 = i + 2 < opts->rm_list.nr ? opts->rm_list.ids[i + 2] : TCPOPT_NOP; id4 = i + 3 < opts->rm_list.nr ? opts->rm_list.ids[i + 3] : TCPOPT_NOP; put_unaligned_be32(id1 << 24 | id2 << 16 | id3 << 8 | id4, ptr); ptr += 1; i += 4; } } if (tp) mptcp_set_rwin(tp, th); } __be32 mptcp_get_reset_option(const struct sk_buff *skb) { const struct mptcp_ext *ext = mptcp_get_ext(skb); u8 flags, reason; if (ext) { flags = ext->reset_transient; reason = ext->reset_reason; return mptcp_option(MPTCPOPT_RST, TCPOLEN_MPTCP_RST, flags, reason); } return htonl(0u); } EXPORT_SYMBOL_GPL(mptcp_get_reset_option);
2 3 5 2 2 66 62 63 62 1 6 5 1 1 1 1 1 1 67 67 4 63 6 98 94 543 455 94 65 66 1 4 67 67 2 67 66 63 5 1 6 6 10 6 6 6 4 5 4 5 5 5 220 220 220 220 7 1 4 2 2 11 11 4 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 // SPDX-License-Identifier: GPL-2.0 /* * Basic worker thread pool for io_uring * * Copyright (C) 2019 Jens Axboe * */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/sched/signal.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/rculist_nulls.h> #include <linux/cpu.h> #include <linux/cpuset.h> #include <linux/task_work.h> #include <linux/audit.h> #include <linux/mmu_context.h> #include <linux/sched/sysctl.h> #include <uapi/linux/io_uring.h> #include "io-wq.h" #include "slist.h" #include "io_uring.h" #define WORKER_IDLE_TIMEOUT (5 * HZ) #define WORKER_INIT_LIMIT 3 enum { IO_WORKER_F_UP = 0, /* up and active */ IO_WORKER_F_RUNNING = 1, /* account as running */ IO_WORKER_F_FREE = 2, /* worker on free list */ }; enum { IO_WQ_BIT_EXIT = 0, /* wq exiting */ IO_WQ_BIT_EXIT_ON_IDLE = 1, /* allow all workers to exit on idle */ }; enum { IO_ACCT_STALLED_BIT = 0, /* stalled on hash */ }; /* * One for each thread in a wq pool */ struct io_worker { refcount_t ref; unsigned long flags; struct hlist_nulls_node nulls_node; struct list_head all_list; struct task_struct *task; struct io_wq *wq; struct io_wq_acct *acct; struct io_wq_work *cur_work; raw_spinlock_t lock; struct completion ref_done; unsigned long create_state; struct callback_head create_work; int init_retries; union { struct rcu_head rcu; struct delayed_work work; }; }; #if BITS_PER_LONG == 64 #define IO_WQ_HASH_ORDER 6 #else #define IO_WQ_HASH_ORDER 5 #endif #define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER) struct io_wq_acct { /** * Protects access to the worker lists. */ raw_spinlock_t workers_lock; unsigned nr_workers; unsigned max_workers; atomic_t nr_running; /** * The list of free workers. Protected by #workers_lock * (write) and RCU (read). */ struct hlist_nulls_head free_list; /** * The list of all workers. Protected by #workers_lock * (write) and RCU (read). */ struct list_head all_list; raw_spinlock_t lock; struct io_wq_work_list work_list; unsigned long flags; }; enum { IO_WQ_ACCT_BOUND, IO_WQ_ACCT_UNBOUND, IO_WQ_ACCT_NR, }; /* * Per io_wq state */ struct io_wq { unsigned long state; struct io_wq_hash *hash; atomic_t worker_refs; struct completion worker_done; struct hlist_node cpuhp_node; struct task_struct *task; struct io_wq_acct acct[IO_WQ_ACCT_NR]; struct wait_queue_entry wait; struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS]; cpumask_var_t cpu_mask; }; static enum cpuhp_state io_wq_online; struct io_cb_cancel_data { work_cancel_fn *fn; void *data; int nr_running; int nr_pending; bool cancel_all; }; static bool create_io_worker(struct io_wq *wq, struct io_wq_acct *acct); static void io_wq_dec_running(struct io_worker *worker); static bool io_acct_cancel_pending_work(struct io_wq *wq, struct io_wq_acct *acct, struct io_cb_cancel_data *match); static void create_worker_cb(struct callback_head *cb); static void io_wq_cancel_tw_create(struct io_wq *wq); static inline unsigned int __io_get_work_hash(unsigned int work_flags) { return work_flags >> IO_WQ_HASH_SHIFT; } static inline unsigned int io_get_work_hash(struct io_wq_work *work) { return __io_get_work_hash(atomic_read(&work->flags)); } static bool io_worker_get(struct io_worker *worker) { return refcount_inc_not_zero(&worker->ref); } static void io_worker_release(struct io_worker *worker) { if (refcount_dec_and_test(&worker->ref)) complete(&worker->ref_done); } static inline struct io_wq_acct *io_get_acct(struct io_wq *wq, bool bound) { return &wq->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND]; } static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq, unsigned int work_flags) { return io_get_acct(wq, !(work_flags & IO_WQ_WORK_UNBOUND)); } static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker) { return worker->acct; } static void io_worker_ref_put(struct io_wq *wq) { if (atomic_dec_and_test(&wq->worker_refs)) complete(&wq->worker_done); } bool io_wq_worker_stopped(void) { struct io_worker *worker = current->worker_private; if (WARN_ON_ONCE(!io_wq_current_is_worker())) return true; return test_bit(IO_WQ_BIT_EXIT, &worker->wq->state); } static void io_worker_cancel_cb(struct io_worker *worker) { struct io_wq_acct *acct = io_wq_get_acct(worker); struct io_wq *wq = worker->wq; atomic_dec(&acct->nr_running); raw_spin_lock(&acct->workers_lock); acct->nr_workers--; raw_spin_unlock(&acct->workers_lock); io_worker_ref_put(wq); clear_bit_unlock(0, &worker->create_state); io_worker_release(worker); } static bool io_task_worker_match(struct callback_head *cb, void *data) { struct io_worker *worker; if (cb->func != create_worker_cb) return false; worker = container_of(cb, struct io_worker, create_work); return worker == data; } static void io_worker_exit(struct io_worker *worker) { struct io_wq *wq = worker->wq; struct io_wq_acct *acct = io_wq_get_acct(worker); while (1) { struct callback_head *cb = task_work_cancel_match(wq->task, io_task_worker_match, worker); if (!cb) break; io_worker_cancel_cb(worker); } io_worker_release(worker); wait_for_completion(&worker->ref_done); raw_spin_lock(&acct->workers_lock); if (test_bit(IO_WORKER_F_FREE, &worker->flags)) hlist_nulls_del_rcu(&worker->nulls_node); list_del_rcu(&worker->all_list); raw_spin_unlock(&acct->workers_lock); io_wq_dec_running(worker); /* * this worker is a goner, clear ->worker_private to avoid any * inc/dec running calls that could happen as part of exit from * touching 'worker'. */ current->worker_private = NULL; kfree_rcu(worker, rcu); io_worker_ref_put(wq); do_exit(0); } static inline bool __io_acct_run_queue(struct io_wq_acct *acct) { return !test_bit(IO_ACCT_STALLED_BIT, &acct->flags) && !wq_list_empty(&acct->work_list); } /* * If there's work to do, returns true with acct->lock acquired. If not, * returns false with no lock held. */ static inline bool io_acct_run_queue(struct io_wq_acct *acct) __acquires(&acct->lock) { raw_spin_lock(&acct->lock); if (__io_acct_run_queue(acct)) return true; raw_spin_unlock(&acct->lock); return false; } /* * Check head of free list for an available worker. If one isn't available, * caller must create one. */ static bool io_acct_activate_free_worker(struct io_wq_acct *acct) __must_hold(RCU) { struct hlist_nulls_node *n; struct io_worker *worker; /* * Iterate free_list and see if we can find an idle worker to * activate. If a given worker is on the free_list but in the process * of exiting, keep trying. */ hlist_nulls_for_each_entry_rcu(worker, n, &acct->free_list, nulls_node) { if (!io_worker_get(worker)) continue; /* * If the worker is already running, it's either already * starting work or finishing work. In either case, if it does * to go sleep, we'll kick off a new task for this work anyway. */ wake_up_process(worker->task); io_worker_release(worker); return true; } return false; } /* * We need a worker. If we find a free one, we're good. If not, and we're * below the max number of workers, create one. */ static bool io_wq_create_worker(struct io_wq *wq, struct io_wq_acct *acct) { /* * Most likely an attempt to queue unbounded work on an io_wq that * wasn't setup with any unbounded workers. */ if (unlikely(!acct->max_workers)) pr_warn_once("io-wq is not configured for unbound workers"); raw_spin_lock(&acct->workers_lock); if (acct->nr_workers >= acct->max_workers) { raw_spin_unlock(&acct->workers_lock); return true; } acct->nr_workers++; raw_spin_unlock(&acct->workers_lock); atomic_inc(&acct->nr_running); atomic_inc(&wq->worker_refs); return create_io_worker(wq, acct); } static void io_wq_inc_running(struct io_worker *worker) { struct io_wq_acct *acct = io_wq_get_acct(worker); atomic_inc(&acct->nr_running); } static void create_worker_cb(struct callback_head *cb) { struct io_worker *worker; struct io_wq *wq; struct io_wq_acct *acct; bool activated_free_worker, do_create = false; worker = container_of(cb, struct io_worker, create_work); wq = worker->wq; acct = worker->acct; rcu_read_lock(); activated_free_worker = io_acct_activate_free_worker(acct); rcu_read_unlock(); if (activated_free_worker) goto no_need_create; raw_spin_lock(&acct->workers_lock); if (acct->nr_workers < acct->max_workers) { acct->nr_workers++; do_create = true; } raw_spin_unlock(&acct->workers_lock); if (do_create) { create_io_worker(wq, acct); } else { no_need_create: atomic_dec(&acct->nr_running); io_worker_ref_put(wq); } clear_bit_unlock(0, &worker->create_state); io_worker_release(worker); } static bool io_queue_worker_create(struct io_worker *worker, struct io_wq_acct *acct, task_work_func_t func) { struct io_wq *wq = worker->wq; /* raced with exit, just ignore create call */ if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) goto fail; if (!io_worker_get(worker)) goto fail; /* * create_state manages ownership of create_work/index. We should * only need one entry per worker, as the worker going to sleep * will trigger the condition, and waking will clear it once it * runs the task_work. */ if (test_bit(0, &worker->create_state) || test_and_set_bit_lock(0, &worker->create_state)) goto fail_release; atomic_inc(&wq->worker_refs); init_task_work(&worker->create_work, func); if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) { /* * EXIT may have been set after checking it above, check after * adding the task_work and remove any creation item if it is * now set. wq exit does that too, but we can have added this * work item after we canceled in io_wq_exit_workers(). */ if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) io_wq_cancel_tw_create(wq); io_worker_ref_put(wq); return true; } io_worker_ref_put(wq); clear_bit_unlock(0, &worker->create_state); fail_release: io_worker_release(worker); fail: atomic_dec(&acct->nr_running); io_worker_ref_put(wq); return false; } /* Defer if current and next work are both hashed to the same chain */ static bool io_wq_hash_defer(struct io_wq_work *work, struct io_wq_acct *acct) { unsigned int hash, work_flags; struct io_wq_work *next; lockdep_assert_held(&acct->lock); work_flags = atomic_read(&work->flags); if (!__io_wq_is_hashed(work_flags)) return false; /* should not happen, io_acct_run_queue() said we had work */ if (wq_list_empty(&acct->work_list)) return true; hash = __io_get_work_hash(work_flags); next = container_of(acct->work_list.first, struct io_wq_work, list); work_flags = atomic_read(&next->flags); if (!__io_wq_is_hashed(work_flags)) return false; return hash == __io_get_work_hash(work_flags); } static void io_wq_dec_running(struct io_worker *worker) { struct io_wq_acct *acct = io_wq_get_acct(worker); struct io_wq *wq = worker->wq; if (!test_bit(IO_WORKER_F_UP, &worker->flags)) return; if (!atomic_dec_and_test(&acct->nr_running)) return; if (!worker->cur_work) return; if (!io_acct_run_queue(acct)) return; if (io_wq_hash_defer(worker->cur_work, acct)) { raw_spin_unlock(&acct->lock); return; } raw_spin_unlock(&acct->lock); atomic_inc(&acct->nr_running); atomic_inc(&wq->worker_refs); io_queue_worker_create(worker, acct, create_worker_cb); } /* * Worker will start processing some work. Move it to the busy list, if * it's currently on the freelist */ static void __io_worker_busy(struct io_wq_acct *acct, struct io_worker *worker) { if (test_bit(IO_WORKER_F_FREE, &worker->flags)) { clear_bit(IO_WORKER_F_FREE, &worker->flags); raw_spin_lock(&acct->workers_lock); hlist_nulls_del_init_rcu(&worker->nulls_node); raw_spin_unlock(&acct->workers_lock); } } /* * No work, worker going to sleep. Move to freelist. */ static void __io_worker_idle(struct io_wq_acct *acct, struct io_worker *worker) __must_hold(acct->workers_lock) { if (!test_bit(IO_WORKER_F_FREE, &worker->flags)) { set_bit(IO_WORKER_F_FREE, &worker->flags); hlist_nulls_add_head_rcu(&worker->nulls_node, &acct->free_list); } } static bool io_wait_on_hash(struct io_wq *wq, unsigned int hash) { bool ret = false; spin_lock_irq(&wq->hash->wait.lock); if (list_empty(&wq->wait.entry)) { __add_wait_queue(&wq->hash->wait, &wq->wait); if (!test_bit(hash, &wq->hash->map)) { __set_current_state(TASK_RUNNING); list_del_init(&wq->wait.entry); ret = true; } } spin_unlock_irq(&wq->hash->wait.lock); return ret; } static struct io_wq_work *io_get_next_work(struct io_wq_acct *acct, struct io_wq *wq) __must_hold(acct->lock) { struct io_wq_work_node *node, *prev; struct io_wq_work *work, *tail; unsigned int stall_hash = -1U; wq_list_for_each(node, prev, &acct->work_list) { unsigned int work_flags; unsigned int hash; work = container_of(node, struct io_wq_work, list); /* not hashed, can run anytime */ work_flags = atomic_read(&work->flags); if (!__io_wq_is_hashed(work_flags)) { wq_list_del(&acct->work_list, node, prev); return work; } hash = __io_get_work_hash(work_flags); /* all items with this hash lie in [work, tail] */ tail = wq->hash_tail[hash]; /* hashed, can run if not already running */ if (!test_and_set_bit(hash, &wq->hash->map)) { wq->hash_tail[hash] = NULL; wq_list_cut(&acct->work_list, &tail->list, prev); return work; } if (stall_hash == -1U) stall_hash = hash; /* fast forward to a next hash, for-each will fix up @prev */ node = &tail->list; } if (stall_hash != -1U) { bool unstalled; /* * Set this before dropping the lock to avoid racing with new * work being added and clearing the stalled bit. */ set_bit(IO_ACCT_STALLED_BIT, &acct->flags); raw_spin_unlock(&acct->lock); unstalled = io_wait_on_hash(wq, stall_hash); raw_spin_lock(&acct->lock); if (unstalled) { clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); if (wq_has_sleeper(&wq->hash->wait)) wake_up(&wq->hash->wait); } } return NULL; } static void io_assign_current_work(struct io_worker *worker, struct io_wq_work *work) { if (work) { io_run_task_work(); cond_resched(); } raw_spin_lock(&worker->lock); worker->cur_work = work; raw_spin_unlock(&worker->lock); } /* * Called with acct->lock held, drops it before returning */ static void io_worker_handle_work(struct io_wq_acct *acct, struct io_worker *worker) __releases(&acct->lock) { struct io_wq *wq = worker->wq; do { bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state); struct io_wq_work *work; /* * If we got some work, mark us as busy. If we didn't, but * the list isn't empty, it means we stalled on hashed work. * Mark us stalled so we don't keep looking for work when we * can't make progress, any work completion or insertion will * clear the stalled flag. */ work = io_get_next_work(acct, wq); if (work) { /* * Make sure cancelation can find this, even before * it becomes the active work. That avoids a window * where the work has been removed from our general * work list, but isn't yet discoverable as the * current work item for this worker. */ raw_spin_lock(&worker->lock); worker->cur_work = work; raw_spin_unlock(&worker->lock); } raw_spin_unlock(&acct->lock); if (!work) break; __io_worker_busy(acct, worker); io_assign_current_work(worker, work); __set_current_state(TASK_RUNNING); /* handle a whole dependent link */ do { struct io_wq_work *next_hashed, *linked; unsigned int work_flags = atomic_read(&work->flags); unsigned int hash = __io_wq_is_hashed(work_flags) ? __io_get_work_hash(work_flags) : -1U; next_hashed = wq_next_work(work); if (do_kill && (work_flags & IO_WQ_WORK_UNBOUND)) atomic_or(IO_WQ_WORK_CANCEL, &work->flags); io_wq_submit_work(work); io_assign_current_work(worker, NULL); linked = io_wq_free_work(work); work = next_hashed; if (!work && linked && !io_wq_is_hashed(linked)) { work = linked; linked = NULL; } io_assign_current_work(worker, work); if (linked) io_wq_enqueue(wq, linked); if (hash != -1U && !next_hashed) { /* serialize hash clear with wake_up() */ spin_lock_irq(&wq->hash->wait.lock); clear_bit(hash, &wq->hash->map); clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); spin_unlock_irq(&wq->hash->wait.lock); if (wq_has_sleeper(&wq->hash->wait)) wake_up(&wq->hash->wait); } } while (work); if (!__io_acct_run_queue(acct)) break; raw_spin_lock(&acct->lock); } while (1); } static int io_wq_worker(void *data) { struct io_worker *worker = data; struct io_wq_acct *acct = io_wq_get_acct(worker); struct io_wq *wq = worker->wq; bool exit_mask = false, last_timeout = false; char buf[TASK_COMM_LEN] = {}; set_mask_bits(&worker->flags, 0, BIT(IO_WORKER_F_UP) | BIT(IO_WORKER_F_RUNNING)); snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid); set_task_comm(current, buf); while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { long ret; set_current_state(TASK_INTERRUPTIBLE); /* * If we have work to do, io_acct_run_queue() returns with * the acct->lock held. If not, it will drop it. */ while (io_acct_run_queue(acct)) io_worker_handle_work(acct, worker); raw_spin_lock(&acct->workers_lock); /* * Last sleep timed out. Exit if we're not the last worker, * or if someone modified our affinity. If wq is marked * idle-exit, drop the worker as well. This is used to avoid * keeping io-wq workers around for tasks that no longer have * any active io_uring instances. */ if ((last_timeout && (exit_mask || acct->nr_workers > 1)) || test_bit(IO_WQ_BIT_EXIT_ON_IDLE, &wq->state)) { acct->nr_workers--; raw_spin_unlock(&acct->workers_lock); __set_current_state(TASK_RUNNING); break; } last_timeout = false; __io_worker_idle(acct, worker); raw_spin_unlock(&acct->workers_lock); if (io_run_task_work()) continue; ret = schedule_timeout(WORKER_IDLE_TIMEOUT); if (signal_pending(current)) { struct ksignal ksig; if (!get_signal(&ksig)) continue; break; } if (!ret) { last_timeout = true; exit_mask = !cpumask_test_cpu(raw_smp_processor_id(), wq->cpu_mask); } } if (test_bit(IO_WQ_BIT_EXIT, &wq->state) && io_acct_run_queue(acct)) io_worker_handle_work(acct, worker); io_worker_exit(worker); return 0; } /* * Called when a worker is scheduled in. Mark us as currently running. */ void io_wq_worker_running(struct task_struct *tsk) { struct io_worker *worker = tsk->worker_private; if (!worker) return; if (!test_bit(IO_WORKER_F_UP, &worker->flags)) return; if (test_bit(IO_WORKER_F_RUNNING, &worker->flags)) return; set_bit(IO_WORKER_F_RUNNING, &worker->flags); io_wq_inc_running(worker); } /* * Called when worker is going to sleep. If there are no workers currently * running and we have work pending, wake up a free one or create a new one. */ void io_wq_worker_sleeping(struct task_struct *tsk) { struct io_worker *worker = tsk->worker_private; if (!worker) return; if (!test_bit(IO_WORKER_F_UP, &worker->flags)) return; if (!test_bit(IO_WORKER_F_RUNNING, &worker->flags)) return; clear_bit(IO_WORKER_F_RUNNING, &worker->flags); io_wq_dec_running(worker); } static void io_init_new_worker(struct io_wq *wq, struct io_wq_acct *acct, struct io_worker *worker, struct task_struct *tsk) { tsk->worker_private = worker; worker->task = tsk; set_cpus_allowed_ptr(tsk, wq->cpu_mask); raw_spin_lock(&acct->workers_lock); hlist_nulls_add_head_rcu(&worker->nulls_node, &acct->free_list); list_add_tail_rcu(&worker->all_list, &acct->all_list); set_bit(IO_WORKER_F_FREE, &worker->flags); raw_spin_unlock(&acct->workers_lock); wake_up_new_task(tsk); } static bool io_wq_work_match_all(struct io_wq_work *work, void *data) { return true; } static inline bool io_should_retry_thread(struct io_worker *worker, long err) { /* * Prevent perpetual task_work retry, if the task (or its group) is * exiting. */ if (fatal_signal_pending(current)) return false; worker->init_retries++; switch (err) { case -EAGAIN: return worker->init_retries <= WORKER_INIT_LIMIT; /* Analogous to a fork() syscall, always retry on a restartable error */ case -ERESTARTSYS: case -ERESTARTNOINTR: case -ERESTARTNOHAND: return true; default: return false; } } static void queue_create_worker_retry(struct io_worker *worker) { /* * We only bother retrying because there's a chance that the * failure to create a worker is due to some temporary condition * in the forking task (e.g. outstanding signal); give the task * some time to clear that condition. */ schedule_delayed_work(&worker->work, msecs_to_jiffies(worker->init_retries * 5)); } static void create_worker_cont(struct callback_head *cb) { struct io_worker *worker; struct task_struct *tsk; struct io_wq *wq; struct io_wq_acct *acct; worker = container_of(cb, struct io_worker, create_work); clear_bit_unlock(0, &worker->create_state); wq = worker->wq; acct = io_wq_get_acct(worker); tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE); if (!IS_ERR(tsk)) { io_init_new_worker(wq, acct, worker, tsk); io_worker_release(worker); return; } else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) { atomic_dec(&acct->nr_running); raw_spin_lock(&acct->workers_lock); acct->nr_workers--; if (!acct->nr_workers) { struct io_cb_cancel_data match = { .fn = io_wq_work_match_all, .cancel_all = true, }; raw_spin_unlock(&acct->workers_lock); while (io_acct_cancel_pending_work(wq, acct, &match)) ; } else { raw_spin_unlock(&acct->workers_lock); } io_worker_ref_put(wq); kfree(worker); return; } /* re-create attempts grab a new worker ref, drop the existing one */ io_worker_release(worker); queue_create_worker_retry(worker); } static void io_workqueue_create(struct work_struct *work) { struct io_worker *worker = container_of(work, struct io_worker, work.work); struct io_wq_acct *acct = io_wq_get_acct(worker); if (!io_queue_worker_create(worker, acct, create_worker_cont)) kfree(worker); } static bool create_io_worker(struct io_wq *wq, struct io_wq_acct *acct) { struct io_worker *worker; struct task_struct *tsk; __set_current_state(TASK_RUNNING); worker = kzalloc_obj(*worker); if (!worker) { fail: atomic_dec(&acct->nr_running); raw_spin_lock(&acct->workers_lock); acct->nr_workers--; raw_spin_unlock(&acct->workers_lock); io_worker_ref_put(wq); return false; } refcount_set(&worker->ref, 1); worker->wq = wq; worker->acct = acct; raw_spin_lock_init(&worker->lock); init_completion(&worker->ref_done); tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE); if (!IS_ERR(tsk)) { io_init_new_worker(wq, acct, worker, tsk); } else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) { kfree(worker); goto fail; } else { INIT_DELAYED_WORK(&worker->work, io_workqueue_create); queue_create_worker_retry(worker); } return true; } /* * Iterate the passed in list and call the specific function for each * worker that isn't exiting */ static bool io_acct_for_each_worker(struct io_wq_acct *acct, bool (*func)(struct io_worker *, void *), void *data) { struct io_worker *worker; bool ret = false; list_for_each_entry_rcu(worker, &acct->all_list, all_list) { if (io_worker_get(worker)) { /* no task if node is/was offline */ if (worker->task) ret = func(worker, data); io_worker_release(worker); if (ret) break; } } return ret; } static void io_wq_for_each_worker(struct io_wq *wq, bool (*func)(struct io_worker *, void *), void *data) { for (int i = 0; i < IO_WQ_ACCT_NR; i++) if (io_acct_for_each_worker(&wq->acct[i], func, data)) break; } static bool io_wq_worker_wake(struct io_worker *worker, void *data) { __set_notify_signal(worker->task); wake_up_process(worker->task); return false; } void io_wq_set_exit_on_idle(struct io_wq *wq, bool enable) { if (!wq->task) return; if (!enable) { clear_bit(IO_WQ_BIT_EXIT_ON_IDLE, &wq->state); return; } if (test_and_set_bit(IO_WQ_BIT_EXIT_ON_IDLE, &wq->state)) return; rcu_read_lock(); io_wq_for_each_worker(wq, io_wq_worker_wake, NULL); rcu_read_unlock(); } static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq) { do { atomic_or(IO_WQ_WORK_CANCEL, &work->flags); io_wq_submit_work(work); work = io_wq_free_work(work); } while (work); } static void io_wq_insert_work(struct io_wq *wq, struct io_wq_acct *acct, struct io_wq_work *work, unsigned int work_flags) { unsigned int hash; struct io_wq_work *tail; if (!__io_wq_is_hashed(work_flags)) { append: wq_list_add_tail(&work->list, &acct->work_list); return; } hash = __io_get_work_hash(work_flags); tail = wq->hash_tail[hash]; wq->hash_tail[hash] = work; if (!tail) goto append; wq_list_add_after(&work->list, &tail->list, &acct->work_list); } static bool io_wq_work_match_item(struct io_wq_work *work, void *data) { return work == data; } void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) { unsigned int work_flags = atomic_read(&work->flags); struct io_wq_acct *acct = io_work_get_acct(wq, work_flags); struct io_cb_cancel_data match = { .fn = io_wq_work_match_item, .data = work, .cancel_all = false, }; bool do_create; /* * If io-wq is exiting for this task, or if the request has explicitly * been marked as one that should not get executed, cancel it here. */ if (test_bit(IO_WQ_BIT_EXIT, &wq->state) || (work_flags & IO_WQ_WORK_CANCEL)) { io_run_cancel(work, wq); return; } raw_spin_lock(&acct->lock); io_wq_insert_work(wq, acct, work, work_flags); clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); raw_spin_unlock(&acct->lock); rcu_read_lock(); do_create = !io_acct_activate_free_worker(acct); rcu_read_unlock(); if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) || !atomic_read(&acct->nr_running))) { bool did_create; did_create = io_wq_create_worker(wq, acct); if (likely(did_create)) return; raw_spin_lock(&acct->workers_lock); if (acct->nr_workers) { raw_spin_unlock(&acct->workers_lock); return; } raw_spin_unlock(&acct->workers_lock); /* fatal condition, failed to create the first worker */ io_acct_cancel_pending_work(wq, acct, &match); } } /* * Work items that hash to the same value will not be done in parallel. * Used to limit concurrent writes, generally hashed by inode. */ void io_wq_hash_work(struct io_wq_work *work, void *val) { unsigned int bit; bit = hash_ptr(val, IO_WQ_HASH_ORDER); atomic_or(IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT), &work->flags); } static bool __io_wq_worker_cancel(struct io_worker *worker, struct io_cb_cancel_data *match, struct io_wq_work *work) { if (work && match->fn(work, match->data)) { atomic_or(IO_WQ_WORK_CANCEL, &work->flags); __set_notify_signal(worker->task); return true; } return false; } static bool io_wq_worker_cancel(struct io_worker *worker, void *data) { struct io_cb_cancel_data *match = data; /* * Hold the lock to avoid ->cur_work going out of scope, caller * may dereference the passed in work. */ raw_spin_lock(&worker->lock); if (__io_wq_worker_cancel(worker, match, worker->cur_work)) match->nr_running++; raw_spin_unlock(&worker->lock); return match->nr_running && !match->cancel_all; } static inline void io_wq_remove_pending(struct io_wq *wq, struct io_wq_acct *acct, struct io_wq_work *work, struct io_wq_work_node *prev) { unsigned int hash = io_get_work_hash(work); struct io_wq_work *prev_work = NULL; if (io_wq_is_hashed(work) && work == wq->hash_tail[hash]) { if (prev) prev_work = container_of(prev, struct io_wq_work, list); if (prev_work && io_get_work_hash(prev_work) == hash) wq->hash_tail[hash] = prev_work; else wq->hash_tail[hash] = NULL; } wq_list_del(&acct->work_list, &work->list, prev); } static bool io_acct_cancel_pending_work(struct io_wq *wq, struct io_wq_acct *acct, struct io_cb_cancel_data *match) { struct io_wq_work_node *node, *prev; struct io_wq_work *work; raw_spin_lock(&acct->lock); wq_list_for_each(node, prev, &acct->work_list) { work = container_of(node, struct io_wq_work, list); if (!match->fn(work, match->data)) continue; io_wq_remove_pending(wq, acct, work, prev); raw_spin_unlock(&acct->lock); io_run_cancel(work, wq); match->nr_pending++; /* not safe to continue after unlock */ return true; } raw_spin_unlock(&acct->lock); return false; } static void io_wq_cancel_pending_work(struct io_wq *wq, struct io_cb_cancel_data *match) { int i; retry: for (i = 0; i < IO_WQ_ACCT_NR; i++) { struct io_wq_acct *acct = io_get_acct(wq, i == 0); if (io_acct_cancel_pending_work(wq, acct, match)) { if (match->cancel_all) goto retry; break; } } } static void io_acct_cancel_running_work(struct io_wq_acct *acct, struct io_cb_cancel_data *match) { raw_spin_lock(&acct->workers_lock); io_acct_for_each_worker(acct, io_wq_worker_cancel, match); raw_spin_unlock(&acct->workers_lock); } static void io_wq_cancel_running_work(struct io_wq *wq, struct io_cb_cancel_data *match) { rcu_read_lock(); for (int i = 0; i < IO_WQ_ACCT_NR; i++) io_acct_cancel_running_work(&wq->acct[i], match); rcu_read_unlock(); } enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, void *data, bool cancel_all) { struct io_cb_cancel_data match = { .fn = cancel, .data = data, .cancel_all = cancel_all, }; /* * First check pending list, if we're lucky we can just remove it * from there. CANCEL_OK means that the work is returned as-new, * no completion will be posted for it. * * Then check if a free (going busy) or busy worker has the work * currently running. If we find it there, we'll return CANCEL_RUNNING * as an indication that we attempt to signal cancellation. The * completion will run normally in this case. * * Do both of these while holding the acct->workers_lock, to ensure that * we'll find a work item regardless of state. */ io_wq_cancel_pending_work(wq, &match); if (match.nr_pending && !match.cancel_all) return IO_WQ_CANCEL_OK; io_wq_cancel_running_work(wq, &match); if (match.nr_running && !match.cancel_all) return IO_WQ_CANCEL_RUNNING; if (match.nr_running) return IO_WQ_CANCEL_RUNNING; if (match.nr_pending) return IO_WQ_CANCEL_OK; return IO_WQ_CANCEL_NOTFOUND; } static int io_wq_hash_wake(struct wait_queue_entry *wait, unsigned mode, int sync, void *key) { struct io_wq *wq = container_of(wait, struct io_wq, wait); int i; list_del_init(&wait->entry); rcu_read_lock(); for (i = 0; i < IO_WQ_ACCT_NR; i++) { struct io_wq_acct *acct = &wq->acct[i]; if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags)) io_acct_activate_free_worker(acct); } rcu_read_unlock(); return 1; } struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) { int ret, i; struct io_wq *wq; if (WARN_ON_ONCE(!bounded)) return ERR_PTR(-EINVAL); wq = kzalloc_obj(struct io_wq); if (!wq) return ERR_PTR(-ENOMEM); refcount_inc(&data->hash->refs); wq->hash = data->hash; ret = -ENOMEM; if (!alloc_cpumask_var(&wq->cpu_mask, GFP_KERNEL)) goto err; cpuset_cpus_allowed(data->task, wq->cpu_mask); wq->acct[IO_WQ_ACCT_BOUND].max_workers = bounded; wq->acct[IO_WQ_ACCT_UNBOUND].max_workers = task_rlimit(current, RLIMIT_NPROC); INIT_LIST_HEAD(&wq->wait.entry); wq->wait.func = io_wq_hash_wake; for (i = 0; i < IO_WQ_ACCT_NR; i++) { struct io_wq_acct *acct = &wq->acct[i]; atomic_set(&acct->nr_running, 0); raw_spin_lock_init(&acct->workers_lock); INIT_HLIST_NULLS_HEAD(&acct->free_list, 0); INIT_LIST_HEAD(&acct->all_list); INIT_WQ_LIST(&acct->work_list); raw_spin_lock_init(&acct->lock); } wq->task = get_task_struct(data->task); atomic_set(&wq->worker_refs, 1); init_completion(&wq->worker_done); ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); if (ret) { put_task_struct(wq->task); goto err; } return wq; err: io_wq_put_hash(data->hash); free_cpumask_var(wq->cpu_mask); kfree(wq); return ERR_PTR(ret); } static bool io_task_work_match(struct callback_head *cb, void *data) { struct io_worker *worker; if (cb->func != create_worker_cb && cb->func != create_worker_cont) return false; worker = container_of(cb, struct io_worker, create_work); return worker->wq == data; } void io_wq_exit_start(struct io_wq *wq) { set_bit(IO_WQ_BIT_EXIT, &wq->state); } static void io_wq_cancel_tw_create(struct io_wq *wq) { struct callback_head *cb; while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { struct io_worker *worker; worker = container_of(cb, struct io_worker, create_work); io_worker_cancel_cb(worker); /* * Only the worker continuation helper has worker allocated and * hence needs freeing. */ if (cb->func == create_worker_cont) kfree(worker); } } static void io_wq_exit_workers(struct io_wq *wq) { unsigned long timeout, warn_timeout; if (!wq->task) return; io_wq_cancel_tw_create(wq); rcu_read_lock(); io_wq_for_each_worker(wq, io_wq_worker_wake, NULL); rcu_read_unlock(); io_worker_ref_put(wq); /* * Shut up hung task complaint, see for example * * https://lore.kernel.org/all/696fc9e7.a70a0220.111c58.0006.GAE@google.com/ * * where completely overloading the system with tons of long running * io-wq items can easily trigger the hung task timeout. Only sleep * uninterruptibly for half that time, and warn if we exceeded end * up waiting more than IO_URING_EXIT_WAIT_MAX. */ timeout = sysctl_hung_task_timeout_secs * HZ / 2; if (!timeout) timeout = MAX_SCHEDULE_TIMEOUT; warn_timeout = jiffies + IO_URING_EXIT_WAIT_MAX; do { if (wait_for_completion_timeout(&wq->worker_done, timeout)) break; WARN_ON_ONCE(time_after(jiffies, warn_timeout)); } while (1); spin_lock_irq(&wq->hash->wait.lock); list_del_init(&wq->wait.entry); spin_unlock_irq(&wq->hash->wait.lock); put_task_struct(wq->task); wq->task = NULL; } static void io_wq_destroy(struct io_wq *wq) { struct io_cb_cancel_data match = { .fn = io_wq_work_match_all, .cancel_all = true, }; cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); io_wq_cancel_pending_work(wq, &match); free_cpumask_var(wq->cpu_mask); io_wq_put_hash(wq->hash); kfree(wq); } void io_wq_put_and_exit(struct io_wq *wq) { WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state)); io_wq_exit_workers(wq); io_wq_destroy(wq); } struct online_data { unsigned int cpu; bool online; }; static bool io_wq_worker_affinity(struct io_worker *worker, void *data) { struct online_data *od = data; if (od->online) cpumask_set_cpu(od->cpu, worker->wq->cpu_mask); else cpumask_clear_cpu(od->cpu, worker->wq->cpu_mask); return false; } static int __io_wq_cpu_online(struct io_wq *wq, unsigned int cpu, bool online) { struct online_data od = { .cpu = cpu, .online = online }; rcu_read_lock(); io_wq_for_each_worker(wq, io_wq_worker_affinity, &od); rcu_read_unlock(); return 0; } static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) { struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); return __io_wq_cpu_online(wq, cpu, true); } static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node) { struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); return __io_wq_cpu_online(wq, cpu, false); } int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask) { cpumask_var_t allowed_mask; int ret = 0; if (!tctx || !tctx->io_wq) return -EINVAL; if (!alloc_cpumask_var(&allowed_mask, GFP_KERNEL)) return -ENOMEM; rcu_read_lock(); cpuset_cpus_allowed(tctx->io_wq->task, allowed_mask); if (mask) { if (cpumask_subset(mask, allowed_mask)) cpumask_copy(tctx->io_wq->cpu_mask, mask); else ret = -EINVAL; } else { cpumask_copy(tctx->io_wq->cpu_mask, allowed_mask); } rcu_read_unlock(); free_cpumask_var(allowed_mask); return ret; } /* * Set max number of unbounded workers, returns old value. If new_count is 0, * then just return the old value. */ int io_wq_max_workers(struct io_wq *wq, int *new_count) { struct io_wq_acct *acct; int prev[IO_WQ_ACCT_NR]; int i; BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND); BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND); BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2); for (i = 0; i < IO_WQ_ACCT_NR; i++) { if (new_count[i] > task_rlimit(current, RLIMIT_NPROC)) new_count[i] = task_rlimit(current, RLIMIT_NPROC); } for (i = 0; i < IO_WQ_ACCT_NR; i++) prev[i] = 0; rcu_read_lock(); for (i = 0; i < IO_WQ_ACCT_NR; i++) { acct = &wq->acct[i]; raw_spin_lock(&acct->workers_lock); prev[i] = max_t(int, acct->max_workers, prev[i]); if (new_count[i]) acct->max_workers = new_count[i]; raw_spin_unlock(&acct->workers_lock); } rcu_read_unlock(); for (i = 0; i < IO_WQ_ACCT_NR; i++) new_count[i] = prev[i]; return 0; } static __init int io_wq_init(void) { int ret; ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online", io_wq_cpu_online, io_wq_cpu_offline); if (ret < 0) return ret; io_wq_online = ret; return 0; } subsys_initcall(io_wq_init);
182 307 307 306 307 267 52 49 308 267 54 49 302 13 308 307 307 306 291 15 19 15 308 1689 1683 308 307 307 308 308 49 308 93 93 631 373 186 181 5 130 106 43 40 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/dsa/user.c - user device handling * Copyright (c) 2008-2009 Marvell Semiconductor */ #include <linux/list.h> #include <linux/etherdevice.h> #include <linux/netdevice.h> #include <linux/phy.h> #include <linux/phy_fixed.h> #include <linux/phylink.h> #include <linux/of_net.h> #include <linux/of_mdio.h> #include <linux/mdio.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/selftests.h> #include <net/tc_act/tc_mirred.h> #include <linux/if_bridge.h> #include <linux/if_hsr.h> #include <net/dcbnl.h> #include <linux/netpoll.h> #include <linux/string.h> #include "conduit.h" #include "dsa.h" #include "netlink.h" #include "port.h" #include "switch.h" #include "tag.h" #include "user.h" struct dsa_switchdev_event_work { struct net_device *dev; struct net_device *orig_dev; struct work_struct work; unsigned long event; /* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and * SWITCHDEV_FDB_DEL_TO_DEVICE */ unsigned char addr[ETH_ALEN]; u16 vid; bool host_addr; }; enum dsa_standalone_event { DSA_UC_ADD, DSA_UC_DEL, DSA_MC_ADD, DSA_MC_DEL, }; struct dsa_standalone_event_work { struct work_struct work; struct net_device *dev; enum dsa_standalone_event event; unsigned char addr[ETH_ALEN]; u16 vid; }; struct dsa_host_vlan_rx_filtering_ctx { struct net_device *dev; const unsigned char *addr; enum dsa_standalone_event event; }; static bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds) { return ds->ops->port_fdb_add && ds->ops->port_fdb_del && ds->fdb_isolation && !ds->vlan_filtering_is_global && !ds->needs_standalone_vlan_filtering; } static bool dsa_switch_supports_mc_filtering(struct dsa_switch *ds) { return ds->ops->port_mdb_add && ds->ops->port_mdb_del && ds->fdb_isolation && !ds->vlan_filtering_is_global && !ds->needs_standalone_vlan_filtering; } static void dsa_user_standalone_event_work(struct work_struct *work) { struct dsa_standalone_event_work *standalone_work = container_of(work, struct dsa_standalone_event_work, work); const unsigned char *addr = standalone_work->addr; struct net_device *dev = standalone_work->dev; struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_mdb mdb; struct dsa_switch *ds = dp->ds; u16 vid = standalone_work->vid; int err; switch (standalone_work->event) { case DSA_UC_ADD: err = dsa_port_standalone_host_fdb_add(dp, addr, vid); if (err) { dev_err(ds->dev, "port %d failed to add %pM vid %d to fdb: %d\n", dp->index, addr, vid, err); break; } break; case DSA_UC_DEL: err = dsa_port_standalone_host_fdb_del(dp, addr, vid); if (err) { dev_err(ds->dev, "port %d failed to delete %pM vid %d from fdb: %d\n", dp->index, addr, vid, err); } break; case DSA_MC_ADD: ether_addr_copy(mdb.addr, addr); mdb.vid = vid; err = dsa_port_standalone_host_mdb_add(dp, &mdb); if (err) { dev_err(ds->dev, "port %d failed to add %pM vid %d to mdb: %d\n", dp->index, addr, vid, err); break; } break; case DSA_MC_DEL: ether_addr_copy(mdb.addr, addr); mdb.vid = vid; err = dsa_port_standalone_host_mdb_del(dp, &mdb); if (err) { dev_err(ds->dev, "port %d failed to delete %pM vid %d from mdb: %d\n", dp->index, addr, vid, err); } break; } kfree(standalone_work); } static int dsa_user_schedule_standalone_work(struct net_device *dev, enum dsa_standalone_event event, const unsigned char *addr, u16 vid) { struct dsa_standalone_event_work *standalone_work; standalone_work = kzalloc_obj(*standalone_work, GFP_ATOMIC); if (!standalone_work) return -ENOMEM; INIT_WORK(&standalone_work->work, dsa_user_standalone_event_work); standalone_work->event = event; standalone_work->dev = dev; ether_addr_copy(standalone_work->addr, addr); standalone_work->vid = vid; dsa_schedule_work(&standalone_work->work); return 0; } static int dsa_user_host_vlan_rx_filtering(void *arg, int vid) { struct dsa_host_vlan_rx_filtering_ctx *ctx = arg; return dsa_user_schedule_standalone_work(ctx->dev, ctx->event, ctx->addr, vid); } static int dsa_user_vlan_for_each(struct net_device *dev, int (*cb)(void *arg, int vid), void *arg) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_vlan *v; int err; lockdep_assert_held(&dev->addr_list_lock); err = cb(arg, 0); if (err) return err; list_for_each_entry(v, &dp->user_vlans, list) { err = cb(arg, v->vid); if (err) return err; } return 0; } static int dsa_user_sync_uc(struct net_device *dev, const unsigned char *addr) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_host_vlan_rx_filtering_ctx ctx = { .dev = dev, .addr = addr, .event = DSA_UC_ADD, }; dev_uc_add(conduit, addr); if (!dsa_switch_supports_uc_filtering(dp->ds)) return 0; return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, &ctx); } static int dsa_user_unsync_uc(struct net_device *dev, const unsigned char *addr) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_host_vlan_rx_filtering_ctx ctx = { .dev = dev, .addr = addr, .event = DSA_UC_DEL, }; dev_uc_del(conduit, addr); if (!dsa_switch_supports_uc_filtering(dp->ds)) return 0; return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, &ctx); } static int dsa_user_sync_mc(struct net_device *dev, const unsigned char *addr) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_host_vlan_rx_filtering_ctx ctx = { .dev = dev, .addr = addr, .event = DSA_MC_ADD, }; dev_mc_add(conduit, addr); if (!dsa_switch_supports_mc_filtering(dp->ds)) return 0; return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, &ctx); } static int dsa_user_unsync_mc(struct net_device *dev, const unsigned char *addr) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_host_vlan_rx_filtering_ctx ctx = { .dev = dev, .addr = addr, .event = DSA_MC_DEL, }; dev_mc_del(conduit, addr); if (!dsa_switch_supports_mc_filtering(dp->ds)) return 0; return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, &ctx); } void dsa_user_sync_ha(struct net_device *dev) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; struct netdev_hw_addr *ha; netif_addr_lock_bh(dev); netdev_for_each_synced_mc_addr(ha, dev) dsa_user_sync_mc(dev, ha->addr); netdev_for_each_synced_uc_addr(ha, dev) dsa_user_sync_uc(dev, ha->addr); netif_addr_unlock_bh(dev); if (dsa_switch_supports_uc_filtering(ds) || dsa_switch_supports_mc_filtering(ds)) dsa_flush_workqueue(); } void dsa_user_unsync_ha(struct net_device *dev) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; struct netdev_hw_addr *ha; netif_addr_lock_bh(dev); netdev_for_each_synced_uc_addr(ha, dev) dsa_user_unsync_uc(dev, ha->addr); netdev_for_each_synced_mc_addr(ha, dev) dsa_user_unsync_mc(dev, ha->addr); netif_addr_unlock_bh(dev); if (dsa_switch_supports_uc_filtering(ds) || dsa_switch_supports_mc_filtering(ds)) dsa_flush_workqueue(); } /* user mii_bus handling ***************************************************/ static int dsa_user_phy_read(struct mii_bus *bus, int addr, int reg) { struct dsa_switch *ds = bus->priv; if (ds->phys_mii_mask & (1 << addr)) return ds->ops->phy_read(ds, addr, reg); return 0xffff; } static int dsa_user_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) { struct dsa_switch *ds = bus->priv; if (ds->phys_mii_mask & (1 << addr)) return ds->ops->phy_write(ds, addr, reg, val); return 0; } void dsa_user_mii_bus_init(struct dsa_switch *ds) { ds->user_mii_bus->priv = (void *)ds; ds->user_mii_bus->name = "dsa user smi"; ds->user_mii_bus->read = dsa_user_phy_read; ds->user_mii_bus->write = dsa_user_phy_write; snprintf(ds->user_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d", ds->dst->index, ds->index); ds->user_mii_bus->parent = ds->dev; ds->user_mii_bus->phy_mask = ~ds->phys_mii_mask; } /* user device handling ****************************************************/ static int dsa_user_get_iflink(const struct net_device *dev) { return READ_ONCE(dsa_user_to_conduit(dev)->ifindex); } int dsa_user_host_uc_install(struct net_device *dev, const u8 *addr) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int err; if (dsa_switch_supports_uc_filtering(ds)) { err = dsa_port_standalone_host_fdb_add(dp, addr, 0); if (err) goto out; } if (!ether_addr_equal(addr, conduit->dev_addr)) { err = dev_uc_add(conduit, addr); if (err < 0) goto del_host_addr; } return 0; del_host_addr: if (dsa_switch_supports_uc_filtering(ds)) dsa_port_standalone_host_fdb_del(dp, addr, 0); out: return err; } void dsa_user_host_uc_uninstall(struct net_device *dev) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) dev_uc_del(conduit, dev->dev_addr); if (dsa_switch_supports_uc_filtering(ds)) dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); } static int dsa_user_open(struct net_device *dev) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); int err; err = dev_open(conduit, NULL); if (err < 0) { netdev_err(dev, "failed to open conduit %s\n", conduit->name); goto out; } err = dsa_user_host_uc_install(dev, dev->dev_addr); if (err) goto out; err = dsa_port_enable_rt(dp, dev->phydev); if (err) goto out_del_host_uc; return 0; out_del_host_uc: dsa_user_host_uc_uninstall(dev); out: return err; } static int dsa_user_close(struct net_device *dev) { struct dsa_port *dp = dsa_user_to_port(dev); dsa_port_disable_rt(dp); dsa_user_host_uc_uninstall(dev); return 0; } static void dsa_user_manage_host_flood(struct net_device *dev) { bool mc = dev->flags & (IFF_PROMISC | IFF_ALLMULTI); struct dsa_port *dp = dsa_user_to_port(dev); bool uc = dev->flags & IFF_PROMISC; dsa_port_set_host_flood(dp, uc, mc); } static void dsa_user_change_rx_flags(struct net_device *dev, int change) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (change & IFF_ALLMULTI) dev_set_allmulti(conduit, dev->flags & IFF_ALLMULTI ? 1 : -1); if (change & IFF_PROMISC) dev_set_promiscuity(conduit, dev->flags & IFF_PROMISC ? 1 : -1); if (dsa_switch_supports_uc_filtering(ds) && dsa_switch_supports_mc_filtering(ds)) dsa_user_manage_host_flood(dev); } static void dsa_user_set_rx_mode(struct net_device *dev) { __dev_mc_sync(dev, dsa_user_sync_mc, dsa_user_unsync_mc); __dev_uc_sync(dev, dsa_user_sync_uc, dsa_user_unsync_uc); } static int dsa_user_set_mac_address(struct net_device *dev, void *a) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; struct sockaddr *addr = a; int err; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; if (ds->ops->port_set_mac_address) { err = ds->ops->port_set_mac_address(ds, dp->index, addr->sa_data); if (err) return err; } /* If the port is down, the address isn't synced yet to hardware or * to the DSA conduit, so there is nothing to change. */ if (!(dev->flags & IFF_UP)) goto out_change_dev_addr; err = dsa_user_host_uc_install(dev, addr->sa_data); if (err) return err; dsa_user_host_uc_uninstall(dev); out_change_dev_addr: eth_hw_addr_set(dev, addr->sa_data); return 0; } struct dsa_user_dump_ctx { struct net_device *dev; struct sk_buff *skb; struct netlink_callback *cb; int idx; }; static int dsa_user_port_fdb_do_dump(const unsigned char *addr, u16 vid, bool is_static, void *data) { struct dsa_user_dump_ctx *dump = data; struct ndo_fdb_dump_context *ctx = (void *)dump->cb->ctx; u32 portid = NETLINK_CB(dump->cb->skb).portid; u32 seq = dump->cb->nlh->nlmsg_seq; struct nlmsghdr *nlh; struct ndmsg *ndm; if (dump->idx < ctx->fdb_idx) goto skip; nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, sizeof(*ndm), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; ndm = nlmsg_data(nlh); ndm->ndm_family = AF_BRIDGE; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; ndm->ndm_flags = NTF_SELF; ndm->ndm_type = 0; ndm->ndm_ifindex = dump->dev->ifindex; ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, addr)) goto nla_put_failure; if (vid && nla_put_u16(dump->skb, NDA_VLAN, vid)) goto nla_put_failure; nlmsg_end(dump->skb, nlh); skip: dump->idx++; return 0; nla_put_failure: nlmsg_cancel(dump->skb, nlh); return -EMSGSIZE; } static int dsa_user_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int *idx) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_user_dump_ctx dump = { .dev = dev, .skb = skb, .cb = cb, .idx = *idx, }; int err; err = dsa_port_fdb_dump(dp, dsa_user_port_fdb_do_dump, &dump); *idx = dump.idx; return err; } static int dsa_user_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_user_priv *p = netdev_priv(dev); return phylink_mii_ioctl(p->dp->pl, ifr, cmd); } static int dsa_user_port_attr_set(struct net_device *dev, const void *ctx, const struct switchdev_attr *attr, struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_user_to_port(dev); int ret; if (ctx && ctx != dp) return 0; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) return -EOPNOTSUPP; ret = dsa_port_set_state(dp, attr->u.stp_state, true); break; case SWITCHDEV_ATTR_ID_PORT_MST_STATE: if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) return -EOPNOTSUPP; ret = dsa_port_set_mst_state(dp, &attr->u.mst_state, extack); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) return -EOPNOTSUPP; ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering, extack); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) return -EOPNOTSUPP; ret = dsa_port_ageing_time(dp, attr->u.ageing_time); break; case SWITCHDEV_ATTR_ID_BRIDGE_MST: if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) return -EOPNOTSUPP; ret = dsa_port_mst_enable(dp, attr->u.mst, extack); break; case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) return -EOPNOTSUPP; ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags, extack); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) return -EOPNOTSUPP; ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack); break; case SWITCHDEV_ATTR_ID_VLAN_MSTI: if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) return -EOPNOTSUPP; ret = dsa_port_vlan_msti(dp, &attr->u.vlan_msti); break; default: ret = -EOPNOTSUPP; break; } return ret; } /* Must be called under rcu_read_lock() */ static int dsa_user_vlan_check_for_8021q_uppers(struct net_device *user, const struct switchdev_obj_port_vlan *vlan) { struct net_device *upper_dev; struct list_head *iter; netdev_for_each_upper_dev_rcu(user, upper_dev, iter) { u16 vid; if (!is_vlan_dev(upper_dev)) continue; vid = vlan_dev_vlan_id(upper_dev); if (vid == vlan->vid) return -EBUSY; } return 0; } static int dsa_user_vlan_add(struct net_device *dev, const struct switchdev_obj *obj, struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_vlan *vlan; int err; if (dsa_port_skip_vlan_configuration(dp)) { NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN"); return 0; } vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); /* Deny adding a bridge VLAN when there is already an 802.1Q upper with * the same VID. */ if (br_vlan_enabled(dsa_port_bridge_dev_get(dp))) { rcu_read_lock(); err = dsa_user_vlan_check_for_8021q_uppers(dev, vlan); rcu_read_unlock(); if (err) { NL_SET_ERR_MSG_MOD(extack, "Port already has a VLAN upper with this VID"); return err; } } return dsa_port_vlan_add(dp, vlan, extack); } /* Offload a VLAN installed on the bridge or on a foreign interface by * installing it as a VLAN towards the CPU port. */ static int dsa_user_host_vlan_add(struct net_device *dev, const struct switchdev_obj *obj, struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_vlan vlan; /* Do nothing if this is a software bridge */ if (!dp->bridge) return -EOPNOTSUPP; if (dsa_port_skip_vlan_configuration(dp)) { NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN"); return 0; } vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj); /* Even though drivers often handle CPU membership in special ways, * it doesn't make sense to program a PVID, so clear this flag. */ vlan.flags &= ~BRIDGE_VLAN_INFO_PVID; return dsa_port_host_vlan_add(dp, &vlan, extack); } static int dsa_user_port_obj_add(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj, struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_user_to_port(dev); int err; if (ctx && ctx != dp) return 0; switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_HOST_MDB: if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_bridge_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: if (dsa_port_offloads_bridge_port(dp, obj->orig_dev)) err = dsa_user_vlan_add(dev, obj, extack); else err = dsa_user_host_vlan_add(dev, obj, extack); break; case SWITCHDEV_OBJ_ID_MRP: if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mrp_add(dp, SWITCHDEV_OBJ_MRP(obj)); break; case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mrp_add_ring_role(dp, SWITCHDEV_OBJ_RING_ROLE_MRP(obj)); break; default: err = -EOPNOTSUPP; break; } return err; } static int dsa_user_vlan_del(struct net_device *dev, const struct switchdev_obj *obj) { struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_vlan *vlan; if (dsa_port_skip_vlan_configuration(dp)) return 0; vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); return dsa_port_vlan_del(dp, vlan); } static int dsa_user_host_vlan_del(struct net_device *dev, const struct switchdev_obj *obj) { struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_vlan *vlan; /* Do nothing if this is a software bridge */ if (!dp->bridge) return -EOPNOTSUPP; if (dsa_port_skip_vlan_configuration(dp)) return 0; vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); return dsa_port_host_vlan_del(dp, vlan); } static int dsa_user_port_obj_del(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj) { struct dsa_port *dp = dsa_user_to_port(dev); int err; if (ctx && ctx != dp) return 0; switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_HOST_MDB: if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_bridge_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: if (dsa_port_offloads_bridge_port(dp, obj->orig_dev)) err = dsa_user_vlan_del(dev, obj); else err = dsa_user_host_vlan_del(dev, obj); break; case SWITCHDEV_OBJ_ID_MRP: if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mrp_del(dp, SWITCHDEV_OBJ_MRP(obj)); break; case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mrp_del_ring_role(dp, SWITCHDEV_OBJ_RING_ROLE_MRP(obj)); break; default: err = -EOPNOTSUPP; break; } return err; } static netdev_tx_t dsa_user_netpoll_send_skb(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_NET_POLL_CONTROLLER struct dsa_user_priv *p = netdev_priv(dev); return netpoll_send_skb(p->netpoll, skb); #else BUG(); return NETDEV_TX_OK; #endif } static void dsa_skb_tx_timestamp(struct dsa_user_priv *p, struct sk_buff *skb) { struct dsa_switch *ds = p->dp->ds; if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NOBPF)) return; if (!ds->ops->port_txtstamp) return; ds->ops->port_txtstamp(ds, p->dp->index, skb); } netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev) { /* SKB for netpoll still need to be mangled with the protocol-specific * tag to be successfully transmitted */ if (unlikely(netpoll_tx_running(dev))) return dsa_user_netpoll_send_skb(dev, skb); /* Queue the SKB for transmission on the parent interface, but * do not modify its EtherType */ skb->dev = dsa_user_to_conduit(dev); dev_queue_xmit(skb); return NETDEV_TX_OK; } EXPORT_SYMBOL_GPL(dsa_enqueue_skb); static netdev_tx_t dsa_user_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_user_priv *p = netdev_priv(dev); struct sk_buff *nskb; dev_sw_netstats_tx_add(dev, 1, skb->len); memset(skb->cb, 0, sizeof(skb->cb)); /* Handle tx timestamp if any */ dsa_skb_tx_timestamp(p, skb); if (skb_ensure_writable_head_tail(skb, dev)) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } /* needed_tailroom should still be 'warm' in the cache line from * skb_ensure_writable_head_tail(), which has also ensured that * padding is safe. */ if (dev->needed_tailroom) eth_skb_pad(skb); /* Transmit function may have to reallocate the original SKB, * in which case it must have freed it. Only free it here on error. */ nskb = p->xmit(skb, dev); if (!nskb) { kfree_skb(skb); return NETDEV_TX_OK; } return dsa_enqueue_skb(nskb, dev); } /* ethtool operations *******************************************************/ static void dsa_user_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { strscpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver)); strscpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version)); strscpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info)); } static int dsa_user_get_regs_len(struct net_device *dev) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_regs_len) return ds->ops->get_regs_len(ds, dp->index); return -EOPNOTSUPP; } static void dsa_user_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_regs) ds->ops->get_regs(ds, dp->index, regs, _p); } static int dsa_user_nway_reset(struct net_device *dev) { struct dsa_port *dp = dsa_user_to_port(dev); return phylink_ethtool_nway_reset(dp->pl); } static int dsa_user_get_eeprom_len(struct net_device *dev) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->cd && ds->cd->eeprom_len) return ds->cd->eeprom_len; if (ds->ops->get_eeprom_len) return ds->ops->get_eeprom_len(ds); return 0; } static int dsa_user_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *data) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_eeprom) return ds->ops->get_eeprom(ds, eeprom, data); return -EOPNOTSUPP; } static int dsa_user_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *data) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->set_eeprom) return ds->ops->set_eeprom(ds, eeprom, data); return -EOPNOTSUPP; } static void dsa_user_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (stringset == ETH_SS_STATS) { ethtool_puts(&data, "tx_packets"); ethtool_puts(&data, "tx_bytes"); ethtool_puts(&data, "rx_packets"); ethtool_puts(&data, "rx_bytes"); if (ds->ops->get_strings) ds->ops->get_strings(ds, dp->index, stringset, data); } else if (stringset == ETH_SS_TEST) { net_selftest_get_strings(data); } } static void dsa_user_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, uint64_t *data) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; struct pcpu_sw_netstats *s; unsigned int start; int i; for_each_possible_cpu(i) { u64 tx_packets, tx_bytes, rx_packets, rx_bytes; s = per_cpu_ptr(dev->tstats, i); do { start = u64_stats_fetch_begin(&s->syncp); tx_packets = u64_stats_read(&s->tx_packets); tx_bytes = u64_stats_read(&s->tx_bytes); rx_packets = u64_stats_read(&s->rx_packets); rx_bytes = u64_stats_read(&s->rx_bytes); } while (u64_stats_fetch_retry(&s->syncp, start)); data[0] += tx_packets; data[1] += tx_bytes; data[2] += rx_packets; data[3] += rx_bytes; } if (ds->ops->get_ethtool_stats) ds->ops->get_ethtool_stats(ds, dp->index, data + 4); } static int dsa_user_get_sset_count(struct net_device *dev, int sset) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (sset == ETH_SS_STATS) { int count = 0; if (ds->ops->get_sset_count) { count = ds->ops->get_sset_count(ds, dp->index, sset); if (count < 0) return count; } return count + 4; } else if (sset == ETH_SS_TEST) { return net_selftest_get_count(); } return -EOPNOTSUPP; } static void dsa_user_get_eth_phy_stats(struct net_device *dev, struct ethtool_eth_phy_stats *phy_stats) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_eth_phy_stats) ds->ops->get_eth_phy_stats(ds, dp->index, phy_stats); } static void dsa_user_get_eth_mac_stats(struct net_device *dev, struct ethtool_eth_mac_stats *mac_stats) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_eth_mac_stats) ds->ops->get_eth_mac_stats(ds, dp->index, mac_stats); } static void dsa_user_get_eth_ctrl_stats(struct net_device *dev, struct ethtool_eth_ctrl_stats *ctrl_stats) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_eth_ctrl_stats) ds->ops->get_eth_ctrl_stats(ds, dp->index, ctrl_stats); } static void dsa_user_get_rmon_stats(struct net_device *dev, struct ethtool_rmon_stats *rmon_stats, const struct ethtool_rmon_hist_range **ranges) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_rmon_stats) ds->ops->get_rmon_stats(ds, dp->index, rmon_stats, ranges); } static void dsa_user_get_ts_stats(struct net_device *dev, struct ethtool_ts_stats *ts_stats) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_ts_stats) ds->ops->get_ts_stats(ds, dp->index, ts_stats); } static void dsa_user_net_selftest(struct net_device *ndev, struct ethtool_test *etest, u64 *buf) { struct dsa_port *dp = dsa_user_to_port(ndev); struct dsa_switch *ds = dp->ds; if (ds->ops->self_test) { ds->ops->self_test(ds, dp->index, etest, buf); return; } net_selftest(ndev, etest, buf); } static int dsa_user_get_mm(struct net_device *dev, struct ethtool_mm_state *state) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (!ds->ops->get_mm) return -EOPNOTSUPP; return ds->ops->get_mm(ds, dp->index, state); } static int dsa_user_set_mm(struct net_device *dev, struct ethtool_mm_cfg *cfg, struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (!ds->ops->set_mm) return -EOPNOTSUPP; return ds->ops->set_mm(ds, dp->index, cfg, extack); } static void dsa_user_get_mm_stats(struct net_device *dev, struct ethtool_mm_stats *stats) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_mm_stats) ds->ops->get_mm_stats(ds, dp->index, stats); } static void dsa_user_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; phylink_ethtool_get_wol(dp->pl, w); if (ds->ops->get_wol) ds->ops->get_wol(ds, dp->index, w); } static int dsa_user_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int ret = -EOPNOTSUPP; phylink_ethtool_set_wol(dp->pl, w); if (ds->ops->set_wol) ret = ds->ops->set_wol(ds, dp->index, w); return ret; } static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int ret; /* Check whether the switch supports EEE */ if (!ds->ops->support_eee || !ds->ops->support_eee(ds, dp->index)) return -EOPNOTSUPP; /* If the port is using phylink managed EEE, then an unimplemented * set_mac_eee() is permissible. */ if (!phylink_mac_implements_lpi(ds->phylink_mac_ops)) { /* Port's PHY and MAC both need to be EEE capable */ if (!dev->phydev) return -ENODEV; if (!ds->ops->set_mac_eee) return -EOPNOTSUPP; ret = ds->ops->set_mac_eee(ds, dp->index, e); if (ret) return ret; } else if (ds->ops->set_mac_eee) { ret = ds->ops->set_mac_eee(ds, dp->index, e); if (ret) return ret; } return phylink_ethtool_set_eee(dp->pl, e); } static int dsa_user_get_eee(struct net_device *dev, struct ethtool_keee *e) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; /* Check whether the switch supports EEE */ if (!ds->ops->support_eee || !ds->ops->support_eee(ds, dp->index)) return -EOPNOTSUPP; /* Port's PHY and MAC both need to be EEE capable */ if (!dev->phydev) return -ENODEV; return phylink_ethtool_get_eee(dp->pl, e); } static int dsa_user_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { struct dsa_port *dp = dsa_user_to_port(dev); return phylink_ethtool_ksettings_get(dp->pl, cmd); } static int dsa_user_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { struct dsa_port *dp = dsa_user_to_port(dev); return phylink_ethtool_ksettings_set(dp->pl, cmd); } static void dsa_user_get_pause_stats(struct net_device *dev, struct ethtool_pause_stats *pause_stats) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_pause_stats) ds->ops->get_pause_stats(ds, dp->index, pause_stats); } static void dsa_user_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct dsa_port *dp = dsa_user_to_port(dev); phylink_ethtool_get_pauseparam(dp->pl, pause); } static int dsa_user_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct dsa_port *dp = dsa_user_to_port(dev); return phylink_ethtool_set_pauseparam(dp->pl, pause); } #ifdef CONFIG_NET_POLL_CONTROLLER static int dsa_user_netpoll_setup(struct net_device *dev) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_user_priv *p = netdev_priv(dev); struct netpoll *netpoll; int err = 0; netpoll = kzalloc_obj(*netpoll); if (!netpoll) return -ENOMEM; err = __netpoll_setup(netpoll, conduit); if (err) { kfree(netpoll); goto out; } p->netpoll = netpoll; out: return err; } static void dsa_user_netpoll_cleanup(struct net_device *dev) { struct dsa_user_priv *p = netdev_priv(dev); struct netpoll *netpoll = p->netpoll; if (!netpoll) return; p->netpoll = NULL; __netpoll_free(netpoll); } static void dsa_user_poll_controller(struct net_device *dev) { } #endif static struct dsa_mall_tc_entry * dsa_user_mall_tc_entry_find(struct net_device *dev, unsigned long cookie) { struct dsa_user_priv *p = netdev_priv(dev); struct dsa_mall_tc_entry *mall_tc_entry; list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) if (mall_tc_entry->cookie == cookie) return mall_tc_entry; return NULL; } static int dsa_user_add_cls_matchall_mirred(struct net_device *dev, struct tc_cls_matchall_offload *cls, bool ingress, bool ingress_target) { struct netlink_ext_ack *extack = cls->common.extack; struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_user_priv *p = netdev_priv(dev); struct dsa_mall_mirror_tc_entry *mirror; struct dsa_mall_tc_entry *mall_tc_entry; struct dsa_switch *ds = dp->ds; struct flow_action_entry *act; struct dsa_port *to_dp; int err; if (cls->common.protocol != htons(ETH_P_ALL)) { NL_SET_ERR_MSG_MOD(extack, "Can only offload \"protocol all\" matchall filter"); return -EOPNOTSUPP; } if (!ds->ops->port_mirror_add) { NL_SET_ERR_MSG_MOD(extack, "Switch does not support mirroring operation"); return -EOPNOTSUPP; } if (!flow_action_basic_hw_stats_check(&cls->rule->action, extack)) return -EOPNOTSUPP; act = &cls->rule->action.entries[0]; if (!act->dev) return -EINVAL; if (dsa_user_dev_check(act->dev)) { if (ingress_target) { /* We can only fulfill this using software assist */ if (cls->common.skip_sw) { NL_SET_ERR_MSG_MOD(extack, "Can only mirred to ingress of DSA user port if filter also runs in software"); return -EOPNOTSUPP; } to_dp = dp->cpu_dp; } else { to_dp = dsa_user_to_port(act->dev); } } else { /* Handle mirroring to foreign target ports as a mirror towards * the CPU. The software tc rule will take the packets from * there. */ if (cls->common.skip_sw) { NL_SET_ERR_MSG_MOD(extack, "Can only mirred to CPU if filter also runs in software"); return -EOPNOTSUPP; } to_dp = dp->cpu_dp; } if (dp->ds != to_dp->ds) { NL_SET_ERR_MSG_MOD(extack, "Cross-chip mirroring not implemented"); return -EOPNOTSUPP; } mall_tc_entry = kzalloc_obj(*mall_tc_entry); if (!mall_tc_entry) return -ENOMEM; mall_tc_entry->cookie = cls->cookie; mall_tc_entry->type = DSA_PORT_MALL_MIRROR; mirror = &mall_tc_entry->mirror; mirror->to_local_port = to_dp->index; mirror->ingress = ingress; err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress, extack); if (err) { kfree(mall_tc_entry); return err; } list_add_tail(&mall_tc_entry->list, &p->mall_tc_list); return err; } static int dsa_user_add_cls_matchall_police(struct net_device *dev, struct tc_cls_matchall_offload *cls, bool ingress) { struct netlink_ext_ack *extack = cls->common.extack; struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_user_priv *p = netdev_priv(dev); struct dsa_mall_tc_entry *mall_tc_entry; struct flow_action_police *policer; struct dsa_switch *ds = dp->ds; struct flow_action_entry *act; int err; if (!ds->ops->port_policer_add) { NL_SET_ERR_MSG_MOD(extack, "Policing offload not implemented"); return -EOPNOTSUPP; } if (!ingress) { NL_SET_ERR_MSG_MOD(extack, "Only supported on ingress qdisc"); return -EOPNOTSUPP; } if (!flow_action_basic_hw_stats_check(&cls->rule->action, extack)) return -EOPNOTSUPP; list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) { if (mall_tc_entry->type == DSA_PORT_MALL_POLICER) { NL_SET_ERR_MSG_MOD(extack, "Only one port policer allowed"); return -EEXIST; } } act = &cls->rule->action.entries[0]; mall_tc_entry = kzalloc_obj(*mall_tc_entry); if (!mall_tc_entry) return -ENOMEM; mall_tc_entry->cookie = cls->cookie; mall_tc_entry->type = DSA_PORT_MALL_POLICER; policer = &mall_tc_entry->policer; *policer = act->police; err = ds->ops->port_policer_add(ds, dp->index, policer); if (err) { kfree(mall_tc_entry); return err; } list_add_tail(&mall_tc_entry->list, &p->mall_tc_list); return err; } static int dsa_user_add_cls_matchall(struct net_device *dev, struct tc_cls_matchall_offload *cls, bool ingress) { const struct flow_action *action = &cls->rule->action; struct netlink_ext_ack *extack = cls->common.extack; if (!flow_offload_has_one_action(action)) { NL_SET_ERR_MSG_MOD(extack, "Cannot offload matchall filter with more than one action"); return -EOPNOTSUPP; } switch (action->entries[0].id) { case FLOW_ACTION_MIRRED: return dsa_user_add_cls_matchall_mirred(dev, cls, ingress, false); case FLOW_ACTION_MIRRED_INGRESS: return dsa_user_add_cls_matchall_mirred(dev, cls, ingress, true); case FLOW_ACTION_POLICE: return dsa_user_add_cls_matchall_police(dev, cls, ingress); default: NL_SET_ERR_MSG_MOD(extack, "Unknown action"); break; } return -EOPNOTSUPP; } static void dsa_user_del_cls_matchall(struct net_device *dev, struct tc_cls_matchall_offload *cls) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_mall_tc_entry *mall_tc_entry; struct dsa_switch *ds = dp->ds; mall_tc_entry = dsa_user_mall_tc_entry_find(dev, cls->cookie); if (!mall_tc_entry) return; list_del(&mall_tc_entry->list); switch (mall_tc_entry->type) { case DSA_PORT_MALL_MIRROR: if (ds->ops->port_mirror_del) ds->ops->port_mirror_del(ds, dp->index, &mall_tc_entry->mirror); break; case DSA_PORT_MALL_POLICER: if (ds->ops->port_policer_del) ds->ops->port_policer_del(ds, dp->index); break; default: WARN_ON(1); } kfree(mall_tc_entry); } static int dsa_user_setup_tc_cls_matchall(struct net_device *dev, struct tc_cls_matchall_offload *cls, bool ingress) { if (cls->common.chain_index) return -EOPNOTSUPP; switch (cls->command) { case TC_CLSMATCHALL_REPLACE: return dsa_user_add_cls_matchall(dev, cls, ingress); case TC_CLSMATCHALL_DESTROY: dsa_user_del_cls_matchall(dev, cls); return 0; default: return -EOPNOTSUPP; } } static int dsa_user_add_cls_flower(struct net_device *dev, struct flow_cls_offload *cls, bool ingress) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int port = dp->index; if (!ds->ops->cls_flower_add) return -EOPNOTSUPP; return ds->ops->cls_flower_add(ds, port, cls, ingress); } static int dsa_user_del_cls_flower(struct net_device *dev, struct flow_cls_offload *cls, bool ingress) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int port = dp->index; if (!ds->ops->cls_flower_del) return -EOPNOTSUPP; return ds->ops->cls_flower_del(ds, port, cls, ingress); } static int dsa_user_stats_cls_flower(struct net_device *dev, struct flow_cls_offload *cls, bool ingress) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int port = dp->index; if (!ds->ops->cls_flower_stats) return -EOPNOTSUPP; return ds->ops->cls_flower_stats(ds, port, cls, ingress); } static int dsa_user_setup_tc_cls_flower(struct net_device *dev, struct flow_cls_offload *cls, bool ingress) { switch (cls->command) { case FLOW_CLS_REPLACE: return dsa_user_add_cls_flower(dev, cls, ingress); case FLOW_CLS_DESTROY: return dsa_user_del_cls_flower(dev, cls, ingress); case FLOW_CLS_STATS: return dsa_user_stats_cls_flower(dev, cls, ingress); default: return -EOPNOTSUPP; } } static int dsa_user_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv, bool ingress) { struct net_device *dev = cb_priv; if (!tc_can_offload(dev)) return -EOPNOTSUPP; switch (type) { case TC_SETUP_CLSMATCHALL: return dsa_user_setup_tc_cls_matchall(dev, type_data, ingress); case TC_SETUP_CLSFLOWER: return dsa_user_setup_tc_cls_flower(dev, type_data, ingress); default: return -EOPNOTSUPP; } } static int dsa_user_setup_tc_block_cb_ig(enum tc_setup_type type, void *type_data, void *cb_priv) { return dsa_user_setup_tc_block_cb(type, type_data, cb_priv, true); } static int dsa_user_setup_tc_block_cb_eg(enum tc_setup_type type, void *type_data, void *cb_priv) { return dsa_user_setup_tc_block_cb(type, type_data, cb_priv, false); } static LIST_HEAD(dsa_user_block_cb_list); static int dsa_user_setup_tc_block(struct net_device *dev, struct flow_block_offload *f) { struct flow_block_cb *block_cb; flow_setup_cb_t *cb; if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) cb = dsa_user_setup_tc_block_cb_ig; else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) cb = dsa_user_setup_tc_block_cb_eg; else return -EOPNOTSUPP; f->driver_block_list = &dsa_user_block_cb_list; switch (f->command) { case FLOW_BLOCK_BIND: if (flow_block_cb_is_busy(cb, dev, &dsa_user_block_cb_list)) return -EBUSY; block_cb = flow_block_cb_alloc(cb, dev, dev, NULL); if (IS_ERR(block_cb)) return PTR_ERR(block_cb); flow_block_cb_add(block_cb, f); list_add_tail(&block_cb->driver_list, &dsa_user_block_cb_list); return 0; case FLOW_BLOCK_UNBIND: block_cb = flow_block_cb_lookup(f->block, cb, dev); if (!block_cb) return -ENOENT; flow_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); return 0; default: return -EOPNOTSUPP; } } static int dsa_user_setup_ft_block(struct dsa_switch *ds, int port, void *type_data) { struct net_device *conduit = dsa_port_to_conduit(dsa_to_port(ds, port)); if (!conduit->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; return conduit->netdev_ops->ndo_setup_tc(conduit, TC_SETUP_FT, type_data); } static int dsa_user_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; switch (type) { case TC_SETUP_BLOCK: return dsa_user_setup_tc_block(dev, type_data); case TC_SETUP_FT: return dsa_user_setup_ft_block(ds, dp->index, type_data); default: break; } if (!ds->ops->port_setup_tc) return -EOPNOTSUPP; return ds->ops->port_setup_tc(ds, dp->index, type, type_data); } static int dsa_user_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc, u32 *rule_locs) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (!ds->ops->get_rxnfc) return -EOPNOTSUPP; return ds->ops->get_rxnfc(ds, dp->index, nfc, rule_locs); } static int dsa_user_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (!ds->ops->set_rxnfc) return -EOPNOTSUPP; return ds->ops->set_rxnfc(ds, dp->index, nfc); } static int dsa_user_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *ts) { struct dsa_user_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->dp->ds; if (!ds->ops->get_ts_info) return -EOPNOTSUPP; return ds->ops->get_ts_info(ds, p->dp->index, ts); } static int dsa_user_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_vlan vlan = { .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .vid = vid, /* This API only allows programming tagged, non-PVID VIDs */ .flags = 0, }; struct netlink_ext_ack extack = {0}; struct dsa_switch *ds = dp->ds; struct netdev_hw_addr *ha; struct dsa_vlan *v; int ret; /* User port... */ ret = dsa_port_vlan_add(dp, &vlan, &extack); if (ret) { if (extack._msg) netdev_err(dev, "%s\n", extack._msg); return ret; } /* And CPU port... */ ret = dsa_port_host_vlan_add(dp, &vlan, &extack); if (ret) { if (extack._msg) netdev_err(dev, "CPU port %d: %s\n", dp->cpu_dp->index, extack._msg); return ret; } if (!dsa_switch_supports_uc_filtering(ds) && !dsa_switch_supports_mc_filtering(ds)) return 0; v = kzalloc_obj(*v); if (!v) { ret = -ENOMEM; goto rollback; } netif_addr_lock_bh(dev); v->vid = vid; list_add_tail(&v->list, &dp->user_vlans); if (dsa_switch_supports_mc_filtering(ds)) { netdev_for_each_synced_mc_addr(ha, dev) { dsa_user_schedule_standalone_work(dev, DSA_MC_ADD, ha->addr, vid); } } if (dsa_switch_supports_uc_filtering(ds)) { netdev_for_each_synced_uc_addr(ha, dev) { dsa_user_schedule_standalone_work(dev, DSA_UC_ADD, ha->addr, vid); } } netif_addr_unlock_bh(dev); dsa_flush_workqueue(); return 0; rollback: dsa_port_host_vlan_del(dp, &vlan); dsa_port_vlan_del(dp, &vlan); return ret; } static int dsa_user_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_vlan vlan = { .vid = vid, /* This API only allows programming tagged, non-PVID VIDs */ .flags = 0, }; struct dsa_switch *ds = dp->ds; struct netdev_hw_addr *ha; struct dsa_vlan *v; int err; err = dsa_port_vlan_del(dp, &vlan); if (err) return err; err = dsa_port_host_vlan_del(dp, &vlan); if (err) return err; if (!dsa_switch_supports_uc_filtering(ds) && !dsa_switch_supports_mc_filtering(ds)) return 0; netif_addr_lock_bh(dev); v = dsa_vlan_find(&dp->user_vlans, &vlan); if (!v) { netif_addr_unlock_bh(dev); return -ENOENT; } list_del(&v->list); kfree(v); if (dsa_switch_supports_mc_filtering(ds)) { netdev_for_each_synced_mc_addr(ha, dev) { dsa_user_schedule_standalone_work(dev, DSA_MC_DEL, ha->addr, vid); } } if (dsa_switch_supports_uc_filtering(ds)) { netdev_for_each_synced_uc_addr(ha, dev) { dsa_user_schedule_standalone_work(dev, DSA_UC_DEL, ha->addr, vid); } } netif_addr_unlock_bh(dev); dsa_flush_workqueue(); return 0; } static int dsa_user_restore_vlan(struct net_device *vdev, int vid, void *arg) { __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q); return dsa_user_vlan_rx_add_vid(arg, proto, vid); } static int dsa_user_clear_vlan(struct net_device *vdev, int vid, void *arg) { __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q); return dsa_user_vlan_rx_kill_vid(arg, proto, vid); } /* Keep the VLAN RX filtering list in sync with the hardware only if VLAN * filtering is enabled. The baseline is that only ports that offload a * VLAN-aware bridge are VLAN-aware, and standalone ports are VLAN-unaware, * but there are exceptions for quirky hardware. * * If ds->vlan_filtering_is_global = true, then standalone ports which share * the same switch with other ports that offload a VLAN-aware bridge are also * inevitably VLAN-aware. * * To summarize, a DSA switch port offloads: * * - If standalone (this includes software bridge, software LAG): * - if ds->needs_standalone_vlan_filtering = true, OR if * (ds->vlan_filtering_is_global = true AND there are bridges spanning * this switch chip which have vlan_filtering=1) * - the 8021q upper VLANs * - else (standalone VLAN filtering is not needed, VLAN filtering is not * global, or it is, but no port is under a VLAN-aware bridge): * - no VLAN (any 8021q upper is a software VLAN) * * - If under a vlan_filtering=0 bridge which it offload: * - if ds->configure_vlan_while_not_filtering = true (default): * - the bridge VLANs. These VLANs are committed to hardware but inactive. * - else (deprecated): * - no VLAN. The bridge VLANs are not restored when VLAN awareness is * enabled, so this behavior is broken and discouraged. * * - If under a vlan_filtering=1 bridge which it offload: * - the bridge VLANs * - the 8021q upper VLANs */ int dsa_user_manage_vlan_filtering(struct net_device *user, bool vlan_filtering) { int err; if (vlan_filtering) { user->features |= NETIF_F_HW_VLAN_CTAG_FILTER; err = vlan_for_each(user, dsa_user_restore_vlan, user); if (err) { vlan_for_each(user, dsa_user_clear_vlan, user); user->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; return err; } } else { err = vlan_for_each(user, dsa_user_clear_vlan, user); if (err) return err; user->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; } return 0; } struct dsa_hw_port { struct list_head list; struct net_device *dev; int old_mtu; }; static int dsa_hw_port_list_set_mtu(struct list_head *hw_port_list, int mtu) { const struct dsa_hw_port *p; int err; list_for_each_entry(p, hw_port_list, list) { if (p->dev->mtu == mtu) continue; err = dev_set_mtu(p->dev, mtu); if (err) goto rollback; } return 0; rollback: list_for_each_entry_continue_reverse(p, hw_port_list, list) { if (p->dev->mtu == p->old_mtu) continue; if (dev_set_mtu(p->dev, p->old_mtu)) netdev_err(p->dev, "Failed to restore MTU\n"); } return err; } static void dsa_hw_port_list_free(struct list_head *hw_port_list) { struct dsa_hw_port *p, *n; list_for_each_entry_safe(p, n, hw_port_list, list) kfree(p); } /* Make the hardware datapath to/from @dev limited to a common MTU */ static void dsa_bridge_mtu_normalization(struct dsa_port *dp) { struct list_head hw_port_list; struct dsa_switch_tree *dst; int min_mtu = ETH_MAX_MTU; struct dsa_port *other_dp; int err; if (!dp->ds->mtu_enforcement_ingress) return; if (!dp->bridge) return; INIT_LIST_HEAD(&hw_port_list); /* Populate the list of ports that are part of the same bridge * as the newly added/modified port */ list_for_each_entry(dst, &dsa_tree_list, list) { list_for_each_entry(other_dp, &dst->ports, list) { struct dsa_hw_port *hw_port; struct net_device *user; if (other_dp->type != DSA_PORT_TYPE_USER) continue; if (!dsa_port_bridge_same(dp, other_dp)) continue; if (!other_dp->ds->mtu_enforcement_ingress) continue; user = other_dp->user; if (min_mtu > user->mtu) min_mtu = user->mtu; hw_port = kzalloc_obj(*hw_port); if (!hw_port) goto out; hw_port->dev = user; hw_port->old_mtu = user->mtu; list_add(&hw_port->list, &hw_port_list); } } /* Attempt to configure the entire hardware bridge to the newly added * interface's MTU first, regardless of whether the intention of the * user was to raise or lower it. */ err = dsa_hw_port_list_set_mtu(&hw_port_list, dp->user->mtu); if (!err) goto out; /* Clearly that didn't work out so well, so just set the minimum MTU on * all hardware bridge ports now. If this fails too, then all ports will * still have their old MTU rolled back anyway. */ dsa_hw_port_list_set_mtu(&hw_port_list, min_mtu); out: dsa_hw_port_list_free(&hw_port_list); } int dsa_user_change_mtu(struct net_device *dev, int new_mtu) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_port *cpu_dp = dp->cpu_dp; struct dsa_switch *ds = dp->ds; struct dsa_port *other_dp; int largest_mtu = 0; int new_conduit_mtu; int old_conduit_mtu; int mtu_limit; int overhead; int cpu_mtu; int err; if (!ds->ops->port_change_mtu) return -EOPNOTSUPP; dsa_tree_for_each_user_port(other_dp, ds->dst) { int user_mtu; /* During probe, this function will be called for each user * device, while not all of them have been allocated. That's * ok, it doesn't change what the maximum is, so ignore it. */ if (!other_dp->user) continue; /* Pretend that we already applied the setting, which we * actually haven't (still haven't done all integrity checks) */ if (dp == other_dp) user_mtu = new_mtu; else user_mtu = other_dp->user->mtu; if (largest_mtu < user_mtu) largest_mtu = user_mtu; } overhead = dsa_tag_protocol_overhead(cpu_dp->tag_ops); mtu_limit = min_t(int, conduit->max_mtu, dev->max_mtu + overhead); old_conduit_mtu = conduit->mtu; new_conduit_mtu = largest_mtu + overhead; if (new_conduit_mtu > mtu_limit) return -ERANGE; /* If the conduit MTU isn't over limit, there's no need to check the CPU * MTU, since that surely isn't either. */ cpu_mtu = largest_mtu; /* Start applying stuff */ if (new_conduit_mtu != old_conduit_mtu) { err = dev_set_mtu(conduit, new_conduit_mtu); if (err < 0) goto out_conduit_failed; /* We only need to propagate the MTU of the CPU port to * upstream switches, so emit a notifier which updates them. */ err = dsa_port_mtu_change(cpu_dp, cpu_mtu); if (err) goto out_cpu_failed; } err = ds->ops->port_change_mtu(ds, dp->index, new_mtu); if (err) goto out_port_failed; WRITE_ONCE(dev->mtu, new_mtu); dsa_bridge_mtu_normalization(dp); return 0; out_port_failed: if (new_conduit_mtu != old_conduit_mtu) dsa_port_mtu_change(cpu_dp, old_conduit_mtu - overhead); out_cpu_failed: if (new_conduit_mtu != old_conduit_mtu) dev_set_mtu(conduit, old_conduit_mtu); out_conduit_failed: return err; } static int __maybe_unused dsa_user_dcbnl_set_apptrust(struct net_device *dev, u8 *sel, int nsel) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int port = dp->index; if (!ds->ops->port_set_apptrust) return -EOPNOTSUPP; return ds->ops->port_set_apptrust(ds, port, sel, nsel); } static int __maybe_unused dsa_user_dcbnl_get_apptrust(struct net_device *dev, u8 *sel, int *nsel) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int port = dp->index; if (!ds->ops->port_get_apptrust) return -EOPNOTSUPP; return ds->ops->port_get_apptrust(ds, port, sel, nsel); } static int __maybe_unused dsa_user_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; unsigned long mask, new_prio; int err, port = dp->index; if (!ds->ops->port_set_default_prio) return -EOPNOTSUPP; err = dcb_ieee_setapp(dev, app); if (err) return err; mask = dcb_ieee_getapp_mask(dev, app); new_prio = __fls(mask); err = ds->ops->port_set_default_prio(ds, port, new_prio); if (err) { dcb_ieee_delapp(dev, app); return err; } return 0; } /* Update the DSCP prio entries on all user ports of the switch in case * the switch supports global DSCP prio instead of per port DSCP prios. */ static int dsa_user_dcbnl_ieee_global_dscp_setdel(struct net_device *dev, struct dcb_app *app, bool del) { int (*setdel)(struct net_device *dev, struct dcb_app *app); struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; struct dsa_port *other_dp; int err, restore_err; if (del) setdel = dcb_ieee_delapp; else setdel = dcb_ieee_setapp; dsa_switch_for_each_user_port(other_dp, ds) { struct net_device *user = other_dp->user; if (!user || user == dev) continue; err = setdel(user, app); if (err) goto err_try_to_restore; } return 0; err_try_to_restore: /* Revert logic to restore previous state of app entries */ if (!del) setdel = dcb_ieee_delapp; else setdel = dcb_ieee_setapp; dsa_switch_for_each_user_port_continue_reverse(other_dp, ds) { struct net_device *user = other_dp->user; if (!user || user == dev) continue; restore_err = setdel(user, app); if (restore_err) netdev_err(user, "Failed to restore DSCP prio entry configuration\n"); } return err; } static int __maybe_unused dsa_user_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; unsigned long mask, new_prio; int err, port = dp->index; u8 dscp = app->protocol; if (!ds->ops->port_add_dscp_prio) return -EOPNOTSUPP; if (dscp >= 64) { netdev_err(dev, "DSCP APP entry with protocol value %u is invalid\n", dscp); return -EINVAL; } err = dcb_ieee_setapp(dev, app); if (err) return err; mask = dcb_ieee_getapp_mask(dev, app); new_prio = __fls(mask); err = ds->ops->port_add_dscp_prio(ds, port, dscp, new_prio); if (err) { dcb_ieee_delapp(dev, app); return err; } if (!ds->dscp_prio_mapping_is_global) return 0; err = dsa_user_dcbnl_ieee_global_dscp_setdel(dev, app, false); if (err) { if (ds->ops->port_del_dscp_prio) ds->ops->port_del_dscp_prio(ds, port, dscp, new_prio); dcb_ieee_delapp(dev, app); return err; } return 0; } static int __maybe_unused dsa_user_dcbnl_ieee_setapp(struct net_device *dev, struct dcb_app *app) { switch (app->selector) { case IEEE_8021QAZ_APP_SEL_ETHERTYPE: switch (app->protocol) { case 0: return dsa_user_dcbnl_set_default_prio(dev, app); default: return -EOPNOTSUPP; } break; case IEEE_8021QAZ_APP_SEL_DSCP: return dsa_user_dcbnl_add_dscp_prio(dev, app); default: return -EOPNOTSUPP; } } static int __maybe_unused dsa_user_dcbnl_del_default_prio(struct net_device *dev, struct dcb_app *app) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; unsigned long mask, new_prio; int err, port = dp->index; if (!ds->ops->port_set_default_prio) return -EOPNOTSUPP; err = dcb_ieee_delapp(dev, app); if (err) return err; mask = dcb_ieee_getapp_mask(dev, app); new_prio = mask ? __fls(mask) : 0; err = ds->ops->port_set_default_prio(ds, port, new_prio); if (err) { dcb_ieee_setapp(dev, app); return err; } return 0; } static int __maybe_unused dsa_user_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int err, port = dp->index; u8 dscp = app->protocol; if (!ds->ops->port_del_dscp_prio) return -EOPNOTSUPP; err = dcb_ieee_delapp(dev, app); if (err) return err; err = ds->ops->port_del_dscp_prio(ds, port, dscp, app->priority); if (err) { dcb_ieee_setapp(dev, app); return err; } if (!ds->dscp_prio_mapping_is_global) return 0; err = dsa_user_dcbnl_ieee_global_dscp_setdel(dev, app, true); if (err) { if (ds->ops->port_add_dscp_prio) ds->ops->port_add_dscp_prio(ds, port, dscp, app->priority); dcb_ieee_setapp(dev, app); return err; } return 0; } static int __maybe_unused dsa_user_dcbnl_ieee_delapp(struct net_device *dev, struct dcb_app *app) { switch (app->selector) { case IEEE_8021QAZ_APP_SEL_ETHERTYPE: switch (app->protocol) { case 0: return dsa_user_dcbnl_del_default_prio(dev, app); default: return -EOPNOTSUPP; } break; case IEEE_8021QAZ_APP_SEL_DSCP: return dsa_user_dcbnl_del_dscp_prio(dev, app); default: return -EOPNOTSUPP; } } /* Pre-populate the DCB application priority table with the priorities * configured during switch setup, which we read from hardware here. */ static int dsa_user_dcbnl_init(struct net_device *dev) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int port = dp->index; int err; if (ds->ops->port_get_default_prio) { int prio = ds->ops->port_get_default_prio(ds, port); struct dcb_app app = { .selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE, .protocol = 0, .priority = prio, }; if (prio < 0) return prio; err = dcb_ieee_setapp(dev, &app); if (err) return err; } if (ds->ops->port_get_dscp_prio) { int protocol; for (protocol = 0; protocol < 64; protocol++) { struct dcb_app app = { .selector = IEEE_8021QAZ_APP_SEL_DSCP, .protocol = protocol, }; int prio; prio = ds->ops->port_get_dscp_prio(ds, port, protocol); if (prio == -EOPNOTSUPP) continue; if (prio < 0) return prio; app.priority = prio; err = dcb_ieee_setapp(dev, &app); if (err) return err; } } return 0; } static const struct ethtool_ops dsa_user_ethtool_ops = { .get_drvinfo = dsa_user_get_drvinfo, .get_regs_len = dsa_user_get_regs_len, .get_regs = dsa_user_get_regs, .nway_reset = dsa_user_nway_reset, .get_link = ethtool_op_get_link, .get_eeprom_len = dsa_user_get_eeprom_len, .get_eeprom = dsa_user_get_eeprom, .set_eeprom = dsa_user_set_eeprom, .get_strings = dsa_user_get_strings, .get_ethtool_stats = dsa_user_get_ethtool_stats, .get_sset_count = dsa_user_get_sset_count, .get_eth_phy_stats = dsa_user_get_eth_phy_stats, .get_eth_mac_stats = dsa_user_get_eth_mac_stats, .get_eth_ctrl_stats = dsa_user_get_eth_ctrl_stats, .get_rmon_stats = dsa_user_get_rmon_stats, .get_ts_stats = dsa_user_get_ts_stats, .set_wol = dsa_user_set_wol, .get_wol = dsa_user_get_wol, .set_eee = dsa_user_set_eee, .get_eee = dsa_user_get_eee, .get_link_ksettings = dsa_user_get_link_ksettings, .set_link_ksettings = dsa_user_set_link_ksettings, .get_pause_stats = dsa_user_get_pause_stats, .get_pauseparam = dsa_user_get_pauseparam, .set_pauseparam = dsa_user_set_pauseparam, .get_rxnfc = dsa_user_get_rxnfc, .set_rxnfc = dsa_user_set_rxnfc, .get_ts_info = dsa_user_get_ts_info, .self_test = dsa_user_net_selftest, .get_mm = dsa_user_get_mm, .set_mm = dsa_user_set_mm, .get_mm_stats = dsa_user_get_mm_stats, }; static const struct dcbnl_rtnl_ops __maybe_unused dsa_user_dcbnl_ops = { .ieee_setapp = dsa_user_dcbnl_ieee_setapp, .ieee_delapp = dsa_user_dcbnl_ieee_delapp, .dcbnl_setapptrust = dsa_user_dcbnl_set_apptrust, .dcbnl_getapptrust = dsa_user_dcbnl_get_apptrust, }; static void dsa_user_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *s) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_stats64) ds->ops->get_stats64(ds, dp->index, s); else dev_get_tstats64(dev, s); } static int dsa_user_fill_forward_path(struct net_device_path_ctx *ctx, struct net_device_path *path) { struct dsa_port *dp = dsa_user_to_port(ctx->dev); struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_port *cpu_dp = dp->cpu_dp; path->dev = ctx->dev; path->type = DEV_PATH_DSA; path->dsa.proto = cpu_dp->tag_ops->proto; path->dsa.port = dp->index; ctx->dev = conduit; return 0; } static int dsa_user_hwtstamp_get(struct net_device *dev, struct kernel_hwtstamp_config *cfg) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (!ds->ops->port_hwtstamp_get) return -EOPNOTSUPP; return ds->ops->port_hwtstamp_get(ds, dp->index, cfg); } static int dsa_user_hwtstamp_set(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (!ds->ops->port_hwtstamp_set) return -EOPNOTSUPP; return ds->ops->port_hwtstamp_set(ds, dp->index, cfg, extack); } static const struct net_device_ops dsa_user_netdev_ops = { .ndo_open = dsa_user_open, .ndo_stop = dsa_user_close, .ndo_start_xmit = dsa_user_xmit, .ndo_change_rx_flags = dsa_user_change_rx_flags, .ndo_set_rx_mode = dsa_user_set_rx_mode, .ndo_set_mac_address = dsa_user_set_mac_address, .ndo_fdb_dump = dsa_user_fdb_dump, .ndo_eth_ioctl = dsa_user_ioctl, .ndo_get_iflink = dsa_user_get_iflink, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_netpoll_setup = dsa_user_netpoll_setup, .ndo_netpoll_cleanup = dsa_user_netpoll_cleanup, .ndo_poll_controller = dsa_user_poll_controller, #endif .ndo_setup_tc = dsa_user_setup_tc, .ndo_get_stats64 = dsa_user_get_stats64, .ndo_vlan_rx_add_vid = dsa_user_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = dsa_user_vlan_rx_kill_vid, .ndo_change_mtu = dsa_user_change_mtu, .ndo_fill_forward_path = dsa_user_fill_forward_path, .ndo_hwtstamp_get = dsa_user_hwtstamp_get, .ndo_hwtstamp_set = dsa_user_hwtstamp_set, }; static const struct device_type dsa_type = { .name = "dsa", }; void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up) { const struct dsa_port *dp = dsa_to_port(ds, port); if (dp->pl) phylink_mac_change(dp->pl, up); } EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_change); static void dsa_user_phylink_fixed_state(struct phylink_config *config, struct phylink_link_state *state) { struct dsa_port *dp = dsa_phylink_to_port(config); struct dsa_switch *ds = dp->ds; /* No need to check that this operation is valid, the callback would * not be called if it was not. */ ds->ops->phylink_fixed_state(ds, dp->index, state); } /* user device setup *******************************************************/ static int dsa_user_phy_connect(struct net_device *user_dev, int addr, u32 flags) { struct dsa_port *dp = dsa_user_to_port(user_dev); struct dsa_switch *ds = dp->ds; user_dev->phydev = mdiobus_get_phy(ds->user_mii_bus, addr); if (!user_dev->phydev) { netdev_err(user_dev, "no phy at %d\n", addr); return -ENODEV; } user_dev->phydev->dev_flags |= flags; return phylink_connect_phy(dp->pl, user_dev->phydev); } static int dsa_user_phy_setup(struct net_device *user_dev) { struct dsa_port *dp = dsa_user_to_port(user_dev); struct device_node *port_dn = dp->dn; struct dsa_switch *ds = dp->ds; u32 phy_flags = 0; int ret; dp->pl_config.dev = &user_dev->dev; dp->pl_config.type = PHYLINK_NETDEV; /* The get_fixed_state callback takes precedence over polling the * link GPIO in PHYLINK (see phylink_get_fixed_state). Only set * this if the switch provides such a callback. */ if (ds->ops->phylink_fixed_state) { dp->pl_config.get_fixed_state = dsa_user_phylink_fixed_state; dp->pl_config.poll_fixed_state = true; } ret = dsa_port_phylink_create(dp); if (ret) return ret; if (ds->ops->get_phy_flags) phy_flags = ds->ops->get_phy_flags(ds, dp->index); ret = phylink_of_phy_connect(dp->pl, port_dn, phy_flags); if (ret == -ENODEV && ds->user_mii_bus) { /* We could not connect to a designated PHY or SFP, so try to * use the switch internal MDIO bus instead */ ret = dsa_user_phy_connect(user_dev, dp->index, phy_flags); } if (ret) { netdev_err(user_dev, "failed to connect to PHY: %pe\n", ERR_PTR(ret)); dsa_port_phylink_destroy(dp); } return ret; } void dsa_user_setup_tagger(struct net_device *user) { struct dsa_port *dp = dsa_user_to_port(user); struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_user_priv *p = netdev_priv(user); const struct dsa_port *cpu_dp = dp->cpu_dp; const struct dsa_switch *ds = dp->ds; user->needed_headroom = cpu_dp->tag_ops->needed_headroom; user->needed_tailroom = cpu_dp->tag_ops->needed_tailroom; /* Try to save one extra realloc later in the TX path (in the conduit) * by also inheriting the conduit's needed headroom and tailroom. * The 8021q driver also does this. */ user->needed_headroom += conduit->needed_headroom; user->needed_tailroom += conduit->needed_tailroom; p->xmit = cpu_dp->tag_ops->xmit; user->features = conduit->vlan_features | NETIF_F_HW_TC; user->hw_features |= NETIF_F_HW_TC; if (user->needed_tailroom) user->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); if (ds->needs_standalone_vlan_filtering) user->features |= NETIF_F_HW_VLAN_CTAG_FILTER; user->lltx = true; } int dsa_user_suspend(struct net_device *user_dev) { struct dsa_port *dp = dsa_user_to_port(user_dev); if (!netif_running(user_dev)) return 0; netif_device_detach(user_dev); rtnl_lock(); phylink_stop(dp->pl); rtnl_unlock(); return 0; } int dsa_user_resume(struct net_device *user_dev) { struct dsa_port *dp = dsa_user_to_port(user_dev); if (!netif_running(user_dev)) return 0; netif_device_attach(user_dev); rtnl_lock(); phylink_start(dp->pl); rtnl_unlock(); return 0; } int dsa_user_create(struct dsa_port *port) { struct net_device *conduit = dsa_port_to_conduit(port); struct dsa_switch *ds = port->ds; struct net_device *user_dev; struct dsa_user_priv *p; const char *name; int assign_type; int ret; if (!ds->num_tx_queues) ds->num_tx_queues = 1; if (port->name) { name = port->name; assign_type = NET_NAME_PREDICTABLE; } else { name = "eth%d"; assign_type = NET_NAME_ENUM; } user_dev = alloc_netdev_mqs(sizeof(struct dsa_user_priv), name, assign_type, ether_setup, ds->num_tx_queues, 1); if (user_dev == NULL) return -ENOMEM; user_dev->rtnl_link_ops = &dsa_link_ops; user_dev->ethtool_ops = &dsa_user_ethtool_ops; #if IS_ENABLED(CONFIG_DCB) user_dev->dcbnl_ops = &dsa_user_dcbnl_ops; #endif if (!is_zero_ether_addr(port->mac)) eth_hw_addr_set(user_dev, port->mac); else eth_hw_addr_inherit(user_dev, conduit); user_dev->priv_flags |= IFF_NO_QUEUE; if (dsa_switch_supports_uc_filtering(ds)) user_dev->priv_flags |= IFF_UNICAST_FLT; user_dev->netdev_ops = &dsa_user_netdev_ops; if (ds->ops->port_max_mtu) user_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index); SET_NETDEV_DEVTYPE(user_dev, &dsa_type); SET_NETDEV_DEV(user_dev, port->ds->dev); SET_NETDEV_DEVLINK_PORT(user_dev, &port->devlink_port); user_dev->dev.of_node = port->dn; user_dev->vlan_features = conduit->vlan_features; p = netdev_priv(user_dev); user_dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; ret = gro_cells_init(&p->gcells, user_dev); if (ret) goto out_free; p->dp = port; INIT_LIST_HEAD(&p->mall_tc_list); port->user = user_dev; dsa_user_setup_tagger(user_dev); netif_carrier_off(user_dev); ret = dsa_user_phy_setup(user_dev); if (ret) { netdev_err(user_dev, "error %d setting up PHY for tree %d, switch %d, port %d\n", ret, ds->dst->index, ds->index, port->index); goto out_gcells; } rtnl_lock(); ret = dsa_user_change_mtu(user_dev, ETH_DATA_LEN); if (ret && ret != -EOPNOTSUPP) dev_warn(ds->dev, "nonfatal error %d setting MTU to %d on port %d\n", ret, ETH_DATA_LEN, port->index); ret = register_netdevice(user_dev); if (ret) { netdev_err(conduit, "error %d registering interface %s\n", ret, user_dev->name); rtnl_unlock(); goto out_phy; } if (IS_ENABLED(CONFIG_DCB)) { ret = dsa_user_dcbnl_init(user_dev); if (ret) { netdev_err(user_dev, "failed to initialize DCB: %pe\n", ERR_PTR(ret)); rtnl_unlock(); goto out_unregister; } } ret = netdev_upper_dev_link(conduit, user_dev, NULL); rtnl_unlock(); if (ret) goto out_unregister; return 0; out_unregister: unregister_netdev(user_dev); out_phy: rtnl_lock(); phylink_disconnect_phy(p->dp->pl); rtnl_unlock(); dsa_port_phylink_destroy(p->dp); out_gcells: gro_cells_destroy(&p->gcells); out_free: free_netdev(user_dev); port->user = NULL; return ret; } void dsa_user_destroy(struct net_device *user_dev) { struct net_device *conduit = dsa_user_to_conduit(user_dev); struct dsa_port *dp = dsa_user_to_port(user_dev); struct dsa_user_priv *p = netdev_priv(user_dev); netif_carrier_off(user_dev); rtnl_lock(); netdev_upper_dev_unlink(conduit, user_dev); unregister_netdevice(user_dev); phylink_disconnect_phy(dp->pl); rtnl_unlock(); dsa_port_phylink_destroy(dp); gro_cells_destroy(&p->gcells); free_netdev(user_dev); } int dsa_user_change_conduit(struct net_device *dev, struct net_device *conduit, struct netlink_ext_ack *extack) { struct net_device *old_conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; struct net_device *upper; struct list_head *iter; int err; if (conduit == old_conduit) return 0; if (!ds->ops->port_change_conduit) { NL_SET_ERR_MSG_MOD(extack, "Driver does not support changing DSA conduit"); return -EOPNOTSUPP; } if (!netdev_uses_dsa(conduit)) { NL_SET_ERR_MSG_MOD(extack, "Interface not eligible as DSA conduit"); return -EOPNOTSUPP; } netdev_for_each_upper_dev_rcu(conduit, upper, iter) { if (dsa_user_dev_check(upper)) continue; if (netif_is_bridge_master(upper)) continue; NL_SET_ERR_MSG_MOD(extack, "Cannot join conduit with unknown uppers"); return -EOPNOTSUPP; } /* Since we allow live-changing the DSA conduit, plus we auto-open the * DSA conduit when the user port opens => we need to ensure that the * new DSA conduit is open too. */ if (dev->flags & IFF_UP) { err = dev_open(conduit, extack); if (err) return err; } netdev_upper_dev_unlink(old_conduit, dev); err = netdev_upper_dev_link(conduit, dev, extack); if (err) goto out_revert_old_conduit_unlink; err = dsa_port_change_conduit(dp, conduit, extack); if (err) goto out_revert_conduit_link; /* Update the MTU of the new CPU port through cross-chip notifiers */ err = dsa_user_change_mtu(dev, dev->mtu); if (err && err != -EOPNOTSUPP) { netdev_warn(dev, "nonfatal error updating MTU with new conduit: %pe\n", ERR_PTR(err)); } return 0; out_revert_conduit_link: netdev_upper_dev_unlink(conduit, dev); out_revert_old_conduit_unlink: netdev_upper_dev_link(old_conduit, dev, NULL); return err; } bool dsa_user_dev_check(const struct net_device *dev) { return dev->netdev_ops == &dsa_user_netdev_ops; } EXPORT_SYMBOL_GPL(dsa_user_dev_check); static int dsa_user_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct netlink_ext_ack *extack; int err = NOTIFY_DONE; struct dsa_port *dp; if (!dsa_user_dev_check(dev)) return err; dp = dsa_user_to_port(dev); extack = netdev_notifier_info_to_extack(&info->info); if (netif_is_bridge_master(info->upper_dev)) { if (info->linking) { err = dsa_port_bridge_join(dp, info->upper_dev, extack); if (!err) dsa_bridge_mtu_normalization(dp); if (err == -EOPNOTSUPP) { NL_SET_ERR_MSG_WEAK_MOD(extack, "Offloading not supported"); err = 0; } err = notifier_from_errno(err); } else { dsa_port_bridge_leave(dp, info->upper_dev); err = NOTIFY_OK; } } else if (netif_is_lag_master(info->upper_dev)) { if (info->linking) { err = dsa_port_lag_join(dp, info->upper_dev, info->upper_info, extack); if (err == -EOPNOTSUPP) { NL_SET_ERR_MSG_WEAK_MOD(extack, "Offloading not supported"); err = 0; } err = notifier_from_errno(err); } else { dsa_port_lag_leave(dp, info->upper_dev); err = NOTIFY_OK; } } else if (is_hsr_master(info->upper_dev)) { if (info->linking) { err = dsa_port_hsr_join(dp, info->upper_dev, extack); if (err == -EOPNOTSUPP) { NL_SET_ERR_MSG_WEAK_MOD(extack, "Offloading not supported"); err = 0; } err = notifier_from_errno(err); } else { dsa_port_hsr_leave(dp, info->upper_dev); err = NOTIFY_OK; } } return err; } static int dsa_user_prechangeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct dsa_port *dp; if (!dsa_user_dev_check(dev)) return NOTIFY_DONE; dp = dsa_user_to_port(dev); if (netif_is_bridge_master(info->upper_dev) && !info->linking) dsa_port_pre_bridge_leave(dp, info->upper_dev); else if (netif_is_lag_master(info->upper_dev) && !info->linking) dsa_port_pre_lag_leave(dp, info->upper_dev); /* dsa_port_pre_hsr_leave is not yet necessary since hsr devices cannot * meaningfully placed under a bridge yet */ return NOTIFY_DONE; } static int dsa_user_lag_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct net_device *lower; struct list_head *iter; int err = NOTIFY_DONE; struct dsa_port *dp; if (!netif_is_lag_master(dev)) return err; netdev_for_each_lower_dev(dev, lower, iter) { if (!dsa_user_dev_check(lower)) continue; dp = dsa_user_to_port(lower); if (!dp->lag) /* Software LAG */ continue; err = dsa_user_changeupper(lower, info); if (notifier_to_errno(err)) break; } return err; } /* Same as dsa_user_lag_changeupper() except that it calls * dsa_user_prechangeupper() */ static int dsa_user_lag_prechangeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct net_device *lower; struct list_head *iter; int err = NOTIFY_DONE; struct dsa_port *dp; if (!netif_is_lag_master(dev)) return err; netdev_for_each_lower_dev(dev, lower, iter) { if (!dsa_user_dev_check(lower)) continue; dp = dsa_user_to_port(lower); if (!dp->lag) /* Software LAG */ continue; err = dsa_user_prechangeupper(lower, info); if (notifier_to_errno(err)) break; } return err; } static int dsa_prevent_bridging_8021q_upper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct netlink_ext_ack *ext_ack; struct net_device *user, *br; struct dsa_port *dp; ext_ack = netdev_notifier_info_to_extack(&info->info); if (!is_vlan_dev(dev)) return NOTIFY_DONE; user = vlan_dev_real_dev(dev); if (!dsa_user_dev_check(user)) return NOTIFY_DONE; dp = dsa_user_to_port(user); br = dsa_port_bridge_dev_get(dp); if (!br) return NOTIFY_DONE; /* Deny enslaving a VLAN device into a VLAN-aware bridge */ if (br_vlan_enabled(br) && netif_is_bridge_master(info->upper_dev) && info->linking) { NL_SET_ERR_MSG_MOD(ext_ack, "Cannot make VLAN device join VLAN-aware bridge"); return notifier_from_errno(-EINVAL); } return NOTIFY_DONE; } static int dsa_user_check_8021q_upper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct dsa_port *dp = dsa_user_to_port(dev); struct net_device *br = dsa_port_bridge_dev_get(dp); struct bridge_vlan_info br_info; struct netlink_ext_ack *extack; int err = NOTIFY_DONE; u16 vid; if (!br || !br_vlan_enabled(br)) return NOTIFY_DONE; extack = netdev_notifier_info_to_extack(&info->info); vid = vlan_dev_vlan_id(info->upper_dev); /* br_vlan_get_info() returns -EINVAL or -ENOENT if the * device, respectively the VID is not found, returning * 0 means success, which is a failure for us here. */ err = br_vlan_get_info(br, vid, &br_info); if (err == 0) { NL_SET_ERR_MSG_MOD(extack, "This VLAN is already configured by the bridge"); return notifier_from_errno(-EBUSY); } return NOTIFY_DONE; } static int dsa_user_prechangeupper_sanity_check(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct dsa_switch *ds; struct dsa_port *dp; int err; if (!dsa_user_dev_check(dev)) return dsa_prevent_bridging_8021q_upper(dev, info); dp = dsa_user_to_port(dev); ds = dp->ds; if (ds->ops->port_prechangeupper) { err = ds->ops->port_prechangeupper(ds, dp->index, info); if (err) return notifier_from_errno(err); } if (is_vlan_dev(info->upper_dev)) return dsa_user_check_8021q_upper(dev, info); return NOTIFY_DONE; } /* To be eligible as a DSA conduit, a LAG must have all lower interfaces be * eligible DSA conduits. Additionally, all LAG slaves must be DSA conduits of * switches in the same switch tree. */ static int dsa_lag_conduit_validate(struct net_device *lag_dev, struct netlink_ext_ack *extack) { struct net_device *lower1, *lower2; struct list_head *iter1, *iter2; netdev_for_each_lower_dev(lag_dev, lower1, iter1) { netdev_for_each_lower_dev(lag_dev, lower2, iter2) { if (!netdev_uses_dsa(lower1) || !netdev_uses_dsa(lower2)) { NL_SET_ERR_MSG_MOD(extack, "All LAG ports must be eligible as DSA conduits"); return notifier_from_errno(-EINVAL); } if (lower1 == lower2) continue; if (!dsa_port_tree_same(lower1->dsa_ptr, lower2->dsa_ptr)) { NL_SET_ERR_MSG_MOD(extack, "LAG contains DSA conduits of disjoint switch trees"); return notifier_from_errno(-EINVAL); } } } return NOTIFY_DONE; } static int dsa_conduit_prechangeupper_sanity_check(struct net_device *conduit, struct netdev_notifier_changeupper_info *info) { struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); if (!netdev_uses_dsa(conduit)) return NOTIFY_DONE; if (!info->linking) return NOTIFY_DONE; /* Allow DSA switch uppers */ if (dsa_user_dev_check(info->upper_dev)) return NOTIFY_DONE; /* Allow bridge uppers of DSA conduits, subject to further * restrictions in dsa_bridge_prechangelower_sanity_check() */ if (netif_is_bridge_master(info->upper_dev)) return NOTIFY_DONE; /* Allow LAG uppers, subject to further restrictions in * dsa_lag_conduit_prechangelower_sanity_check() */ if (netif_is_lag_master(info->upper_dev)) return dsa_lag_conduit_validate(info->upper_dev, extack); NL_SET_ERR_MSG_MOD(extack, "DSA conduit cannot join unknown upper interfaces"); return notifier_from_errno(-EBUSY); } static int dsa_lag_conduit_prechangelower_sanity_check(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); struct net_device *lag_dev = info->upper_dev; struct net_device *lower; struct list_head *iter; if (!netdev_uses_dsa(lag_dev) || !netif_is_lag_master(lag_dev)) return NOTIFY_DONE; if (!info->linking) return NOTIFY_DONE; if (!netdev_uses_dsa(dev)) { NL_SET_ERR_MSG(extack, "Only DSA conduits can join a LAG DSA conduit"); return notifier_from_errno(-EINVAL); } netdev_for_each_lower_dev(lag_dev, lower, iter) { if (!dsa_port_tree_same(dev->dsa_ptr, lower->dsa_ptr)) { NL_SET_ERR_MSG(extack, "Interface is DSA conduit for a different switch tree than this LAG"); return notifier_from_errno(-EINVAL); } break; } return NOTIFY_DONE; } /* Don't allow bridging of DSA conduits, since the bridge layer rx_handler * prevents the DSA fake ethertype handler to be invoked, so we don't get the * chance to strip off and parse the DSA switch tag protocol header (the bridge * layer just returns RX_HANDLER_CONSUMED, stopping RX processing for these * frames). * The only case where that would not be an issue is when bridging can already * be offloaded, such as when the DSA conduit is itself a DSA or plain switchdev * port, and is bridged only with other ports from the same hardware device. */ static int dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower, struct netdev_notifier_changeupper_info *info) { struct net_device *br = info->upper_dev; struct netlink_ext_ack *extack; struct net_device *lower; struct list_head *iter; if (!netif_is_bridge_master(br)) return NOTIFY_DONE; if (!info->linking) return NOTIFY_DONE; extack = netdev_notifier_info_to_extack(&info->info); netdev_for_each_lower_dev(br, lower, iter) { if (!netdev_uses_dsa(new_lower) && !netdev_uses_dsa(lower)) continue; if (!netdev_port_same_parent_id(lower, new_lower)) { NL_SET_ERR_MSG(extack, "Cannot do software bridging with a DSA conduit"); return notifier_from_errno(-EINVAL); } } return NOTIFY_DONE; } static void dsa_tree_migrate_ports_from_lag_conduit(struct dsa_switch_tree *dst, struct net_device *lag_dev) { struct net_device *new_conduit = dsa_tree_find_first_conduit(dst); struct dsa_port *dp; int err; dsa_tree_for_each_user_port(dp, dst) { if (dsa_port_to_conduit(dp) != lag_dev) continue; err = dsa_user_change_conduit(dp->user, new_conduit, NULL); if (err) { netdev_err(dp->user, "failed to restore conduit to %s: %pe\n", new_conduit->name, ERR_PTR(err)); } } } static int dsa_conduit_lag_join(struct net_device *conduit, struct net_device *lag_dev, struct netdev_lag_upper_info *uinfo, struct netlink_ext_ack *extack) { struct dsa_port *cpu_dp = conduit->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->dst; struct dsa_port *dp; int err; err = dsa_conduit_lag_setup(lag_dev, cpu_dp, uinfo, extack); if (err) return err; dsa_tree_for_each_user_port(dp, dst) { if (dsa_port_to_conduit(dp) != conduit) continue; err = dsa_user_change_conduit(dp->user, lag_dev, extack); if (err) goto restore; } return 0; restore: dsa_tree_for_each_user_port_continue_reverse(dp, dst) { if (dsa_port_to_conduit(dp) != lag_dev) continue; err = dsa_user_change_conduit(dp->user, conduit, NULL); if (err) { netdev_err(dp->user, "failed to restore conduit to %s: %pe\n", conduit->name, ERR_PTR(err)); } } dsa_conduit_lag_teardown(lag_dev, conduit->dsa_ptr); return err; } static void dsa_conduit_lag_leave(struct net_device *conduit, struct net_device *lag_dev) { struct dsa_port *dp, *cpu_dp = lag_dev->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->dst; struct dsa_port *new_cpu_dp = NULL; struct net_device *lower; struct list_head *iter; netdev_for_each_lower_dev(lag_dev, lower, iter) { if (netdev_uses_dsa(lower)) { new_cpu_dp = lower->dsa_ptr; break; } } if (new_cpu_dp) { /* Update the CPU port of the user ports still under the LAG * so that dsa_port_to_conduit() continues to work properly */ dsa_tree_for_each_user_port(dp, dst) if (dsa_port_to_conduit(dp) == lag_dev) dp->cpu_dp = new_cpu_dp; /* Update the index of the virtual CPU port to match the lowest * physical CPU port */ lag_dev->dsa_ptr = new_cpu_dp; wmb(); } else { /* If the LAG DSA conduit has no ports left, migrate back all * user ports to the first physical CPU port */ dsa_tree_migrate_ports_from_lag_conduit(dst, lag_dev); } /* This DSA conduit has left its LAG in any case, so let * the CPU port leave the hardware LAG as well */ dsa_conduit_lag_teardown(lag_dev, conduit->dsa_ptr); } static int dsa_conduit_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct netlink_ext_ack *extack; int err = NOTIFY_DONE; if (!netdev_uses_dsa(dev)) return err; extack = netdev_notifier_info_to_extack(&info->info); if (netif_is_lag_master(info->upper_dev)) { if (info->linking) { err = dsa_conduit_lag_join(dev, info->upper_dev, info->upper_info, extack); err = notifier_from_errno(err); } else { dsa_conduit_lag_leave(dev, info->upper_dev); err = NOTIFY_OK; } } return err; } static int dsa_user_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_PRECHANGEUPPER: { struct netdev_notifier_changeupper_info *info = ptr; int err; err = dsa_user_prechangeupper_sanity_check(dev, info); if (notifier_to_errno(err)) return err; err = dsa_conduit_prechangeupper_sanity_check(dev, info); if (notifier_to_errno(err)) return err; err = dsa_lag_conduit_prechangelower_sanity_check(dev, info); if (notifier_to_errno(err)) return err; err = dsa_bridge_prechangelower_sanity_check(dev, info); if (notifier_to_errno(err)) return err; err = dsa_user_prechangeupper(dev, ptr); if (notifier_to_errno(err)) return err; err = dsa_user_lag_prechangeupper(dev, ptr); if (notifier_to_errno(err)) return err; break; } case NETDEV_CHANGEUPPER: { int err; err = dsa_user_changeupper(dev, ptr); if (notifier_to_errno(err)) return err; err = dsa_user_lag_changeupper(dev, ptr); if (notifier_to_errno(err)) return err; err = dsa_conduit_changeupper(dev, ptr); if (notifier_to_errno(err)) return err; break; } case NETDEV_CHANGELOWERSTATE: { struct netdev_notifier_changelowerstate_info *info = ptr; struct dsa_port *dp; int err = 0; if (dsa_user_dev_check(dev)) { dp = dsa_user_to_port(dev); err = dsa_port_lag_change(dp, info->lower_state_info); } /* Mirror LAG port events on DSA conduits that are in * a LAG towards their respective switch CPU ports */ if (netdev_uses_dsa(dev)) { dp = dev->dsa_ptr; err = dsa_port_lag_change(dp, info->lower_state_info); } return notifier_from_errno(err); } case NETDEV_CHANGE: case NETDEV_UP: { /* Track state of conduit port. * DSA driver may require the conduit port (and indirectly * the tagger) to be available for some special operation. */ if (netdev_uses_dsa(dev)) { struct dsa_port *cpu_dp = dev->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->ds->dst; /* Track when the conduit port is UP */ dsa_tree_conduit_oper_state_change(dst, dev, netif_oper_up(dev)); /* Track when the conduit port is ready and can accept * packet. * NETDEV_UP event is not enough to flag a port as ready. * We also have to wait for linkwatch_do_dev to dev_activate * and emit a NETDEV_CHANGE event. * We check if a conduit port is ready by checking if the dev * have a qdisc assigned and is not noop. */ dsa_tree_conduit_admin_state_change(dst, dev, !qdisc_tx_is_noop(dev)); return NOTIFY_OK; } return NOTIFY_DONE; } case NETDEV_GOING_DOWN: { struct dsa_port *dp, *cpu_dp; struct dsa_switch_tree *dst; LIST_HEAD(close_list); if (!netdev_uses_dsa(dev)) return NOTIFY_DONE; cpu_dp = dev->dsa_ptr; dst = cpu_dp->ds->dst; dsa_tree_conduit_admin_state_change(dst, dev, false); list_for_each_entry(dp, &dst->ports, list) { if (!dsa_port_is_user(dp)) continue; if (dp->cpu_dp != cpu_dp) continue; list_add(&dp->user->close_list, &close_list); } netif_close_many(&close_list, true); return NOTIFY_OK; } default: break; } return NOTIFY_DONE; } static void dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work) { struct switchdev_notifier_fdb_info info = {}; info.addr = switchdev_work->addr; info.vid = switchdev_work->vid; info.offloaded = true; call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, switchdev_work->orig_dev, &info.info, NULL); } static void dsa_user_switchdev_event_work(struct work_struct *work) { struct dsa_switchdev_event_work *switchdev_work = container_of(work, struct dsa_switchdev_event_work, work); const unsigned char *addr = switchdev_work->addr; struct net_device *dev = switchdev_work->dev; u16 vid = switchdev_work->vid; struct dsa_switch *ds; struct dsa_port *dp; int err; dp = dsa_user_to_port(dev); ds = dp->ds; switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: if (switchdev_work->host_addr) err = dsa_port_bridge_host_fdb_add(dp, addr, vid); else if (dp->lag) err = dsa_port_lag_fdb_add(dp, addr, vid); else err = dsa_port_fdb_add(dp, addr, vid); if (err) { dev_err(ds->dev, "port %d failed to add %pM vid %d to fdb: %d\n", dp->index, addr, vid, err); break; } dsa_fdb_offload_notify(switchdev_work); break; case SWITCHDEV_FDB_DEL_TO_DEVICE: if (switchdev_work->host_addr) err = dsa_port_bridge_host_fdb_del(dp, addr, vid); else if (dp->lag) err = dsa_port_lag_fdb_del(dp, addr, vid); else err = dsa_port_fdb_del(dp, addr, vid); if (err) { dev_err(ds->dev, "port %d failed to delete %pM vid %d from fdb: %d\n", dp->index, addr, vid, err); } break; } kfree(switchdev_work); } static bool dsa_foreign_dev_check(const struct net_device *dev, const struct net_device *foreign_dev) { const struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch_tree *dst = dp->ds->dst; if (netif_is_bridge_master(foreign_dev)) return !dsa_tree_offloads_bridge_dev(dst, foreign_dev); if (netif_is_bridge_port(foreign_dev)) return !dsa_tree_offloads_bridge_port(dst, foreign_dev); /* Everything else is foreign */ return true; } static int dsa_user_fdb_event(struct net_device *dev, struct net_device *orig_dev, unsigned long event, const void *ctx, const struct switchdev_notifier_fdb_info *fdb_info) { struct dsa_switchdev_event_work *switchdev_work; struct dsa_port *dp = dsa_user_to_port(dev); bool host_addr = fdb_info->is_local; struct dsa_switch *ds = dp->ds; if (ctx && ctx != dp) return 0; if (!dp->bridge) return 0; if (switchdev_fdb_is_dynamically_learned(fdb_info)) { if (dsa_port_offloads_bridge_port(dp, orig_dev)) return 0; /* FDB entries learned by the software bridge or by foreign * bridge ports should be installed as host addresses only if * the driver requests assisted learning. */ if (!ds->assisted_learning_on_cpu_port) return 0; } /* Also treat FDB entries on foreign interfaces bridged with us as host * addresses. */ if (dsa_foreign_dev_check(dev, orig_dev)) host_addr = true; /* Check early that we're not doing work in vain. * Host addresses on LAG ports still require regular FDB ops, * since the CPU port isn't in a LAG. */ if (dp->lag && !host_addr) { if (!ds->ops->lag_fdb_add || !ds->ops->lag_fdb_del) return -EOPNOTSUPP; } else { if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del) return -EOPNOTSUPP; } switchdev_work = kzalloc_obj(*switchdev_work, GFP_ATOMIC); if (!switchdev_work) return -ENOMEM; netdev_dbg(dev, "%s FDB entry towards %s, addr %pM vid %d%s\n", event == SWITCHDEV_FDB_ADD_TO_DEVICE ? "Adding" : "Deleting", orig_dev->name, fdb_info->addr, fdb_info->vid, host_addr ? " as host address" : ""); INIT_WORK(&switchdev_work->work, dsa_user_switchdev_event_work); switchdev_work->event = event; switchdev_work->dev = dev; switchdev_work->orig_dev = orig_dev; ether_addr_copy(switchdev_work->addr, fdb_info->addr); switchdev_work->vid = fdb_info->vid; switchdev_work->host_addr = host_addr; dsa_schedule_work(&switchdev_work->work); return 0; } /* Called under rcu_read_lock() */ static int dsa_user_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = switchdev_notifier_info_to_dev(ptr); int err; switch (event) { case SWITCHDEV_PORT_ATTR_SET: err = switchdev_handle_port_attr_set(dev, ptr, dsa_user_dev_check, dsa_user_port_attr_set); return notifier_from_errno(err); case SWITCHDEV_FDB_ADD_TO_DEVICE: case SWITCHDEV_FDB_DEL_TO_DEVICE: err = switchdev_handle_fdb_event_to_device(dev, event, ptr, dsa_user_dev_check, dsa_foreign_dev_check, dsa_user_fdb_event); return notifier_from_errno(err); default: return NOTIFY_DONE; } return NOTIFY_OK; } static int dsa_user_switchdev_blocking_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = switchdev_notifier_info_to_dev(ptr); int err; switch (event) { case SWITCHDEV_PORT_OBJ_ADD: err = switchdev_handle_port_obj_add_foreign(dev, ptr, dsa_user_dev_check, dsa_foreign_dev_check, dsa_user_port_obj_add); return notifier_from_errno(err); case SWITCHDEV_PORT_OBJ_DEL: err = switchdev_handle_port_obj_del_foreign(dev, ptr, dsa_user_dev_check, dsa_foreign_dev_check, dsa_user_port_obj_del); return notifier_from_errno(err); case SWITCHDEV_PORT_ATTR_SET: err = switchdev_handle_port_attr_set(dev, ptr, dsa_user_dev_check, dsa_user_port_attr_set); return notifier_from_errno(err); } return NOTIFY_DONE; } static struct notifier_block dsa_user_nb __read_mostly = { .notifier_call = dsa_user_netdevice_event, }; struct notifier_block dsa_user_switchdev_notifier = { .notifier_call = dsa_user_switchdev_event, }; struct notifier_block dsa_user_switchdev_blocking_notifier = { .notifier_call = dsa_user_switchdev_blocking_event, }; int dsa_user_register_notifier(void) { struct notifier_block *nb; int err; err = register_netdevice_notifier(&dsa_user_nb); if (err) return err; err = register_switchdev_notifier(&dsa_user_switchdev_notifier); if (err) goto err_switchdev_nb; nb = &dsa_user_switchdev_blocking_notifier; err = register_switchdev_blocking_notifier(nb); if (err) goto err_switchdev_blocking_nb; return 0; err_switchdev_blocking_nb: unregister_switchdev_notifier(&dsa_user_switchdev_notifier); err_switchdev_nb: unregister_netdevice_notifier(&dsa_user_nb); return err; } void dsa_user_unregister_notifier(void) { struct notifier_block *nb; int err; nb = &dsa_user_switchdev_blocking_notifier; err = unregister_switchdev_blocking_notifier(nb); if (err) pr_err("DSA: failed to unregister switchdev blocking notifier (%d)\n", err); err = unregister_switchdev_notifier(&dsa_user_switchdev_notifier); if (err) pr_err("DSA: failed to unregister switchdev notifier (%d)\n", err); err = unregister_netdevice_notifier(&dsa_user_nb); if (err) pr_err("DSA: failed to unregister user notifier (%d)\n", err); }
1 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 /* * Parallel-port resource manager code. * * Authors: David Campbell <campbell@tirian.che.curtin.edu.au> * Tim Waugh <tim@cyberelk.demon.co.uk> * Jose Renau <renau@acm.org> * Philip Blundell <philb@gnu.org> * Andrea Arcangeli * * based on work by Grant Guenther <grant@torque.net> * and Philip Blundell * * Any part of this program may be used in documents licensed under * the GNU Free Documentation License, Version 1.1 or any later version * published by the Free Software Foundation. */ #undef PARPORT_DEBUG_SHARING /* undef for production */ #include <linux/module.h> #include <linux/string.h> #include <linux/threads.h> #include <linux/parport.h> #include <linux/delay.h> #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/ioport.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/kmod.h> #include <linux/device.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <asm/irq.h> #undef PARPORT_PARANOID #define PARPORT_DEFAULT_TIMESLICE (HZ/5) unsigned long parport_default_timeslice = PARPORT_DEFAULT_TIMESLICE; int parport_default_spintime = DEFAULT_SPIN_TIME; static LIST_HEAD(portlist); static DEFINE_SPINLOCK(parportlist_lock); /* list of all allocated ports, sorted by ->number */ static LIST_HEAD(all_ports); static DEFINE_SPINLOCK(full_list_lock); static DEFINE_MUTEX(registration_lock); /* What you can do to a port that's gone away.. */ static void dead_write_lines(struct parport *p, unsigned char b){} static unsigned char dead_read_lines(struct parport *p) { return 0; } static unsigned char dead_frob_lines(struct parport *p, unsigned char b, unsigned char c) { return 0; } static void dead_onearg(struct parport *p){} static void dead_initstate(struct pardevice *d, struct parport_state *s) { } static void dead_state(struct parport *p, struct parport_state *s) { } static size_t dead_write(struct parport *p, const void *b, size_t l, int f) { return 0; } static size_t dead_read(struct parport *p, void *b, size_t l, int f) { return 0; } static struct parport_operations dead_ops = { .write_data = dead_write_lines, /* data */ .read_data = dead_read_lines, .write_control = dead_write_lines, /* control */ .read_control = dead_read_lines, .frob_control = dead_frob_lines, .read_status = dead_read_lines, /* status */ .enable_irq = dead_onearg, /* enable_irq */ .disable_irq = dead_onearg, /* disable_irq */ .data_forward = dead_onearg, /* data_forward */ .data_reverse = dead_onearg, /* data_reverse */ .init_state = dead_initstate, /* init_state */ .save_state = dead_state, .restore_state = dead_state, .epp_write_data = dead_write, /* epp */ .epp_read_data = dead_read, .epp_write_addr = dead_write, .epp_read_addr = dead_read, .ecp_write_data = dead_write, /* ecp */ .ecp_read_data = dead_read, .ecp_write_addr = dead_write, .compat_write_data = dead_write, /* compat */ .nibble_read_data = dead_read, /* nibble */ .byte_read_data = dead_read, /* byte */ .owner = NULL, }; static struct device_type parport_device_type = { .name = "parport", }; static int is_parport(struct device *dev) { return dev->type == &parport_device_type; } static int parport_probe(struct device *dev) { struct parport_driver *drv; if (is_parport(dev)) return -ENODEV; drv = to_parport_driver(dev->driver); if (!drv->probe) { /* if driver has not defined a custom probe */ struct pardevice *par_dev = to_pardevice(dev); if (strcmp(par_dev->name, drv->name)) return -ENODEV; return 0; } /* if driver defined its own probe */ return drv->probe(to_pardevice(dev)); } static const struct bus_type parport_bus_type = { .name = "parport", .probe = parport_probe, }; int parport_bus_init(void) { return bus_register(&parport_bus_type); } void parport_bus_exit(void) { bus_unregister(&parport_bus_type); } /* * iterates through all the drivers registered with the bus and sends the port * details to the match_port callback of the driver, so that the driver can * know about the new port that just registered with the bus and decide if it * wants to use this new port. */ static int driver_check(struct device_driver *dev_drv, void *_port) { struct parport *port = _port; struct parport_driver *drv = to_parport_driver(dev_drv); if (drv->match_port) drv->match_port(port); return 0; } /* Call attach(port) for each registered driver. */ static void attach_driver_chain(struct parport *port) { /* caller has exclusive registration_lock */ /* * call the driver_check function of the drivers registered in * new device model */ bus_for_each_drv(&parport_bus_type, NULL, port, driver_check); } static int driver_detach(struct device_driver *_drv, void *_port) { struct parport *port = _port; struct parport_driver *drv = to_parport_driver(_drv); if (drv->detach) drv->detach(port); return 0; } /* Call detach(port) for each registered driver. */ static void detach_driver_chain(struct parport *port) { /* caller has exclusive registration_lock */ /* * call the detach function of the drivers registered in * new device model */ bus_for_each_drv(&parport_bus_type, NULL, port, driver_detach); } /* Ask kmod for some lowlevel drivers. */ static void get_lowlevel_driver(void) { /* * There is no actual module called this: you should set * up an alias for modutils. */ request_module("parport_lowlevel"); } /* * iterates through all the devices connected to the bus and sends the device * details to the match_port callback of the driver, so that the driver can * know what are all the ports that are connected to the bus and choose the * port to which it wants to register its device. */ static int port_check(struct device *dev, void *dev_drv) { struct parport_driver *drv = dev_drv; /* only send ports, do not send other devices connected to bus */ if (is_parport(dev)) drv->match_port(to_parport_dev(dev)); return 0; } /* * Iterates through all the devices connected to the bus and return 1 * if the device is a parallel port. */ static int port_detect(struct device *dev, void *dev_drv) { if (is_parport(dev)) return 1; return 0; } /** * __parport_register_driver - register a parallel port device driver * @drv: structure describing the driver * @owner: owner module of drv * @mod_name: module name string * * This can be called by a parallel port device driver in order * to receive notifications about ports being found in the * system, as well as ports no longer available. * * If devmodel is true then the new device model is used * for registration. * * The @drv structure is allocated by the caller and must not be * deallocated until after calling parport_unregister_driver(). * * If using the non device model: * The driver's attach() function may block. The port that * attach() is given will be valid for the duration of the * callback, but if the driver wants to take a copy of the * pointer it must call parport_get_port() to do so. Calling * parport_register_device() on that port will do this for you. * * The driver's detach() function may block. The port that * detach() is given will be valid for the duration of the * callback, but if the driver wants to take a copy of the * pointer it must call parport_get_port() to do so. * * * Returns 0 on success. The non device model will always succeeds. * but the new device model can fail and will return the error code. **/ int __parport_register_driver(struct parport_driver *drv, struct module *owner, const char *mod_name) { /* using device model */ int ret; /* initialize common driver fields */ drv->driver.name = drv->name; drv->driver.bus = &parport_bus_type; drv->driver.owner = owner; drv->driver.mod_name = mod_name; ret = driver_register(&drv->driver); if (ret) return ret; /* * check if bus has any parallel port registered, if * none is found then load the lowlevel driver. */ ret = bus_for_each_dev(&parport_bus_type, NULL, NULL, port_detect); if (!ret) get_lowlevel_driver(); mutex_lock(&registration_lock); if (drv->match_port) bus_for_each_dev(&parport_bus_type, NULL, drv, port_check); mutex_unlock(&registration_lock); return 0; } EXPORT_SYMBOL(__parport_register_driver); static int port_detach(struct device *dev, void *_drv) { struct parport_driver *drv = _drv; if (is_parport(dev) && drv->detach) drv->detach(to_parport_dev(dev)); return 0; } /** * parport_unregister_driver - deregister a parallel port device driver * @drv: structure describing the driver that was given to * parport_register_driver() * * This should be called by a parallel port device driver that * has registered itself using parport_register_driver() when it * is about to be unloaded. * * When it returns, the driver's attach() routine will no longer * be called, and for each port that attach() was called for, the * detach() routine will have been called. * * All the driver's attach() and detach() calls are guaranteed to have * finished by the time this function returns. **/ void parport_unregister_driver(struct parport_driver *drv) { mutex_lock(&registration_lock); bus_for_each_dev(&parport_bus_type, NULL, drv, port_detach); driver_unregister(&drv->driver); mutex_unlock(&registration_lock); } EXPORT_SYMBOL(parport_unregister_driver); static void free_port(struct device *dev) { int d; struct parport *port = to_parport_dev(dev); spin_lock(&full_list_lock); list_del(&port->full_list); spin_unlock(&full_list_lock); for (d = 0; d < 5; d++) { kfree(port->probe_info[d].class_name); kfree(port->probe_info[d].mfr); kfree(port->probe_info[d].model); kfree(port->probe_info[d].cmdset); kfree(port->probe_info[d].description); } kfree(port); } /** * parport_get_port - increment a port's reference count * @port: the port * * This ensures that a struct parport pointer remains valid * until the matching parport_put_port() call. **/ struct parport *parport_get_port(struct parport *port) { struct device *dev = get_device(&port->bus_dev); return to_parport_dev(dev); } EXPORT_SYMBOL(parport_get_port); void parport_del_port(struct parport *port) { device_unregister(&port->bus_dev); } EXPORT_SYMBOL(parport_del_port); /** * parport_put_port - decrement a port's reference count * @port: the port * * This should be called once for each call to parport_get_port(), * once the port is no longer needed. When the reference count reaches * zero (port is no longer used), free_port is called. **/ void parport_put_port(struct parport *port) { put_device(&port->bus_dev); } EXPORT_SYMBOL(parport_put_port); /** * parport_register_port - register a parallel port * @base: base I/O address * @irq: IRQ line * @dma: DMA channel * @ops: pointer to the port driver's port operations structure * * When a parallel port (lowlevel) driver finds a port that * should be made available to parallel port device drivers, it * should call parport_register_port(). The @base, @irq, and * @dma parameters are for the convenience of port drivers, and * for ports where they aren't meaningful needn't be set to * anything special. They can be altered afterwards by adjusting * the relevant members of the parport structure that is returned * and represents the port. They should not be tampered with * after calling parport_announce_port, however. * * If there are parallel port device drivers in the system that * have registered themselves using parport_register_driver(), * they are not told about the port at this time; that is done by * parport_announce_port(). * * The @ops structure is allocated by the caller, and must not be * deallocated before calling parport_remove_port(). * * If there is no memory to allocate a new parport structure, * this function will return %NULL. **/ struct parport *parport_register_port(unsigned long base, int irq, int dma, struct parport_operations *ops) { struct list_head *l; struct parport *tmp; int num; int device; int ret; tmp = kzalloc_obj(struct parport); if (!tmp) return NULL; /* Init our structure */ tmp->base = base; tmp->irq = irq; tmp->dma = dma; tmp->muxport = tmp->daisy = tmp->muxsel = -1; INIT_LIST_HEAD(&tmp->list); tmp->ops = ops; tmp->physport = tmp; rwlock_init(&tmp->cad_lock); spin_lock_init(&tmp->waitlist_lock); spin_lock_init(&tmp->pardevice_lock); tmp->ieee1284.mode = IEEE1284_MODE_COMPAT; tmp->ieee1284.phase = IEEE1284_PH_FWD_IDLE; sema_init(&tmp->ieee1284.irq, 0); tmp->spintime = parport_default_spintime; atomic_set(&tmp->ref_count, 1); /* Search for the lowest free parport number. */ spin_lock(&full_list_lock); num = 0; list_for_each(l, &all_ports) { struct parport *p = list_entry(l, struct parport, full_list); if (p->number != num++) break; } tmp->portnum = tmp->number = num; list_add_tail(&tmp->full_list, l); spin_unlock(&full_list_lock); /* * Now that the portnum is known finish doing the Init. */ dev_set_name(&tmp->bus_dev, "parport%d", tmp->portnum); tmp->bus_dev.bus = &parport_bus_type; tmp->bus_dev.release = free_port; tmp->bus_dev.type = &parport_device_type; tmp->name = dev_name(&tmp->bus_dev); for (device = 0; device < 5; device++) /* assume the worst */ tmp->probe_info[device].class = PARPORT_CLASS_LEGACY; ret = device_register(&tmp->bus_dev); if (ret) { put_device(&tmp->bus_dev); return NULL; } return tmp; } EXPORT_SYMBOL(parport_register_port); /** * parport_announce_port - tell device drivers about a parallel port * @port: parallel port to announce * * After a port driver has registered a parallel port with * parport_register_port, and performed any necessary * initialisation or adjustments, it should call * parport_announce_port() in order to notify all device drivers * that have called parport_register_driver(). Their attach() * functions will be called, with @port as the parameter. **/ void parport_announce_port(struct parport *port) { int i; #ifdef CONFIG_PARPORT_1284 /* Analyse the IEEE1284.3 topology of the port. */ parport_daisy_init(port); #endif if (!port->dev) pr_warn("%s: fix this legacy no-device port driver!\n", port->name); parport_proc_register(port); mutex_lock(&registration_lock); spin_lock_irq(&parportlist_lock); list_add_tail(&port->list, &portlist); for (i = 1; i < 3; i++) { struct parport *slave = port->slaves[i-1]; if (slave) list_add_tail(&slave->list, &portlist); } spin_unlock_irq(&parportlist_lock); /* Let drivers know that new port(s) has arrived. */ attach_driver_chain(port); for (i = 1; i < 3; i++) { struct parport *slave = port->slaves[i-1]; if (slave) attach_driver_chain(slave); } mutex_unlock(&registration_lock); } EXPORT_SYMBOL(parport_announce_port); /** * parport_remove_port - deregister a parallel port * @port: parallel port to deregister * * When a parallel port driver is forcibly unloaded, or a * parallel port becomes inaccessible, the port driver must call * this function in order to deal with device drivers that still * want to use it. * * The parport structure associated with the port has its * operations structure replaced with one containing 'null' * operations that return errors or just don't do anything. * * Any drivers that have registered themselves using * parport_register_driver() are notified that the port is no * longer accessible by having their detach() routines called * with @port as the parameter. **/ void parport_remove_port(struct parport *port) { int i; mutex_lock(&registration_lock); /* Spread the word. */ detach_driver_chain(port); #ifdef CONFIG_PARPORT_1284 /* Forget the IEEE1284.3 topology of the port. */ parport_daisy_fini(port); for (i = 1; i < 3; i++) { struct parport *slave = port->slaves[i-1]; if (!slave) continue; detach_driver_chain(slave); parport_daisy_fini(slave); } #endif port->ops = &dead_ops; spin_lock(&parportlist_lock); list_del_init(&port->list); for (i = 1; i < 3; i++) { struct parport *slave = port->slaves[i-1]; if (slave) list_del_init(&slave->list); } spin_unlock(&parportlist_lock); mutex_unlock(&registration_lock); parport_proc_unregister(port); for (i = 1; i < 3; i++) { struct parport *slave = port->slaves[i-1]; if (slave) parport_put_port(slave); } } EXPORT_SYMBOL(parport_remove_port); static void free_pardevice(struct device *dev) { struct pardevice *par_dev = to_pardevice(dev); kfree_const(par_dev->name); kfree(par_dev); } /** * parport_register_dev_model - register a device on a parallel port * @port: port to which the device is attached * @name: a name to refer to the device * @par_dev_cb: struct containing callbacks * @id: device number to be given to the device * * This function, called by parallel port device drivers, * declares that a device is connected to a port, and tells the * system all it needs to know. * * The struct pardev_cb contains pointer to callbacks. preemption * callback function, @preempt, is called when this device driver * has claimed access to the port but another device driver wants * to use it. It is given, @private, as its parameter, and should * return zero if it is willing for the system to release the port * to another driver on its behalf. If it wants to keep control of * the port it should return non-zero, and no action will be taken. * It is good manners for the driver to try to release the port at * the earliest opportunity after its preemption callback rejects a * preemption attempt. Note that if a preemption callback is happy * for preemption to go ahead, there is no need to release the * port; it is done automatically. This function may not block, as * it may be called from interrupt context. If the device driver * does not support preemption, @preempt can be %NULL. * * The wake-up ("kick") callback function, @wakeup, is called when * the port is available to be claimed for exclusive access; that * is, parport_claim() is guaranteed to succeed when called from * inside the wake-up callback function. If the driver wants to * claim the port it should do so; otherwise, it need not take * any action. This function may not block, as it may be called * from interrupt context. If the device driver does not want to * be explicitly invited to claim the port in this way, @wakeup can * be %NULL. * * The interrupt handler, @irq_func, is called when an interrupt * arrives from the parallel port. Note that if a device driver * wants to use interrupts it should use parport_enable_irq(), * and can also check the irq member of the parport structure * representing the port. * * The parallel port (lowlevel) driver is the one that has called * request_irq() and whose interrupt handler is called first. * This handler does whatever needs to be done to the hardware to * acknowledge the interrupt (for PC-style ports there is nothing * special to be done). It then tells the IEEE 1284 code about * the interrupt, which may involve reacting to an IEEE 1284 * event depending on the current IEEE 1284 phase. After this, * it calls @irq_func. Needless to say, @irq_func will be called * from interrupt context, and may not block. * * The %PARPORT_DEV_EXCL flag is for preventing port sharing, and * so should only be used when sharing the port with other device * drivers is impossible and would lead to incorrect behaviour. * Use it sparingly! Normally, @flags will be zero. * * This function returns a pointer to a structure that represents * the device on the port, or %NULL if there is not enough memory * to allocate space for that structure. **/ struct pardevice * parport_register_dev_model(struct parport *port, const char *name, const struct pardev_cb *par_dev_cb, int id) { struct pardevice *par_dev; const char *devname; int ret; if (port->physport->flags & PARPORT_FLAG_EXCL) { /* An exclusive device is registered. */ pr_err("%s: no more devices allowed\n", port->name); return NULL; } if (par_dev_cb->flags & PARPORT_DEV_LURK) { if (!par_dev_cb->preempt || !par_dev_cb->wakeup) { pr_info("%s: refused to register lurking device (%s) without callbacks\n", port->name, name); return NULL; } } if (par_dev_cb->flags & PARPORT_DEV_EXCL) { if (port->physport->devices) { /* * If a device is already registered and this new * device wants exclusive access, then no need to * continue as we can not grant exclusive access to * this device. */ pr_err("%s: cannot grant exclusive access for device %s\n", port->name, name); return NULL; } } if (!try_module_get(port->ops->owner)) return NULL; parport_get_port(port); par_dev = kzalloc_obj(*par_dev); if (!par_dev) goto err_put_port; par_dev->state = kzalloc_obj(*par_dev->state); if (!par_dev->state) goto err_put_par_dev; devname = kstrdup_const(name, GFP_KERNEL); if (!devname) goto err_free_par_dev; par_dev->name = devname; par_dev->port = port; par_dev->daisy = -1; par_dev->preempt = par_dev_cb->preempt; par_dev->wakeup = par_dev_cb->wakeup; par_dev->private = par_dev_cb->private; par_dev->flags = par_dev_cb->flags; par_dev->irq_func = par_dev_cb->irq_func; par_dev->waiting = 0; par_dev->timeout = 5 * HZ; par_dev->dev.parent = &port->bus_dev; par_dev->dev.bus = &parport_bus_type; ret = dev_set_name(&par_dev->dev, "%s.%d", devname, id); if (ret) goto err_free_devname; par_dev->dev.release = free_pardevice; par_dev->devmodel = true; ret = device_register(&par_dev->dev); if (ret) { kfree(par_dev->state); put_device(&par_dev->dev); goto err_put_port; } /* Chain this onto the list */ par_dev->prev = NULL; /* * This function must not run from an irq handler so we don' t need * to clear irq on the local CPU. -arca */ spin_lock(&port->physport->pardevice_lock); if (par_dev_cb->flags & PARPORT_DEV_EXCL) { if (port->physport->devices) { spin_unlock(&port->physport->pardevice_lock); pr_debug("%s: cannot grant exclusive access for device %s\n", port->name, name); kfree(par_dev->state); device_unregister(&par_dev->dev); goto err_put_port; } port->flags |= PARPORT_FLAG_EXCL; } par_dev->next = port->physport->devices; wmb(); /* * Make sure that tmp->next is written before it's * added to the list; see comments marked 'no locking * required' */ if (port->physport->devices) port->physport->devices->prev = par_dev; port->physport->devices = par_dev; spin_unlock(&port->physport->pardevice_lock); init_waitqueue_head(&par_dev->wait_q); par_dev->timeslice = parport_default_timeslice; par_dev->waitnext = NULL; par_dev->waitprev = NULL; /* * This has to be run as last thing since init_state may need other * pardevice fields. -arca */ port->ops->init_state(par_dev, par_dev->state); if (!test_and_set_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags)) { port->proc_device = par_dev; parport_device_proc_register(par_dev); } return par_dev; err_free_devname: kfree_const(devname); err_free_par_dev: kfree(par_dev->state); err_put_par_dev: if (!par_dev->devmodel) kfree(par_dev); err_put_port: parport_put_port(port); module_put(port->ops->owner); return NULL; } EXPORT_SYMBOL(parport_register_dev_model); /** * parport_unregister_device - deregister a device on a parallel port * @dev: pointer to structure representing device * * This undoes the effect of parport_register_device(). **/ void parport_unregister_device(struct pardevice *dev) { struct parport *port; #ifdef PARPORT_PARANOID if (!dev) { pr_err("%s: passed NULL\n", __func__); return; } #endif port = dev->port->physport; if (port->proc_device == dev) { port->proc_device = NULL; clear_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags); parport_device_proc_unregister(dev); } if (port->cad == dev) { printk(KERN_DEBUG "%s: %s forgot to release port\n", port->name, dev->name); parport_release(dev); } spin_lock(&port->pardevice_lock); if (dev->next) dev->next->prev = dev->prev; if (dev->prev) dev->prev->next = dev->next; else port->devices = dev->next; if (dev->flags & PARPORT_DEV_EXCL) port->flags &= ~PARPORT_FLAG_EXCL; spin_unlock(&port->pardevice_lock); /* * Make sure we haven't left any pointers around in the wait * list. */ spin_lock_irq(&port->waitlist_lock); if (dev->waitprev || dev->waitnext || port->waithead == dev) { if (dev->waitprev) dev->waitprev->waitnext = dev->waitnext; else port->waithead = dev->waitnext; if (dev->waitnext) dev->waitnext->waitprev = dev->waitprev; else port->waittail = dev->waitprev; } spin_unlock_irq(&port->waitlist_lock); kfree(dev->state); device_unregister(&dev->dev); module_put(port->ops->owner); parport_put_port(port); } EXPORT_SYMBOL(parport_unregister_device); /** * parport_find_number - find a parallel port by number * @number: parallel port number * * This returns the parallel port with the specified number, or * %NULL if there is none. * * There is an implicit parport_get_port() done already; to throw * away the reference to the port that parport_find_number() * gives you, use parport_put_port(). */ struct parport *parport_find_number(int number) { struct parport *port, *result = NULL; if (list_empty(&portlist)) get_lowlevel_driver(); spin_lock(&parportlist_lock); list_for_each_entry(port, &portlist, list) { if (port->number == number) { result = parport_get_port(port); break; } } spin_unlock(&parportlist_lock); return result; } EXPORT_SYMBOL(parport_find_number); /** * parport_find_base - find a parallel port by base address * @base: base I/O address * * This returns the parallel port with the specified base * address, or %NULL if there is none. * * There is an implicit parport_get_port() done already; to throw * away the reference to the port that parport_find_base() * gives you, use parport_put_port(). */ struct parport *parport_find_base(unsigned long base) { struct parport *port, *result = NULL; if (list_empty(&portlist)) get_lowlevel_driver(); spin_lock(&parportlist_lock); list_for_each_entry(port, &portlist, list) { if (port->base == base) { result = parport_get_port(port); break; } } spin_unlock(&parportlist_lock); return result; } EXPORT_SYMBOL(parport_find_base); /** * parport_claim - claim access to a parallel port device * @dev: pointer to structure representing a device on the port * * This function will not block and so can be used from interrupt * context. If parport_claim() succeeds in claiming access to * the port it returns zero and the port is available to use. It * may fail (returning non-zero) if the port is in use by another * driver and that driver is not willing to relinquish control of * the port. **/ int parport_claim(struct pardevice *dev) { struct pardevice *oldcad; struct parport *port = dev->port->physport; unsigned long flags; if (port->cad == dev) { pr_info("%s: %s already owner\n", dev->port->name, dev->name); return 0; } /* Preempt any current device */ write_lock_irqsave(&port->cad_lock, flags); oldcad = port->cad; if (oldcad) { if (oldcad->preempt) { if (oldcad->preempt(oldcad->private)) goto blocked; port->ops->save_state(port, dev->state); } else goto blocked; if (port->cad != oldcad) { /* * I think we'll actually deadlock rather than * get here, but just in case.. */ pr_warn("%s: %s released port when preempted!\n", port->name, oldcad->name); if (port->cad) goto blocked; } } /* Can't fail from now on, so mark ourselves as no longer waiting. */ if (dev->waiting & 1) { dev->waiting = 0; /* Take ourselves out of the wait list again. */ spin_lock_irq(&port->waitlist_lock); if (dev->waitprev) dev->waitprev->waitnext = dev->waitnext; else port->waithead = dev->waitnext; if (dev->waitnext) dev->waitnext->waitprev = dev->waitprev; else port->waittail = dev->waitprev; spin_unlock_irq(&port->waitlist_lock); dev->waitprev = dev->waitnext = NULL; } /* Now we do the change of devices */ port->cad = dev; #ifdef CONFIG_PARPORT_1284 /* If it's a mux port, select it. */ if (dev->port->muxport >= 0) { /* FIXME */ port->muxsel = dev->port->muxport; } /* If it's a daisy chain device, select it. */ if (dev->daisy >= 0) { /* This could be lazier. */ if (!parport_daisy_select(port, dev->daisy, IEEE1284_MODE_COMPAT)) port->daisy = dev->daisy; } #endif /* IEEE1284.3 support */ /* Restore control registers */ port->ops->restore_state(port, dev->state); write_unlock_irqrestore(&port->cad_lock, flags); dev->time = jiffies; return 0; blocked: /* * If this is the first time we tried to claim the port, register an * interest. This is only allowed for devices sleeping in * parport_claim_or_block(), or those with a wakeup function. */ /* The cad_lock is still held for writing here */ if (dev->waiting & 2 || dev->wakeup) { spin_lock(&port->waitlist_lock); if (test_and_set_bit(0, &dev->waiting) == 0) { /* First add ourselves to the end of the wait list. */ dev->waitnext = NULL; dev->waitprev = port->waittail; if (port->waittail) { port->waittail->waitnext = dev; port->waittail = dev; } else port->waithead = port->waittail = dev; } spin_unlock(&port->waitlist_lock); } write_unlock_irqrestore(&port->cad_lock, flags); return -EAGAIN; } EXPORT_SYMBOL(parport_claim); /** * parport_claim_or_block - claim access to a parallel port device * @dev: pointer to structure representing a device on the port * * This behaves like parport_claim(), but will block if necessary * to wait for the port to be free. A return value of 1 * indicates that it slept; 0 means that it succeeded without * needing to sleep. A negative error code indicates failure. **/ int parport_claim_or_block(struct pardevice *dev) { int r; /* * Signal to parport_claim() that we can wait even without a * wakeup function. */ dev->waiting = 2; /* Try to claim the port. If this fails, we need to sleep. */ r = parport_claim(dev); if (r == -EAGAIN) { #ifdef PARPORT_DEBUG_SHARING printk(KERN_DEBUG "%s: parport_claim() returned -EAGAIN\n", dev->name); #endif /* * FIXME!!! Use the proper locking for dev->waiting, * and make this use the "wait_event_interruptible()" * interfaces. The cli/sti that used to be here * did nothing. * * See also parport_release() */ /* * If dev->waiting is clear now, an interrupt * gave us the port and we would deadlock if we slept. */ if (dev->waiting) { wait_event_interruptible(dev->wait_q, !dev->waiting); if (signal_pending(current)) return -EINTR; r = 1; } else { r = 0; #ifdef PARPORT_DEBUG_SHARING printk(KERN_DEBUG "%s: didn't sleep in parport_claim_or_block()\n", dev->name); #endif } #ifdef PARPORT_DEBUG_SHARING if (dev->port->physport->cad != dev) printk(KERN_DEBUG "%s: exiting parport_claim_or_block but %s owns port!\n", dev->name, dev->port->physport->cad ? dev->port->physport->cad->name : "nobody"); #endif } dev->waiting = 0; return r; } EXPORT_SYMBOL(parport_claim_or_block); /** * parport_release - give up access to a parallel port device * @dev: pointer to structure representing parallel port device * * This function cannot fail, but it should not be called without * the port claimed. Similarly, if the port is already claimed * you should not try claiming it again. **/ void parport_release(struct pardevice *dev) { struct parport *port = dev->port->physport; struct pardevice *pd; unsigned long flags; /* Make sure that dev is the current device */ write_lock_irqsave(&port->cad_lock, flags); if (port->cad != dev) { write_unlock_irqrestore(&port->cad_lock, flags); pr_warn("%s: %s tried to release parport when not owner\n", port->name, dev->name); return; } #ifdef CONFIG_PARPORT_1284 /* If this is on a mux port, deselect it. */ if (dev->port->muxport >= 0) { /* FIXME */ port->muxsel = -1; } /* If this is a daisy device, deselect it. */ if (dev->daisy >= 0) { parport_daisy_deselect_all(port); port->daisy = -1; } #endif port->cad = NULL; write_unlock_irqrestore(&port->cad_lock, flags); /* Save control registers */ port->ops->save_state(port, dev->state); /* * If anybody is waiting, find out who's been there longest and * then wake them up. (Note: no locking required) */ /* !!! LOCKING IS NEEDED HERE */ for (pd = port->waithead; pd; pd = pd->waitnext) { if (pd->waiting & 2) { /* sleeping in claim_or_block */ parport_claim(pd); if (waitqueue_active(&pd->wait_q)) wake_up_interruptible(&pd->wait_q); return; } else if (pd->wakeup) { pd->wakeup(pd->private); if (dev->port->cad) /* racy but no matter */ return; } else { pr_err("%s: don't know how to wake %s\n", port->name, pd->name); } } /* * Nobody was waiting, so walk the list to see if anyone is * interested in being woken up. (Note: no locking required) */ /* !!! LOCKING IS NEEDED HERE */ for (pd = port->devices; !port->cad && pd; pd = pd->next) { if (pd->wakeup && pd != dev) pd->wakeup(pd->private); } } EXPORT_SYMBOL(parport_release); irqreturn_t parport_irq_handler(int irq, void *dev_id) { struct parport *port = dev_id; parport_generic_irq(port); return IRQ_HANDLED; } EXPORT_SYMBOL(parport_irq_handler); MODULE_DESCRIPTION("Parallel-port resource manager"); MODULE_LICENSE("GPL");
69 109 7 3 78 2 56 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 /* SPDX-License-Identifier: GPL-2.0+ */ /* * Driver for 8250/16550-type serial ports * * Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o. * * Copyright (C) 2001 Russell King. */ #include <linux/bits.h> #include <linux/serial_8250.h> #include <linux/serial_core.h> #include <linux/dmaengine.h> #include "../serial_mctrl_gpio.h" struct uart_8250_dma { int (*tx_dma)(struct uart_8250_port *p); int (*rx_dma)(struct uart_8250_port *p); void (*prepare_tx_dma)(struct uart_8250_port *p); void (*prepare_rx_dma)(struct uart_8250_port *p); /* Filter function */ dma_filter_fn fn; /* Parameter to the filter function */ void *rx_param; void *tx_param; struct dma_slave_config rxconf; struct dma_slave_config txconf; struct dma_chan *rxchan; struct dma_chan *txchan; /* Device address base for DMA operations */ phys_addr_t rx_dma_addr; phys_addr_t tx_dma_addr; /* DMA address of the buffer in memory */ dma_addr_t rx_addr; dma_addr_t tx_addr; dma_cookie_t rx_cookie; dma_cookie_t tx_cookie; void *rx_buf; size_t rx_size; size_t tx_size; unsigned char tx_running; unsigned char tx_err; unsigned char rx_running; }; struct old_serial_port { unsigned int uart; unsigned int baud_base; unsigned int port; unsigned int irq; upf_t flags; unsigned char io_type; unsigned char __iomem *iomem_base; unsigned short iomem_reg_shift; }; struct serial8250_config { const char *name; unsigned short fifo_size; unsigned short tx_loadsz; unsigned char fcr; unsigned char rxtrig_bytes[UART_FCR_R_TRIG_MAX_STATE]; unsigned int flags; }; #define UART_CAP_FIFO BIT(8) /* UART has FIFO */ #define UART_CAP_EFR BIT(9) /* UART has EFR */ #define UART_CAP_SLEEP BIT(10) /* UART has IER sleep */ #define UART_CAP_AFE BIT(11) /* MCR-based hw flow control */ #define UART_CAP_UUE BIT(12) /* UART needs IER bit 6 set (Xscale) */ #define UART_CAP_RTOIE BIT(13) /* UART needs IER bit 4 set (Xscale, Tegra) */ #define UART_CAP_HFIFO BIT(14) /* UART has a "hidden" FIFO */ #define UART_CAP_RPM BIT(15) /* Runtime PM is active while idle */ #define UART_CAP_IRDA BIT(16) /* UART supports IrDA line discipline */ #define UART_CAP_MINI BIT(17) /* Mini UART on BCM283X family lacks: * STOP PARITY EPAR SPAR WLEN5 WLEN6 */ #define UART_CAP_NOTEMT BIT(18) /* UART without interrupt on TEMT available */ #define UART_BUG_QUOT BIT(0) /* UART has buggy quot LSB */ #define UART_BUG_TXEN BIT(1) /* UART has buggy TX IIR status */ #define UART_BUG_NOMSR BIT(2) /* UART has buggy MSR status bits (Au1x00) */ #define UART_BUG_THRE BIT(3) /* UART has buggy THRE reassertion */ #define UART_BUG_TXRACE BIT(5) /* UART Tx fails to set remote DR */ /* Module parameters */ #define UART_NR CONFIG_SERIAL_8250_NR_UARTS extern unsigned int nr_uarts; #define SERIAL8250_PORT_FLAGS(_base, _irq, _flags) \ { \ .iobase = _base, \ .irq = _irq, \ .uartclk = 1843200, \ .iotype = UPIO_PORT, \ .flags = UPF_BOOT_AUTOCONF | (_flags), \ } #define SERIAL8250_PORT(_base, _irq) SERIAL8250_PORT_FLAGS(_base, _irq, 0) extern struct uart_driver serial8250_reg; void serial8250_register_ports(struct uart_driver *drv, struct device *dev); /* Legacy ISA bus related APIs */ typedef void (*serial8250_isa_config_fn)(int, struct uart_port *, u32 *); extern serial8250_isa_config_fn serial8250_isa_config; void serial8250_isa_init_ports(void); extern struct platform_device *serial8250_isa_devs; extern const struct uart_ops *univ8250_port_base_ops; extern struct uart_ops univ8250_port_ops; static inline int serial_in(struct uart_8250_port *up, int offset) { return up->port.serial_in(&up->port, offset); } static inline void serial_out(struct uart_8250_port *up, int offset, int value) { up->port.serial_out(&up->port, offset, value); } /** * serial_lsr_in - Read LSR register and preserve flags across reads * @up: uart 8250 port * * Read LSR register and handle saving non-preserved flags across reads. * The flags that are not preserved across reads are stored into * up->lsr_saved_flags. * * Returns LSR value or'ed with the preserved flags (if any). */ static inline u16 serial_lsr_in(struct uart_8250_port *up) { u16 lsr = up->lsr_saved_flags; lsr |= serial_in(up, UART_LSR); up->lsr_saved_flags = lsr & up->lsr_save_mask; return lsr; } /* * For the 16C950 */ static void serial_icr_write(struct uart_8250_port *up, int offset, int value) { serial_out(up, UART_SCR, offset); serial_out(up, UART_ICR, value); } static unsigned int __maybe_unused serial_icr_read(struct uart_8250_port *up, int offset) { unsigned int value; serial_icr_write(up, UART_ACR, up->acr | UART_ACR_ICRRD); serial_out(up, UART_SCR, offset); value = serial_in(up, UART_ICR); serial_icr_write(up, UART_ACR, up->acr); return value; } void serial8250_clear_fifos(struct uart_8250_port *p); void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p); void serial8250_fifo_wait_for_lsr_thre(struct uart_8250_port *up, unsigned int count); void serial8250_rpm_get(struct uart_8250_port *p); void serial8250_rpm_put(struct uart_8250_port *p); DEFINE_GUARD(serial8250_rpm, struct uart_8250_port *, serial8250_rpm_get(_T), serial8250_rpm_put(_T)); static inline u32 serial_dl_read(struct uart_8250_port *up) { return up->dl_read(up); } static inline void serial_dl_write(struct uart_8250_port *up, u32 value) { up->dl_write(up, value); } static inline bool serial8250_set_THRI(struct uart_8250_port *up) { /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&up->port.lock); if (up->ier & UART_IER_THRI) return false; up->ier |= UART_IER_THRI; serial_out(up, UART_IER, up->ier); return true; } static inline bool serial8250_clear_THRI(struct uart_8250_port *up) { /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&up->port.lock); if (!(up->ier & UART_IER_THRI)) return false; up->ier &= ~UART_IER_THRI; serial_out(up, UART_IER, up->ier); return true; } struct uart_8250_port *serial8250_setup_port(int index); struct uart_8250_port *serial8250_get_port(int line); int serial8250_em485_config(struct uart_port *port, struct ktermios *termios, struct serial_rs485 *rs485); void serial8250_em485_start_tx(struct uart_8250_port *p, bool toggle_ier); void serial8250_em485_stop_tx(struct uart_8250_port *p, bool toggle_ier); void serial8250_em485_destroy(struct uart_8250_port *p); extern struct serial_rs485 serial8250_em485_supported; /* MCR <-> TIOCM conversion */ static inline int serial8250_TIOCM_to_MCR(int tiocm) { int mcr = 0; if (tiocm & TIOCM_RTS) mcr |= UART_MCR_RTS; if (tiocm & TIOCM_DTR) mcr |= UART_MCR_DTR; if (tiocm & TIOCM_OUT1) mcr |= UART_MCR_OUT1; if (tiocm & TIOCM_OUT2) mcr |= UART_MCR_OUT2; if (tiocm & TIOCM_LOOP) mcr |= UART_MCR_LOOP; return mcr; } static inline int serial8250_MCR_to_TIOCM(int mcr) { int tiocm = 0; if (mcr & UART_MCR_RTS) tiocm |= TIOCM_RTS; if (mcr & UART_MCR_DTR) tiocm |= TIOCM_DTR; if (mcr & UART_MCR_OUT1) tiocm |= TIOCM_OUT1; if (mcr & UART_MCR_OUT2) tiocm |= TIOCM_OUT2; if (mcr & UART_MCR_LOOP) tiocm |= TIOCM_LOOP; return tiocm; } /* MSR <-> TIOCM conversion */ static inline int serial8250_MSR_to_TIOCM(int msr) { int tiocm = 0; if (msr & UART_MSR_DCD) tiocm |= TIOCM_CAR; if (msr & UART_MSR_RI) tiocm |= TIOCM_RNG; if (msr & UART_MSR_DSR) tiocm |= TIOCM_DSR; if (msr & UART_MSR_CTS) tiocm |= TIOCM_CTS; return tiocm; } static inline void serial8250_out_MCR(struct uart_8250_port *up, int value) { serial_out(up, UART_MCR, value); if (up->gpios) mctrl_gpio_set(up->gpios, serial8250_MCR_to_TIOCM(value)); } static inline int serial8250_in_MCR(struct uart_8250_port *up) { int mctrl; mctrl = serial_in(up, UART_MCR); if (up->gpios) { unsigned int mctrl_gpio = 0; mctrl_gpio = mctrl_gpio_get_outputs(up->gpios, &mctrl_gpio); mctrl |= serial8250_TIOCM_to_MCR(mctrl_gpio); } return mctrl; } #ifdef CONFIG_SERIAL_8250_PNP int serial8250_pnp_init(void); void serial8250_pnp_exit(void); #else static inline int serial8250_pnp_init(void) { return 0; } static inline void serial8250_pnp_exit(void) { } #endif #ifdef CONFIG_SERIAL_8250_RSA void univ8250_rsa_support(struct uart_ops *ops, const struct uart_ops *core_ops); void rsa_enable(struct uart_8250_port *up); void rsa_disable(struct uart_8250_port *up); void rsa_autoconfig(struct uart_8250_port *up); void rsa_reset(struct uart_8250_port *up); #else static inline void univ8250_rsa_support(struct uart_ops *ops, const struct uart_ops *core_ops) { } static inline void rsa_enable(struct uart_8250_port *up) {} static inline void rsa_disable(struct uart_8250_port *up) {} static inline void rsa_autoconfig(struct uart_8250_port *up) {} static inline void rsa_reset(struct uart_8250_port *up) {} #endif #ifdef CONFIG_SERIAL_8250_FINTEK int fintek_8250_probe(struct uart_8250_port *uart); #else static inline int fintek_8250_probe(struct uart_8250_port *uart) { return 0; } #endif #ifdef CONFIG_ARCH_OMAP1 #include <linux/soc/ti/omap1-soc.h> static inline int is_omap1_8250(struct uart_8250_port *pt) { int res; switch (pt->port.mapbase) { case OMAP1_UART1_BASE: case OMAP1_UART2_BASE: case OMAP1_UART3_BASE: res = 1; break; default: res = 0; break; } return res; } static inline int is_omap1510_8250(struct uart_8250_port *pt) { if (!cpu_is_omap1510()) return 0; return is_omap1_8250(pt); } #else static inline int is_omap1_8250(struct uart_8250_port *pt) { return 0; } static inline int is_omap1510_8250(struct uart_8250_port *pt) { return 0; } #endif #ifdef CONFIG_SERIAL_8250_DMA extern int serial8250_tx_dma(struct uart_8250_port *); extern void serial8250_tx_dma_flush(struct uart_8250_port *); extern int serial8250_rx_dma(struct uart_8250_port *); extern void serial8250_rx_dma_flush(struct uart_8250_port *); extern int serial8250_request_dma(struct uart_8250_port *); extern void serial8250_release_dma(struct uart_8250_port *); static inline void serial8250_do_prepare_tx_dma(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; if (dma->prepare_tx_dma) dma->prepare_tx_dma(p); } static inline void serial8250_do_prepare_rx_dma(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; if (dma->prepare_rx_dma) dma->prepare_rx_dma(p); } static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; return dma && dma->tx_running; } static inline void serial8250_tx_dma_pause(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; if (!dma->tx_running) return; dmaengine_pause(dma->txchan); } static inline void serial8250_tx_dma_resume(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; if (!dma->tx_running) return; dmaengine_resume(dma->txchan); } #else static inline int serial8250_tx_dma(struct uart_8250_port *p) { return -1; } static inline void serial8250_tx_dma_flush(struct uart_8250_port *p) { } static inline int serial8250_rx_dma(struct uart_8250_port *p) { return -1; } static inline void serial8250_rx_dma_flush(struct uart_8250_port *p) { } static inline int serial8250_request_dma(struct uart_8250_port *p) { return -1; } static inline void serial8250_release_dma(struct uart_8250_port *p) { } static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) { return false; } static inline void serial8250_tx_dma_pause(struct uart_8250_port *p) { } static inline void serial8250_tx_dma_resume(struct uart_8250_port *p) { } #endif static inline int ns16550a_goto_highspeed(struct uart_8250_port *up) { unsigned char status; status = serial_in(up, 0x04); /* EXCR2 */ #define PRESL(x) ((x) & 0x30) if (PRESL(status) == 0x10) { /* already in high speed mode */ return 0; } else { status &= ~0xB0; /* Disable LOCK, mask out PRESL[01] */ status |= 0x10; /* 1.625 divisor for baud_base --> 921600 */ serial_out(up, 0x04, status); } return 1; } static inline int serial_index(struct uart_port *port) { return port->minor - 64; }
3 6805 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 /* SPDX-License-Identifier: GPL-2.0-only */ /* * include/linux/idr.h * * 2002-10-18 written by Jim Houston jim.houston@ccur.com * Copyright (C) 2002 by Concurrent Computer Corporation * * Small id to pointer translation service avoiding fixed sized * tables. */ #ifndef __IDR_H__ #define __IDR_H__ #include <linux/radix-tree.h> #include <linux/gfp.h> #include <linux/percpu.h> #include <linux/cleanup.h> struct idr { struct radix_tree_root idr_rt; unsigned int idr_base; unsigned int idr_next; }; /* * The IDR API does not expose the tagging functionality of the radix tree * to users. Use tag 0 to track whether a node has free space below it. */ #define IDR_FREE 0 /* Set the IDR flag and the IDR_FREE tag */ #define IDR_RT_MARKER (ROOT_IS_IDR | (__force gfp_t) \ (1 << (ROOT_TAG_SHIFT + IDR_FREE))) #define IDR_INIT_BASE(name, base) { \ .idr_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER), \ .idr_base = (base), \ .idr_next = 0, \ } /** * IDR_INIT() - Initialise an IDR. * @name: Name of IDR. * * A freshly-initialised IDR contains no IDs. */ #define IDR_INIT(name) IDR_INIT_BASE(name, 0) /** * DEFINE_IDR() - Define a statically-allocated IDR. * @name: Name of IDR. * * An IDR defined using this macro is ready for use with no additional * initialisation required. It contains no IDs. */ #define DEFINE_IDR(name) struct idr name = IDR_INIT(name) /** * idr_get_cursor - Return the current position of the cyclic allocator * @idr: idr handle * * The value returned is the value that will be next returned from * idr_alloc_cyclic() if it is free (otherwise the search will start from * this position). */ static inline unsigned int idr_get_cursor(const struct idr *idr) { return READ_ONCE(idr->idr_next); } /** * idr_set_cursor - Set the current position of the cyclic allocator * @idr: idr handle * @val: new position * * The next call to idr_alloc_cyclic() will return @val if it is free * (otherwise the search will start from this position). */ static inline void idr_set_cursor(struct idr *idr, unsigned int val) { WRITE_ONCE(idr->idr_next, val); } /** * DOC: idr sync * idr synchronization (stolen from radix-tree.h) * * idr_find() is able to be called locklessly, using RCU. The caller must * ensure calls to this function are made within rcu_read_lock() regions. * Other readers (lock-free or otherwise) and modifications may be running * concurrently. * * It is still required that the caller manage the synchronization and * lifetimes of the items. So if RCU lock-free lookups are used, typically * this would mean that the items have their own locks, or are amenable to * lock-free access; and that the items are freed by RCU (or only freed after * having been deleted from the idr tree *and* a synchronize_rcu() grace * period). */ #define idr_lock(idr) xa_lock(&(idr)->idr_rt) #define idr_unlock(idr) xa_unlock(&(idr)->idr_rt) #define idr_lock_bh(idr) xa_lock_bh(&(idr)->idr_rt) #define idr_unlock_bh(idr) xa_unlock_bh(&(idr)->idr_rt) #define idr_lock_irq(idr) xa_lock_irq(&(idr)->idr_rt) #define idr_unlock_irq(idr) xa_unlock_irq(&(idr)->idr_rt) #define idr_lock_irqsave(idr, flags) \ xa_lock_irqsave(&(idr)->idr_rt, flags) #define idr_unlock_irqrestore(idr, flags) \ xa_unlock_irqrestore(&(idr)->idr_rt, flags) void idr_preload(gfp_t gfp_mask); int idr_alloc(struct idr *, void *ptr, int start, int end, gfp_t); int __must_check idr_alloc_u32(struct idr *, void *ptr, u32 *id, unsigned long max, gfp_t); int idr_alloc_cyclic(struct idr *, void *ptr, int start, int end, gfp_t); void *idr_remove(struct idr *, unsigned long id); void *idr_find(const struct idr *, unsigned long id); int idr_for_each(const struct idr *, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *, int *nextid); void *idr_get_next_ul(struct idr *, unsigned long *nextid); void *idr_replace(struct idr *, void *, unsigned long id); void idr_destroy(struct idr *); struct __class_idr { struct idr *idr; int id; }; #define idr_null ((struct __class_idr){ NULL, -1 }) #define take_idr_id(id) __get_and_null(id, idr_null) DEFINE_CLASS(idr_alloc, struct __class_idr, if (_T.id >= 0) idr_remove(_T.idr, _T.id), ((struct __class_idr){ .idr = idr, .id = idr_alloc(idr, ptr, start, end, gfp), }), struct idr *idr, void *ptr, int start, int end, gfp_t gfp); /** * idr_init_base() - Initialise an IDR. * @idr: IDR handle. * @base: The base value for the IDR. * * This variation of idr_init() creates an IDR which will allocate IDs * starting at %base. */ static inline void idr_init_base(struct idr *idr, int base) { INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER); idr->idr_base = base; idr->idr_next = 0; } /** * idr_init() - Initialise an IDR. * @idr: IDR handle. * * Initialise a dynamically allocated IDR. To initialise a * statically allocated IDR, use DEFINE_IDR(). */ static inline void idr_init(struct idr *idr) { idr_init_base(idr, 0); } /** * idr_is_empty() - Are there any IDs allocated? * @idr: IDR handle. * * Return: %true if any IDs have been allocated from this IDR. */ static inline bool idr_is_empty(const struct idr *idr) { return radix_tree_empty(&idr->idr_rt) && radix_tree_tagged(&idr->idr_rt, IDR_FREE); } /** * idr_preload_end - end preload section started with idr_preload() * * Each idr_preload() should be matched with an invocation of this * function. See idr_preload() for details. */ static inline void idr_preload_end(void) { local_unlock(&radix_tree_preloads.lock); } /** * idr_for_each_entry() - Iterate over an IDR's elements of a given type. * @idr: IDR handle. * @entry: The type * to use as cursor * @id: Entry ID. * * @entry and @id do not need to be initialized before the loop, and * after normal termination @entry is left with the value NULL. This * is convenient for a "not found" value. */ #define idr_for_each_entry(idr, entry, id) \ for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; id += 1U) /** * idr_for_each_entry_ul() - Iterate over an IDR's elements of a given type. * @idr: IDR handle. * @entry: The type * to use as cursor. * @tmp: A temporary placeholder for ID. * @id: Entry ID. * * @entry and @id do not need to be initialized before the loop, and * after normal termination @entry is left with the value NULL. This * is convenient for a "not found" value. */ #define idr_for_each_entry_ul(idr, entry, tmp, id) \ for (tmp = 0, id = 0; \ ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /** * idr_for_each_entry_continue() - Continue iteration over an IDR's elements of a given type * @idr: IDR handle. * @entry: The type * to use as a cursor. * @id: Entry ID. * * Continue to iterate over entries, continuing after the current position. */ #define idr_for_each_entry_continue(idr, entry, id) \ for ((entry) = idr_get_next((idr), &(id)); \ entry; \ ++id, (entry) = idr_get_next((idr), &(id))) /** * idr_for_each_entry_continue_ul() - Continue iteration over an IDR's elements of a given type * @idr: IDR handle. * @entry: The type * to use as a cursor. * @tmp: A temporary placeholder for ID. * @id: Entry ID. * * Continue to iterate over entries, continuing after the current position. * After normal termination @entry is left with the value NULL. This * is convenient for a "not found" value. */ #define idr_for_each_entry_continue_ul(idr, entry, tmp, id) \ for (tmp = id; \ ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \ tmp = id, ++id) /* * IDA - ID Allocator, use when translation from id to pointer isn't necessary. */ #define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */ #define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long)) #define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8) struct ida_bitmap { unsigned long bitmap[IDA_BITMAP_LONGS]; }; struct ida { struct xarray xa; }; #define IDA_INIT_FLAGS (XA_FLAGS_LOCK_IRQ | XA_FLAGS_ALLOC) #define IDA_INIT(name) { \ .xa = XARRAY_INIT(name, IDA_INIT_FLAGS) \ } #define DEFINE_IDA(name) struct ida name = IDA_INIT(name) int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t); void ida_free(struct ida *, unsigned int id); void ida_destroy(struct ida *ida); int ida_find_first_range(struct ida *ida, unsigned int min, unsigned int max); /** * ida_alloc() - Allocate an unused ID. * @ida: IDA handle. * @gfp: Memory allocation flags. * * Allocate an ID between 0 and %INT_MAX, inclusive. * * Context: Any context. It is safe to call this function without * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ static inline int ida_alloc(struct ida *ida, gfp_t gfp) { return ida_alloc_range(ida, 0, ~0, gfp); } /** * ida_alloc_min() - Allocate an unused ID. * @ida: IDA handle. * @min: Lowest ID to allocate. * @gfp: Memory allocation flags. * * Allocate an ID between @min and %INT_MAX, inclusive. * * Context: Any context. It is safe to call this function without * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp) { return ida_alloc_range(ida, min, ~0, gfp); } /** * ida_alloc_max() - Allocate an unused ID. * @ida: IDA handle. * @max: Highest ID to allocate. * @gfp: Memory allocation flags. * * Allocate an ID between 0 and @max, inclusive. * * Context: Any context. It is safe to call this function without * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ static inline int ida_alloc_max(struct ida *ida, unsigned int max, gfp_t gfp) { return ida_alloc_range(ida, 0, max, gfp); } static inline void ida_init(struct ida *ida) { xa_init_flags(&ida->xa, IDA_INIT_FLAGS); } static inline bool ida_is_empty(const struct ida *ida) { return xa_empty(&ida->xa); } static inline bool ida_exists(struct ida *ida, unsigned int id) { return ida_find_first_range(ida, id, id) == id; } static inline int ida_find_first(struct ida *ida) { return ida_find_first_range(ida, 0, ~0); } #endif /* __IDR_H__ */
1 11 47 57 57 14 8 11 11 11 11 9 8 9 5 5 2 6 4 1 61 57 56 34 100 99 48 48 47 47 48 13 13 13 61 60 9 9 9 9 9 9 9 4 4 17 2 1 2 1 10 10 5 4 4 2 2 3 3 3 2 1 5 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2006 - 2007 Ivo van Doorn * Copyright (C) 2007 Dmitry Torokhov * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/workqueue.h> #include <linux/capability.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/rfkill.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/device.h> #include <linux/miscdevice.h> #include <linux/wait.h> #include <linux/poll.h> #include <linux/fs.h> #include <linux/slab.h> #include "rfkill.h" #define POLL_INTERVAL (5 * HZ) #define RFKILL_BLOCK_HW BIT(0) #define RFKILL_BLOCK_SW BIT(1) #define RFKILL_BLOCK_SW_PREV BIT(2) #define RFKILL_BLOCK_ANY (RFKILL_BLOCK_HW |\ RFKILL_BLOCK_SW |\ RFKILL_BLOCK_SW_PREV) #define RFKILL_BLOCK_SW_SETCALL BIT(31) struct rfkill { spinlock_t lock; enum rfkill_type type; unsigned long state; unsigned long hard_block_reasons; u32 idx; bool registered; bool persistent; bool polling_paused; bool suspended; bool need_sync; const struct rfkill_ops *ops; void *data; #ifdef CONFIG_RFKILL_LEDS struct led_trigger led_trigger; const char *ledtrigname; #endif struct device dev; struct list_head node; struct delayed_work poll_work; struct work_struct uevent_work; struct work_struct sync_work; char name[]; }; #define to_rfkill(d) container_of(d, struct rfkill, dev) struct rfkill_int_event { struct list_head list; struct rfkill_event_ext ev; }; struct rfkill_data { struct list_head list; struct list_head events; struct mutex mtx; wait_queue_head_t read_wait; bool input_handler; u8 max_size; }; MODULE_AUTHOR("Ivo van Doorn <IvDoorn@gmail.com>"); MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>"); MODULE_DESCRIPTION("RF switch support"); MODULE_LICENSE("GPL"); /* * The locking here should be made much smarter, we currently have * a bit of a stupid situation because drivers might want to register * the rfkill struct under their own lock, and take this lock during * rfkill method calls -- which will cause an AB-BA deadlock situation. * * To fix that, we need to rework this code here to be mostly lock-free * and only use the mutex for list manipulations, not to protect the * various other global variables. Then we can avoid holding the mutex * around driver operations, and all is happy. */ static LIST_HEAD(rfkill_list); /* list of registered rf switches */ static DEFINE_MUTEX(rfkill_global_mutex); static LIST_HEAD(rfkill_fds); /* list of open fds of /dev/rfkill */ static unsigned int rfkill_default_state = 1; module_param_named(default_state, rfkill_default_state, uint, 0444); MODULE_PARM_DESC(default_state, "Default initial state for all radio types, 0 = radio off"); static struct { bool cur, sav; } rfkill_global_states[NUM_RFKILL_TYPES]; static bool rfkill_epo_lock_active; #ifdef CONFIG_RFKILL_LEDS static void rfkill_led_trigger_event(struct rfkill *rfkill) { struct led_trigger *trigger; if (!rfkill->registered) return; trigger = &rfkill->led_trigger; if (rfkill->state & RFKILL_BLOCK_ANY) led_trigger_event(trigger, LED_OFF); else led_trigger_event(trigger, LED_FULL); } static int rfkill_led_trigger_activate(struct led_classdev *led) { struct rfkill *rfkill; rfkill = container_of(led->trigger, struct rfkill, led_trigger); rfkill_led_trigger_event(rfkill); return 0; } const char *rfkill_get_led_trigger_name(struct rfkill *rfkill) { return rfkill->led_trigger.name; } EXPORT_SYMBOL(rfkill_get_led_trigger_name); void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name) { BUG_ON(!rfkill); rfkill->ledtrigname = name; } EXPORT_SYMBOL(rfkill_set_led_trigger_name); static int rfkill_led_trigger_register(struct rfkill *rfkill) { rfkill->led_trigger.name = rfkill->ledtrigname ? : dev_name(&rfkill->dev); rfkill->led_trigger.activate = rfkill_led_trigger_activate; return led_trigger_register(&rfkill->led_trigger); } static void rfkill_led_trigger_unregister(struct rfkill *rfkill) { led_trigger_unregister(&rfkill->led_trigger); } static struct led_trigger rfkill_any_led_trigger; static struct led_trigger rfkill_none_led_trigger; static struct work_struct rfkill_global_led_trigger_work; static void rfkill_global_led_trigger_worker(struct work_struct *work) { enum led_brightness brightness = LED_OFF; struct rfkill *rfkill; mutex_lock(&rfkill_global_mutex); list_for_each_entry(rfkill, &rfkill_list, node) { if (!(rfkill->state & RFKILL_BLOCK_ANY)) { brightness = LED_FULL; break; } } mutex_unlock(&rfkill_global_mutex); led_trigger_event(&rfkill_any_led_trigger, brightness); led_trigger_event(&rfkill_none_led_trigger, brightness == LED_OFF ? LED_FULL : LED_OFF); } static void rfkill_global_led_trigger_event(void) { schedule_work(&rfkill_global_led_trigger_work); } static int rfkill_global_led_trigger_register(void) { int ret; INIT_WORK(&rfkill_global_led_trigger_work, rfkill_global_led_trigger_worker); rfkill_any_led_trigger.name = "rfkill-any"; ret = led_trigger_register(&rfkill_any_led_trigger); if (ret) return ret; rfkill_none_led_trigger.name = "rfkill-none"; ret = led_trigger_register(&rfkill_none_led_trigger); if (ret) led_trigger_unregister(&rfkill_any_led_trigger); else /* Delay activation until all global triggers are registered */ rfkill_global_led_trigger_event(); return ret; } static void rfkill_global_led_trigger_unregister(void) { led_trigger_unregister(&rfkill_none_led_trigger); led_trigger_unregister(&rfkill_any_led_trigger); cancel_work_sync(&rfkill_global_led_trigger_work); } #else static void rfkill_led_trigger_event(struct rfkill *rfkill) { } static inline int rfkill_led_trigger_register(struct rfkill *rfkill) { return 0; } static inline void rfkill_led_trigger_unregister(struct rfkill *rfkill) { } static void rfkill_global_led_trigger_event(void) { } static int rfkill_global_led_trigger_register(void) { return 0; } static void rfkill_global_led_trigger_unregister(void) { } #endif /* CONFIG_RFKILL_LEDS */ static void rfkill_fill_event(struct rfkill_event_ext *ev, struct rfkill *rfkill, enum rfkill_operation op) { unsigned long flags; ev->idx = rfkill->idx; ev->type = rfkill->type; ev->op = op; spin_lock_irqsave(&rfkill->lock, flags); ev->hard = !!(rfkill->state & RFKILL_BLOCK_HW); ev->soft = !!(rfkill->state & (RFKILL_BLOCK_SW | RFKILL_BLOCK_SW_PREV)); ev->hard_block_reasons = rfkill->hard_block_reasons; spin_unlock_irqrestore(&rfkill->lock, flags); } static void rfkill_send_events(struct rfkill *rfkill, enum rfkill_operation op) { struct rfkill_data *data; struct rfkill_int_event *ev; list_for_each_entry(data, &rfkill_fds, list) { ev = kzalloc_obj(*ev); if (!ev) continue; rfkill_fill_event(&ev->ev, rfkill, op); mutex_lock(&data->mtx); list_add_tail(&ev->list, &data->events); mutex_unlock(&data->mtx); wake_up_interruptible(&data->read_wait); } } static void rfkill_event(struct rfkill *rfkill) { if (!rfkill->registered) return; kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); /* also send event to /dev/rfkill */ rfkill_send_events(rfkill, RFKILL_OP_CHANGE); } /** * rfkill_set_block - wrapper for set_block method * * @rfkill: the rfkill struct to use * @blocked: the new software state * * Calls the set_block method (when applicable) and handles notifications * etc. as well. */ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) { unsigned long flags; bool prev, curr; int err; if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) return; /* * Some platforms (...!) generate input events which affect the * _hard_ kill state -- whenever something tries to change the * current software state query the hardware state too. */ if (rfkill->ops->query) rfkill->ops->query(rfkill, rfkill->data); spin_lock_irqsave(&rfkill->lock, flags); prev = rfkill->state & RFKILL_BLOCK_SW; if (prev) rfkill->state |= RFKILL_BLOCK_SW_PREV; else rfkill->state &= ~RFKILL_BLOCK_SW_PREV; if (blocked) rfkill->state |= RFKILL_BLOCK_SW; else rfkill->state &= ~RFKILL_BLOCK_SW; rfkill->state |= RFKILL_BLOCK_SW_SETCALL; spin_unlock_irqrestore(&rfkill->lock, flags); err = rfkill->ops->set_block(rfkill->data, blocked); spin_lock_irqsave(&rfkill->lock, flags); if (err) { /* * Failed -- reset status to _PREV, which may be different * from what we have set _PREV to earlier in this function * if rfkill_set_sw_state was invoked. */ if (rfkill->state & RFKILL_BLOCK_SW_PREV) rfkill->state |= RFKILL_BLOCK_SW; else rfkill->state &= ~RFKILL_BLOCK_SW; } rfkill->state &= ~RFKILL_BLOCK_SW_SETCALL; rfkill->state &= ~RFKILL_BLOCK_SW_PREV; curr = rfkill->state & RFKILL_BLOCK_SW; spin_unlock_irqrestore(&rfkill->lock, flags); rfkill_led_trigger_event(rfkill); rfkill_global_led_trigger_event(); if (prev != curr) rfkill_event(rfkill); } static void rfkill_sync(struct rfkill *rfkill) { lockdep_assert_held(&rfkill_global_mutex); if (!rfkill->need_sync) return; rfkill_set_block(rfkill, rfkill_global_states[rfkill->type].cur); rfkill->need_sync = false; } static void rfkill_update_global_state(enum rfkill_type type, bool blocked) { int i; if (type != RFKILL_TYPE_ALL) { rfkill_global_states[type].cur = blocked; return; } for (i = 0; i < NUM_RFKILL_TYPES; i++) rfkill_global_states[i].cur = blocked; } #ifdef CONFIG_RFKILL_INPUT static atomic_t rfkill_input_disabled = ATOMIC_INIT(0); /** * __rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected * @blocked: the new state * * This function sets the state of all switches of given type, * unless a specific switch is suspended. * * Caller must have acquired rfkill_global_mutex. */ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked) { struct rfkill *rfkill; rfkill_update_global_state(type, blocked); list_for_each_entry(rfkill, &rfkill_list, node) { if (rfkill->type != type && type != RFKILL_TYPE_ALL) continue; rfkill_set_block(rfkill, blocked); } } /** * rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected * @blocked: the new state * * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state). * Please refer to __rfkill_switch_all() for details. * * Does nothing if the EPO lock is active. */ void rfkill_switch_all(enum rfkill_type type, bool blocked) { if (atomic_read(&rfkill_input_disabled)) return; mutex_lock(&rfkill_global_mutex); if (!rfkill_epo_lock_active) __rfkill_switch_all(type, blocked); mutex_unlock(&rfkill_global_mutex); } /** * rfkill_epo - emergency power off all transmitters * * This kicks all non-suspended rfkill devices to RFKILL_STATE_SOFT_BLOCKED, * ignoring everything in its path but rfkill_global_mutex and rfkill->mutex. * * The global state before the EPO is saved and can be restored later * using rfkill_restore_states(). */ void rfkill_epo(void) { struct rfkill *rfkill; int i; if (atomic_read(&rfkill_input_disabled)) return; mutex_lock(&rfkill_global_mutex); rfkill_epo_lock_active = true; list_for_each_entry(rfkill, &rfkill_list, node) rfkill_set_block(rfkill, true); for (i = 0; i < NUM_RFKILL_TYPES; i++) { rfkill_global_states[i].sav = rfkill_global_states[i].cur; rfkill_global_states[i].cur = true; } mutex_unlock(&rfkill_global_mutex); } /** * rfkill_restore_states - restore global states * * Restore (and sync switches to) the global state from the * states in rfkill_default_states. This can undo the effects of * a call to rfkill_epo(). */ void rfkill_restore_states(void) { int i; if (atomic_read(&rfkill_input_disabled)) return; mutex_lock(&rfkill_global_mutex); rfkill_epo_lock_active = false; for (i = 0; i < NUM_RFKILL_TYPES; i++) __rfkill_switch_all(i, rfkill_global_states[i].sav); mutex_unlock(&rfkill_global_mutex); } /** * rfkill_remove_epo_lock - unlock state changes * * Used by rfkill-input manually unlock state changes, when * the EPO switch is deactivated. */ void rfkill_remove_epo_lock(void) { if (atomic_read(&rfkill_input_disabled)) return; mutex_lock(&rfkill_global_mutex); rfkill_epo_lock_active = false; mutex_unlock(&rfkill_global_mutex); } /** * rfkill_is_epo_lock_active - returns true EPO is active * * Returns 0 (false) if there is NOT an active EPO condition, * and 1 (true) if there is an active EPO condition, which * locks all radios in one of the BLOCKED states. * * Can be called in atomic context. */ bool rfkill_is_epo_lock_active(void) { return rfkill_epo_lock_active; } /** * rfkill_get_global_sw_state - returns global state for a type * @type: the type to get the global state of * * Returns the current global state for a given wireless * device type. */ bool rfkill_get_global_sw_state(const enum rfkill_type type) { return rfkill_global_states[type].cur; } #endif bool rfkill_set_hw_state_reason(struct rfkill *rfkill, bool blocked, enum rfkill_hard_block_reasons reason) { unsigned long flags; bool ret, prev; BUG_ON(!rfkill); spin_lock_irqsave(&rfkill->lock, flags); prev = !!(rfkill->hard_block_reasons & reason); if (blocked) { rfkill->state |= RFKILL_BLOCK_HW; rfkill->hard_block_reasons |= reason; } else { rfkill->hard_block_reasons &= ~reason; if (!rfkill->hard_block_reasons) rfkill->state &= ~RFKILL_BLOCK_HW; } ret = !!(rfkill->state & RFKILL_BLOCK_ANY); spin_unlock_irqrestore(&rfkill->lock, flags); rfkill_led_trigger_event(rfkill); rfkill_global_led_trigger_event(); if (rfkill->registered && prev != blocked) schedule_work(&rfkill->uevent_work); return ret; } EXPORT_SYMBOL(rfkill_set_hw_state_reason); static void __rfkill_set_sw_state(struct rfkill *rfkill, bool blocked) { u32 bit = RFKILL_BLOCK_SW; /* if in a ops->set_block right now, use other bit */ if (rfkill->state & RFKILL_BLOCK_SW_SETCALL) bit = RFKILL_BLOCK_SW_PREV; if (blocked) rfkill->state |= bit; else rfkill->state &= ~bit; } bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked) { unsigned long flags; bool prev, hwblock; BUG_ON(!rfkill); spin_lock_irqsave(&rfkill->lock, flags); prev = !!(rfkill->state & RFKILL_BLOCK_SW); __rfkill_set_sw_state(rfkill, blocked); hwblock = !!(rfkill->state & RFKILL_BLOCK_HW); blocked = blocked || hwblock; spin_unlock_irqrestore(&rfkill->lock, flags); if (!rfkill->registered) return blocked; if (prev != blocked && !hwblock) schedule_work(&rfkill->uevent_work); rfkill_led_trigger_event(rfkill); rfkill_global_led_trigger_event(); return blocked; } EXPORT_SYMBOL(rfkill_set_sw_state); void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked) { unsigned long flags; BUG_ON(!rfkill); BUG_ON(rfkill->registered); spin_lock_irqsave(&rfkill->lock, flags); __rfkill_set_sw_state(rfkill, blocked); rfkill->persistent = true; spin_unlock_irqrestore(&rfkill->lock, flags); } EXPORT_SYMBOL(rfkill_init_sw_state); void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw) { unsigned long flags; bool swprev, hwprev; BUG_ON(!rfkill); spin_lock_irqsave(&rfkill->lock, flags); /* * No need to care about prev/setblock ... this is for uevent only * and that will get triggered by rfkill_set_block anyway. */ swprev = !!(rfkill->state & RFKILL_BLOCK_SW); hwprev = !!(rfkill->state & RFKILL_BLOCK_HW); __rfkill_set_sw_state(rfkill, sw); if (hw) rfkill->state |= RFKILL_BLOCK_HW; else rfkill->state &= ~RFKILL_BLOCK_HW; spin_unlock_irqrestore(&rfkill->lock, flags); if (!rfkill->registered) { rfkill->persistent = true; } else { if (swprev != sw || hwprev != hw) schedule_work(&rfkill->uevent_work); rfkill_led_trigger_event(rfkill); rfkill_global_led_trigger_event(); } } EXPORT_SYMBOL(rfkill_set_states); static const char * const rfkill_types[] = { NULL, /* RFKILL_TYPE_ALL */ "wlan", "bluetooth", "ultrawideband", "wimax", "wwan", "gps", "fm", "nfc", }; enum rfkill_type rfkill_find_type(const char *name) { int i; BUILD_BUG_ON(ARRAY_SIZE(rfkill_types) != NUM_RFKILL_TYPES); if (!name) return RFKILL_TYPE_ALL; for (i = 1; i < NUM_RFKILL_TYPES; i++) if (!strcmp(name, rfkill_types[i])) return i; return RFKILL_TYPE_ALL; } EXPORT_SYMBOL(rfkill_find_type); static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) { struct rfkill *rfkill = to_rfkill(dev); return sysfs_emit(buf, "%s\n", rfkill->name); } static DEVICE_ATTR_RO(name); static ssize_t type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct rfkill *rfkill = to_rfkill(dev); return sysfs_emit(buf, "%s\n", rfkill_types[rfkill->type]); } static DEVICE_ATTR_RO(type); static ssize_t index_show(struct device *dev, struct device_attribute *attr, char *buf) { struct rfkill *rfkill = to_rfkill(dev); return sysfs_emit(buf, "%d\n", rfkill->idx); } static DEVICE_ATTR_RO(index); static ssize_t persistent_show(struct device *dev, struct device_attribute *attr, char *buf) { struct rfkill *rfkill = to_rfkill(dev); return sysfs_emit(buf, "%d\n", rfkill->persistent); } static DEVICE_ATTR_RO(persistent); static ssize_t hard_show(struct device *dev, struct device_attribute *attr, char *buf) { struct rfkill *rfkill = to_rfkill(dev); return sysfs_emit(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_HW) ? 1 : 0); } static DEVICE_ATTR_RO(hard); static ssize_t soft_show(struct device *dev, struct device_attribute *attr, char *buf) { struct rfkill *rfkill = to_rfkill(dev); mutex_lock(&rfkill_global_mutex); rfkill_sync(rfkill); mutex_unlock(&rfkill_global_mutex); return sysfs_emit(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0); } static ssize_t soft_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct rfkill *rfkill = to_rfkill(dev); unsigned long state; int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; err = kstrtoul(buf, 0, &state); if (err) return err; if (state > 1 ) return -EINVAL; mutex_lock(&rfkill_global_mutex); rfkill_sync(rfkill); rfkill_set_block(rfkill, state); mutex_unlock(&rfkill_global_mutex); return count; } static DEVICE_ATTR_RW(soft); static ssize_t hard_block_reasons_show(struct device *dev, struct device_attribute *attr, char *buf) { struct rfkill *rfkill = to_rfkill(dev); return sysfs_emit(buf, "0x%lx\n", rfkill->hard_block_reasons); } static DEVICE_ATTR_RO(hard_block_reasons); static u8 user_state_from_blocked(unsigned long state) { if (state & RFKILL_BLOCK_HW) return RFKILL_USER_STATE_HARD_BLOCKED; if (state & RFKILL_BLOCK_SW) return RFKILL_USER_STATE_SOFT_BLOCKED; return RFKILL_USER_STATE_UNBLOCKED; } static ssize_t state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct rfkill *rfkill = to_rfkill(dev); mutex_lock(&rfkill_global_mutex); rfkill_sync(rfkill); mutex_unlock(&rfkill_global_mutex); return sysfs_emit(buf, "%d\n", user_state_from_blocked(rfkill->state)); } static ssize_t state_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct rfkill *rfkill = to_rfkill(dev); unsigned long state; int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; err = kstrtoul(buf, 0, &state); if (err) return err; if (state != RFKILL_USER_STATE_SOFT_BLOCKED && state != RFKILL_USER_STATE_UNBLOCKED) return -EINVAL; mutex_lock(&rfkill_global_mutex); rfkill_sync(rfkill); rfkill_set_block(rfkill, state == RFKILL_USER_STATE_SOFT_BLOCKED); mutex_unlock(&rfkill_global_mutex); return count; } static DEVICE_ATTR_RW(state); static struct attribute *rfkill_dev_attrs[] = { &dev_attr_name.attr, &dev_attr_type.attr, &dev_attr_index.attr, &dev_attr_persistent.attr, &dev_attr_state.attr, &dev_attr_soft.attr, &dev_attr_hard.attr, &dev_attr_hard_block_reasons.attr, NULL, }; ATTRIBUTE_GROUPS(rfkill_dev); static void rfkill_release(struct device *dev) { struct rfkill *rfkill = to_rfkill(dev); kfree(rfkill); } static int rfkill_dev_uevent(const struct device *dev, struct kobj_uevent_env *env) { struct rfkill *rfkill = to_rfkill(dev); unsigned long flags; unsigned long reasons; u32 state; int error; error = add_uevent_var(env, "RFKILL_NAME=%s", rfkill->name); if (error) return error; error = add_uevent_var(env, "RFKILL_TYPE=%s", rfkill_types[rfkill->type]); if (error) return error; spin_lock_irqsave(&rfkill->lock, flags); state = rfkill->state; reasons = rfkill->hard_block_reasons; spin_unlock_irqrestore(&rfkill->lock, flags); error = add_uevent_var(env, "RFKILL_STATE=%d", user_state_from_blocked(state)); if (error) return error; return add_uevent_var(env, "RFKILL_HW_BLOCK_REASON=0x%lx", reasons); } void rfkill_pause_polling(struct rfkill *rfkill) { BUG_ON(!rfkill); if (!rfkill->ops->poll) return; rfkill->polling_paused = true; cancel_delayed_work_sync(&rfkill->poll_work); } EXPORT_SYMBOL(rfkill_pause_polling); void rfkill_resume_polling(struct rfkill *rfkill) { BUG_ON(!rfkill); if (!rfkill->ops->poll) return; rfkill->polling_paused = false; if (rfkill->suspended) return; queue_delayed_work(system_power_efficient_wq, &rfkill->poll_work, 0); } EXPORT_SYMBOL(rfkill_resume_polling); #ifdef CONFIG_PM_SLEEP static int rfkill_suspend(struct device *dev) { struct rfkill *rfkill = to_rfkill(dev); rfkill->suspended = true; cancel_delayed_work_sync(&rfkill->poll_work); return 0; } static int rfkill_resume(struct device *dev) { struct rfkill *rfkill = to_rfkill(dev); bool cur; rfkill->suspended = false; if (!rfkill->registered) return 0; if (!rfkill->persistent) { cur = !!(rfkill->state & RFKILL_BLOCK_SW); rfkill_set_block(rfkill, cur); } if (rfkill->ops->poll && !rfkill->polling_paused) queue_delayed_work(system_power_efficient_wq, &rfkill->poll_work, 0); return 0; } static SIMPLE_DEV_PM_OPS(rfkill_pm_ops, rfkill_suspend, rfkill_resume); #define RFKILL_PM_OPS (&rfkill_pm_ops) #else #define RFKILL_PM_OPS NULL #endif static struct class rfkill_class = { .name = "rfkill", .dev_release = rfkill_release, .dev_groups = rfkill_dev_groups, .dev_uevent = rfkill_dev_uevent, .pm = RFKILL_PM_OPS, }; bool rfkill_blocked(struct rfkill *rfkill) { unsigned long flags; u32 state; spin_lock_irqsave(&rfkill->lock, flags); state = rfkill->state; spin_unlock_irqrestore(&rfkill->lock, flags); return !!(state & RFKILL_BLOCK_ANY); } EXPORT_SYMBOL(rfkill_blocked); bool rfkill_soft_blocked(struct rfkill *rfkill) { unsigned long flags; u32 state; spin_lock_irqsave(&rfkill->lock, flags); state = rfkill->state; spin_unlock_irqrestore(&rfkill->lock, flags); return !!(state & RFKILL_BLOCK_SW); } EXPORT_SYMBOL(rfkill_soft_blocked); struct rfkill * __must_check rfkill_alloc(const char *name, struct device *parent, const enum rfkill_type type, const struct rfkill_ops *ops, void *ops_data) { struct rfkill *rfkill; struct device *dev; if (WARN_ON(!ops)) return NULL; if (WARN_ON(!ops->set_block)) return NULL; if (WARN_ON(!name)) return NULL; if (WARN_ON(type == RFKILL_TYPE_ALL || type >= NUM_RFKILL_TYPES)) return NULL; rfkill = kzalloc(sizeof(*rfkill) + strlen(name) + 1, GFP_KERNEL); if (!rfkill) return NULL; spin_lock_init(&rfkill->lock); INIT_LIST_HEAD(&rfkill->node); rfkill->type = type; strcpy(rfkill->name, name); rfkill->ops = ops; rfkill->data = ops_data; dev = &rfkill->dev; dev->class = &rfkill_class; dev->parent = parent; device_initialize(dev); return rfkill; } EXPORT_SYMBOL(rfkill_alloc); static void rfkill_poll(struct work_struct *work) { struct rfkill *rfkill; rfkill = container_of(work, struct rfkill, poll_work.work); /* * Poll hardware state -- driver will use one of the * rfkill_set{,_hw,_sw}_state functions and use its * return value to update the current status. */ rfkill->ops->poll(rfkill, rfkill->data); queue_delayed_work(system_power_efficient_wq, &rfkill->poll_work, round_jiffies_relative(POLL_INTERVAL)); } static void rfkill_uevent_work(struct work_struct *work) { struct rfkill *rfkill; rfkill = container_of(work, struct rfkill, uevent_work); mutex_lock(&rfkill_global_mutex); rfkill_event(rfkill); mutex_unlock(&rfkill_global_mutex); } static void rfkill_sync_work(struct work_struct *work) { struct rfkill *rfkill = container_of(work, struct rfkill, sync_work); mutex_lock(&rfkill_global_mutex); rfkill_sync(rfkill); mutex_unlock(&rfkill_global_mutex); } int __must_check rfkill_register(struct rfkill *rfkill) { static unsigned long rfkill_no; struct device *dev; int error; if (!rfkill) return -EINVAL; dev = &rfkill->dev; mutex_lock(&rfkill_global_mutex); if (rfkill->registered) { error = -EALREADY; goto unlock; } rfkill->idx = rfkill_no; dev_set_name(dev, "rfkill%lu", rfkill_no); rfkill_no++; list_add_tail(&rfkill->node, &rfkill_list); error = device_add(dev); if (error) goto remove; error = rfkill_led_trigger_register(rfkill); if (error) goto devdel; rfkill->registered = true; INIT_DELAYED_WORK(&rfkill->poll_work, rfkill_poll); INIT_WORK(&rfkill->uevent_work, rfkill_uevent_work); INIT_WORK(&rfkill->sync_work, rfkill_sync_work); if (rfkill->ops->poll) queue_delayed_work(system_power_efficient_wq, &rfkill->poll_work, round_jiffies_relative(POLL_INTERVAL)); if (!rfkill->persistent || rfkill_epo_lock_active) { rfkill->need_sync = true; schedule_work(&rfkill->sync_work); } else { #ifdef CONFIG_RFKILL_INPUT bool soft_blocked = !!(rfkill->state & RFKILL_BLOCK_SW); if (!atomic_read(&rfkill_input_disabled)) __rfkill_switch_all(rfkill->type, soft_blocked); #endif } rfkill_global_led_trigger_event(); rfkill_send_events(rfkill, RFKILL_OP_ADD); mutex_unlock(&rfkill_global_mutex); return 0; devdel: device_del(&rfkill->dev); remove: list_del_init(&rfkill->node); unlock: mutex_unlock(&rfkill_global_mutex); return error; } EXPORT_SYMBOL(rfkill_register); void rfkill_unregister(struct rfkill *rfkill) { BUG_ON(!rfkill); if (rfkill->ops->poll) cancel_delayed_work_sync(&rfkill->poll_work); cancel_work_sync(&rfkill->uevent_work); cancel_work_sync(&rfkill->sync_work); rfkill->registered = false; device_del(&rfkill->dev); mutex_lock(&rfkill_global_mutex); rfkill_send_events(rfkill, RFKILL_OP_DEL); list_del_init(&rfkill->node); rfkill_global_led_trigger_event(); mutex_unlock(&rfkill_global_mutex); rfkill_led_trigger_unregister(rfkill); } EXPORT_SYMBOL(rfkill_unregister); void rfkill_destroy(struct rfkill *rfkill) { if (rfkill) put_device(&rfkill->dev); } EXPORT_SYMBOL(rfkill_destroy); static int rfkill_fop_open(struct inode *inode, struct file *file) { struct rfkill_data *data; struct rfkill *rfkill; struct rfkill_int_event *ev, *tmp; data = kzalloc_obj(*data); if (!data) return -ENOMEM; data->max_size = RFKILL_EVENT_SIZE_V1; INIT_LIST_HEAD(&data->events); mutex_init(&data->mtx); init_waitqueue_head(&data->read_wait); mutex_lock(&rfkill_global_mutex); /* * start getting events from elsewhere but hold mtx to get * startup events added first */ list_for_each_entry(rfkill, &rfkill_list, node) { ev = kzalloc_obj(*ev); if (!ev) goto free; rfkill_sync(rfkill); rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD); mutex_lock(&data->mtx); list_add_tail(&ev->list, &data->events); mutex_unlock(&data->mtx); } list_add(&data->list, &rfkill_fds); mutex_unlock(&rfkill_global_mutex); file->private_data = data; return stream_open(inode, file); free: mutex_unlock(&rfkill_global_mutex); mutex_destroy(&data->mtx); list_for_each_entry_safe(ev, tmp, &data->events, list) kfree(ev); kfree(data); return -ENOMEM; } static __poll_t rfkill_fop_poll(struct file *file, poll_table *wait) { struct rfkill_data *data = file->private_data; __poll_t res = EPOLLOUT | EPOLLWRNORM; poll_wait(file, &data->read_wait, wait); mutex_lock(&data->mtx); if (!list_empty(&data->events)) res = EPOLLIN | EPOLLRDNORM; mutex_unlock(&data->mtx); return res; } static ssize_t rfkill_fop_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { struct rfkill_data *data = file->private_data; struct rfkill_int_event *ev; unsigned long sz; int ret; mutex_lock(&data->mtx); while (list_empty(&data->events)) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; goto out; } mutex_unlock(&data->mtx); /* since we re-check and it just compares pointers, * using !list_empty() without locking isn't a problem */ ret = wait_event_interruptible(data->read_wait, !list_empty(&data->events)); mutex_lock(&data->mtx); if (ret) goto out; } ev = list_first_entry(&data->events, struct rfkill_int_event, list); sz = min_t(unsigned long, sizeof(ev->ev), count); sz = min_t(unsigned long, sz, data->max_size); ret = sz; if (copy_to_user(buf, &ev->ev, sz)) ret = -EFAULT; list_del(&ev->list); kfree(ev); out: mutex_unlock(&data->mtx); return ret; } static ssize_t rfkill_fop_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { struct rfkill_data *data = file->private_data; struct rfkill *rfkill; struct rfkill_event_ext ev; int ret; /* we don't need the 'hard' variable but accept it */ if (count < RFKILL_EVENT_SIZE_V1 - 1) return -EINVAL; /* * Copy as much data as we can accept into our 'ev' buffer, * but tell userspace how much we've copied so it can determine * our API version even in a write() call, if it cares. */ count = min(count, sizeof(ev)); count = min_t(size_t, count, data->max_size); if (copy_from_user(&ev, buf, count)) return -EFAULT; if (ev.type >= NUM_RFKILL_TYPES) return -EINVAL; mutex_lock(&rfkill_global_mutex); switch (ev.op) { case RFKILL_OP_CHANGE_ALL: rfkill_update_global_state(ev.type, ev.soft); list_for_each_entry(rfkill, &rfkill_list, node) if (rfkill->type == ev.type || ev.type == RFKILL_TYPE_ALL) rfkill_set_block(rfkill, ev.soft); ret = 0; break; case RFKILL_OP_CHANGE: list_for_each_entry(rfkill, &rfkill_list, node) if (rfkill->idx == ev.idx && (rfkill->type == ev.type || ev.type == RFKILL_TYPE_ALL)) rfkill_set_block(rfkill, ev.soft); ret = 0; break; default: ret = -EINVAL; break; } mutex_unlock(&rfkill_global_mutex); return ret ?: count; } static int rfkill_fop_release(struct inode *inode, struct file *file) { struct rfkill_data *data = file->private_data; struct rfkill_int_event *ev, *tmp; mutex_lock(&rfkill_global_mutex); list_del(&data->list); mutex_unlock(&rfkill_global_mutex); mutex_destroy(&data->mtx); list_for_each_entry_safe(ev, tmp, &data->events, list) kfree(ev); #ifdef CONFIG_RFKILL_INPUT if (data->input_handler) if (atomic_dec_return(&rfkill_input_disabled) == 0) printk(KERN_DEBUG "rfkill: input handler enabled\n"); #endif kfree(data); return 0; } static long rfkill_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct rfkill_data *data = file->private_data; int ret = -ENOTTY; u32 size; if (_IOC_TYPE(cmd) != RFKILL_IOC_MAGIC) return -ENOTTY; mutex_lock(&data->mtx); switch (_IOC_NR(cmd)) { #ifdef CONFIG_RFKILL_INPUT case RFKILL_IOC_NOINPUT: if (!data->input_handler) { if (atomic_inc_return(&rfkill_input_disabled) == 1) printk(KERN_DEBUG "rfkill: input handler disabled\n"); data->input_handler = true; } ret = 0; break; #endif case RFKILL_IOC_MAX_SIZE: if (get_user(size, (__u32 __user *)arg)) { ret = -EFAULT; break; } if (size < RFKILL_EVENT_SIZE_V1 || size > U8_MAX) { ret = -EINVAL; break; } data->max_size = size; ret = 0; break; default: break; } mutex_unlock(&data->mtx); return ret; } static const struct file_operations rfkill_fops = { .owner = THIS_MODULE, .open = rfkill_fop_open, .read = rfkill_fop_read, .write = rfkill_fop_write, .poll = rfkill_fop_poll, .release = rfkill_fop_release, .unlocked_ioctl = rfkill_fop_ioctl, .compat_ioctl = compat_ptr_ioctl, }; #define RFKILL_NAME "rfkill" static struct miscdevice rfkill_miscdev = { .fops = &rfkill_fops, .name = RFKILL_NAME, .minor = RFKILL_MINOR, }; static int __init rfkill_init(void) { int error; rfkill_update_global_state(RFKILL_TYPE_ALL, !rfkill_default_state); error = class_register(&rfkill_class); if (error) goto error_class; error = misc_register(&rfkill_miscdev); if (error) goto error_misc; error = rfkill_global_led_trigger_register(); if (error) goto error_led_trigger; #ifdef CONFIG_RFKILL_INPUT error = rfkill_handler_init(); if (error) goto error_input; #endif return 0; #ifdef CONFIG_RFKILL_INPUT error_input: rfkill_global_led_trigger_unregister(); #endif error_led_trigger: misc_deregister(&rfkill_miscdev); error_misc: class_unregister(&rfkill_class); error_class: return error; } subsys_initcall(rfkill_init); static void __exit rfkill_exit(void) { #ifdef CONFIG_RFKILL_INPUT rfkill_handler_exit(); #endif rfkill_global_led_trigger_unregister(); misc_deregister(&rfkill_miscdev); class_unregister(&rfkill_class); } module_exit(rfkill_exit); MODULE_ALIAS_MISCDEV(RFKILL_MINOR); MODULE_ALIAS("devname:" RFKILL_NAME);
333 333 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 // SPDX-License-Identifier: GPL-2.0 /* * x86 specific code for irq_work * * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra */ #include <linux/kernel.h> #include <linux/irq_work.h> #include <linux/hardirq.h> #include <asm/apic.h> #include <asm/idtentry.h> #include <asm/trace/irq_vectors.h> #include <linux/interrupt.h> #ifdef CONFIG_X86_LOCAL_APIC DEFINE_IDTENTRY_SYSVEC(sysvec_irq_work) { apic_eoi(); trace_irq_work_entry(IRQ_WORK_VECTOR); inc_irq_stat(apic_irq_work_irqs); irq_work_run(); trace_irq_work_exit(IRQ_WORK_VECTOR); } void arch_irq_work_raise(void) { if (!arch_irq_work_has_interrupt()) return; __apic_send_IPI_self(IRQ_WORK_VECTOR); apic_wait_icr_idle(); } #endif
19 18 19 17 1 3 1 3 3 3 3 1 1 15 2 1 7 2 1 1 1 4 4 1 2 1 1 18 3 15 19 3 1 3 1 1 1 11 1 1 1 1 11 1 1 2 18 3 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 // SPDX-License-Identifier: GPL-2.0-or-later /* * Realtek RTL28xxU DVB USB driver * * Copyright (C) 2009 Antti Palosaari <crope@iki.fi> * Copyright (C) 2011 Antti Palosaari <crope@iki.fi> * Copyright (C) 2012 Thomas Mair <thomas.mair86@googlemail.com> */ #include "rtl28xxu.h" static int rtl28xxu_disable_rc; module_param_named(disable_rc, rtl28xxu_disable_rc, int, 0644); MODULE_PARM_DESC(disable_rc, "disable RTL2832U remote controller"); DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); static int rtl28xxu_ctrl_msg(struct dvb_usb_device *d, struct rtl28xxu_req *req) { struct rtl28xxu_dev *dev = d->priv; int ret; unsigned int pipe; u8 requesttype; mutex_lock(&d->usb_mutex); if (req->size > sizeof(dev->buf)) { dev_err(&d->intf->dev, "too large message %u\n", req->size); ret = -EINVAL; goto err_mutex_unlock; } if (req->index & CMD_WR_FLAG) { /* write */ memcpy(dev->buf, req->data, req->size); requesttype = (USB_TYPE_VENDOR | USB_DIR_OUT); pipe = usb_sndctrlpipe(d->udev, 0); } else { /* read */ requesttype = (USB_TYPE_VENDOR | USB_DIR_IN); /* * Zero-length transfers must use usb_sndctrlpipe() and * rtl28xxu_identify_state() uses a zero-length i2c read * command to determine the chip type. */ if (req->size) pipe = usb_rcvctrlpipe(d->udev, 0); else pipe = usb_sndctrlpipe(d->udev, 0); } ret = usb_control_msg(d->udev, pipe, 0, requesttype, req->value, req->index, dev->buf, req->size, 1000); dvb_usb_dbg_usb_control_msg(d->udev, 0, requesttype, req->value, req->index, dev->buf, req->size); if (ret < 0) goto err_mutex_unlock; /* read request, copy returned data to return buf */ if (requesttype == (USB_TYPE_VENDOR | USB_DIR_IN)) memcpy(req->data, dev->buf, req->size); mutex_unlock(&d->usb_mutex); return 0; err_mutex_unlock: mutex_unlock(&d->usb_mutex); dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static int rtl28xxu_wr_regs(struct dvb_usb_device *d, u16 reg, u8 *val, int len) { struct rtl28xxu_req req; if (reg < 0x3000) req.index = CMD_USB_WR; else if (reg < 0x4000) req.index = CMD_SYS_WR; else req.index = CMD_IR_WR; req.value = reg; req.size = len; req.data = val; return rtl28xxu_ctrl_msg(d, &req); } static int rtl28xxu_rd_regs(struct dvb_usb_device *d, u16 reg, u8 *val, int len) { struct rtl28xxu_req req; if (reg < 0x3000) req.index = CMD_USB_RD; else if (reg < 0x4000) req.index = CMD_SYS_RD; else req.index = CMD_IR_RD; req.value = reg; req.size = len; req.data = val; return rtl28xxu_ctrl_msg(d, &req); } static int rtl28xxu_wr_reg(struct dvb_usb_device *d, u16 reg, u8 val) { return rtl28xxu_wr_regs(d, reg, &val, 1); } static int rtl28xxu_rd_reg(struct dvb_usb_device *d, u16 reg, u8 *val) { return rtl28xxu_rd_regs(d, reg, val, 1); } static int rtl28xxu_wr_reg_mask(struct dvb_usb_device *d, u16 reg, u8 val, u8 mask) { int ret; u8 tmp; /* no need for read if whole reg is written */ if (mask != 0xff) { ret = rtl28xxu_rd_reg(d, reg, &tmp); if (ret) return ret; val &= mask; tmp &= ~mask; val |= tmp; } return rtl28xxu_wr_reg(d, reg, val); } /* I2C */ static int rtl28xxu_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int num) { int ret; struct dvb_usb_device *d = i2c_get_adapdata(adap); struct rtl28xxu_dev *dev = d->priv; struct rtl28xxu_req req; /* * It is not known which are real I2C bus xfer limits, but testing * with RTL2831U + MT2060 gives max RD 24 and max WR 22 bytes. * TODO: find out RTL2832U lens */ /* * I2C adapter logic looks rather complicated due to fact it handles * three different access methods. Those methods are; * 1) integrated demod access * 2) old I2C access * 3) new I2C access * * Used method is selected in order 1, 2, 3. Method 3 can handle all * requests but there is two reasons why not use it always; * 1) It is most expensive, usually two USB messages are needed * 2) At least RTL2831U does not support it * * Method 3 is needed in case of I2C write+read (typical register read) * where write is more than one byte. */ if (mutex_lock_interruptible(&d->i2c_mutex) < 0) return -EAGAIN; if (num == 2 && !(msg[0].flags & I2C_M_RD) && (msg[1].flags & I2C_M_RD)) { if (msg[0].len > 24 || msg[1].len > 24) { /* TODO: check msg[0].len max */ ret = -EOPNOTSUPP; goto err_mutex_unlock; } else if (msg[0].addr == 0x10) { if (msg[0].len < 1 || msg[1].len < 1) { ret = -EOPNOTSUPP; goto err_mutex_unlock; } /* method 1 - integrated demod */ if (msg[0].buf[0] == 0x00) { /* return demod page from driver cache */ msg[1].buf[0] = dev->page; ret = 0; } else { req.value = (msg[0].buf[0] << 8) | (msg[0].addr << 1); req.index = CMD_DEMOD_RD | dev->page; req.size = msg[1].len; req.data = &msg[1].buf[0]; ret = rtl28xxu_ctrl_msg(d, &req); } } else if (msg[0].len < 2) { if (msg[0].len < 1) { ret = -EOPNOTSUPP; goto err_mutex_unlock; } /* method 2 - old I2C */ req.value = (msg[0].buf[0] << 8) | (msg[0].addr << 1); req.index = CMD_I2C_RD; req.size = msg[1].len; req.data = &msg[1].buf[0]; ret = rtl28xxu_ctrl_msg(d, &req); } else { /* method 3 - new I2C */ req.value = (msg[0].addr << 1); req.index = CMD_I2C_DA_WR; req.size = msg[0].len; req.data = msg[0].buf; ret = rtl28xxu_ctrl_msg(d, &req); if (ret) goto err_mutex_unlock; req.value = (msg[0].addr << 1); req.index = CMD_I2C_DA_RD; req.size = msg[1].len; req.data = msg[1].buf; ret = rtl28xxu_ctrl_msg(d, &req); } } else if (num == 1 && !(msg[0].flags & I2C_M_RD)) { if (msg[0].len > 22) { /* TODO: check msg[0].len max */ ret = -EOPNOTSUPP; goto err_mutex_unlock; } else if (msg[0].addr == 0x10) { if (msg[0].len < 1) { ret = -EOPNOTSUPP; goto err_mutex_unlock; } /* method 1 - integrated demod */ if (msg[0].buf[0] == 0x00) { if (msg[0].len < 2) { ret = -EOPNOTSUPP; goto err_mutex_unlock; } /* save demod page for later demod access */ dev->page = msg[0].buf[1]; ret = 0; } else { req.value = (msg[0].buf[0] << 8) | (msg[0].addr << 1); req.index = CMD_DEMOD_WR | dev->page; req.size = msg[0].len-1; req.data = &msg[0].buf[1]; ret = rtl28xxu_ctrl_msg(d, &req); } } else if ((msg[0].len < 23) && (!dev->new_i2c_write)) { if (msg[0].len < 1) { ret = -EOPNOTSUPP; goto err_mutex_unlock; } /* method 2 - old I2C */ req.value = (msg[0].buf[0] << 8) | (msg[0].addr << 1); req.index = CMD_I2C_WR; req.size = msg[0].len-1; req.data = &msg[0].buf[1]; ret = rtl28xxu_ctrl_msg(d, &req); } else { /* method 3 - new I2C */ req.value = (msg[0].addr << 1); req.index = CMD_I2C_DA_WR; req.size = msg[0].len; req.data = msg[0].buf; ret = rtl28xxu_ctrl_msg(d, &req); } } else if (num == 1 && (msg[0].flags & I2C_M_RD)) { req.value = (msg[0].addr << 1); req.index = CMD_I2C_DA_RD; req.size = msg[0].len; req.data = msg[0].buf; ret = rtl28xxu_ctrl_msg(d, &req); } else { ret = -EOPNOTSUPP; } /* Retry failed I2C messages */ if (ret == -EPIPE) ret = -EAGAIN; err_mutex_unlock: mutex_unlock(&d->i2c_mutex); return ret ? ret : num; } static u32 rtl28xxu_i2c_func(struct i2c_adapter *adapter) { return I2C_FUNC_I2C; } static const struct i2c_algorithm rtl28xxu_i2c_algo = { .master_xfer = rtl28xxu_i2c_xfer, .functionality = rtl28xxu_i2c_func, }; static int rtl2831u_read_config(struct dvb_usb_device *d) { struct rtl28xxu_dev *dev = d_to_priv(d); int ret; u8 buf[1]; /* open RTL2831U/RTL2830 I2C gate */ struct rtl28xxu_req req_gate_open = {0x0120, 0x0011, 0x0001, "\x08"}; /* tuner probes */ struct rtl28xxu_req req_mt2060 = {0x00c0, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_qt1010 = {0x0fc4, CMD_I2C_RD, 1, buf}; dev_dbg(&d->intf->dev, "\n"); /* * RTL2831U GPIOs * ========================================================= * GPIO0 | tuner#0 | 0 off | 1 on | MXL5005S (?) * GPIO2 | LED | 0 off | 1 on | * GPIO4 | tuner#1 | 0 on | 1 off | MT2060 */ /* GPIO direction */ ret = rtl28xxu_wr_reg(d, SYS_GPIO_DIR, 0x0a); if (ret) goto err; /* enable as output GPIO0, GPIO2, GPIO4 */ ret = rtl28xxu_wr_reg(d, SYS_GPIO_OUT_EN, 0x15); if (ret) goto err; /* * Probe used tuner. We need to know used tuner before demod attach * since there is some demod params needed to set according to tuner. */ /* demod needs some time to wake up */ msleep(20); dev->tuner_name = "NONE"; /* open demod I2C gate */ ret = rtl28xxu_ctrl_msg(d, &req_gate_open); if (ret) goto err; /* check QT1010 ID(?) register; reg=0f val=2c */ ret = rtl28xxu_ctrl_msg(d, &req_qt1010); if (ret == 0 && buf[0] == 0x2c) { dev->tuner = TUNER_RTL2830_QT1010; dev->tuner_name = "QT1010"; goto found; } /* open demod I2C gate */ ret = rtl28xxu_ctrl_msg(d, &req_gate_open); if (ret) goto err; /* check MT2060 ID register; reg=00 val=63 */ ret = rtl28xxu_ctrl_msg(d, &req_mt2060); if (ret == 0 && buf[0] == 0x63) { dev->tuner = TUNER_RTL2830_MT2060; dev->tuner_name = "MT2060"; goto found; } /* assume MXL5005S */ dev->tuner = TUNER_RTL2830_MXL5005S; dev->tuner_name = "MXL5005S"; goto found; found: dev_dbg(&d->intf->dev, "tuner=%s\n", dev->tuner_name); return 0; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static int rtl2832u_read_config(struct dvb_usb_device *d) { struct rtl28xxu_dev *dev = d_to_priv(d); int ret; u8 buf[2]; /* open RTL2832U/RTL2832 I2C gate */ struct rtl28xxu_req req_gate_open = {0x0120, 0x0011, 0x0001, "\x18"}; /* close RTL2832U/RTL2832 I2C gate */ struct rtl28xxu_req req_gate_close = {0x0120, 0x0011, 0x0001, "\x10"}; /* tuner probes */ struct rtl28xxu_req req_fc0012 = {0x00c6, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_fc0013 = {0x00c6, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_mt2266 = {0x00c0, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_fc2580 = {0x01ac, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_mt2063 = {0x00c0, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_max3543 = {0x00c0, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_tua9001 = {0x7ec0, CMD_I2C_RD, 2, buf}; struct rtl28xxu_req req_mxl5007t = {0xd9c0, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_e4000 = {0x02c8, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_tda18272 = {0x00c0, CMD_I2C_RD, 2, buf}; struct rtl28xxu_req req_r820t = {0x0034, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_r828d = {0x0074, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_mn88472 = {0xff38, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_mn88473 = {0xff38, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_cxd2837er = {0xfdd8, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_si2157 = {0x00c0, CMD_I2C_RD, 1, buf}; struct rtl28xxu_req req_si2168 = {0x00c8, CMD_I2C_RD, 1, buf}; dev_dbg(&d->intf->dev, "\n"); /* enable GPIO3 and GPIO6 as output */ ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_DIR, 0x00, 0x40); if (ret) goto err; ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_EN, 0x48, 0x48); if (ret) goto err; /* * Probe used tuner. We need to know used tuner before demod attach * since there is some demod params needed to set according to tuner. */ /* open demod I2C gate */ ret = rtl28xxu_ctrl_msg(d, &req_gate_open); if (ret) goto err; dev->tuner_name = "NONE"; /* check FC0012 ID register; reg=00 val=a1 */ ret = rtl28xxu_ctrl_msg(d, &req_fc0012); if (ret == 0 && buf[0] == 0xa1) { dev->tuner = TUNER_RTL2832_FC0012; dev->tuner_name = "FC0012"; goto tuner_found; } /* check FC0013 ID register; reg=00 val=a3 */ ret = rtl28xxu_ctrl_msg(d, &req_fc0013); if (ret == 0 && buf[0] == 0xa3) { dev->tuner = TUNER_RTL2832_FC0013; dev->tuner_name = "FC0013"; goto tuner_found; } /* check MT2266 ID register; reg=00 val=85 */ ret = rtl28xxu_ctrl_msg(d, &req_mt2266); if (ret == 0 && buf[0] == 0x85) { dev->tuner = TUNER_RTL2832_MT2266; dev->tuner_name = "MT2266"; goto tuner_found; } /* check FC2580 ID register; reg=01 val=56 */ ret = rtl28xxu_ctrl_msg(d, &req_fc2580); if (ret == 0 && buf[0] == 0x56) { dev->tuner = TUNER_RTL2832_FC2580; dev->tuner_name = "FC2580"; goto tuner_found; } /* check MT2063 ID register; reg=00 val=9e || 9c */ ret = rtl28xxu_ctrl_msg(d, &req_mt2063); if (ret == 0 && (buf[0] == 0x9e || buf[0] == 0x9c)) { dev->tuner = TUNER_RTL2832_MT2063; dev->tuner_name = "MT2063"; goto tuner_found; } /* check MAX3543 ID register; reg=00 val=38 */ ret = rtl28xxu_ctrl_msg(d, &req_max3543); if (ret == 0 && buf[0] == 0x38) { dev->tuner = TUNER_RTL2832_MAX3543; dev->tuner_name = "MAX3543"; goto tuner_found; } /* check TUA9001 ID register; reg=7e val=2328 */ ret = rtl28xxu_ctrl_msg(d, &req_tua9001); if (ret == 0 && buf[0] == 0x23 && buf[1] == 0x28) { dev->tuner = TUNER_RTL2832_TUA9001; dev->tuner_name = "TUA9001"; goto tuner_found; } /* check MXL5007R ID register; reg=d9 val=14 */ ret = rtl28xxu_ctrl_msg(d, &req_mxl5007t); if (ret == 0 && buf[0] == 0x14) { dev->tuner = TUNER_RTL2832_MXL5007T; dev->tuner_name = "MXL5007T"; goto tuner_found; } /* check E4000 ID register; reg=02 val=40 */ ret = rtl28xxu_ctrl_msg(d, &req_e4000); if (ret == 0 && buf[0] == 0x40) { dev->tuner = TUNER_RTL2832_E4000; dev->tuner_name = "E4000"; goto tuner_found; } /* check TDA18272 ID register; reg=00 val=c760 */ ret = rtl28xxu_ctrl_msg(d, &req_tda18272); if (ret == 0 && (buf[0] == 0xc7 || buf[1] == 0x60)) { dev->tuner = TUNER_RTL2832_TDA18272; dev->tuner_name = "TDA18272"; goto tuner_found; } /* check R820T ID register; reg=00 val=69 */ ret = rtl28xxu_ctrl_msg(d, &req_r820t); if (ret == 0 && buf[0] == 0x69) { dev->tuner = TUNER_RTL2832_R820T; dev->tuner_name = "R820T"; goto tuner_found; } /* check R828D ID register; reg=00 val=69 */ ret = rtl28xxu_ctrl_msg(d, &req_r828d); if (ret == 0 && buf[0] == 0x69) { dev->tuner = TUNER_RTL2832_R828D; dev->tuner_name = "R828D"; goto tuner_found; } /* GPIO0 and GPIO5 to reset Si2157/Si2168 tuner and demod */ ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_VAL, 0x00, 0x21); if (ret) goto err; ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_EN, 0x00, 0x21); if (ret) goto err; msleep(50); ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_VAL, 0x21, 0x21); if (ret) goto err; ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_EN, 0x21, 0x21); if (ret) goto err; msleep(50); /* check Si2157 ID register; reg=c0 val=80 */ ret = rtl28xxu_ctrl_msg(d, &req_si2157); if (ret == 0 && ((buf[0] & 0x80) == 0x80)) { dev->tuner = TUNER_RTL2832_SI2157; dev->tuner_name = "SI2157"; goto tuner_found; } tuner_found: dev_dbg(&d->intf->dev, "tuner=%s\n", dev->tuner_name); /* probe slave demod */ if (dev->tuner == TUNER_RTL2832_R828D) { /* power off slave demod on GPIO0 to reset CXD2837ER */ ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_VAL, 0x00, 0x01); if (ret) goto err; ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_EN, 0x00, 0x01); if (ret) goto err; msleep(50); /* power on slave demod on GPIO0 */ ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_VAL, 0x01, 0x01); if (ret) goto err; ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_DIR, 0x00, 0x01); if (ret) goto err; ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_EN, 0x01, 0x01); if (ret) goto err; /* slave demod needs some time to wake up */ msleep(20); /* check slave answers */ ret = rtl28xxu_ctrl_msg(d, &req_mn88472); if (ret == 0 && buf[0] == 0x02) { dev_dbg(&d->intf->dev, "MN88472 found\n"); dev->slave_demod = SLAVE_DEMOD_MN88472; goto demod_found; } ret = rtl28xxu_ctrl_msg(d, &req_mn88473); if (ret == 0 && buf[0] == 0x03) { dev_dbg(&d->intf->dev, "MN88473 found\n"); dev->slave_demod = SLAVE_DEMOD_MN88473; goto demod_found; } ret = rtl28xxu_ctrl_msg(d, &req_cxd2837er); if (ret == 0 && buf[0] == 0xb1) { dev_dbg(&d->intf->dev, "CXD2837ER found\n"); dev->slave_demod = SLAVE_DEMOD_CXD2837ER; goto demod_found; } } if (dev->tuner == TUNER_RTL2832_SI2157) { /* check Si2168 ID register; reg=c8 val=80 */ ret = rtl28xxu_ctrl_msg(d, &req_si2168); if (ret == 0 && ((buf[0] & 0x80) == 0x80)) { dev_dbg(&d->intf->dev, "Si2168 found\n"); dev->slave_demod = SLAVE_DEMOD_SI2168; goto demod_found; } } demod_found: /* close demod I2C gate */ ret = rtl28xxu_ctrl_msg(d, &req_gate_close); if (ret < 0) goto err; return 0; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static int rtl28xxu_read_config(struct dvb_usb_device *d) { struct rtl28xxu_dev *dev = d_to_priv(d); if (dev->chip_id == CHIP_ID_RTL2831U) return rtl2831u_read_config(d); else return rtl2832u_read_config(d); } static int rtl28xxu_identify_state(struct dvb_usb_device *d, const char **name) { struct rtl28xxu_dev *dev = d_to_priv(d); int ret; struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 0, NULL}; dev_dbg(&d->intf->dev, "\n"); /* * Detect chip type using I2C command that is not supported * by old RTL2831U. */ ret = rtl28xxu_ctrl_msg(d, &req_demod_i2c); if (ret == -EPIPE) { dev->chip_id = CHIP_ID_RTL2831U; } else if (ret == 0) { dev->chip_id = CHIP_ID_RTL2832U; } else { dev_err(&d->intf->dev, "chip type detection failed %d\n", ret); goto err; } dev_dbg(&d->intf->dev, "chip_id=%u\n", dev->chip_id); /* Retry failed I2C messages */ d->i2c_adap.retries = 3; d->i2c_adap.timeout = msecs_to_jiffies(10); return WARM; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static const struct rtl2830_platform_data rtl2830_mt2060_platform_data = { .clk = 28800000, .spec_inv = 1, .vtop = 0x20, .krf = 0x04, .agc_targ_val = 0x2d, }; static const struct rtl2830_platform_data rtl2830_qt1010_platform_data = { .clk = 28800000, .spec_inv = 1, .vtop = 0x20, .krf = 0x04, .agc_targ_val = 0x2d, }; static const struct rtl2830_platform_data rtl2830_mxl5005s_platform_data = { .clk = 28800000, .spec_inv = 0, .vtop = 0x3f, .krf = 0x04, .agc_targ_val = 0x3e, }; static int rtl2831u_frontend_attach(struct dvb_usb_adapter *adap) { struct dvb_usb_device *d = adap_to_d(adap); struct rtl28xxu_dev *dev = d_to_priv(d); struct rtl2830_platform_data *pdata = &dev->rtl2830_platform_data; struct i2c_board_info board_info; struct i2c_client *client; int ret; dev_dbg(&d->intf->dev, "\n"); switch (dev->tuner) { case TUNER_RTL2830_QT1010: *pdata = rtl2830_qt1010_platform_data; break; case TUNER_RTL2830_MT2060: *pdata = rtl2830_mt2060_platform_data; break; case TUNER_RTL2830_MXL5005S: *pdata = rtl2830_mxl5005s_platform_data; break; default: dev_err(&d->intf->dev, "unknown tuner %s\n", dev->tuner_name); ret = -ENODEV; goto err; } /* attach demodulator */ memset(&board_info, 0, sizeof(board_info)); strscpy(board_info.type, "rtl2830", I2C_NAME_SIZE); board_info.addr = 0x10; board_info.platform_data = pdata; request_module("%s", board_info.type); client = i2c_new_client_device(&d->i2c_adap, &board_info); if (!i2c_client_has_driver(client)) { ret = -ENODEV; goto err; } if (!try_module_get(client->dev.driver->owner)) { i2c_unregister_device(client); ret = -ENODEV; goto err; } adap->fe[0] = pdata->get_dvb_frontend(client); dev->demod_i2c_adapter = pdata->get_i2c_adapter(client); dev->i2c_client_demod = client; return 0; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static const struct rtl2832_platform_data rtl2832_fc2580_platform_data = { .clk = 28800000, .tuner = TUNER_RTL2832_FC2580, }; static const struct rtl2832_platform_data rtl2832_fc0012_platform_data = { .clk = 28800000, .tuner = TUNER_RTL2832_FC0012 }; static const struct rtl2832_platform_data rtl2832_fc0013_platform_data = { .clk = 28800000, .tuner = TUNER_RTL2832_FC0013 }; static const struct rtl2832_platform_data rtl2832_tua9001_platform_data = { .clk = 28800000, .tuner = TUNER_RTL2832_TUA9001, }; static const struct rtl2832_platform_data rtl2832_e4000_platform_data = { .clk = 28800000, .tuner = TUNER_RTL2832_E4000, }; static const struct rtl2832_platform_data rtl2832_r820t_platform_data = { .clk = 28800000, .tuner = TUNER_RTL2832_R820T, }; static const struct rtl2832_platform_data rtl2832_si2157_platform_data = { .clk = 28800000, .tuner = TUNER_RTL2832_SI2157, }; static int rtl2832u_fc0012_tuner_callback(struct dvb_usb_device *d, int cmd, int arg) { int ret; u8 val; dev_dbg(&d->intf->dev, "cmd=%d arg=%d\n", cmd, arg); switch (cmd) { case FC_FE_CALLBACK_VHF_ENABLE: /* set output values */ ret = rtl28xxu_rd_reg(d, SYS_GPIO_OUT_VAL, &val); if (ret) goto err; if (arg) val &= 0xbf; /* set GPIO6 low */ else val |= 0x40; /* set GPIO6 high */ ret = rtl28xxu_wr_reg(d, SYS_GPIO_OUT_VAL, val); if (ret) goto err; break; default: ret = -EINVAL; goto err; } return 0; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static int rtl2832u_tua9001_tuner_callback(struct dvb_usb_device *d, int cmd, int arg) { int ret; u8 val; dev_dbg(&d->intf->dev, "cmd=%d arg=%d\n", cmd, arg); /* * CEN always enabled by hardware wiring * RESETN GPIO4 * RXEN GPIO1 */ switch (cmd) { case TUA9001_CMD_RESETN: if (arg) val = (1 << 4); else val = (0 << 4); ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_VAL, val, 0x10); if (ret) goto err; break; case TUA9001_CMD_RXEN: if (arg) val = (1 << 1); else val = (0 << 1); ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_VAL, val, 0x02); if (ret) goto err; break; } return 0; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static int rtl2832u_frontend_callback(void *adapter_priv, int component, int cmd, int arg) { struct i2c_adapter *adapter = adapter_priv; struct device *parent = adapter->dev.parent; struct i2c_adapter *parent_adapter; struct dvb_usb_device *d; struct rtl28xxu_dev *dev; /* * All tuners are connected to demod muxed I2C adapter. We have to * resolve its parent adapter in order to get handle for this driver * private data. That is a bit hackish solution, GPIO or direct driver * callback would be better... */ if (parent != NULL && parent->type == &i2c_adapter_type) parent_adapter = to_i2c_adapter(parent); else return -EINVAL; d = i2c_get_adapdata(parent_adapter); dev = d->priv; dev_dbg(&d->intf->dev, "component=%d cmd=%d arg=%d\n", component, cmd, arg); switch (component) { case DVB_FRONTEND_COMPONENT_TUNER: switch (dev->tuner) { case TUNER_RTL2832_FC0012: return rtl2832u_fc0012_tuner_callback(d, cmd, arg); case TUNER_RTL2832_TUA9001: return rtl2832u_tua9001_tuner_callback(d, cmd, arg); } } return 0; } static int rtl2832u_frontend_attach(struct dvb_usb_adapter *adap) { struct dvb_usb_device *d = adap_to_d(adap); struct rtl28xxu_dev *dev = d_to_priv(d); struct rtl2832_platform_data *pdata = &dev->rtl2832_platform_data; struct i2c_board_info board_info; struct i2c_client *client; int ret; dev_dbg(&d->intf->dev, "\n"); switch (dev->tuner) { case TUNER_RTL2832_FC0012: *pdata = rtl2832_fc0012_platform_data; break; case TUNER_RTL2832_FC0013: *pdata = rtl2832_fc0013_platform_data; break; case TUNER_RTL2832_FC2580: *pdata = rtl2832_fc2580_platform_data; break; case TUNER_RTL2832_TUA9001: *pdata = rtl2832_tua9001_platform_data; break; case TUNER_RTL2832_E4000: *pdata = rtl2832_e4000_platform_data; break; case TUNER_RTL2832_R820T: case TUNER_RTL2832_R828D: *pdata = rtl2832_r820t_platform_data; break; case TUNER_RTL2832_SI2157: *pdata = rtl2832_si2157_platform_data; break; default: dev_err(&d->intf->dev, "unknown tuner %s\n", dev->tuner_name); ret = -ENODEV; goto err; } /* attach demodulator */ memset(&board_info, 0, sizeof(board_info)); strscpy(board_info.type, "rtl2832", I2C_NAME_SIZE); board_info.addr = 0x10; board_info.platform_data = pdata; request_module("%s", board_info.type); client = i2c_new_client_device(&d->i2c_adap, &board_info); if (!i2c_client_has_driver(client)) { ret = -ENODEV; goto err; } if (!try_module_get(client->dev.driver->owner)) { i2c_unregister_device(client); ret = -ENODEV; goto err; } adap->fe[0] = pdata->get_dvb_frontend(client); dev->demod_i2c_adapter = pdata->get_i2c_adapter(client); dev->i2c_client_demod = client; /* set fe callback */ adap->fe[0]->callback = rtl2832u_frontend_callback; if (dev->slave_demod) { struct i2c_board_info info = {}; /* attach slave demodulator */ if (dev->slave_demod == SLAVE_DEMOD_MN88472) { struct mn88472_config mn88472_config = {}; mn88472_config.fe = &adap->fe[1]; mn88472_config.i2c_wr_max = 22; strscpy(info.type, "mn88472", I2C_NAME_SIZE); mn88472_config.xtal = 20500000; mn88472_config.ts_mode = SERIAL_TS_MODE; mn88472_config.ts_clock = VARIABLE_TS_CLOCK; info.addr = 0x18; info.platform_data = &mn88472_config; request_module(info.type); client = i2c_new_client_device(&d->i2c_adap, &info); if (!i2c_client_has_driver(client)) goto err_slave_demod_failed; if (!try_module_get(client->dev.driver->owner)) { i2c_unregister_device(client); goto err_slave_demod_failed; } dev->i2c_client_slave_demod = client; } else if (dev->slave_demod == SLAVE_DEMOD_MN88473) { struct mn88473_config mn88473_config = {}; mn88473_config.fe = &adap->fe[1]; mn88473_config.i2c_wr_max = 22; strscpy(info.type, "mn88473", I2C_NAME_SIZE); info.addr = 0x18; info.platform_data = &mn88473_config; request_module(info.type); client = i2c_new_client_device(&d->i2c_adap, &info); if (!i2c_client_has_driver(client)) goto err_slave_demod_failed; if (!try_module_get(client->dev.driver->owner)) { i2c_unregister_device(client); goto err_slave_demod_failed; } dev->i2c_client_slave_demod = client; } else if (dev->slave_demod == SLAVE_DEMOD_CXD2837ER) { struct cxd2841er_config cxd2837er_config = {}; cxd2837er_config.i2c_addr = 0xd8; cxd2837er_config.xtal = SONY_XTAL_20500; cxd2837er_config.flags = (CXD2841ER_AUTO_IFHZ | CXD2841ER_NO_AGCNEG | CXD2841ER_TSBITS | CXD2841ER_EARLY_TUNE | CXD2841ER_TS_SERIAL); adap->fe[1] = dvb_attach(cxd2841er_attach_t_c, &cxd2837er_config, &d->i2c_adap); if (!adap->fe[1]) goto err_slave_demod_failed; adap->fe[1]->id = 1; dev->i2c_client_slave_demod = NULL; } else { struct si2168_config si2168_config = {}; struct i2c_adapter *adapter; si2168_config.i2c_adapter = &adapter; si2168_config.fe = &adap->fe[1]; si2168_config.ts_mode = SI2168_TS_SERIAL; si2168_config.ts_clock_inv = false; si2168_config.ts_clock_gapped = true; strscpy(info.type, "si2168", I2C_NAME_SIZE); info.addr = 0x64; info.platform_data = &si2168_config; request_module(info.type); client = i2c_new_client_device(&d->i2c_adap, &info); if (!i2c_client_has_driver(client)) goto err_slave_demod_failed; if (!try_module_get(client->dev.driver->owner)) { i2c_unregister_device(client); goto err_slave_demod_failed; } dev->i2c_client_slave_demod = client; /* for Si2168 devices use only new I2C write method */ dev->new_i2c_write = true; } } return 0; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; err_slave_demod_failed: /* * We continue on reduced mode, without DVB-T2/C, using master * demod, when slave demod fails. */ dev->slave_demod = SLAVE_DEMOD_NONE; return 0; } static int rtl28xxu_frontend_attach(struct dvb_usb_adapter *adap) { struct rtl28xxu_dev *dev = adap_to_priv(adap); if (dev->chip_id == CHIP_ID_RTL2831U) return rtl2831u_frontend_attach(adap); else return rtl2832u_frontend_attach(adap); } static int rtl28xxu_frontend_detach(struct dvb_usb_adapter *adap) { struct dvb_usb_device *d = adap_to_d(adap); struct rtl28xxu_dev *dev = d_to_priv(d); struct i2c_client *client; dev_dbg(&d->intf->dev, "\n"); /* remove I2C slave demod */ client = dev->i2c_client_slave_demod; if (client) { module_put(client->dev.driver->owner); i2c_unregister_device(client); } /* remove I2C demod */ client = dev->i2c_client_demod; if (client) { module_put(client->dev.driver->owner); i2c_unregister_device(client); } return 0; } static struct qt1010_config rtl28xxu_qt1010_config = { .i2c_address = 0x62, /* 0xc4 */ }; static struct mt2060_config rtl28xxu_mt2060_config = { .i2c_address = 0x60, /* 0xc0 */ .clock_out = 0, }; static struct mxl5005s_config rtl28xxu_mxl5005s_config = { .i2c_address = 0x63, /* 0xc6 */ .if_freq = IF_FREQ_4570000HZ, .xtal_freq = CRYSTAL_FREQ_16000000HZ, .agc_mode = MXL_SINGLE_AGC, .tracking_filter = MXL_TF_C_H, .rssi_enable = MXL_RSSI_ENABLE, .cap_select = MXL_CAP_SEL_ENABLE, .div_out = MXL_DIV_OUT_4, .clock_out = MXL_CLOCK_OUT_DISABLE, .output_load = MXL5005S_IF_OUTPUT_LOAD_200_OHM, .top = MXL5005S_TOP_25P2, .mod_mode = MXL_DIGITAL_MODE, .if_mode = MXL_ZERO_IF, .AgcMasterByte = 0x00, }; static int rtl2831u_tuner_attach(struct dvb_usb_adapter *adap) { int ret; struct dvb_usb_device *d = adap_to_d(adap); struct rtl28xxu_dev *dev = d_to_priv(d); struct dvb_frontend *fe; dev_dbg(&d->intf->dev, "\n"); switch (dev->tuner) { case TUNER_RTL2830_QT1010: fe = dvb_attach(qt1010_attach, adap->fe[0], dev->demod_i2c_adapter, &rtl28xxu_qt1010_config); break; case TUNER_RTL2830_MT2060: fe = dvb_attach(mt2060_attach, adap->fe[0], dev->demod_i2c_adapter, &rtl28xxu_mt2060_config, 1220); break; case TUNER_RTL2830_MXL5005S: fe = dvb_attach(mxl5005s_attach, adap->fe[0], dev->demod_i2c_adapter, &rtl28xxu_mxl5005s_config); break; default: fe = NULL; dev_err(&d->intf->dev, "unknown tuner %d\n", dev->tuner); } if (fe == NULL) { ret = -ENODEV; goto err; } return 0; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static const struct fc0012_config rtl2832u_fc0012_config = { .i2c_address = 0x63, /* 0xc6 >> 1 */ .xtal_freq = FC_XTAL_28_8_MHZ, }; static const struct r820t_config rtl2832u_r820t_config = { .i2c_addr = 0x1a, .xtal = 28800000, .max_i2c_msg_len = 2, .rafael_chip = CHIP_R820T, }; static const struct r820t_config rtl2832u_r828d_config = { .i2c_addr = 0x3a, .xtal = 16000000, .max_i2c_msg_len = 2, .rafael_chip = CHIP_R828D, }; static int rtl2832u_tuner_attach(struct dvb_usb_adapter *adap) { int ret; struct dvb_usb_device *d = adap_to_d(adap); struct rtl28xxu_dev *dev = d_to_priv(d); struct dvb_frontend *fe = NULL; struct i2c_board_info info; struct i2c_client *client; struct v4l2_subdev *subdev = NULL; struct platform_device *pdev; struct rtl2832_sdr_platform_data pdata; dev_dbg(&d->intf->dev, "\n"); memset(&info, 0, sizeof(struct i2c_board_info)); memset(&pdata, 0, sizeof(pdata)); switch (dev->tuner) { case TUNER_RTL2832_FC0012: fe = dvb_attach(fc0012_attach, adap->fe[0], dev->demod_i2c_adapter, &rtl2832u_fc0012_config); /* since fc0012 includs reading the signal strength delegate * that to the tuner driver */ adap->fe[0]->ops.read_signal_strength = adap->fe[0]->ops.tuner_ops.get_rf_strength; break; case TUNER_RTL2832_FC0013: fe = dvb_attach(fc0013_attach, adap->fe[0], dev->demod_i2c_adapter, 0xc6>>1, 0, FC_XTAL_28_8_MHZ); /* fc0013 also supports signal strength reading */ adap->fe[0]->ops.read_signal_strength = adap->fe[0]->ops.tuner_ops.get_rf_strength; break; case TUNER_RTL2832_E4000: { struct e4000_config e4000_config = { .fe = adap->fe[0], .clock = 28800000, }; strscpy(info.type, "e4000", I2C_NAME_SIZE); info.addr = 0x64; info.platform_data = &e4000_config; request_module(info.type); client = i2c_new_client_device(dev->demod_i2c_adapter, &info); if (!i2c_client_has_driver(client)) break; if (!try_module_get(client->dev.driver->owner)) { i2c_unregister_device(client); break; } dev->i2c_client_tuner = client; subdev = i2c_get_clientdata(client); } break; case TUNER_RTL2832_FC2580: { struct fc2580_platform_data fc2580_pdata = { .dvb_frontend = adap->fe[0], }; struct i2c_board_info board_info = {}; strscpy(board_info.type, "fc2580", I2C_NAME_SIZE); board_info.addr = 0x56; board_info.platform_data = &fc2580_pdata; request_module("fc2580"); client = i2c_new_client_device(dev->demod_i2c_adapter, &board_info); if (!i2c_client_has_driver(client)) break; if (!try_module_get(client->dev.driver->owner)) { i2c_unregister_device(client); break; } dev->i2c_client_tuner = client; subdev = fc2580_pdata.get_v4l2_subdev(client); } break; case TUNER_RTL2832_TUA9001: { struct tua9001_platform_data tua9001_pdata = { .dvb_frontend = adap->fe[0], }; struct i2c_board_info board_info = {}; /* enable GPIO1 and GPIO4 as output */ ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_DIR, 0x00, 0x12); if (ret) goto err; ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_EN, 0x12, 0x12); if (ret) goto err; strscpy(board_info.type, "tua9001", I2C_NAME_SIZE); board_info.addr = 0x60; board_info.platform_data = &tua9001_pdata; request_module("tua9001"); client = i2c_new_client_device(dev->demod_i2c_adapter, &board_info); if (!i2c_client_has_driver(client)) break; if (!try_module_get(client->dev.driver->owner)) { i2c_unregister_device(client); break; } dev->i2c_client_tuner = client; break; } case TUNER_RTL2832_R820T: fe = dvb_attach(r820t_attach, adap->fe[0], dev->demod_i2c_adapter, &rtl2832u_r820t_config); /* Use tuner to get the signal strength */ adap->fe[0]->ops.read_signal_strength = adap->fe[0]->ops.tuner_ops.get_rf_strength; break; case TUNER_RTL2832_R828D: fe = dvb_attach(r820t_attach, adap->fe[0], dev->demod_i2c_adapter, &rtl2832u_r828d_config); adap->fe[0]->ops.read_signal_strength = adap->fe[0]->ops.tuner_ops.get_rf_strength; if (adap->fe[1]) { fe = dvb_attach(r820t_attach, adap->fe[1], dev->demod_i2c_adapter, &rtl2832u_r828d_config); adap->fe[1]->ops.read_signal_strength = adap->fe[1]->ops.tuner_ops.get_rf_strength; } break; case TUNER_RTL2832_SI2157: { struct si2157_config si2157_config = { .fe = adap->fe[0], .if_port = 0, .inversion = false, }; strscpy(info.type, "si2157", I2C_NAME_SIZE); info.addr = 0x60; info.platform_data = &si2157_config; request_module(info.type); client = i2c_new_client_device(&d->i2c_adap, &info); if (!i2c_client_has_driver(client)) break; if (!try_module_get(client->dev.driver->owner)) { i2c_unregister_device(client); break; } dev->i2c_client_tuner = client; subdev = i2c_get_clientdata(client); /* copy tuner ops for 2nd FE as tuner is shared */ if (adap->fe[1]) { adap->fe[1]->tuner_priv = adap->fe[0]->tuner_priv; memcpy(&adap->fe[1]->ops.tuner_ops, &adap->fe[0]->ops.tuner_ops, sizeof(struct dvb_tuner_ops)); } } break; default: dev_err(&d->intf->dev, "unknown tuner %d\n", dev->tuner); } if (fe == NULL && dev->i2c_client_tuner == NULL) { ret = -ENODEV; goto err; } /* register SDR */ switch (dev->tuner) { case TUNER_RTL2832_FC2580: case TUNER_RTL2832_FC0012: case TUNER_RTL2832_FC0013: case TUNER_RTL2832_E4000: case TUNER_RTL2832_R820T: case TUNER_RTL2832_R828D: pdata.clk = dev->rtl2832_platform_data.clk; pdata.tuner = dev->tuner; pdata.regmap = dev->rtl2832_platform_data.regmap; pdata.dvb_frontend = adap->fe[0]; pdata.dvb_usb_device = d; pdata.v4l2_subdev = subdev; request_module("%s", "rtl2832_sdr"); pdev = platform_device_register_data(&d->intf->dev, "rtl2832_sdr", PLATFORM_DEVID_AUTO, &pdata, sizeof(pdata)); if (IS_ERR(pdev) || pdev->dev.driver == NULL) break; dev->platform_device_sdr = pdev; break; default: dev_dbg(&d->intf->dev, "no SDR for tuner=%d\n", dev->tuner); } return 0; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static int rtl28xxu_tuner_attach(struct dvb_usb_adapter *adap) { struct rtl28xxu_dev *dev = adap_to_priv(adap); if (dev->chip_id == CHIP_ID_RTL2831U) return rtl2831u_tuner_attach(adap); else return rtl2832u_tuner_attach(adap); } static int rtl28xxu_tuner_detach(struct dvb_usb_adapter *adap) { struct dvb_usb_device *d = adap_to_d(adap); struct rtl28xxu_dev *dev = d_to_priv(d); struct i2c_client *client; struct platform_device *pdev; dev_dbg(&d->intf->dev, "\n"); /* remove platform SDR */ pdev = dev->platform_device_sdr; if (pdev) platform_device_unregister(pdev); /* remove I2C tuner */ client = dev->i2c_client_tuner; if (client) { module_put(client->dev.driver->owner); i2c_unregister_device(client); } return 0; } static int rtl28xxu_init(struct dvb_usb_device *d) { int ret; u8 val; dev_dbg(&d->intf->dev, "\n"); /* init USB endpoints */ ret = rtl28xxu_rd_reg(d, USB_SYSCTL_0, &val); if (ret) goto err; /* enable DMA and Full Packet Mode*/ val |= 0x09; ret = rtl28xxu_wr_reg(d, USB_SYSCTL_0, val); if (ret) goto err; /* set EPA maximum packet size to 0x0200 */ ret = rtl28xxu_wr_regs(d, USB_EPA_MAXPKT, "\x00\x02\x00\x00", 4); if (ret) goto err; /* change EPA FIFO length */ ret = rtl28xxu_wr_regs(d, USB_EPA_FIFO_CFG, "\x14\x00\x00\x00", 4); if (ret) goto err; return ret; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static int rtl2831u_power_ctrl(struct dvb_usb_device *d, int onoff) { int ret; u8 gpio, sys0, epa_ctl[2]; dev_dbg(&d->intf->dev, "onoff=%d\n", onoff); /* demod adc */ ret = rtl28xxu_rd_reg(d, SYS_SYS0, &sys0); if (ret) goto err; /* tuner power, read GPIOs */ ret = rtl28xxu_rd_reg(d, SYS_GPIO_OUT_VAL, &gpio); if (ret) goto err; dev_dbg(&d->intf->dev, "RD SYS0=%02x GPIO_OUT_VAL=%02x\n", sys0, gpio); if (onoff) { gpio |= 0x01; /* GPIO0 = 1 */ gpio &= (~0x10); /* GPIO4 = 0 */ gpio |= 0x04; /* GPIO2 = 1, LED on */ sys0 = sys0 & 0x0f; sys0 |= 0xe0; epa_ctl[0] = 0x00; /* clear stall */ epa_ctl[1] = 0x00; /* clear reset */ } else { gpio &= (~0x01); /* GPIO0 = 0 */ gpio |= 0x10; /* GPIO4 = 1 */ gpio &= (~0x04); /* GPIO2 = 1, LED off */ sys0 = sys0 & (~0xc0); epa_ctl[0] = 0x10; /* set stall */ epa_ctl[1] = 0x02; /* set reset */ } dev_dbg(&d->intf->dev, "WR SYS0=%02x GPIO_OUT_VAL=%02x\n", sys0, gpio); /* demod adc */ ret = rtl28xxu_wr_reg(d, SYS_SYS0, sys0); if (ret) goto err; /* tuner power, write GPIOs */ ret = rtl28xxu_wr_reg(d, SYS_GPIO_OUT_VAL, gpio); if (ret) goto err; /* streaming EP: stall & reset */ ret = rtl28xxu_wr_regs(d, USB_EPA_CTL, epa_ctl, 2); if (ret) goto err; if (onoff) usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, 0x81)); return ret; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static int rtl2832u_power_ctrl(struct dvb_usb_device *d, int onoff) { int ret; dev_dbg(&d->intf->dev, "onoff=%d\n", onoff); if (onoff) { /* GPIO3=1, GPIO4=0 */ ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_VAL, 0x08, 0x18); if (ret) goto err; /* suspend? */ ret = rtl28xxu_wr_reg_mask(d, SYS_DEMOD_CTL1, 0x00, 0x10); if (ret) goto err; /* enable PLL */ ret = rtl28xxu_wr_reg_mask(d, SYS_DEMOD_CTL, 0x80, 0x80); if (ret) goto err; /* disable reset */ ret = rtl28xxu_wr_reg_mask(d, SYS_DEMOD_CTL, 0x20, 0x20); if (ret) goto err; /* streaming EP: clear stall & reset */ ret = rtl28xxu_wr_regs(d, USB_EPA_CTL, "\x00\x00", 2); if (ret) goto err; ret = usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, 0x81)); if (ret) goto err; } else { /* GPIO4=1 */ ret = rtl28xxu_wr_reg_mask(d, SYS_GPIO_OUT_VAL, 0x10, 0x10); if (ret) goto err; /* disable PLL */ ret = rtl28xxu_wr_reg_mask(d, SYS_DEMOD_CTL, 0x00, 0x80); if (ret) goto err; /* streaming EP: set stall & reset */ ret = rtl28xxu_wr_regs(d, USB_EPA_CTL, "\x10\x02", 2); if (ret) goto err; } return ret; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static int rtl28xxu_power_ctrl(struct dvb_usb_device *d, int onoff) { struct rtl28xxu_dev *dev = d_to_priv(d); if (dev->chip_id == CHIP_ID_RTL2831U) return rtl2831u_power_ctrl(d, onoff); else return rtl2832u_power_ctrl(d, onoff); } static int rtl28xxu_frontend_ctrl(struct dvb_frontend *fe, int onoff) { struct dvb_usb_device *d = fe_to_d(fe); struct rtl28xxu_dev *dev = fe_to_priv(fe); struct rtl2832_platform_data *pdata = &dev->rtl2832_platform_data; int ret; u8 val; dev_dbg(&d->intf->dev, "fe=%d onoff=%d\n", fe->id, onoff); if (dev->chip_id == CHIP_ID_RTL2831U) return 0; if (fe->id == 0) { /* control internal demod ADC */ if (onoff) val = 0x48; /* enable ADC */ else val = 0x00; /* disable ADC */ ret = rtl28xxu_wr_reg_mask(d, SYS_DEMOD_CTL, val, 0x48); if (ret) goto err; } else if (fe->id == 1) { /* bypass slave demod TS through master demod */ ret = pdata->slave_ts_ctrl(dev->i2c_client_demod, onoff); if (ret) goto err; } return 0; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } #if IS_ENABLED(CONFIG_RC_CORE) static int rtl2831u_rc_query(struct dvb_usb_device *d) { int ret, i; struct rtl28xxu_dev *dev = d->priv; u8 buf[5]; u32 rc_code; static const struct rtl28xxu_reg_val rc_nec_tab[] = { { 0x3033, 0x80 }, { 0x3020, 0x43 }, { 0x3021, 0x16 }, { 0x3022, 0x16 }, { 0x3023, 0x5a }, { 0x3024, 0x2d }, { 0x3025, 0x16 }, { 0x3026, 0x01 }, { 0x3028, 0xb0 }, { 0x3029, 0x04 }, { 0x302c, 0x88 }, { 0x302e, 0x13 }, { 0x3030, 0xdf }, { 0x3031, 0x05 }, }; /* init remote controller */ if (!dev->rc_active) { for (i = 0; i < ARRAY_SIZE(rc_nec_tab); i++) { ret = rtl28xxu_wr_reg(d, rc_nec_tab[i].reg, rc_nec_tab[i].val); if (ret) goto err; } dev->rc_active = true; } ret = rtl28xxu_rd_regs(d, SYS_IRRC_RP, buf, 5); if (ret) goto err; if (buf[4] & 0x01) { enum rc_proto proto; if (buf[2] == (u8) ~buf[3]) { if (buf[0] == (u8) ~buf[1]) { /* NEC standard (16 bit) */ rc_code = RC_SCANCODE_NEC(buf[0], buf[2]); proto = RC_PROTO_NEC; } else { /* NEC extended (24 bit) */ rc_code = RC_SCANCODE_NECX(buf[0] << 8 | buf[1], buf[2]); proto = RC_PROTO_NECX; } } else { /* NEC full (32 bit) */ rc_code = RC_SCANCODE_NEC32(buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]); proto = RC_PROTO_NEC32; } rc_keydown(d->rc_dev, proto, rc_code, 0); ret = rtl28xxu_wr_reg(d, SYS_IRRC_SR, 1); if (ret) goto err; /* repeated intentionally to avoid extra keypress */ ret = rtl28xxu_wr_reg(d, SYS_IRRC_SR, 1); if (ret) goto err; } return ret; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static int rtl2831u_get_rc_config(struct dvb_usb_device *d, struct dvb_usb_rc *rc) { rc->map_name = RC_MAP_EMPTY; rc->allowed_protos = RC_PROTO_BIT_NEC | RC_PROTO_BIT_NECX | RC_PROTO_BIT_NEC32; rc->query = rtl2831u_rc_query; rc->interval = 400; return 0; } static int rtl2832u_rc_query(struct dvb_usb_device *d) { int ret, i, len; struct rtl28xxu_dev *dev = d->priv; struct ir_raw_event ev = {}; u8 buf[128]; static const struct rtl28xxu_reg_val_mask refresh_tab[] = { {IR_RX_IF, 0x03, 0xff}, {IR_RX_BUF_CTRL, 0x80, 0xff}, {IR_RX_CTRL, 0x80, 0xff}, }; /* init remote controller */ if (!dev->rc_active) { static const struct rtl28xxu_reg_val_mask init_tab[] = { {SYS_DEMOD_CTL1, 0x00, 0x04}, {SYS_DEMOD_CTL1, 0x00, 0x08}, {USB_CTRL, 0x20, 0x20}, {SYS_GPIO_DIR, 0x00, 0x08}, {SYS_GPIO_OUT_EN, 0x08, 0x08}, {SYS_GPIO_OUT_VAL, 0x08, 0x08}, {IR_MAX_DURATION0, 0xd0, 0xff}, {IR_MAX_DURATION1, 0x07, 0xff}, {IR_IDLE_LEN0, 0xc0, 0xff}, {IR_IDLE_LEN1, 0x00, 0xff}, {IR_GLITCH_LEN, 0x03, 0xff}, {IR_RX_CLK, 0x09, 0xff}, {IR_RX_CFG, 0x1c, 0xff}, {IR_MAX_H_TOL_LEN, 0x1e, 0xff}, {IR_MAX_L_TOL_LEN, 0x1e, 0xff}, {IR_RX_CTRL, 0x80, 0xff}, }; for (i = 0; i < ARRAY_SIZE(init_tab); i++) { ret = rtl28xxu_wr_reg_mask(d, init_tab[i].reg, init_tab[i].val, init_tab[i].mask); if (ret) goto err; } dev->rc_active = true; } ret = rtl28xxu_rd_reg(d, IR_RX_IF, &buf[0]); if (ret) goto err; if (buf[0] != 0x83) goto exit; ret = rtl28xxu_rd_reg(d, IR_RX_BC, &buf[0]); if (ret || buf[0] > sizeof(buf)) goto err; len = buf[0]; /* read raw code from hw */ ret = rtl28xxu_rd_regs(d, IR_RX_BUF, buf, len); if (ret) goto err; /* let hw receive new code */ for (i = 0; i < ARRAY_SIZE(refresh_tab); i++) { ret = rtl28xxu_wr_reg_mask(d, refresh_tab[i].reg, refresh_tab[i].val, refresh_tab[i].mask); if (ret) goto err; } /* pass data to Kernel IR decoder */ for (i = 0; i < len; i++) { ev.pulse = buf[i] >> 7; ev.duration = 51 * (buf[i] & 0x7f); ir_raw_event_store_with_filter(d->rc_dev, &ev); } /* 'flush' ir_raw_event_store_with_filter() */ ir_raw_event_handle(d->rc_dev); exit: return ret; err: dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } static int rtl2832u_get_rc_config(struct dvb_usb_device *d, struct dvb_usb_rc *rc) { /* disable IR interrupts in order to avoid SDR sample loss */ if (rtl28xxu_disable_rc) return rtl28xxu_wr_reg(d, IR_RX_IE, 0x00); /* load empty to enable rc */ if (!rc->map_name) rc->map_name = RC_MAP_EMPTY; rc->allowed_protos = RC_PROTO_BIT_ALL_IR_DECODER; rc->driver_type = RC_DRIVER_IR_RAW; rc->query = rtl2832u_rc_query; rc->interval = 200; /* we program idle len to 0xc0, set timeout to one less */ rc->timeout = 0xbf * 51; return 0; } static int rtl28xxu_get_rc_config(struct dvb_usb_device *d, struct dvb_usb_rc *rc) { struct rtl28xxu_dev *dev = d_to_priv(d); if (dev->chip_id == CHIP_ID_RTL2831U) return rtl2831u_get_rc_config(d, rc); else return rtl2832u_get_rc_config(d, rc); } #else #define rtl28xxu_get_rc_config NULL #endif static int rtl28xxu_pid_filter_ctrl(struct dvb_usb_adapter *adap, int onoff) { struct rtl28xxu_dev *dev = adap_to_priv(adap); if (dev->chip_id == CHIP_ID_RTL2831U) { struct rtl2830_platform_data *pdata = &dev->rtl2830_platform_data; return pdata->pid_filter_ctrl(adap->fe[0], onoff); } else { struct rtl2832_platform_data *pdata = &dev->rtl2832_platform_data; return pdata->pid_filter_ctrl(adap->fe[0], onoff); } } static int rtl28xxu_pid_filter(struct dvb_usb_adapter *adap, int index, u16 pid, int onoff) { struct rtl28xxu_dev *dev = adap_to_priv(adap); if (dev->chip_id == CHIP_ID_RTL2831U) { struct rtl2830_platform_data *pdata = &dev->rtl2830_platform_data; return pdata->pid_filter(adap->fe[0], index, pid, onoff); } else { struct rtl2832_platform_data *pdata = &dev->rtl2832_platform_data; return pdata->pid_filter(adap->fe[0], index, pid, onoff); } } static const struct dvb_usb_device_properties rtl28xxu_props = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct rtl28xxu_dev), .identify_state = rtl28xxu_identify_state, .power_ctrl = rtl28xxu_power_ctrl, .frontend_ctrl = rtl28xxu_frontend_ctrl, .i2c_algo = &rtl28xxu_i2c_algo, .read_config = rtl28xxu_read_config, .frontend_attach = rtl28xxu_frontend_attach, .frontend_detach = rtl28xxu_frontend_detach, .tuner_attach = rtl28xxu_tuner_attach, .tuner_detach = rtl28xxu_tuner_detach, .init = rtl28xxu_init, .get_rc_config = rtl28xxu_get_rc_config, .num_adapters = 1, .adapter = { { .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, .pid_filter_count = 32, .pid_filter_ctrl = rtl28xxu_pid_filter_ctrl, .pid_filter = rtl28xxu_pid_filter, .stream = DVB_USB_STREAM_BULK(0x81, 6, 8 * 512), }, }, }; static const struct usb_device_id rtl28xxu_id_table[] = { /* RTL2831U devices: */ { DVB_USB_DEVICE(USB_VID_REALTEK, USB_PID_REALTEK_RTL2831U, &rtl28xxu_props, "Realtek RTL2831U reference design", NULL) }, { DVB_USB_DEVICE(USB_VID_WIDEVIEW, USB_PID_FREECOM_DVBT, &rtl28xxu_props, "Freecom USB2.0 DVB-T", NULL) }, { DVB_USB_DEVICE(USB_VID_WIDEVIEW, USB_PID_FREECOM_DVBT_2, &rtl28xxu_props, "Freecom USB2.0 DVB-T", NULL) }, /* RTL2832U devices: */ { DVB_USB_DEVICE(USB_VID_REALTEK, 0x2832, &rtl28xxu_props, "Realtek RTL2832U reference design", NULL) }, { DVB_USB_DEVICE(USB_VID_REALTEK, 0x2838, &rtl28xxu_props, "Realtek RTL2832U reference design", NULL) }, { DVB_USB_DEVICE(USB_VID_TERRATEC, USB_PID_TERRATEC_CINERGY_T_STICK_BLACK_REV1, &rtl28xxu_props, "TerraTec Cinergy T Stick Black", RC_MAP_TERRATEC_SLIM) }, { DVB_USB_DEVICE(USB_VID_GTEK, USB_PID_DELOCK_USB2_DVBT, &rtl28xxu_props, "G-Tek Electronics Group Lifeview LV5TDLX DVB-T", NULL) }, { DVB_USB_DEVICE(USB_VID_TERRATEC, USB_PID_NOXON_DAB_STICK, &rtl28xxu_props, "TerraTec NOXON DAB Stick", NULL) }, { DVB_USB_DEVICE(USB_VID_TERRATEC, USB_PID_NOXON_DAB_STICK_REV2, &rtl28xxu_props, "TerraTec NOXON DAB Stick (rev 2)", NULL) }, { DVB_USB_DEVICE(USB_VID_TERRATEC, USB_PID_NOXON_DAB_STICK_REV3, &rtl28xxu_props, "TerraTec NOXON DAB Stick (rev 3)", NULL) }, { DVB_USB_DEVICE(USB_VID_GTEK, USB_PID_TREKSTOR_TERRES_2_0, &rtl28xxu_props, "Trekstor DVB-T Stick Terres 2.0", NULL) }, { DVB_USB_DEVICE(USB_VID_DEXATEK, 0x1101, &rtl28xxu_props, "Dexatek DK DVB-T Dongle", NULL) }, { DVB_USB_DEVICE(USB_VID_LEADTEK, 0x6680, &rtl28xxu_props, "DigitalNow Quad DVB-T Receiver", NULL) }, { DVB_USB_DEVICE(USB_VID_LEADTEK, USB_PID_WINFAST_DTV_DONGLE_MINID, &rtl28xxu_props, "Leadtek Winfast DTV Dongle Mini D", NULL) }, { DVB_USB_DEVICE(USB_VID_LEADTEK, USB_PID_WINFAST_DTV2000DS_PLUS, &rtl28xxu_props, "Leadtek WinFast DTV2000DS Plus", RC_MAP_LEADTEK_Y04G0051) }, { DVB_USB_DEVICE(USB_VID_TERRATEC, 0x00d3, &rtl28xxu_props, "TerraTec Cinergy T Stick RC (Rev. 3)", NULL) }, { DVB_USB_DEVICE(USB_VID_DEXATEK, 0x1102, &rtl28xxu_props, "Dexatek DK mini DVB-T Dongle", NULL) }, { DVB_USB_DEVICE(USB_VID_TERRATEC, 0x00d7, &rtl28xxu_props, "TerraTec Cinergy T Stick+", NULL) }, { DVB_USB_DEVICE(USB_VID_KWORLD_2, 0xd3a8, &rtl28xxu_props, "ASUS My Cinema-U3100Mini Plus V2", NULL) }, { DVB_USB_DEVICE(USB_VID_KWORLD_2, 0xd393, &rtl28xxu_props, "GIGABYTE U7300", NULL) }, { DVB_USB_DEVICE(USB_VID_DEXATEK, 0x1104, &rtl28xxu_props, "MSI DIGIVOX Micro HD", NULL) }, { DVB_USB_DEVICE(USB_VID_COMPRO, 0x0620, &rtl28xxu_props, "Compro VideoMate U620F", NULL) }, { DVB_USB_DEVICE(USB_VID_COMPRO, 0x0650, &rtl28xxu_props, "Compro VideoMate U650F", NULL) }, { DVB_USB_DEVICE(USB_VID_KWORLD_2, 0xd394, &rtl28xxu_props, "MaxMedia HU394-T", NULL) }, { DVB_USB_DEVICE(USB_VID_LEADTEK, 0x6a03, &rtl28xxu_props, "Leadtek WinFast DTV Dongle mini", NULL) }, { DVB_USB_DEVICE(USB_VID_GTEK, USB_PID_CPYTO_REDI_PC50A, &rtl28xxu_props, "Crypto ReDi PC 50 A", NULL) }, { DVB_USB_DEVICE(USB_VID_KYE, 0x707f, &rtl28xxu_props, "Genius TVGo DVB-T03", NULL) }, { DVB_USB_DEVICE(USB_VID_KWORLD_2, 0xd395, &rtl28xxu_props, "Peak DVB-T USB", NULL) }, { DVB_USB_DEVICE(USB_VID_KWORLD_2, USB_PID_SVEON_STV20_RTL2832U, &rtl28xxu_props, "Sveon STV20", NULL) }, { DVB_USB_DEVICE(USB_VID_KWORLD_2, USB_PID_SVEON_STV21, &rtl28xxu_props, "Sveon STV21", NULL) }, { DVB_USB_DEVICE(USB_VID_KWORLD_2, USB_PID_SVEON_STV27, &rtl28xxu_props, "Sveon STV27", NULL) }, { DVB_USB_DEVICE(USB_VID_KWORLD_2, USB_PID_TURBOX_DTT_2000, &rtl28xxu_props, "TURBO-X Pure TV Tuner DTT-2000", NULL) }, { DVB_USB_DEVICE(USB_VID_GTEK, USB_PID_PROLECTRIX_DV107669, &rtl28xxu_props, "PROlectrix DV107669", NULL) }, /* RTL2832P devices: */ { DVB_USB_DEVICE(USB_VID_HANFTEK, 0x0131, &rtl28xxu_props, "Astrometa DVB-T2", RC_MAP_ASTROMETA_T2HYBRID) }, { DVB_USB_DEVICE(0x5654, 0xca42, &rtl28xxu_props, "GoTView MasterHD 3", NULL) }, { } }; MODULE_DEVICE_TABLE(usb, rtl28xxu_id_table); static struct usb_driver rtl28xxu_usb_driver = { .name = KBUILD_MODNAME, .id_table = rtl28xxu_id_table, .probe = dvb_usbv2_probe, .disconnect = dvb_usbv2_disconnect, .suspend = dvb_usbv2_suspend, .resume = dvb_usbv2_resume, .reset_resume = dvb_usbv2_reset_resume, .no_dynamic_id = 1, .soft_unbind = 1, }; module_usb_driver(rtl28xxu_usb_driver); MODULE_DESCRIPTION("Realtek RTL28xxU DVB USB driver"); MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>"); MODULE_AUTHOR("Thomas Mair <thomas.mair86@googlemail.com>"); MODULE_LICENSE("GPL");
11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> * Copyright (c) 2002 David S. Miller (davem@redhat.com) * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au> * * Portions derived from Cryptoapi, by Alexander Kjeldaas <astor@fast.no> * and Nettle, by Niels Möller. */ #ifndef _CRYPTO_INTERNAL_CIPHER_H #define _CRYPTO_INTERNAL_CIPHER_H #include <crypto/algapi.h> struct crypto_cipher { struct crypto_tfm base; }; /** * DOC: Single Block Cipher API * * The single block cipher API is used with the ciphers of type * CRYPTO_ALG_TYPE_CIPHER (listed as type "cipher" in /proc/crypto). * * Using the single block cipher API calls, operations with the basic cipher * primitive can be implemented. These cipher primitives exclude any block * chaining operations including IV handling. * * The purpose of this single block cipher API is to support the implementation * of templates or other concepts that only need to perform the cipher operation * on one block at a time. Templates invoke the underlying cipher primitive * block-wise and process either the input or the output data of these cipher * operations. */ static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm) { return (struct crypto_cipher *)tfm; } /** * crypto_alloc_cipher() - allocate single block cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * single block cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for a single block cipher. The returned struct * crypto_cipher is the cipher handle that is required for any subsequent API * invocation for that single block cipher. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ static inline struct crypto_cipher *crypto_alloc_cipher(const char *alg_name, u32 type, u32 mask) { type &= ~CRYPTO_ALG_TYPE_MASK; type |= CRYPTO_ALG_TYPE_CIPHER; mask |= CRYPTO_ALG_TYPE_MASK; return __crypto_cipher_cast(crypto_alloc_base(alg_name, type, mask)); } static inline struct crypto_tfm *crypto_cipher_tfm(struct crypto_cipher *tfm) { return &tfm->base; } /** * crypto_free_cipher() - zeroize and free the single block cipher handle * @tfm: cipher handle to be freed */ static inline void crypto_free_cipher(struct crypto_cipher *tfm) { crypto_free_tfm(crypto_cipher_tfm(tfm)); } /** * crypto_has_cipher() - Search for the availability of a single block cipher * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * single block cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Return: true when the single block cipher is known to the kernel crypto API; * false otherwise */ static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask) { type &= ~CRYPTO_ALG_TYPE_MASK; type |= CRYPTO_ALG_TYPE_CIPHER; mask |= CRYPTO_ALG_TYPE_MASK; return crypto_has_alg(alg_name, type, mask); } /** * crypto_cipher_blocksize() - obtain block size for cipher * @tfm: cipher handle * * The block size for the single block cipher referenced with the cipher handle * tfm is returned. The caller may use that information to allocate appropriate * memory for the data returned by the encryption or decryption operation * * Return: block size of cipher */ static inline unsigned int crypto_cipher_blocksize(struct crypto_cipher *tfm) { return crypto_tfm_alg_blocksize(crypto_cipher_tfm(tfm)); } static inline unsigned int crypto_cipher_alignmask(struct crypto_cipher *tfm) { return crypto_tfm_alg_alignmask(crypto_cipher_tfm(tfm)); } static inline u32 crypto_cipher_get_flags(struct crypto_cipher *tfm) { return crypto_tfm_get_flags(crypto_cipher_tfm(tfm)); } static inline void crypto_cipher_set_flags(struct crypto_cipher *tfm, u32 flags) { crypto_tfm_set_flags(crypto_cipher_tfm(tfm), flags); } static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm, u32 flags) { crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags); } /** * crypto_cipher_setkey() - set key for cipher * @tfm: cipher handle * @key: buffer holding the key * @keylen: length of the key in bytes * * The caller provided key is set for the single block cipher referenced by the * cipher handle. * * Note, the key length determines the cipher type. Many block ciphers implement * different cipher modes depending on the key size, such as AES-128 vs AES-192 * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 * is performed. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_cipher_setkey(struct crypto_cipher *tfm, const u8 *key, unsigned int keylen); /** * crypto_cipher_encrypt_one() - encrypt one block of plaintext * @tfm: cipher handle * @dst: points to the buffer that will be filled with the ciphertext * @src: buffer holding the plaintext to be encrypted * * Invoke the encryption operation of one block. The caller must ensure that * the plaintext and ciphertext buffers are at least one block in size. */ void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, u8 *dst, const u8 *src); /** * crypto_cipher_decrypt_one() - decrypt one block of ciphertext * @tfm: cipher handle * @dst: points to the buffer that will be filled with the plaintext * @src: buffer holding the ciphertext to be decrypted * * Invoke the decryption operation of one block. The caller must ensure that * the plaintext and ciphertext buffers are at least one block in size. */ void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, u8 *dst, const u8 *src); struct crypto_cipher *crypto_clone_cipher(struct crypto_cipher *cipher); struct crypto_cipher_spawn { struct crypto_spawn base; }; static inline int crypto_grab_cipher(struct crypto_cipher_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask) { type &= ~CRYPTO_ALG_TYPE_MASK; type |= CRYPTO_ALG_TYPE_CIPHER; mask |= CRYPTO_ALG_TYPE_MASK; return crypto_grab_spawn(&spawn->base, inst, name, type, mask); } static inline void crypto_drop_cipher(struct crypto_cipher_spawn *spawn) { crypto_drop_spawn(&spawn->base); } static inline struct crypto_alg *crypto_spawn_cipher_alg( struct crypto_cipher_spawn *spawn) { return spawn->base.alg; } static inline struct crypto_cipher *crypto_spawn_cipher( struct crypto_cipher_spawn *spawn) { u32 type = CRYPTO_ALG_TYPE_CIPHER; u32 mask = CRYPTO_ALG_TYPE_MASK; return __crypto_cipher_cast(crypto_spawn_tfm(&spawn->base, type, mask)); } static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm) { return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher; } #endif
30 27 1 8 29 7 4 23 23 36 75 75 4 69 5 66 2 2 4 2 1 1 3 4 8 6 55 51 4 65 1 65 34 26 3 2 6 4 2 2 3 6 2 6 5 2 2 1 1 1 2 14 10 4 7 4 4 56 1 10 45 55 55 11 2 2 33 1 3 1 3 1 4 20 16 1 3 6 1 5 118 105 5 1 3 4 4 69 1 7 7 45 21 19 2 20 1 46 2 45 45 45 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992 Linus Torvalds * * Added support for a Unix98-style ptmx device. * -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998 * */ #include <linux/module.h> #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/tty.h> #include <linux/tty_flip.h> #include <linux/fcntl.h> #include <linux/sched/signal.h> #include <linux/string.h> #include <linux/major.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/device.h> #include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/devpts_fs.h> #include <linux/slab.h> #include <linux/mutex.h> #include <linux/poll.h> #include <linux/mount.h> #include <linux/file.h> #include <linux/ioctl.h> #include <linux/compat.h> #include "tty.h" #undef TTY_DEBUG_HANGUP #ifdef TTY_DEBUG_HANGUP # define tty_debug_hangup(tty, f, args...) tty_debug(tty, f, ##args) #else # define tty_debug_hangup(tty, f, args...) do {} while (0) #endif #ifdef CONFIG_UNIX98_PTYS static struct tty_driver *ptm_driver; static struct tty_driver *pts_driver; static DEFINE_MUTEX(devpts_mutex); #endif static void pty_close(struct tty_struct *tty, struct file *filp) { if (tty->driver->subtype == PTY_TYPE_MASTER) WARN_ON(tty->count > 1); else { if (tty_io_error(tty)) return; if (tty->count > 2) return; } set_bit(TTY_IO_ERROR, &tty->flags); wake_up_interruptible(&tty->read_wait); wake_up_interruptible(&tty->write_wait); scoped_guard(spinlock_irq, &tty->ctrl.lock) tty->ctrl.packet = false; /* Review - krefs on tty_link ?? */ if (!tty->link) return; set_bit(TTY_OTHER_CLOSED, &tty->link->flags); wake_up_interruptible(&tty->link->read_wait); wake_up_interruptible(&tty->link->write_wait); if (tty->driver->subtype == PTY_TYPE_MASTER) { set_bit(TTY_OTHER_CLOSED, &tty->flags); #ifdef CONFIG_UNIX98_PTYS if (tty->driver == ptm_driver) { guard(mutex)(&devpts_mutex); if (tty->link->driver_data) devpts_pty_kill(tty->link->driver_data); } #endif tty_vhangup(tty->link); } } /* * The unthrottle routine is called by the line discipline to signal * that it can receive more characters. For PTY's, the TTY_THROTTLED * flag is always set, to force the line discipline to always call the * unthrottle routine when there are fewer than TTY_THRESHOLD_UNTHROTTLE * characters in the queue. This is necessary since each time this * happens, we need to wake up any sleeping processes that could be * (1) trying to send data to the pty, or (2) waiting in wait_until_sent() * for the pty buffer to be drained. */ static void pty_unthrottle(struct tty_struct *tty) { tty_wakeup(tty->link); set_bit(TTY_THROTTLED, &tty->flags); } /** * pty_write - write to a pty * @tty: the tty we write from * @buf: kernel buffer of data * @c: bytes to write * * Our "hardware" write method. Data is coming from the ldisc which * may be in a non sleeping state. We simply throw this at the other * end of the link as if we were an IRQ handler receiving stuff for * the other side of the pty/tty pair. */ static ssize_t pty_write(struct tty_struct *tty, const u8 *buf, size_t c) { struct tty_struct *to = tty->link; if (tty->flow.stopped || !c) return 0; return tty_insert_flip_string_and_push_buffer(to->port, buf, c); } /** * pty_write_room - write space * @tty: tty we are writing from * * Report how many bytes the ldisc can send into the queue for * the other device. */ static unsigned int pty_write_room(struct tty_struct *tty) { if (tty->flow.stopped) return 0; return tty_buffer_space_avail(tty->link->port); } /* Set the lock flag on a pty */ static int pty_set_lock(struct tty_struct *tty, int __user *arg) { int val; if (get_user(val, arg)) return -EFAULT; if (val) set_bit(TTY_PTY_LOCK, &tty->flags); else clear_bit(TTY_PTY_LOCK, &tty->flags); return 0; } static int pty_get_lock(struct tty_struct *tty, int __user *arg) { int locked = test_bit(TTY_PTY_LOCK, &tty->flags); return put_user(locked, arg); } /* Set the packet mode on a pty */ static int pty_set_pktmode(struct tty_struct *tty, int __user *arg) { int want_pktmode; if (get_user(want_pktmode, arg)) return -EFAULT; guard(spinlock_irq)(&tty->ctrl.lock); if (!want_pktmode) { tty->ctrl.packet = false; return 0; } if (tty->ctrl.packet) return 0; tty->link->ctrl.pktstatus = 0; smp_mb(); tty->ctrl.packet = true; return 0; } /* Get the packet mode of a pty */ static int pty_get_pktmode(struct tty_struct *tty, int __user *arg) { int pktmode = tty->ctrl.packet; return put_user(pktmode, arg); } /* Send a signal to the slave */ static int pty_signal(struct tty_struct *tty, int sig) { struct pid *pgrp; if (sig != SIGINT && sig != SIGQUIT && sig != SIGTSTP) return -EINVAL; if (tty->link) { pgrp = tty_get_pgrp(tty->link); if (pgrp) kill_pgrp(pgrp, sig, 1); put_pid(pgrp); } return 0; } static void pty_flush_buffer(struct tty_struct *tty) { struct tty_struct *to = tty->link; if (!to) return; tty_buffer_flush(to, NULL); if (to->ctrl.packet) { guard(spinlock_irq)(&tty->ctrl.lock); tty->ctrl.pktstatus |= TIOCPKT_FLUSHWRITE; wake_up_interruptible(&to->read_wait); } } static int pty_open(struct tty_struct *tty, struct file *filp) { if (!tty || !tty->link) return -ENODEV; if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) goto out; if (test_bit(TTY_PTY_LOCK, &tty->link->flags)) goto out; if (tty->driver->subtype == PTY_TYPE_SLAVE && tty->link->count != 1) goto out; clear_bit(TTY_IO_ERROR, &tty->flags); clear_bit(TTY_OTHER_CLOSED, &tty->link->flags); set_bit(TTY_THROTTLED, &tty->flags); return 0; out: set_bit(TTY_IO_ERROR, &tty->flags); return -EIO; } static void pty_set_termios(struct tty_struct *tty, const struct ktermios *old_termios) { /* See if packet mode change of state. */ if (tty->link && tty->link->ctrl.packet) { int extproc = (old_termios->c_lflag & EXTPROC) | L_EXTPROC(tty); int old_flow = ((old_termios->c_iflag & IXON) && (old_termios->c_cc[VSTOP] == '\023') && (old_termios->c_cc[VSTART] == '\021')); int new_flow = (I_IXON(tty) && STOP_CHAR(tty) == '\023' && START_CHAR(tty) == '\021'); if ((old_flow != new_flow) || extproc) { scoped_guard(spinlock_irq, &tty->ctrl.lock) { if (old_flow != new_flow) { tty->ctrl.pktstatus &= ~(TIOCPKT_DOSTOP | TIOCPKT_NOSTOP); if (new_flow) tty->ctrl.pktstatus |= TIOCPKT_DOSTOP; else tty->ctrl.pktstatus |= TIOCPKT_NOSTOP; } if (extproc) tty->ctrl.pktstatus |= TIOCPKT_IOCTL; } wake_up_interruptible(&tty->link->read_wait); } } tty->termios.c_cflag &= ~(CSIZE | PARENB); tty->termios.c_cflag |= (CS8 | CREAD); } /** * pty_resize - resize event * @tty: tty being resized * @ws: window size being set. * * Update the termios variables and send the necessary signals to * peform a terminal resize correctly */ static int pty_resize(struct tty_struct *tty, struct winsize *ws) { struct pid *pgrp, *rpgrp; struct tty_struct *pty = tty->link; /* For a PTY we need to lock the tty side */ guard(mutex)(&tty->winsize_mutex); if (!memcmp(ws, &tty->winsize, sizeof(*ws))) return 0; /* Signal the foreground process group of both ptys */ pgrp = tty_get_pgrp(tty); rpgrp = tty_get_pgrp(pty); if (pgrp) kill_pgrp(pgrp, SIGWINCH, 1); if (rpgrp != pgrp && rpgrp) kill_pgrp(rpgrp, SIGWINCH, 1); put_pid(pgrp); put_pid(rpgrp); tty->winsize = *ws; pty->winsize = *ws; /* Never used so will go away soon */ return 0; } /** * pty_start - start() handler * pty_stop - stop() handler * @tty: tty being flow-controlled * * Propagates the TIOCPKT status to the master pty. * * NB: only the master pty can be in packet mode so only the slave * needs start()/stop() handlers */ static void pty_start(struct tty_struct *tty) { if (!tty->link || !tty->link->ctrl.packet) return; scoped_guard(spinlock_irqsave, &tty->ctrl.lock) { tty->ctrl.pktstatus &= ~TIOCPKT_STOP; tty->ctrl.pktstatus |= TIOCPKT_START; } wake_up_interruptible_poll(&tty->link->read_wait, EPOLLIN); } static void pty_stop(struct tty_struct *tty) { if (!tty->link || !tty->link->ctrl.packet) return; scoped_guard(spinlock_irqsave, &tty->ctrl.lock) { tty->ctrl.pktstatus &= ~TIOCPKT_START; tty->ctrl.pktstatus |= TIOCPKT_STOP; } wake_up_interruptible_poll(&tty->link->read_wait, EPOLLIN); } /** * pty_common_install - set up the pty pair * @driver: the pty driver * @tty: the tty being instantiated * @legacy: true if this is BSD style * * Perform the initial set up for the tty/pty pair. Called from the * tty layer when the port is first opened. * * Locking: the caller must hold the tty_mutex */ static int pty_common_install(struct tty_driver *driver, struct tty_struct *tty, bool legacy) { struct tty_struct *o_tty; struct tty_port *ports[2]; int idx = tty->index; int retval = -ENOMEM; /* Opening the slave first has always returned -EIO */ if (driver->subtype != PTY_TYPE_MASTER) return -EIO; ports[0] = kmalloc_obj(**ports); ports[1] = kmalloc_obj(**ports); if (!ports[0] || !ports[1]) goto err; if (!try_module_get(driver->other->owner)) { /* This cannot in fact currently happen */ goto err; } o_tty = alloc_tty_struct(driver->other, idx); if (!o_tty) goto err_put_module; tty_set_lock_subclass(o_tty); lockdep_set_subclass(&o_tty->termios_rwsem, TTY_LOCK_SLAVE); if (legacy) { /* We always use new tty termios data so we can do this the easy way .. */ tty_init_termios(tty); tty_init_termios(o_tty); driver->other->ttys[idx] = o_tty; driver->ttys[idx] = tty; } else { memset(&tty->termios_locked, 0, sizeof(tty->termios_locked)); tty->termios = driver->init_termios; memset(&o_tty->termios_locked, 0, sizeof(tty->termios_locked)); o_tty->termios = driver->other->init_termios; } /* * Everything allocated ... set up the o_tty structure. */ tty_driver_kref_get(driver->other); /* Establish the links in both directions */ tty->link = o_tty; o_tty->link = tty; tty_port_init(ports[0]); tty_port_init(ports[1]); tty_buffer_set_limit(ports[0], 8192); tty_buffer_set_limit(ports[1], 8192); o_tty->port = ports[0]; tty->port = ports[1]; o_tty->port->itty = o_tty; tty_buffer_set_lock_subclass(o_tty->port); tty_driver_kref_get(driver); tty->count++; o_tty->count++; return 0; err_put_module: module_put(driver->other->owner); err: kfree(ports[0]); kfree(ports[1]); return retval; } static void pty_cleanup(struct tty_struct *tty) { tty_port_put(tty->port); } /* Traditional BSD devices */ #ifdef CONFIG_LEGACY_PTYS static int pty_install(struct tty_driver *driver, struct tty_struct *tty) { return pty_common_install(driver, tty, true); } static void pty_remove(struct tty_driver *driver, struct tty_struct *tty) { struct tty_struct *pair = tty->link; driver->ttys[tty->index] = NULL; if (pair) pair->driver->ttys[pair->index] = NULL; } static int pty_bsd_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { switch (cmd) { case TIOCSPTLCK: /* Set PT Lock (disallow slave open) */ return pty_set_lock(tty, (int __user *) arg); case TIOCGPTLCK: /* Get PT Lock status */ return pty_get_lock(tty, (int __user *)arg); case TIOCPKT: /* Set PT packet mode */ return pty_set_pktmode(tty, (int __user *)arg); case TIOCGPKT: /* Get PT packet mode */ return pty_get_pktmode(tty, (int __user *)arg); case TIOCSIG: /* Send signal to other side of pty */ return pty_signal(tty, (int) arg); case TIOCGPTN: /* TTY returns ENOTTY, but glibc expects EINVAL here */ return -EINVAL; } return -ENOIOCTLCMD; } #ifdef CONFIG_COMPAT static long pty_bsd_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { /* * PTY ioctls don't require any special translation between 32-bit and * 64-bit userspace, they are already compatible. */ return pty_bsd_ioctl(tty, cmd, (unsigned long)compat_ptr(arg)); } #else #define pty_bsd_compat_ioctl NULL #endif static int legacy_count = CONFIG_LEGACY_PTY_COUNT; /* * not really modular, but the easiest way to keep compat with existing * bootargs behaviour is to continue using module_param here. */ module_param(legacy_count, int, 0); /* * The master side of a pty can do TIOCSPTLCK and thus * has pty_bsd_ioctl. */ static const struct tty_operations master_pty_ops_bsd = { .install = pty_install, .open = pty_open, .close = pty_close, .write = pty_write, .write_room = pty_write_room, .flush_buffer = pty_flush_buffer, .unthrottle = pty_unthrottle, .ioctl = pty_bsd_ioctl, .compat_ioctl = pty_bsd_compat_ioctl, .cleanup = pty_cleanup, .resize = pty_resize, .remove = pty_remove }; static const struct tty_operations slave_pty_ops_bsd = { .install = pty_install, .open = pty_open, .close = pty_close, .write = pty_write, .write_room = pty_write_room, .flush_buffer = pty_flush_buffer, .unthrottle = pty_unthrottle, .set_termios = pty_set_termios, .cleanup = pty_cleanup, .resize = pty_resize, .start = pty_start, .stop = pty_stop, .remove = pty_remove }; static void __init legacy_pty_init(void) { struct tty_driver *pty_driver, *pty_slave_driver; if (legacy_count <= 0) return; pty_driver = tty_alloc_driver(legacy_count, TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_ALLOC); if (IS_ERR(pty_driver)) panic("Couldn't allocate pty driver"); pty_slave_driver = tty_alloc_driver(legacy_count, TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_ALLOC); if (IS_ERR(pty_slave_driver)) panic("Couldn't allocate pty slave driver"); pty_driver->driver_name = "pty_master"; pty_driver->name = "pty"; pty_driver->major = PTY_MASTER_MAJOR; pty_driver->minor_start = 0; pty_driver->type = TTY_DRIVER_TYPE_PTY; pty_driver->subtype = PTY_TYPE_MASTER; pty_driver->init_termios = tty_std_termios; pty_driver->init_termios.c_iflag = 0; pty_driver->init_termios.c_oflag = 0; pty_driver->init_termios.c_cflag = B38400 | CS8 | CREAD; pty_driver->init_termios.c_lflag = 0; pty_driver->init_termios.c_ispeed = 38400; pty_driver->init_termios.c_ospeed = 38400; pty_driver->other = pty_slave_driver; tty_set_operations(pty_driver, &master_pty_ops_bsd); pty_slave_driver->driver_name = "pty_slave"; pty_slave_driver->name = "ttyp"; pty_slave_driver->major = PTY_SLAVE_MAJOR; pty_slave_driver->minor_start = 0; pty_slave_driver->type = TTY_DRIVER_TYPE_PTY; pty_slave_driver->subtype = PTY_TYPE_SLAVE; pty_slave_driver->init_termios = tty_std_termios; pty_slave_driver->init_termios.c_cflag = B38400 | CS8 | CREAD; pty_slave_driver->init_termios.c_ispeed = 38400; pty_slave_driver->init_termios.c_ospeed = 38400; pty_slave_driver->other = pty_driver; tty_set_operations(pty_slave_driver, &slave_pty_ops_bsd); if (tty_register_driver(pty_driver)) panic("Couldn't register pty driver"); if (tty_register_driver(pty_slave_driver)) panic("Couldn't register pty slave driver"); } #else static inline void legacy_pty_init(void) { } #endif /* Unix98 devices */ #ifdef CONFIG_UNIX98_PTYS static struct cdev ptmx_cdev; static struct file *ptm_open_peer_file(struct file *master, struct tty_struct *tty, int flags) { struct path path; struct file *file; /* Compute the slave's path */ path.mnt = devpts_mntget(master, tty->driver_data); if (IS_ERR(path.mnt)) return ERR_CAST(path.mnt); path.dentry = tty->link->driver_data; file = dentry_open(&path, flags, current_cred()); mntput(path.mnt); return file; } /** * ptm_open_peer - open the peer of a pty * @master: the open struct file of the ptmx device node * @tty: the master of the pty being opened * @flags: the flags for open * * Provide a race free way for userspace to open the slave end of a pty * (where they have the master fd and cannot access or trust the mount * namespace /dev/pts was mounted inside). */ int ptm_open_peer(struct file *master, struct tty_struct *tty, int flags) { if (tty->driver != ptm_driver) return -EIO; return FD_ADD(flags, ptm_open_peer_file(master, tty, flags)); } static int pty_unix98_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { switch (cmd) { case TIOCSPTLCK: /* Set PT Lock (disallow slave open) */ return pty_set_lock(tty, (int __user *)arg); case TIOCGPTLCK: /* Get PT Lock status */ return pty_get_lock(tty, (int __user *)arg); case TIOCPKT: /* Set PT packet mode */ return pty_set_pktmode(tty, (int __user *)arg); case TIOCGPKT: /* Get PT packet mode */ return pty_get_pktmode(tty, (int __user *)arg); case TIOCGPTN: /* Get PT Number */ return put_user(tty->index, (unsigned int __user *)arg); case TIOCSIG: /* Send signal to other side of pty */ return pty_signal(tty, (int) arg); } return -ENOIOCTLCMD; } #ifdef CONFIG_COMPAT static long pty_unix98_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { /* * PTY ioctls don't require any special translation between 32-bit and * 64-bit userspace, they are already compatible. */ return pty_unix98_ioctl(tty, cmd, cmd == TIOCSIG ? arg : (unsigned long)compat_ptr(arg)); } #else #define pty_unix98_compat_ioctl NULL #endif /** * ptm_unix98_lookup - find a pty master * @driver: ptm driver * @file: unused * @idx: tty index * * Look up a pty master device. Called under the tty_mutex for now. * This provides our locking. */ static struct tty_struct *ptm_unix98_lookup(struct tty_driver *driver, struct file *file, int idx) { /* Master must be open via /dev/ptmx */ return ERR_PTR(-EIO); } /** * pts_unix98_lookup - find a pty slave * @driver: pts driver * @file: file pointer to tty * @idx: tty index * * Look up a pty master device. Called under the tty_mutex for now. * This provides our locking for the tty pointer. */ static struct tty_struct *pts_unix98_lookup(struct tty_driver *driver, struct file *file, int idx) { guard(mutex)(&devpts_mutex); /* Master must be open before slave */ return devpts_get_priv(file->f_path.dentry) ? : ERR_PTR(-EIO); } static int pty_unix98_install(struct tty_driver *driver, struct tty_struct *tty) { return pty_common_install(driver, tty, false); } /* this is called once with whichever end is closed last */ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty) { struct pts_fs_info *fsi; if (tty->driver->subtype == PTY_TYPE_MASTER) fsi = tty->driver_data; else fsi = tty->link->driver_data; if (fsi) { devpts_kill_index(fsi, tty->index); devpts_release(fsi); } } static void pty_show_fdinfo(struct tty_struct *tty, struct seq_file *m) { seq_printf(m, "tty-index:\t%d\n", tty->index); } static const struct tty_operations ptm_unix98_ops = { .lookup = ptm_unix98_lookup, .install = pty_unix98_install, .remove = pty_unix98_remove, .open = pty_open, .close = pty_close, .write = pty_write, .write_room = pty_write_room, .flush_buffer = pty_flush_buffer, .unthrottle = pty_unthrottle, .ioctl = pty_unix98_ioctl, .compat_ioctl = pty_unix98_compat_ioctl, .resize = pty_resize, .cleanup = pty_cleanup, .show_fdinfo = pty_show_fdinfo, }; static const struct tty_operations pty_unix98_ops = { .lookup = pts_unix98_lookup, .install = pty_unix98_install, .remove = pty_unix98_remove, .open = pty_open, .close = pty_close, .write = pty_write, .write_room = pty_write_room, .flush_buffer = pty_flush_buffer, .unthrottle = pty_unthrottle, .set_termios = pty_set_termios, .start = pty_start, .stop = pty_stop, .cleanup = pty_cleanup, }; /** * ptmx_open - open a unix 98 pty master * @inode: inode of device file * @filp: file pointer to tty * * Allocate a unix98 pty master device from the ptmx driver. * * Locking: tty_mutex protects the init_dev work. tty->count should * protect the rest. * allocated_ptys_lock handles the list of free pty numbers */ static int ptmx_open(struct inode *inode, struct file *filp) { struct pts_fs_info *fsi; struct tty_struct *tty; struct dentry *dentry; int retval; int index; nonseekable_open(inode, filp); /* We refuse fsnotify events on ptmx, since it's a shared resource */ file_set_fsnotify_mode(filp, FMODE_NONOTIFY); retval = tty_alloc_file(filp); if (retval) return retval; fsi = devpts_acquire(filp); if (IS_ERR(fsi)) { retval = PTR_ERR(fsi); goto out_free_file; } /* find a device that is not in use. */ scoped_guard(mutex, &devpts_mutex) index = devpts_new_index(fsi); retval = index; if (index < 0) goto out_put_fsi; /* The tty returned here is locked so we can safely drop the mutex */ scoped_guard(mutex, &tty_mutex) tty = tty_init_dev(ptm_driver, index); retval = PTR_ERR(tty); if (IS_ERR(tty)) goto out; /* * From here on out, the tty is "live", and the index and * fsi will be killed/put by the tty_release() */ set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ tty->driver_data = fsi; tty_add_file(tty, filp); dentry = devpts_pty_new(fsi, index, tty->link); if (IS_ERR(dentry)) { retval = PTR_ERR(dentry); goto err_release; } tty->link->driver_data = dentry; retval = ptm_driver->ops->open(tty, filp); if (retval) goto err_release; tty_debug_hangup(tty, "opening (count=%d)\n", tty->count); tty_unlock(tty); return 0; err_release: tty_unlock(tty); // This will also put-ref the fsi tty_release(inode, filp); return retval; out: devpts_kill_index(fsi, index); out_put_fsi: devpts_release(fsi); out_free_file: tty_free_file(filp); return retval; } static struct file_operations ptmx_fops __ro_after_init; static void __init unix98_pty_init(void) { ptm_driver = tty_alloc_driver(NR_UNIX98_PTY_MAX, TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV | TTY_DRIVER_DEVPTS_MEM | TTY_DRIVER_DYNAMIC_ALLOC); if (IS_ERR(ptm_driver)) panic("Couldn't allocate Unix98 ptm driver"); pts_driver = tty_alloc_driver(NR_UNIX98_PTY_MAX, TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV | TTY_DRIVER_DEVPTS_MEM | TTY_DRIVER_DYNAMIC_ALLOC); if (IS_ERR(pts_driver)) panic("Couldn't allocate Unix98 pts driver"); ptm_driver->driver_name = "pty_master"; ptm_driver->name = "ptm"; ptm_driver->major = UNIX98_PTY_MASTER_MAJOR; ptm_driver->minor_start = 0; ptm_driver->type = TTY_DRIVER_TYPE_PTY; ptm_driver->subtype = PTY_TYPE_MASTER; ptm_driver->init_termios = tty_std_termios; ptm_driver->init_termios.c_iflag = 0; ptm_driver->init_termios.c_oflag = 0; ptm_driver->init_termios.c_cflag = B38400 | CS8 | CREAD; ptm_driver->init_termios.c_lflag = 0; ptm_driver->init_termios.c_ispeed = 38400; ptm_driver->init_termios.c_ospeed = 38400; ptm_driver->other = pts_driver; tty_set_operations(ptm_driver, &ptm_unix98_ops); pts_driver->driver_name = "pty_slave"; pts_driver->name = "pts"; pts_driver->major = UNIX98_PTY_SLAVE_MAJOR; pts_driver->minor_start = 0; pts_driver->type = TTY_DRIVER_TYPE_PTY; pts_driver->subtype = PTY_TYPE_SLAVE; pts_driver->init_termios = tty_std_termios; pts_driver->init_termios.c_cflag = B38400 | CS8 | CREAD; pts_driver->init_termios.c_ispeed = 38400; pts_driver->init_termios.c_ospeed = 38400; pts_driver->other = ptm_driver; tty_set_operations(pts_driver, &pty_unix98_ops); if (tty_register_driver(ptm_driver)) panic("Couldn't register Unix98 ptm driver"); if (tty_register_driver(pts_driver)) panic("Couldn't register Unix98 pts driver"); /* Now create the /dev/ptmx special device */ tty_default_fops(&ptmx_fops); ptmx_fops.open = ptmx_open; cdev_init(&ptmx_cdev, &ptmx_fops); if (cdev_add(&ptmx_cdev, MKDEV(TTYAUX_MAJOR, 2), 1) || register_chrdev_region(MKDEV(TTYAUX_MAJOR, 2), 1, "/dev/ptmx") < 0) panic("Couldn't register /dev/ptmx driver"); device_create(&tty_class, NULL, MKDEV(TTYAUX_MAJOR, 2), NULL, "ptmx"); } #else static inline void unix98_pty_init(void) { } #endif static int __init pty_init(void) { legacy_pty_init(); unix98_pty_init(); return 0; } device_initcall(pty_init);
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 // SPDX-License-Identifier: GPL-2.0-or-later /* * Etoms Et61x151 GPL Linux driver by Michel Xhaard (09/09/2004) * * V4L2 by Jean-Francois Moine <http://moinejf.free.fr> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "etoms" #include "gspca.h" MODULE_AUTHOR("Michel Xhaard <mxhaard@users.sourceforge.net>"); MODULE_DESCRIPTION("Etoms USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ unsigned char autogain; char sensor; #define SENSOR_PAS106 0 #define SENSOR_TAS5130CXX 1 signed char ag_cnt; #define AG_CNT_START 13 }; static const struct v4l2_pix_format vga_mode[] = { {320, 240, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, /* {640, 480, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, */ }; static const struct v4l2_pix_format sif_mode[] = { {176, 144, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {352, 288, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; #define ETOMS_ALT_SIZE_1000 12 #define ET_GPIO_DIR_CTRL 0x04 /* Control IO bit[0..5] (0 in 1 out) */ #define ET_GPIO_OUT 0x05 /* Only IO data */ #define ET_GPIO_IN 0x06 /* Read Only IO data */ #define ET_RESET_ALL 0x03 #define ET_ClCK 0x01 #define ET_CTRL 0x02 /* enable i2c OutClck Powerdown mode */ #define ET_COMP 0x12 /* Compression register */ #define ET_MAXQt 0x13 #define ET_MINQt 0x14 #define ET_COMP_VAL0 0x02 #define ET_COMP_VAL1 0x03 #define ET_REG1d 0x1d #define ET_REG1e 0x1e #define ET_REG1f 0x1f #define ET_REG20 0x20 #define ET_REG21 0x21 #define ET_REG22 0x22 #define ET_REG23 0x23 #define ET_REG24 0x24 #define ET_REG25 0x25 /* base registers for luma calculation */ #define ET_LUMA_CENTER 0x39 #define ET_G_RED 0x4d #define ET_G_GREEN1 0x4e #define ET_G_BLUE 0x4f #define ET_G_GREEN2 0x50 #define ET_G_GR_H 0x51 #define ET_G_GB_H 0x52 #define ET_O_RED 0x34 #define ET_O_GREEN1 0x35 #define ET_O_BLUE 0x36 #define ET_O_GREEN2 0x37 #define ET_SYNCHRO 0x68 #define ET_STARTX 0x69 #define ET_STARTY 0x6a #define ET_WIDTH_LOW 0x6b #define ET_HEIGTH_LOW 0x6c #define ET_W_H_HEIGTH 0x6d #define ET_REG6e 0x6e /* OBW */ #define ET_REG6f 0x6f /* OBW */ #define ET_REG70 0x70 /* OBW_AWB */ #define ET_REG71 0x71 /* OBW_AWB */ #define ET_REG72 0x72 /* OBW_AWB */ #define ET_REG73 0x73 /* Clkdelay ns */ #define ET_REG74 0x74 /* test pattern */ #define ET_REG75 0x75 /* test pattern */ #define ET_I2C_CLK 0x8c #define ET_PXL_CLK 0x60 #define ET_I2C_BASE 0x89 #define ET_I2C_COUNT 0x8a #define ET_I2C_PREFETCH 0x8b #define ET_I2C_REG 0x88 #define ET_I2C_DATA7 0x87 #define ET_I2C_DATA6 0x86 #define ET_I2C_DATA5 0x85 #define ET_I2C_DATA4 0x84 #define ET_I2C_DATA3 0x83 #define ET_I2C_DATA2 0x82 #define ET_I2C_DATA1 0x81 #define ET_I2C_DATA0 0x80 #define PAS106_REG2 0x02 /* pxlClk = systemClk/(reg2) */ #define PAS106_REG3 0x03 /* line/frame H [11..4] */ #define PAS106_REG4 0x04 /* line/frame L [3..0] */ #define PAS106_REG5 0x05 /* exposure time line offset(default 5) */ #define PAS106_REG6 0x06 /* exposure time pixel offset(default 6) */ #define PAS106_REG7 0x07 /* signbit Dac (default 0) */ #define PAS106_REG9 0x09 #define PAS106_REG0e 0x0e /* global gain [4..0](default 0x0e) */ #define PAS106_REG13 0x13 /* end i2c write */ static const __u8 GainRGBG[] = { 0x80, 0x80, 0x80, 0x80, 0x00, 0x00 }; static const __u8 I2c2[] = { 0x08, 0x08, 0x08, 0x08, 0x0d }; static const __u8 I2c3[] = { 0x12, 0x05 }; static const __u8 I2c4[] = { 0x41, 0x08 }; /* read 'len' bytes to gspca_dev->usb_buf */ static void reg_r(struct gspca_dev *gspca_dev, __u16 index, __u16 len) { struct usb_device *dev = gspca_dev->dev; if (len > USB_BUF_SZ) { gspca_err(gspca_dev, "reg_r: buffer overflow\n"); return; } usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), 0, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 0, index, gspca_dev->usb_buf, len, 500); gspca_dbg(gspca_dev, D_USBI, "reg read [%02x] -> %02x ..\n", index, gspca_dev->usb_buf[0]); } static void reg_w_val(struct gspca_dev *gspca_dev, __u16 index, __u8 val) { struct usb_device *dev = gspca_dev->dev; gspca_dev->usb_buf[0] = val; usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 0, index, gspca_dev->usb_buf, 1, 500); } static void reg_w(struct gspca_dev *gspca_dev, __u16 index, const __u8 *buffer, __u16 len) { struct usb_device *dev = gspca_dev->dev; if (len > USB_BUF_SZ) { pr_err("reg_w: buffer overflow\n"); return; } gspca_dbg(gspca_dev, D_USBO, "reg write [%02x] = %02x..\n", index, *buffer); memcpy(gspca_dev->usb_buf, buffer, len); usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 0, index, gspca_dev->usb_buf, len, 500); } static int i2c_w(struct gspca_dev *gspca_dev, __u8 reg, const __u8 *buffer, int len, __u8 mode) { /* buffer should be [D0..D7] */ __u8 ptchcount; /* set the base address */ reg_w_val(gspca_dev, ET_I2C_BASE, 0x40); /* sensor base for the pas106 */ /* set count and prefetch */ ptchcount = ((len & 0x07) << 4) | (mode & 0x03); reg_w_val(gspca_dev, ET_I2C_COUNT, ptchcount); /* set the register base */ reg_w_val(gspca_dev, ET_I2C_REG, reg); while (--len >= 0) reg_w_val(gspca_dev, ET_I2C_DATA0 + len, buffer[len]); return 0; } static int i2c_r(struct gspca_dev *gspca_dev, __u8 reg) { /* set the base address */ reg_w_val(gspca_dev, ET_I2C_BASE, 0x40); /* sensor base for the pas106 */ /* set count and prefetch (cnd: 4 bits - mode: 4 bits) */ reg_w_val(gspca_dev, ET_I2C_COUNT, 0x11); reg_w_val(gspca_dev, ET_I2C_REG, reg); /* set the register base */ reg_w_val(gspca_dev, ET_I2C_PREFETCH, 0x02); /* prefetch */ reg_w_val(gspca_dev, ET_I2C_PREFETCH, 0x00); reg_r(gspca_dev, ET_I2C_DATA0, 1); /* read one byte */ return 0; } static int Et_WaitStatus(struct gspca_dev *gspca_dev) { int retry = 10; while (retry--) { reg_r(gspca_dev, ET_ClCK, 1); if (gspca_dev->usb_buf[0] != 0) return 1; } return 0; } static int et_video(struct gspca_dev *gspca_dev, int on) { int ret; reg_w_val(gspca_dev, ET_GPIO_OUT, on ? 0x10 /* startvideo - set Bit5 */ : 0); /* stopvideo */ ret = Et_WaitStatus(gspca_dev); if (ret != 0) gspca_err(gspca_dev, "timeout video on/off\n"); return ret; } static void Et_init2(struct gspca_dev *gspca_dev) { __u8 value; static const __u8 FormLine[] = { 0x84, 0x03, 0x14, 0xf4, 0x01, 0x05 }; gspca_dbg(gspca_dev, D_STREAM, "Open Init2 ET\n"); reg_w_val(gspca_dev, ET_GPIO_DIR_CTRL, 0x2f); reg_w_val(gspca_dev, ET_GPIO_OUT, 0x10); reg_r(gspca_dev, ET_GPIO_IN, 1); reg_w_val(gspca_dev, ET_ClCK, 0x14); /* 0x14 // 0x16 enabled pattern */ reg_w_val(gspca_dev, ET_CTRL, 0x1b); /* compression et subsampling */ if (gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv) value = ET_COMP_VAL1; /* 320 */ else value = ET_COMP_VAL0; /* 640 */ reg_w_val(gspca_dev, ET_COMP, value); reg_w_val(gspca_dev, ET_MAXQt, 0x1f); reg_w_val(gspca_dev, ET_MINQt, 0x04); /* undocumented registers */ reg_w_val(gspca_dev, ET_REG1d, 0xff); reg_w_val(gspca_dev, ET_REG1e, 0xff); reg_w_val(gspca_dev, ET_REG1f, 0xff); reg_w_val(gspca_dev, ET_REG20, 0x35); reg_w_val(gspca_dev, ET_REG21, 0x01); reg_w_val(gspca_dev, ET_REG22, 0x00); reg_w_val(gspca_dev, ET_REG23, 0xff); reg_w_val(gspca_dev, ET_REG24, 0xff); reg_w_val(gspca_dev, ET_REG25, 0x0f); /* colors setting */ reg_w_val(gspca_dev, 0x30, 0x11); /* 0x30 */ reg_w_val(gspca_dev, 0x31, 0x40); reg_w_val(gspca_dev, 0x32, 0x00); reg_w_val(gspca_dev, ET_O_RED, 0x00); /* 0x34 */ reg_w_val(gspca_dev, ET_O_GREEN1, 0x00); reg_w_val(gspca_dev, ET_O_BLUE, 0x00); reg_w_val(gspca_dev, ET_O_GREEN2, 0x00); /*************/ reg_w_val(gspca_dev, ET_G_RED, 0x80); /* 0x4d */ reg_w_val(gspca_dev, ET_G_GREEN1, 0x80); reg_w_val(gspca_dev, ET_G_BLUE, 0x80); reg_w_val(gspca_dev, ET_G_GREEN2, 0x80); reg_w_val(gspca_dev, ET_G_GR_H, 0x00); reg_w_val(gspca_dev, ET_G_GB_H, 0x00); /* 0x52 */ /* Window control registers */ reg_w_val(gspca_dev, 0x61, 0x80); /* use cmc_out */ reg_w_val(gspca_dev, 0x62, 0x02); reg_w_val(gspca_dev, 0x63, 0x03); reg_w_val(gspca_dev, 0x64, 0x14); reg_w_val(gspca_dev, 0x65, 0x0e); reg_w_val(gspca_dev, 0x66, 0x02); reg_w_val(gspca_dev, 0x67, 0x02); /**************************************/ reg_w_val(gspca_dev, ET_SYNCHRO, 0x8f); /* 0x68 */ reg_w_val(gspca_dev, ET_STARTX, 0x69); /* 0x6a //0x69 */ reg_w_val(gspca_dev, ET_STARTY, 0x0d); /* 0x0d //0x0c */ reg_w_val(gspca_dev, ET_WIDTH_LOW, 0x80); reg_w_val(gspca_dev, ET_HEIGTH_LOW, 0xe0); reg_w_val(gspca_dev, ET_W_H_HEIGTH, 0x60); /* 6d */ reg_w_val(gspca_dev, ET_REG6e, 0x86); reg_w_val(gspca_dev, ET_REG6f, 0x01); reg_w_val(gspca_dev, ET_REG70, 0x26); reg_w_val(gspca_dev, ET_REG71, 0x7a); reg_w_val(gspca_dev, ET_REG72, 0x01); /* Clock Pattern registers ***************** */ reg_w_val(gspca_dev, ET_REG73, 0x00); reg_w_val(gspca_dev, ET_REG74, 0x18); /* 0x28 */ reg_w_val(gspca_dev, ET_REG75, 0x0f); /* 0x01 */ /**********************************************/ reg_w_val(gspca_dev, 0x8a, 0x20); reg_w_val(gspca_dev, 0x8d, 0x0f); reg_w_val(gspca_dev, 0x8e, 0x08); /**************************************/ reg_w_val(gspca_dev, 0x03, 0x08); reg_w_val(gspca_dev, ET_PXL_CLK, 0x03); reg_w_val(gspca_dev, 0x81, 0xff); reg_w_val(gspca_dev, 0x80, 0x00); reg_w_val(gspca_dev, 0x81, 0xff); reg_w_val(gspca_dev, 0x80, 0x20); reg_w_val(gspca_dev, 0x03, 0x01); reg_w_val(gspca_dev, 0x03, 0x00); reg_w_val(gspca_dev, 0x03, 0x08); /********************************************/ /* reg_r(gspca_dev, ET_I2C_BASE, 1); always 0x40 as the pas106 ??? */ /* set the sensor */ if (gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv) value = 0x04; /* 320 */ else /* 640 */ value = 0x1e; /* 0x17 * setting PixelClock * 0x03 mean 24/(3+1) = 6 Mhz * 0x05 -> 24/(5+1) = 4 Mhz * 0x0b -> 24/(11+1) = 2 Mhz * 0x17 -> 24/(23+1) = 1 Mhz */ reg_w_val(gspca_dev, ET_PXL_CLK, value); /* now set by fifo the FormatLine setting */ reg_w(gspca_dev, 0x62, FormLine, 6); /* set exposure times [ 0..0x78] 0->longvalue 0x78->shortvalue */ reg_w_val(gspca_dev, 0x81, 0x47); /* 0x47; */ reg_w_val(gspca_dev, 0x80, 0x40); /* 0x40; */ /* Pedro change */ /* Brightness change Brith+ decrease value */ /* Brigth- increase value */ /* original value = 0x70; */ reg_w_val(gspca_dev, 0x81, 0x30); /* 0x20; - set brightness */ reg_w_val(gspca_dev, 0x80, 0x20); /* 0x20; */ } static void setbrightness(struct gspca_dev *gspca_dev, s32 val) { int i; for (i = 0; i < 4; i++) reg_w_val(gspca_dev, ET_O_RED + i, val); } static void setcontrast(struct gspca_dev *gspca_dev, s32 val) { __u8 RGBG[] = { 0x80, 0x80, 0x80, 0x80, 0x00, 0x00 }; memset(RGBG, val, sizeof(RGBG) - 2); reg_w(gspca_dev, ET_G_RED, RGBG, 6); } static void setcolors(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; __u8 I2cc[] = { 0x05, 0x02, 0x02, 0x05, 0x0d }; __u8 i2cflags = 0x01; /* __u8 green = 0; */ I2cc[3] = val; /* red */ I2cc[0] = 15 - val; /* blue */ /* green = 15 - ((((7*I2cc[0]) >> 2 ) + I2cc[3]) >> 1); */ /* I2cc[1] = I2cc[2] = green; */ if (sd->sensor == SENSOR_PAS106) { i2c_w(gspca_dev, PAS106_REG13, &i2cflags, 1, 3); i2c_w(gspca_dev, PAS106_REG9, I2cc, sizeof I2cc, 1); } } static s32 getcolors(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (sd->sensor == SENSOR_PAS106) { /* i2c_r(gspca_dev, PAS106_REG9); * blue */ i2c_r(gspca_dev, PAS106_REG9 + 3); /* red */ return gspca_dev->usb_buf[0] & 0x0f; } return 0; } static void setautogain(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (sd->autogain) sd->ag_cnt = AG_CNT_START; else sd->ag_cnt = -1; } static void Et_init1(struct gspca_dev *gspca_dev) { __u8 value; /* __u8 I2c0 [] = {0x0a, 0x12, 0x05, 0x22, 0xac, 0x00, 0x01, 0x00}; */ __u8 I2c0[] = { 0x0a, 0x12, 0x05, 0x6d, 0xcd, 0x00, 0x01, 0x00 }; /* try 1/120 0x6d 0xcd 0x40 */ /* __u8 I2c0 [] = {0x0a, 0x12, 0x05, 0xfe, 0xfe, 0xc0, 0x01, 0x00}; * 1/60000 hmm ?? */ gspca_dbg(gspca_dev, D_STREAM, "Open Init1 ET\n\n"); reg_w_val(gspca_dev, ET_GPIO_DIR_CTRL, 7); reg_r(gspca_dev, ET_GPIO_IN, 1); reg_w_val(gspca_dev, ET_RESET_ALL, 1); reg_w_val(gspca_dev, ET_RESET_ALL, 0); reg_w_val(gspca_dev, ET_ClCK, 0x10); reg_w_val(gspca_dev, ET_CTRL, 0x19); /* compression et subsampling */ if (gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv) value = ET_COMP_VAL1; else value = ET_COMP_VAL0; gspca_dbg(gspca_dev, D_STREAM, "Open mode %d Compression %d\n", gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv, value); reg_w_val(gspca_dev, ET_COMP, value); reg_w_val(gspca_dev, ET_MAXQt, 0x1d); reg_w_val(gspca_dev, ET_MINQt, 0x02); /* undocumented registers */ reg_w_val(gspca_dev, ET_REG1d, 0xff); reg_w_val(gspca_dev, ET_REG1e, 0xff); reg_w_val(gspca_dev, ET_REG1f, 0xff); reg_w_val(gspca_dev, ET_REG20, 0x35); reg_w_val(gspca_dev, ET_REG21, 0x01); reg_w_val(gspca_dev, ET_REG22, 0x00); reg_w_val(gspca_dev, ET_REG23, 0xf7); reg_w_val(gspca_dev, ET_REG24, 0xff); reg_w_val(gspca_dev, ET_REG25, 0x07); /* colors setting */ reg_w_val(gspca_dev, ET_G_RED, 0x80); reg_w_val(gspca_dev, ET_G_GREEN1, 0x80); reg_w_val(gspca_dev, ET_G_BLUE, 0x80); reg_w_val(gspca_dev, ET_G_GREEN2, 0x80); reg_w_val(gspca_dev, ET_G_GR_H, 0x00); reg_w_val(gspca_dev, ET_G_GB_H, 0x00); /* Window control registers */ reg_w_val(gspca_dev, ET_SYNCHRO, 0xf0); reg_w_val(gspca_dev, ET_STARTX, 0x56); /* 0x56 */ reg_w_val(gspca_dev, ET_STARTY, 0x05); /* 0x04 */ reg_w_val(gspca_dev, ET_WIDTH_LOW, 0x60); reg_w_val(gspca_dev, ET_HEIGTH_LOW, 0x20); reg_w_val(gspca_dev, ET_W_H_HEIGTH, 0x50); reg_w_val(gspca_dev, ET_REG6e, 0x86); reg_w_val(gspca_dev, ET_REG6f, 0x01); reg_w_val(gspca_dev, ET_REG70, 0x86); reg_w_val(gspca_dev, ET_REG71, 0x14); reg_w_val(gspca_dev, ET_REG72, 0x00); /* Clock Pattern registers */ reg_w_val(gspca_dev, ET_REG73, 0x00); reg_w_val(gspca_dev, ET_REG74, 0x00); reg_w_val(gspca_dev, ET_REG75, 0x0a); reg_w_val(gspca_dev, ET_I2C_CLK, 0x04); reg_w_val(gspca_dev, ET_PXL_CLK, 0x01); /* set the sensor */ if (gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv) { I2c0[0] = 0x06; i2c_w(gspca_dev, PAS106_REG2, I2c0, sizeof I2c0, 1); i2c_w(gspca_dev, PAS106_REG9, I2c2, sizeof I2c2, 1); value = 0x06; i2c_w(gspca_dev, PAS106_REG2, &value, 1, 1); i2c_w(gspca_dev, PAS106_REG3, I2c3, sizeof I2c3, 1); /* value = 0x1f; */ value = 0x04; i2c_w(gspca_dev, PAS106_REG0e, &value, 1, 1); } else { I2c0[0] = 0x0a; i2c_w(gspca_dev, PAS106_REG2, I2c0, sizeof I2c0, 1); i2c_w(gspca_dev, PAS106_REG9, I2c2, sizeof I2c2, 1); value = 0x0a; i2c_w(gspca_dev, PAS106_REG2, &value, 1, 1); i2c_w(gspca_dev, PAS106_REG3, I2c3, sizeof I2c3, 1); value = 0x04; /* value = 0x10; */ i2c_w(gspca_dev, PAS106_REG0e, &value, 1, 1); /* bit 2 enable bit 1:2 select 0 1 2 3 value = 0x07; * curve 0 * i2c_w(gspca_dev, PAS106_REG0f, &value, 1, 1); */ } /* value = 0x01; */ /* value = 0x22; */ /* i2c_w(gspca_dev, PAS106_REG5, &value, 1, 1); */ /* magnetude and sign bit for DAC */ i2c_w(gspca_dev, PAS106_REG7, I2c4, sizeof I2c4, 1); /* now set by fifo the whole colors setting */ reg_w(gspca_dev, ET_G_RED, GainRGBG, 6); setcolors(gspca_dev, getcolors(gspca_dev)); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; cam = &gspca_dev->cam; sd->sensor = id->driver_info; if (sd->sensor == SENSOR_PAS106) { cam->cam_mode = sif_mode; cam->nmodes = ARRAY_SIZE(sif_mode); } else { cam->cam_mode = vga_mode; cam->nmodes = ARRAY_SIZE(vga_mode); } sd->ag_cnt = -1; return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (sd->sensor == SENSOR_PAS106) Et_init1(gspca_dev); else Et_init2(gspca_dev); reg_w_val(gspca_dev, ET_RESET_ALL, 0x08); et_video(gspca_dev, 0); /* video off */ return 0; } /* -- start the camera -- */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (sd->sensor == SENSOR_PAS106) Et_init1(gspca_dev); else Et_init2(gspca_dev); setautogain(gspca_dev); reg_w_val(gspca_dev, ET_RESET_ALL, 0x08); et_video(gspca_dev, 1); /* video on */ return 0; } static void sd_stopN(struct gspca_dev *gspca_dev) { et_video(gspca_dev, 0); /* video off */ } static __u8 Et_getgainG(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (sd->sensor == SENSOR_PAS106) { i2c_r(gspca_dev, PAS106_REG0e); gspca_dbg(gspca_dev, D_CONF, "Etoms gain G %d\n", gspca_dev->usb_buf[0]); return gspca_dev->usb_buf[0]; } return 0x1f; } static void Et_setgainG(struct gspca_dev *gspca_dev, __u8 gain) { struct sd *sd = (struct sd *) gspca_dev; if (sd->sensor == SENSOR_PAS106) { __u8 i2cflags = 0x01; i2c_w(gspca_dev, PAS106_REG13, &i2cflags, 1, 3); i2c_w(gspca_dev, PAS106_REG0e, &gain, 1, 1); } } #define BLIMIT(bright) \ (u8)((bright > 0x1f) ? 0x1f : ((bright < 4) ? 3 : bright)) #define LIMIT(color) \ (u8)((color > 0xff) ? 0xff : ((color < 0) ? 0 : color)) static void do_autogain(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; __u8 luma; __u8 luma_mean = 128; __u8 luma_delta = 20; __u8 spring = 4; int Gbright; __u8 r, g, b; if (sd->ag_cnt < 0) return; if (--sd->ag_cnt >= 0) return; sd->ag_cnt = AG_CNT_START; Gbright = Et_getgainG(gspca_dev); reg_r(gspca_dev, ET_LUMA_CENTER, 4); g = (gspca_dev->usb_buf[0] + gspca_dev->usb_buf[3]) >> 1; r = gspca_dev->usb_buf[1]; b = gspca_dev->usb_buf[2]; r = ((r << 8) - (r << 4) - (r << 3)) >> 10; b = ((b << 7) >> 10); g = ((g << 9) + (g << 7) + (g << 5)) >> 10; luma = LIMIT(r + g + b); gspca_dbg(gspca_dev, D_FRAM, "Etoms luma G %d\n", luma); if (luma < luma_mean - luma_delta || luma > luma_mean + luma_delta) { Gbright += (luma_mean - luma) >> spring; Gbright = BLIMIT(Gbright); gspca_dbg(gspca_dev, D_FRAM, "Etoms Gbright %d\n", Gbright); Et_setgainG(gspca_dev, (__u8) Gbright); } } #undef BLIMIT #undef LIMIT static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { int seqframe; seqframe = data[0] & 0x3f; len = (int) (((data[0] & 0xc0) << 2) | data[1]); if (seqframe == 0x3f) { gspca_dbg(gspca_dev, D_FRAM, "header packet found datalength %d !!\n", len); gspca_dbg(gspca_dev, D_FRAM, "G %d R %d G %d B %d", data[2], data[3], data[4], data[5]); data += 30; /* don't change datalength as the chips provided it */ gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); return; } if (len) { data += 8; gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } else { /* Drop Packet */ gspca_dev->last_packet_type = DISCARD_PACKET; } } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *)gspca_dev; gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; case V4L2_CID_CONTRAST: setcontrast(gspca_dev, ctrl->val); break; case V4L2_CID_SATURATION: setcolors(gspca_dev, ctrl->val); break; case V4L2_CID_AUTOGAIN: sd->autogain = ctrl->val; setautogain(gspca_dev); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *)gspca_dev; struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 4); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 1, 127, 1, 63); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 255, 1, 127); if (sd->sensor == SENSOR_PAS106) v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SATURATION, 0, 15, 1, 7); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_AUTOGAIN, 0, 1, 1, 1); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, .dq_callback = do_autogain, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x102c, 0x6151), .driver_info = SENSOR_PAS106}, {USB_DEVICE(0x102c, 0x6251), .driver_info = SENSOR_TAS5130CXX}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
158 97 205 204 97 97 776 774 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 // SPDX-License-Identifier: GPL-2.0-only /* * Aug 8, 2011 Bob Pearson with help from Joakim Tjernlund and George Spelvin * cleaned up code to current version of sparse and added the slicing-by-8 * algorithm to the closely similar existing slicing-by-4 algorithm. * * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com> * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks! * Code was from the public domain, copyright abandoned. Code was * subsequently included in the kernel, thus was re-licensed under the * GNU GPL v2. * * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com> * Same crc32 function was used in 5 other places in the kernel. * I made one version, and deleted the others. * There are various incantations of crc32(). Some use a seed of 0 or ~0. * Some xor at the end with ~0. The generic crc32() function takes * seed as an argument, and doesn't xor at the end. Then individual * users can do whatever they need. * drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0. * fs/jffs2 uses seed 0, doesn't xor with ~0. * fs/partitions/efi.c uses seed ~0, xor's with ~0. */ /* see: Documentation/staging/crc32.rst for a description of algorithms */ #include <linux/crc32.h> #include <linux/export.h> #include <linux/module.h> #include <linux/types.h> #include "crc32table.h" static inline u32 __maybe_unused crc32_le_base(u32 crc, const u8 *p, size_t len) { while (len--) crc = (crc >> 8) ^ crc32table_le[(crc & 255) ^ *p++]; return crc; } static inline u32 __maybe_unused crc32_be_base(u32 crc, const u8 *p, size_t len) { while (len--) crc = (crc << 8) ^ crc32table_be[(crc >> 24) ^ *p++]; return crc; } static inline u32 __maybe_unused crc32c_base(u32 crc, const u8 *p, size_t len) { while (len--) crc = (crc >> 8) ^ crc32ctable_le[(crc & 255) ^ *p++]; return crc; } #ifdef CONFIG_CRC32_ARCH #include "crc32.h" /* $(SRCARCH)/crc32.h */ u32 crc32_optimizations(void) { return crc32_optimizations_arch(); } EXPORT_SYMBOL(crc32_optimizations); #else #define crc32_le_arch crc32_le_base #define crc32_be_arch crc32_be_base #define crc32c_arch crc32c_base #endif u32 crc32_le(u32 crc, const void *p, size_t len) { return crc32_le_arch(crc, p, len); } EXPORT_SYMBOL(crc32_le); u32 crc32_be(u32 crc, const void *p, size_t len) { return crc32_be_arch(crc, p, len); } EXPORT_SYMBOL(crc32_be); u32 crc32c(u32 crc, const void *p, size_t len) { return crc32c_arch(crc, p, len); } EXPORT_SYMBOL(crc32c); #ifdef crc32_mod_init_arch static int __init crc32_mod_init(void) { crc32_mod_init_arch(); return 0; } subsys_initcall(crc32_mod_init); static void __exit crc32_mod_exit(void) { } module_exit(crc32_mod_exit); #endif MODULE_DESCRIPTION("CRC32 library functions"); MODULE_LICENSE("GPL");
37 2 37 2 8 8 7 19 4 4 4 6 10 6 6 4 4 30 30 31 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/u64_stats_sync.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables_offload.h> struct nft_counter { u64_stats_t bytes; u64_stats_t packets; }; struct nft_counter_tot { s64 bytes; s64 packets; }; struct nft_counter_percpu_priv { struct nft_counter __percpu *counter; }; static DEFINE_PER_CPU(struct u64_stats_sync, nft_counter_sync); /* control plane only: sync fetch+reset */ static DEFINE_SPINLOCK(nft_counter_lock); static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct u64_stats_sync *nft_sync; struct nft_counter *this_cpu; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); nft_sync = this_cpu_ptr(&nft_counter_sync); u64_stats_update_begin(nft_sync); u64_stats_add(&this_cpu->bytes, pkt->skb->len); u64_stats_inc(&this_cpu->packets); u64_stats_update_end(nft_sync); local_bh_enable(); } static inline void nft_counter_obj_eval(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_counter_percpu_priv *priv = nft_obj_data(obj); nft_counter_do_eval(priv, regs, pkt); } static int nft_counter_do_init(const struct nlattr * const tb[], struct nft_counter_percpu_priv *priv) { struct nft_counter __percpu *cpu_stats; struct nft_counter *this_cpu; cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT); if (cpu_stats == NULL) return -ENOMEM; this_cpu = raw_cpu_ptr(cpu_stats); if (tb[NFTA_COUNTER_PACKETS]) { u64_stats_set(&this_cpu->packets, be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]))); } if (tb[NFTA_COUNTER_BYTES]) { u64_stats_set(&this_cpu->bytes, be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]))); } priv->counter = cpu_stats; return 0; } static int nft_counter_obj_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) { struct nft_counter_percpu_priv *priv = nft_obj_data(obj); return nft_counter_do_init(tb, priv); } static void nft_counter_do_destroy(struct nft_counter_percpu_priv *priv) { free_percpu(priv->counter); } static void nft_counter_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) { struct nft_counter_percpu_priv *priv = nft_obj_data(obj); nft_counter_do_destroy(priv); } static void nft_counter_reset(struct nft_counter_percpu_priv *priv, struct nft_counter_tot *total) { struct u64_stats_sync *nft_sync; struct nft_counter *this_cpu; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); nft_sync = this_cpu_ptr(&nft_counter_sync); u64_stats_update_begin(nft_sync); u64_stats_sub(&this_cpu->packets, total->packets); u64_stats_sub(&this_cpu->bytes, total->bytes); u64_stats_update_end(nft_sync); local_bh_enable(); } static void nft_counter_fetch(struct nft_counter_percpu_priv *priv, struct nft_counter_tot *total) { struct nft_counter *this_cpu; u64 bytes, packets; unsigned int seq; int cpu; memset(total, 0, sizeof(*total)); for_each_possible_cpu(cpu) { struct u64_stats_sync *nft_sync = per_cpu_ptr(&nft_counter_sync, cpu); this_cpu = per_cpu_ptr(priv->counter, cpu); do { seq = u64_stats_fetch_begin(nft_sync); bytes = u64_stats_read(&this_cpu->bytes); packets = u64_stats_read(&this_cpu->packets); } while (u64_stats_fetch_retry(nft_sync, seq)); total->bytes += bytes; total->packets += packets; } } static void nft_counter_fetch_and_reset(struct nft_counter_percpu_priv *priv, struct nft_counter_tot *total) { spin_lock(&nft_counter_lock); nft_counter_fetch(priv, total); nft_counter_reset(priv, total); spin_unlock(&nft_counter_lock); } static int nft_counter_do_dump(struct sk_buff *skb, struct nft_counter_percpu_priv *priv, bool reset) { struct nft_counter_tot total; if (unlikely(reset)) nft_counter_fetch_and_reset(priv, &total); else nft_counter_fetch(priv, &total); if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), NFTA_COUNTER_PAD) || nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets), NFTA_COUNTER_PAD)) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int nft_counter_obj_dump(struct sk_buff *skb, struct nft_object *obj, bool reset) { struct nft_counter_percpu_priv *priv = nft_obj_data(obj); return nft_counter_do_dump(skb, priv, reset); } static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, }; struct nft_object_type nft_counter_obj_type; static const struct nft_object_ops nft_counter_obj_ops = { .type = &nft_counter_obj_type, .size = sizeof(struct nft_counter_percpu_priv), .eval = nft_counter_obj_eval, .init = nft_counter_obj_init, .destroy = nft_counter_obj_destroy, .dump = nft_counter_obj_dump, }; struct nft_object_type nft_counter_obj_type __read_mostly = { .type = NFT_OBJECT_COUNTER, .ops = &nft_counter_obj_ops, .maxattr = NFTA_COUNTER_MAX, .policy = nft_counter_policy, .owner = THIS_MODULE, }; void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); nft_counter_do_eval(priv, regs, pkt); } static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); return nft_counter_do_dump(skb, priv, reset); } static int nft_counter_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); return nft_counter_do_init(tb, priv); } static void nft_counter_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); nft_counter_do_destroy(priv); } static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) { struct nft_counter_percpu_priv *priv = nft_expr_priv(src); struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst); struct nft_counter __percpu *cpu_stats; struct nft_counter *this_cpu; struct nft_counter_tot total; nft_counter_fetch(priv, &total); cpu_stats = alloc_percpu_gfp(struct nft_counter, gfp); if (cpu_stats == NULL) return -ENOMEM; this_cpu = raw_cpu_ptr(cpu_stats); u64_stats_set(&this_cpu->packets, total.packets); u64_stats_set(&this_cpu->bytes, total.bytes); priv_clone->counter = cpu_stats; return 0; } static int nft_counter_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr) { /* No specific offload action is needed, but report success. */ return 0; } static void nft_counter_offload_stats(struct nft_expr *expr, const struct flow_stats *stats) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); struct u64_stats_sync *nft_sync; struct nft_counter *this_cpu; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); nft_sync = this_cpu_ptr(&nft_counter_sync); u64_stats_update_begin(nft_sync); u64_stats_add(&this_cpu->packets, stats->pkts); u64_stats_add(&this_cpu->bytes, stats->bytes); u64_stats_update_end(nft_sync); local_bh_enable(); } void nft_counter_init_seqcount(void) { int cpu; for_each_possible_cpu(cpu) u64_stats_init(per_cpu_ptr(&nft_counter_sync, cpu)); } struct nft_expr_type nft_counter_type; static const struct nft_expr_ops nft_counter_ops = { .type = &nft_counter_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)), .eval = nft_counter_eval, .init = nft_counter_init, .destroy = nft_counter_destroy, .destroy_clone = nft_counter_destroy, .dump = nft_counter_dump, .clone = nft_counter_clone, .reduce = NFT_REDUCE_READONLY, .offload = nft_counter_offload, .offload_stats = nft_counter_offload_stats, }; struct nft_expr_type nft_counter_type __read_mostly = { .name = "counter", .ops = &nft_counter_ops, .policy = nft_counter_policy, .maxattr = NFTA_COUNTER_MAX, .flags = NFT_EXPR_STATEFUL, .owner = THIS_MODULE, };
16 2213 2214 2 13 15 13 2 15 15 15 3 12 16 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 // SPDX-License-Identifier: GPL-2.0 /* * usb port device code * * Copyright (C) 2012 Intel Corp * * Author: Lan Tianyu <tianyu.lan@intel.com> */ #include <linux/kstrtox.h> #include <linux/slab.h> #include <linux/string_choices.h> #include <linux/sysfs.h> #include <linux/pm_qos.h> #include <linux/component.h> #include <linux/usb/of.h> #include "hub.h" static int usb_port_block_power_off; static const struct attribute_group *port_dev_group[]; static ssize_t early_stop_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); return sysfs_emit(buf, "%s\n", str_yes_no(port_dev->early_stop)); } static ssize_t early_stop_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_port *port_dev = to_usb_port(dev); bool value; if (kstrtobool(buf, &value)) return -EINVAL; if (value) port_dev->early_stop = 1; else port_dev->early_stop = 0; return count; } static DEVICE_ATTR_RW(early_stop); static ssize_t disable_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_interface *intf = to_usb_interface(dev->parent); int port1 = port_dev->portnum; u16 portstatus, unused; bool disabled; int rc; struct kernfs_node *kn; if (!hub) return -ENODEV; hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) goto out_hub_get; /* * Prevent deadlock if another process is concurrently * trying to unregister hdev. */ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); if (!kn) { rc = -ENODEV; goto out_autopm; } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; goto out_hdev_lock; } usb_hub_port_status(hub, port1, &portstatus, &unused); disabled = !usb_port_is_power_on(hub, portstatus); out_hdev_lock: usb_unlock_device(hdev); sysfs_unbreak_active_protection(kn); out_autopm: usb_autopm_put_interface(intf); out_hub_get: hub_put(hub); if (rc) return rc; return sysfs_emit(buf, "%s\n", disabled ? "1" : "0"); } static ssize_t disable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_interface *intf = to_usb_interface(dev->parent); int port1 = port_dev->portnum; bool disabled; int rc; struct kernfs_node *kn; if (!hub) return -ENODEV; rc = kstrtobool(buf, &disabled); if (rc) return rc; hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) goto out_hub_get; /* * Prevent deadlock if another process is concurrently * trying to unregister hdev. */ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); if (!kn) { rc = -ENODEV; goto out_autopm; } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; goto out_hdev_lock; } if (disabled && port_dev->child) usb_disconnect(&port_dev->child); rc = usb_hub_set_port_power(hdev, hub, port1, !disabled); if (disabled) { usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION); if (!port_dev->is_superspeed) usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_ENABLE); } if (!rc) rc = count; out_hdev_lock: usb_unlock_device(hdev); sysfs_unbreak_active_protection(kn); out_autopm: usb_autopm_put_interface(intf); out_hub_get: hub_put(hub); return rc; } static DEVICE_ATTR_RW(disable); static ssize_t location_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); return sysfs_emit(buf, "0x%08x\n", port_dev->location); } static DEVICE_ATTR_RO(location); static ssize_t connect_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); char *result; switch (port_dev->connect_type) { case USB_PORT_CONNECT_TYPE_HOT_PLUG: result = "hotplug"; break; case USB_PORT_CONNECT_TYPE_HARD_WIRED: result = "hardwired"; break; case USB_PORT_NOT_USED: result = "not used"; break; default: result = "unknown"; break; } return sysfs_emit(buf, "%s\n", result); } static DEVICE_ATTR_RO(connect_type); static ssize_t state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); enum usb_device_state state = READ_ONCE(port_dev->state); return sysfs_emit(buf, "%s\n", usb_state_string(state)); } static DEVICE_ATTR_RO(state); static ssize_t over_current_count_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); return sysfs_emit(buf, "%u\n", port_dev->over_current_count); } static DEVICE_ATTR_RO(over_current_count); static ssize_t quirks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); return sysfs_emit(buf, "%08x\n", port_dev->quirks); } static ssize_t quirks_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_port *port_dev = to_usb_port(dev); u32 value; if (kstrtou32(buf, 16, &value)) return -EINVAL; port_dev->quirks = value; return count; } static DEVICE_ATTR_RW(quirks); static ssize_t usb3_lpm_permit_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_port *port_dev = to_usb_port(dev); const char *p; if (port_dev->usb3_lpm_u1_permit) { if (port_dev->usb3_lpm_u2_permit) p = "u1_u2"; else p = "u1"; } else { if (port_dev->usb3_lpm_u2_permit) p = "u2"; else p = "0"; } return sysfs_emit(buf, "%s\n", p); } static ssize_t usb3_lpm_permit_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *udev = port_dev->child; struct usb_hcd *hcd; if (!strncmp(buf, "u1_u2", 5)) { port_dev->usb3_lpm_u1_permit = 1; port_dev->usb3_lpm_u2_permit = 1; } else if (!strncmp(buf, "u1", 2)) { port_dev->usb3_lpm_u1_permit = 1; port_dev->usb3_lpm_u2_permit = 0; } else if (!strncmp(buf, "u2", 2)) { port_dev->usb3_lpm_u1_permit = 0; port_dev->usb3_lpm_u2_permit = 1; } else if (!strncmp(buf, "0", 1)) { port_dev->usb3_lpm_u1_permit = 0; port_dev->usb3_lpm_u2_permit = 0; } else return -EINVAL; /* If device is connected to the port, disable or enable lpm * to make new u1 u2 setting take effect immediately. */ if (udev) { hcd = bus_to_hcd(udev->bus); if (!hcd) return -EINVAL; usb_lock_device(udev); mutex_lock(hcd->bandwidth_mutex); if (!usb_disable_lpm(udev)) usb_enable_lpm(udev); mutex_unlock(hcd->bandwidth_mutex); usb_unlock_device(udev); } return count; } static DEVICE_ATTR_RW(usb3_lpm_permit); static struct attribute *port_dev_attrs[] = { &dev_attr_connect_type.attr, &dev_attr_state.attr, &dev_attr_location.attr, &dev_attr_quirks.attr, &dev_attr_over_current_count.attr, &dev_attr_disable.attr, &dev_attr_early_stop.attr, NULL, }; static const struct attribute_group port_dev_attr_grp = { .attrs = port_dev_attrs, }; static const struct attribute_group *port_dev_group[] = { &port_dev_attr_grp, NULL, }; static struct attribute *port_dev_usb3_attrs[] = { &dev_attr_usb3_lpm_permit.attr, NULL, }; static const struct attribute_group port_dev_usb3_attr_grp = { .attrs = port_dev_usb3_attrs, }; static const struct attribute_group *port_dev_usb3_group[] = { &port_dev_attr_grp, &port_dev_usb3_attr_grp, NULL, }; static void usb_port_device_release(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); kfree(port_dev->req); kfree(port_dev); } #ifdef CONFIG_PM static int usb_port_runtime_resume(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_interface *intf = to_usb_interface(dev->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_device *udev = port_dev->child; struct usb_port *peer = port_dev->peer; int port1 = port_dev->portnum; int retval; if (!hub) return -EINVAL; if (hub->in_reset) { set_bit(port1, hub->power_bits); return 0; } /* * Power on our usb3 peer before this usb2 port to prevent a usb3 * device from degrading to its usb2 connection */ if (!port_dev->is_superspeed && peer) pm_runtime_get_sync(&peer->dev); retval = usb_autopm_get_interface(intf); if (retval < 0) return retval; retval = usb_hub_set_port_power(hdev, hub, port1, true); msleep(hub_power_on_good_delay(hub)); if (udev && !retval) { /* * Our preference is to simply wait for the port to reconnect, * as that is the lowest latency method to restart the port. * However, there are cases where toggling port power results in * the host port and the device port getting out of sync causing * a link training live lock. Upon timeout, flag the port as * needing warm reset recovery (to be performed later by * usb_port_resume() as requested via usb_wakeup_notification()) */ if (hub_port_debounce_be_connected(hub, port1) < 0) { dev_dbg(&port_dev->dev, "reconnect timeout\n"); if (hub_is_superspeed(hdev)) set_bit(port1, hub->warm_reset_bits); } /* Force the child awake to revalidate after the power loss. */ if (!test_and_set_bit(port1, hub->child_usage_bits)) { pm_runtime_get_noresume(&port_dev->dev); pm_request_resume(&udev->dev); } } usb_autopm_put_interface(intf); return retval; } static int usb_port_runtime_suspend(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_interface *intf = to_usb_interface(dev->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); struct usb_port *peer = port_dev->peer; int port1 = port_dev->portnum; int retval; if (!hub) return -EINVAL; if (hub->in_reset) return -EBUSY; if (dev_pm_qos_flags(&port_dev->dev, PM_QOS_FLAG_NO_POWER_OFF) == PM_QOS_FLAGS_ALL) return -EAGAIN; if (usb_port_block_power_off) return -EBUSY; retval = usb_autopm_get_interface(intf); if (retval < 0) return retval; retval = usb_hub_set_port_power(hdev, hub, port1, false); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION); if (!port_dev->is_superspeed) usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_ENABLE); usb_autopm_put_interface(intf); /* * Our peer usb3 port may now be able to suspend, so * asynchronously queue a suspend request to observe that this * usb2 port is now off. */ if (!port_dev->is_superspeed && peer) pm_runtime_put(&peer->dev); return retval; } #endif static void usb_port_shutdown(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); struct usb_device *udev = port_dev->child; if (udev && !udev->port_is_suspended) { usb_disable_usb2_hardware_lpm(udev); usb_unlocked_disable_lpm(udev); } } static const struct dev_pm_ops usb_port_pm_ops = { #ifdef CONFIG_PM .runtime_suspend = usb_port_runtime_suspend, .runtime_resume = usb_port_runtime_resume, #endif }; const struct device_type usb_port_device_type = { .name = "usb_port", .release = usb_port_device_release, .pm = &usb_port_pm_ops, }; static struct device_driver usb_port_driver = { .name = "usb", .owner = THIS_MODULE, .shutdown = usb_port_shutdown, }; static int link_peers(struct usb_port *left, struct usb_port *right) { struct usb_port *ss_port, *hs_port; int rc; if (left->peer == right && right->peer == left) return 0; if (left->peer || right->peer) { struct usb_port *lpeer = left->peer; struct usb_port *rpeer = right->peer; char *method; if (left->location && left->location == right->location) method = "location"; else method = "default"; pr_debug("usb: failed to peer %s and %s by %s (%s:%s) (%s:%s)\n", dev_name(&left->dev), dev_name(&right->dev), method, dev_name(&left->dev), lpeer ? dev_name(&lpeer->dev) : "none", dev_name(&right->dev), rpeer ? dev_name(&rpeer->dev) : "none"); return -EBUSY; } rc = sysfs_create_link(&left->dev.kobj, &right->dev.kobj, "peer"); if (rc) return rc; rc = sysfs_create_link(&right->dev.kobj, &left->dev.kobj, "peer"); if (rc) { sysfs_remove_link(&left->dev.kobj, "peer"); return rc; } /* * We need to wake the HiSpeed port to make sure we don't race * setting ->peer with usb_port_runtime_suspend(). Otherwise we * may miss a suspend event for the SuperSpeed port. */ if (left->is_superspeed) { ss_port = left; WARN_ON(right->is_superspeed); hs_port = right; } else { ss_port = right; WARN_ON(!right->is_superspeed); hs_port = left; } pm_runtime_get_sync(&hs_port->dev); left->peer = right; right->peer = left; /* * The SuperSpeed reference is dropped when the HiSpeed port in * this relationship suspends, i.e. when it is safe to allow a * SuperSpeed connection to drop since there is no risk of a * device degrading to its powered-off HiSpeed connection. * * Also, drop the HiSpeed ref taken above. */ pm_runtime_get_sync(&ss_port->dev); pm_runtime_put(&hs_port->dev); return 0; } static void link_peers_report(struct usb_port *left, struct usb_port *right) { int rc; rc = link_peers(left, right); if (rc == 0) { dev_dbg(&left->dev, "peered to %s\n", dev_name(&right->dev)); } else { dev_dbg(&left->dev, "failed to peer to %s (%d)\n", dev_name(&right->dev), rc); pr_warn_once("usb: port power management may be unreliable\n"); usb_port_block_power_off = 1; } } static void unlink_peers(struct usb_port *left, struct usb_port *right) { struct usb_port *ss_port, *hs_port; WARN(right->peer != left || left->peer != right, "%s and %s are not peers?\n", dev_name(&left->dev), dev_name(&right->dev)); /* * We wake the HiSpeed port to make sure we don't race its * usb_port_runtime_resume() event which takes a SuperSpeed ref * when ->peer is !NULL. */ if (left->is_superspeed) { ss_port = left; hs_port = right; } else { ss_port = right; hs_port = left; } pm_runtime_get_sync(&hs_port->dev); sysfs_remove_link(&left->dev.kobj, "peer"); right->peer = NULL; sysfs_remove_link(&right->dev.kobj, "peer"); left->peer = NULL; /* Drop the SuperSpeed ref held on behalf of the active HiSpeed port */ pm_runtime_put(&ss_port->dev); /* Drop the ref taken above */ pm_runtime_put(&hs_port->dev); } /* * For each usb hub device in the system check to see if it is in the * peer domain of the given port_dev, and if it is check to see if it * has a port that matches the given port by location */ static int match_location(struct usb_device *peer_hdev, void *p) { int port1; struct usb_hcd *hcd, *peer_hcd; struct usb_port *port_dev = p, *peer; struct usb_hub *peer_hub = usb_hub_to_struct_hub(peer_hdev); struct usb_device *hdev = to_usb_device(port_dev->dev.parent->parent); if (!peer_hub || port_dev->connect_type == USB_PORT_NOT_USED) return 0; hcd = bus_to_hcd(hdev->bus); peer_hcd = bus_to_hcd(peer_hdev->bus); /* peer_hcd is provisional until we verify it against the known peer */ if (peer_hcd != hcd->shared_hcd) return 0; for (port1 = 1; port1 <= peer_hdev->maxchild; port1++) { peer = peer_hub->ports[port1 - 1]; if (peer && peer->connect_type != USB_PORT_NOT_USED && peer->location == port_dev->location) { link_peers_report(port_dev, peer); return 1; /* done */ } } return 0; } /* * Find the peer port either via explicit platform firmware "location" * data, the peer hcd for root hubs, or the upstream peer relationship * for all other hubs. */ static void find_and_link_peer(struct usb_hub *hub, int port1) { struct usb_port *port_dev = hub->ports[port1 - 1], *peer; struct usb_device *hdev = hub->hdev; struct usb_device *peer_hdev; struct usb_hub *peer_hub; /* * If location data is available then we can only peer this port * by a location match, not the default peer (lest we create a * situation where we need to go back and undo a default peering * when the port is later peered by location data) */ if (port_dev->location) { /* we link the peer in match_location() if found */ usb_for_each_dev(port_dev, match_location); return; } else if (!hdev->parent) { struct usb_hcd *hcd = bus_to_hcd(hdev->bus); struct usb_hcd *peer_hcd = hcd->shared_hcd; if (!peer_hcd) return; peer_hdev = peer_hcd->self.root_hub; } else { struct usb_port *upstream; struct usb_device *parent = hdev->parent; struct usb_hub *parent_hub = usb_hub_to_struct_hub(parent); if (!parent_hub) return; upstream = parent_hub->ports[hdev->portnum - 1]; if (!upstream || !upstream->peer) return; peer_hdev = upstream->peer->child; } peer_hub = usb_hub_to_struct_hub(peer_hdev); if (!peer_hub || port1 > peer_hdev->maxchild) return; /* * we found a valid default peer, last check is to make sure it * does not have location data */ peer = peer_hub->ports[port1 - 1]; if (peer && peer->location == 0) link_peers_report(port_dev, peer); } static int connector_bind(struct device *dev, struct device *connector, void *data) { struct usb_port *port_dev = to_usb_port(dev); int ret; ret = sysfs_create_link(&dev->kobj, &connector->kobj, "connector"); if (ret) return ret; ret = sysfs_create_link(&connector->kobj, &dev->kobj, dev_name(dev)); if (ret) { sysfs_remove_link(&dev->kobj, "connector"); return ret; } port_dev->connector = data; /* * If there is already USB device connected to the port, letting the * Type-C connector know about it immediately. */ if (port_dev->child) typec_attach(port_dev->connector, &port_dev->child->dev); return 0; } static void connector_unbind(struct device *dev, struct device *connector, void *data) { struct usb_port *port_dev = to_usb_port(dev); sysfs_remove_link(&connector->kobj, dev_name(dev)); sysfs_remove_link(&dev->kobj, "connector"); port_dev->connector = NULL; } static const struct component_ops connector_ops = { .bind = connector_bind, .unbind = connector_unbind, }; int usb_hub_create_port_device(struct usb_hub *hub, int port1) { struct usb_port *port_dev; struct usb_device *hdev = hub->hdev; int retval; port_dev = kzalloc_obj(*port_dev); if (!port_dev) return -ENOMEM; port_dev->req = kzalloc_obj(*(port_dev->req)); if (!port_dev->req) { kfree(port_dev); return -ENOMEM; } port_dev->connect_type = usb_of_get_connect_type(hdev, port1); hub->ports[port1 - 1] = port_dev; port_dev->portnum = port1; set_bit(port1, hub->power_bits); port_dev->dev.parent = hub->intfdev; if (hub_is_superspeed(hdev)) { port_dev->is_superspeed = 1; port_dev->usb3_lpm_u1_permit = 1; port_dev->usb3_lpm_u2_permit = 1; port_dev->dev.groups = port_dev_usb3_group; } else port_dev->dev.groups = port_dev_group; port_dev->dev.type = &usb_port_device_type; port_dev->dev.driver = &usb_port_driver; dev_set_name(&port_dev->dev, "%s-port%d", dev_name(&hub->hdev->dev), port1); mutex_init(&port_dev->status_lock); retval = device_register(&port_dev->dev); if (retval) { put_device(&port_dev->dev); return retval; } port_dev->state_kn = sysfs_get_dirent(port_dev->dev.kobj.sd, "state"); if (!port_dev->state_kn) { dev_err(&port_dev->dev, "failed to sysfs_get_dirent 'state'\n"); retval = -ENODEV; goto err_unregister; } /* Set default policy of port-poweroff disabled. */ retval = dev_pm_qos_add_request(&port_dev->dev, port_dev->req, DEV_PM_QOS_FLAGS, PM_QOS_FLAG_NO_POWER_OFF); if (retval < 0) { goto err_put_kn; } retval = component_add(&port_dev->dev, &connector_ops); if (retval) { dev_warn(&port_dev->dev, "failed to add component\n"); goto err_put_kn; } find_and_link_peer(hub, port1); /* * Enable runtime pm and hold a refernce that hub_configure() * will drop once the PM_QOS_NO_POWER_OFF flag state has been set * and the hub has been fully registered (hdev->maxchild set). */ pm_runtime_set_active(&port_dev->dev); pm_runtime_get_noresume(&port_dev->dev); pm_runtime_enable(&port_dev->dev); device_enable_async_suspend(&port_dev->dev); /* * Keep hidden the ability to enable port-poweroff if the hub * does not support power switching. */ if (!hub_is_port_power_switchable(hub)) return 0; /* Attempt to let userspace take over the policy. */ retval = dev_pm_qos_expose_flags(&port_dev->dev, PM_QOS_FLAG_NO_POWER_OFF); if (retval < 0) { dev_warn(&port_dev->dev, "failed to expose pm_qos_no_poweroff\n"); return 0; } /* Userspace owns the policy, drop the kernel 'no_poweroff' request. */ retval = dev_pm_qos_remove_request(port_dev->req); if (retval >= 0) { kfree(port_dev->req); port_dev->req = NULL; } return 0; err_put_kn: sysfs_put(port_dev->state_kn); err_unregister: device_unregister(&port_dev->dev); return retval; } void usb_hub_remove_port_device(struct usb_hub *hub, int port1) { struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_port *peer; peer = port_dev->peer; if (peer) unlink_peers(port_dev, peer); component_del(&port_dev->dev, &connector_ops); sysfs_put(port_dev->state_kn); device_unregister(&port_dev->dev); }
471 1 469 470 471 470 11 459 75 75 75 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 // SPDX-License-Identifier: GPL-2.0 /* * linux/drivers/char/misc.c * * Generic misc open routine by Johan Myreen * * Based on code from Linus * * Teemu Rantanen's Microsoft Busmouse support and Derrick Cole's * changes incorporated into 0.97pl4 * by Peter Cervasio (pete%q106fm.uucp@wupost.wustl.edu) (08SEP92) * See busmouse.c for particulars. * * Made things a lot mode modular - easy to compile in just one or two * of the misc drivers, as they are now completely independent. Linus. * * Support for loadable modules. 8-Sep-95 Philip Blundell <pjb27@cam.ac.uk> * * Fixed a failing symbol register to free the device registration * Alan Cox <alan@lxorguk.ukuu.org.uk> 21-Jan-96 * * Dynamic minors and /proc/mice by Alessandro Rubini. 26-Mar-96 * * Renamed to misc and miscdevice to be more accurate. Alan Cox 26-Mar-96 * * Handling of mouse minor numbers for kerneld: * Idea by Jacques Gelinas <jack@solucorp.qc.ca>, * adapted by Bjorn Ekwall <bj0rn@blox.se> * corrected by Alan Cox <alan@lxorguk.ukuu.org.uk> * * Changes for kmod (from kerneld): * Cyrus Durgin <cider@speakeasy.org> * * Added devfs support. Richard Gooch <rgooch@atnf.csiro.au> 10-Jan-1998 */ #include <linux/module.h> #include <linux/fs.h> #include <linux/errno.h> #include <linux/miscdevice.h> #include <linux/kernel.h> #include <linux/major.h> #include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/init.h> #include <linux/device.h> #include <linux/tty.h> #include <linux/kmod.h> #include <linux/gfp.h> /* * Head entry for the doubly linked miscdevice list */ static LIST_HEAD(misc_list); static DEFINE_MUTEX(misc_mtx); /* * Assigned numbers. */ static DEFINE_IDA(misc_minors_ida); static int misc_minor_alloc(int minor) { int ret = 0; if (minor == MISC_DYNAMIC_MINOR) { /* allocate free id */ ret = ida_alloc_range(&misc_minors_ida, MISC_DYNAMIC_MINOR + 1, MINORMASK, GFP_KERNEL); } else { ret = ida_alloc_range(&misc_minors_ida, minor, minor, GFP_KERNEL); } return ret; } static void misc_minor_free(int minor) { ida_free(&misc_minors_ida, minor); } #ifdef CONFIG_PROC_FS static void *misc_seq_start(struct seq_file *seq, loff_t *pos) { mutex_lock(&misc_mtx); return seq_list_start(&misc_list, *pos); } static void *misc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { return seq_list_next(v, &misc_list, pos); } static void misc_seq_stop(struct seq_file *seq, void *v) { mutex_unlock(&misc_mtx); } static int misc_seq_show(struct seq_file *seq, void *v) { const struct miscdevice *p = list_entry(v, struct miscdevice, list); seq_printf(seq, "%3i %s\n", p->minor, p->name ? p->name : ""); return 0; } static const struct seq_operations misc_seq_ops = { .start = misc_seq_start, .next = misc_seq_next, .stop = misc_seq_stop, .show = misc_seq_show, }; #endif static int misc_open(struct inode *inode, struct file *file) { int minor = iminor(inode); struct miscdevice *c = NULL, *iter; int err = -ENODEV; const struct file_operations *new_fops = NULL; mutex_lock(&misc_mtx); list_for_each_entry(iter, &misc_list, list) { if (iter->minor != minor) continue; c = iter; new_fops = fops_get(iter->fops); break; } /* Only request module for fixed minor code */ if (!new_fops && minor < MISC_DYNAMIC_MINOR) { mutex_unlock(&misc_mtx); request_module("char-major-%d-%d", MISC_MAJOR, minor); mutex_lock(&misc_mtx); list_for_each_entry(iter, &misc_list, list) { if (iter->minor != minor) continue; c = iter; new_fops = fops_get(iter->fops); break; } } if (!new_fops) goto fail; /* * Place the miscdevice in the file's * private_data so it can be used by the * file operations, including f_op->open below */ file->private_data = c; err = 0; replace_fops(file, new_fops); if (file->f_op->open) err = file->f_op->open(inode, file); fail: mutex_unlock(&misc_mtx); return err; } static char *misc_devnode(const struct device *dev, umode_t *mode) { const struct miscdevice *c = dev_get_drvdata(dev); if (mode && c->mode) *mode = c->mode; if (c->nodename) return kstrdup(c->nodename, GFP_KERNEL); return NULL; } static const struct class misc_class = { .name = "misc", .devnode = misc_devnode, }; static const struct file_operations misc_fops = { .owner = THIS_MODULE, .open = misc_open, .llseek = noop_llseek, }; /** * misc_register - register a miscellaneous device * @misc: device structure * * Register a miscellaneous device with the kernel. If the minor * number is set to %MISC_DYNAMIC_MINOR a minor number is assigned * and placed in the minor field of the structure. For other cases * the minor number requested is used. * * The structure passed is linked into the kernel and may not be * destroyed until it has been unregistered. By default, an open() * syscall to the device sets file->private_data to point to the * structure. Drivers don't need open in fops for this. * * A zero is returned on success and a negative errno code for * failure. */ int misc_register(struct miscdevice *misc) { dev_t dev; int err = 0; bool is_dynamic = (misc->minor == MISC_DYNAMIC_MINOR); if (misc->minor > MISC_DYNAMIC_MINOR) { pr_err("Invalid fixed minor %d for miscdevice '%s'\n", misc->minor, misc->name); return -EINVAL; } INIT_LIST_HEAD(&misc->list); mutex_lock(&misc_mtx); if (is_dynamic) { int i = misc_minor_alloc(misc->minor); if (i < 0) { err = -EBUSY; goto out; } misc->minor = i; } else { struct miscdevice *c; int i; list_for_each_entry(c, &misc_list, list) { if (c->minor == misc->minor) { err = -EBUSY; goto out; } } i = misc_minor_alloc(misc->minor); if (i < 0) { err = -EBUSY; goto out; } } dev = MKDEV(MISC_MAJOR, misc->minor); misc->this_device = device_create_with_groups(&misc_class, misc->parent, dev, misc, misc->groups, "%s", misc->name); if (IS_ERR(misc->this_device)) { misc_minor_free(misc->minor); if (is_dynamic) { misc->minor = MISC_DYNAMIC_MINOR; } err = PTR_ERR(misc->this_device); goto out; } /* * Add it to the front, so that later devices can "override" * earlier defaults */ list_add(&misc->list, &misc_list); out: mutex_unlock(&misc_mtx); return err; } EXPORT_SYMBOL(misc_register); /** * misc_deregister - unregister a miscellaneous device * @misc: device to unregister * * Unregister a miscellaneous device that was previously * successfully registered with misc_register(). */ void misc_deregister(struct miscdevice *misc) { mutex_lock(&misc_mtx); list_del_init(&misc->list); device_destroy(&misc_class, MKDEV(MISC_MAJOR, misc->minor)); misc_minor_free(misc->minor); if (misc->minor > MISC_DYNAMIC_MINOR) misc->minor = MISC_DYNAMIC_MINOR; mutex_unlock(&misc_mtx); } EXPORT_SYMBOL(misc_deregister); static int __init misc_init(void) { int err; struct proc_dir_entry *misc_proc_file; misc_proc_file = proc_create_seq("misc", 0, NULL, &misc_seq_ops); err = class_register(&misc_class); if (err) goto fail_remove; err = __register_chrdev(MISC_MAJOR, 0, MINORMASK + 1, "misc", &misc_fops); if (err < 0) goto fail_printk; return 0; fail_printk: pr_err("unable to get major %d for misc devices\n", MISC_MAJOR); class_unregister(&misc_class); fail_remove: if (misc_proc_file) remove_proc_entry("misc", NULL); return err; } subsys_initcall(misc_init);
47 47 100 100 149 149 36 36 36 179 179 1 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 // SPDX-License-Identifier: GPL-2.0 /* * dax: direct host memory access * Copyright (C) 2020 Red Hat, Inc. */ #include "fuse_i.h" #include <linux/delay.h> #include <linux/dax.h> #include <linux/uio.h> #include <linux/pagemap.h> #include <linux/iomap.h> #include <linux/interval_tree.h> /* * Default memory range size. A power of 2 so it agrees with common FUSE_INIT * map_alignment values 4KB and 64KB. */ #define FUSE_DAX_SHIFT 21 #define FUSE_DAX_SZ (1 << FUSE_DAX_SHIFT) #define FUSE_DAX_PAGES (FUSE_DAX_SZ / PAGE_SIZE) /* Number of ranges reclaimer will try to free in one invocation */ #define FUSE_DAX_RECLAIM_CHUNK (10) /* * Dax memory reclaim threshold in percetage of total ranges. When free * number of free ranges drops below this threshold, reclaim can trigger * Default is 20% */ #define FUSE_DAX_RECLAIM_THRESHOLD (20) /** Translation information for file offsets to DAX window offsets */ struct fuse_dax_mapping { /* Pointer to inode where this memory range is mapped */ struct inode *inode; /* Will connect in fcd->free_ranges to keep track of free memory */ struct list_head list; /* For interval tree in file/inode */ struct interval_tree_node itn; /* Will connect in fc->busy_ranges to keep track busy memory */ struct list_head busy_list; /** Position in DAX window */ u64 window_offset; /** Length of mapping, in bytes */ loff_t length; /* Is this mapping read-only or read-write */ bool writable; /* reference count when the mapping is used by dax iomap. */ refcount_t refcnt; }; /* Per-inode dax map */ struct fuse_inode_dax { /* Semaphore to protect modifications to the dmap tree */ struct rw_semaphore sem; /* Sorted rb tree of struct fuse_dax_mapping elements */ struct rb_root_cached tree; unsigned long nr; }; struct fuse_conn_dax { /* DAX device */ struct dax_device *dev; /* Lock protecting accessess to members of this structure */ spinlock_t lock; /* List of memory ranges which are busy */ unsigned long nr_busy_ranges; struct list_head busy_ranges; /* Worker to free up memory ranges */ struct delayed_work free_work; /* Wait queue for a dax range to become free */ wait_queue_head_t range_waitq; /* DAX Window Free Ranges */ long nr_free_ranges; struct list_head free_ranges; unsigned long nr_ranges; }; static inline struct fuse_dax_mapping * node_to_dmap(struct interval_tree_node *node) { if (!node) return NULL; return container_of(node, struct fuse_dax_mapping, itn); } static struct fuse_dax_mapping * alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode); static void __kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms) { unsigned long free_threshold; /* If number of free ranges are below threshold, start reclaim */ free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100, 1); if (fcd->nr_free_ranges < free_threshold) queue_delayed_work(system_long_wq, &fcd->free_work, msecs_to_jiffies(delay_ms)); } static void kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms) { spin_lock(&fcd->lock); __kick_dmap_free_worker(fcd, delay_ms); spin_unlock(&fcd->lock); } static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd) { struct fuse_dax_mapping *dmap; spin_lock(&fcd->lock); dmap = list_first_entry_or_null(&fcd->free_ranges, struct fuse_dax_mapping, list); if (dmap) { list_del_init(&dmap->list); WARN_ON(fcd->nr_free_ranges <= 0); fcd->nr_free_ranges--; } __kick_dmap_free_worker(fcd, 0); spin_unlock(&fcd->lock); return dmap; } /* This assumes fcd->lock is held */ static void __dmap_remove_busy_list(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { list_del_init(&dmap->busy_list); WARN_ON(fcd->nr_busy_ranges == 0); fcd->nr_busy_ranges--; } static void dmap_remove_busy_list(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { spin_lock(&fcd->lock); __dmap_remove_busy_list(fcd, dmap); spin_unlock(&fcd->lock); } /* This assumes fcd->lock is held */ static void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { list_add_tail(&dmap->list, &fcd->free_ranges); fcd->nr_free_ranges++; wake_up(&fcd->range_waitq); } static void dmap_add_to_free_pool(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { /* Return fuse_dax_mapping to free list */ spin_lock(&fcd->lock); __dmap_add_to_free_pool(fcd, dmap); spin_unlock(&fcd->lock); } static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx, struct fuse_dax_mapping *dmap, bool writable, bool upgrade) { struct fuse_mount *fm = get_fuse_mount(inode); struct fuse_conn_dax *fcd = fm->fc->dax; struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_setupmapping_in inarg; loff_t offset = start_idx << FUSE_DAX_SHIFT; FUSE_ARGS(args); ssize_t err; WARN_ON(fcd->nr_free_ranges < 0); /* Ask fuse daemon to setup mapping */ memset(&inarg, 0, sizeof(inarg)); inarg.foffset = offset; inarg.fh = -1; inarg.moffset = dmap->window_offset; inarg.len = FUSE_DAX_SZ; inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ; if (writable) inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE; args.opcode = FUSE_SETUPMAPPING; args.nodeid = fi->nodeid; args.in_numargs = 1; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; err = fuse_simple_request(fm, &args); if (err < 0) return err; dmap->writable = writable; if (!upgrade) { /* * We don't take a reference on inode. inode is valid right now * and when inode is going away, cleanup logic should first * cleanup dmap entries. */ dmap->inode = inode; dmap->itn.start = dmap->itn.last = start_idx; /* Protected by fi->dax->sem */ interval_tree_insert(&dmap->itn, &fi->dax->tree); fi->dax->nr++; spin_lock(&fcd->lock); list_add_tail(&dmap->busy_list, &fcd->busy_ranges); fcd->nr_busy_ranges++; spin_unlock(&fcd->lock); } return 0; } static int fuse_send_removemapping(struct inode *inode, struct fuse_removemapping_in *inargp, struct fuse_removemapping_one *remove_one) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_mount *fm = get_fuse_mount(inode); FUSE_ARGS(args); args.opcode = FUSE_REMOVEMAPPING; args.nodeid = fi->nodeid; args.in_numargs = 3; fuse_set_zero_arg0(&args); args.in_args[1].size = sizeof(*inargp); args.in_args[1].value = inargp; args.in_args[2].size = inargp->count * sizeof(*remove_one); args.in_args[2].value = remove_one; return fuse_simple_request(fm, &args); } static int dmap_removemapping_list(struct inode *inode, unsigned int num, struct list_head *to_remove) { struct fuse_removemapping_one *remove_one, *ptr; struct fuse_removemapping_in inarg; struct fuse_dax_mapping *dmap; int ret, i = 0, nr_alloc; nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY); remove_one = kmalloc_objs(*remove_one, nr_alloc, GFP_NOFS); if (!remove_one) return -ENOMEM; ptr = remove_one; list_for_each_entry(dmap, to_remove, list) { ptr->moffset = dmap->window_offset; ptr->len = dmap->length; ptr++; i++; num--; if (i >= nr_alloc || num == 0) { memset(&inarg, 0, sizeof(inarg)); inarg.count = i; ret = fuse_send_removemapping(inode, &inarg, remove_one); if (ret) goto out; ptr = remove_one; i = 0; } } out: kfree(remove_one); return ret; } /* * Cleanup dmap entry and add back to free list. This should be called with * fcd->lock held. */ static void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n", dmap->itn.start, dmap->itn.last, dmap->window_offset, dmap->length); __dmap_remove_busy_list(fcd, dmap); dmap->inode = NULL; dmap->itn.start = dmap->itn.last = 0; __dmap_add_to_free_pool(fcd, dmap); } /* * Free inode dmap entries whose range falls inside [start, end]. * Does not take any locks. At this point of time it should only be * called from evict_inode() path where we know all dmap entries can be * reclaimed. */ static void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd, struct inode *inode, loff_t start, loff_t end) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap, *n; int err, num = 0; LIST_HEAD(to_remove); unsigned long start_idx = start >> FUSE_DAX_SHIFT; unsigned long end_idx = end >> FUSE_DAX_SHIFT; struct interval_tree_node *node; while (1) { node = interval_tree_iter_first(&fi->dax->tree, start_idx, end_idx); if (!node) break; dmap = node_to_dmap(node); /* inode is going away. There should not be any users of dmap */ WARN_ON(refcount_read(&dmap->refcnt) > 1); interval_tree_remove(&dmap->itn, &fi->dax->tree); num++; list_add(&dmap->list, &to_remove); } /* Nothing to remove */ if (list_empty(&to_remove)) return; WARN_ON(fi->dax->nr < num); fi->dax->nr -= num; err = dmap_removemapping_list(inode, num, &to_remove); if (err && err != -ENOTCONN) { pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n", start, end); } spin_lock(&fcd->lock); list_for_each_entry_safe(dmap, n, &to_remove, list) { list_del_init(&dmap->list); dmap_reinit_add_to_free_pool(fcd, dmap); } spin_unlock(&fcd->lock); } static int dmap_removemapping_one(struct inode *inode, struct fuse_dax_mapping *dmap) { struct fuse_removemapping_one forget_one; struct fuse_removemapping_in inarg; memset(&inarg, 0, sizeof(inarg)); inarg.count = 1; memset(&forget_one, 0, sizeof(forget_one)); forget_one.moffset = dmap->window_offset; forget_one.len = dmap->length; return fuse_send_removemapping(inode, &inarg, &forget_one); } /* * It is called from evict_inode() and by that time inode is going away. So * this function does not take any locks like fi->dax->sem for traversing * that fuse inode interval tree. If that lock is taken then lock validator * complains of deadlock situation w.r.t fs_reclaim lock. */ void fuse_dax_inode_cleanup(struct inode *inode) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); /* * fuse_evict_inode() has already called truncate_inode_pages_final() * before we arrive here. So we should not have to worry about any * pages/exception entries still associated with inode. */ inode_reclaim_dmap_range(fc->dax, inode, 0, -1); WARN_ON(fi->dax->nr); } static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length) { iomap->addr = IOMAP_NULL_ADDR; iomap->length = length; iomap->type = IOMAP_HOLE; } static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length, struct iomap *iomap, struct fuse_dax_mapping *dmap, unsigned int flags) { loff_t offset, len; loff_t i_size = i_size_read(inode); offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT); len = min(length, dmap->length - offset); /* If length is beyond end of file, truncate further */ if (pos + len > i_size) len = i_size - pos; if (len > 0) { iomap->addr = dmap->window_offset + offset; iomap->length = len; if (flags & IOMAP_FAULT) iomap->length = ALIGN(len, PAGE_SIZE); iomap->type = IOMAP_MAPPED; /* * increace refcnt so that reclaim code knows this dmap is in * use. This assumes fi->dax->sem mutex is held either * shared/exclusive. */ refcount_inc(&dmap->refcnt); /* iomap->private should be NULL */ WARN_ON_ONCE(iomap->private); iomap->private = dmap; } else { /* Mapping beyond end of file is hole */ fuse_fill_iomap_hole(iomap, length); } } static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos, loff_t length, unsigned int flags, struct iomap *iomap) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn_dax *fcd = fc->dax; struct fuse_dax_mapping *dmap, *alloc_dmap = NULL; int ret; bool writable = flags & IOMAP_WRITE; unsigned long start_idx = pos >> FUSE_DAX_SHIFT; struct interval_tree_node *node; /* * Can't do inline reclaim in fault path. We call * dax_layout_busy_page() before we free a range. And * fuse_wait_dax_page() drops mapping->invalidate_lock and requires it. * In fault path we enter with mapping->invalidate_lock held and can't * drop it. Also in fault path we hold mapping->invalidate_lock shared * and not exclusive, so that creates further issues with * fuse_wait_dax_page(). Hence return -EAGAIN and fuse_dax_fault() * will wait for a memory range to become free and retry. */ if (flags & IOMAP_FAULT) { alloc_dmap = alloc_dax_mapping(fcd); if (!alloc_dmap) return -EAGAIN; } else { alloc_dmap = alloc_dax_mapping_reclaim(fcd, inode); if (IS_ERR(alloc_dmap)) return PTR_ERR(alloc_dmap); } /* If we are here, we should have memory allocated */ if (WARN_ON(!alloc_dmap)) return -EIO; /* * Take write lock so that only one caller can try to setup mapping * and other waits. */ down_write(&fi->dax->sem); /* * We dropped lock. Check again if somebody else setup * mapping already. */ node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); if (node) { dmap = node_to_dmap(node); fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); dmap_add_to_free_pool(fcd, alloc_dmap); up_write(&fi->dax->sem); return 0; } /* Setup one mapping */ ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap, writable, false); if (ret < 0) { dmap_add_to_free_pool(fcd, alloc_dmap); up_write(&fi->dax->sem); return ret; } fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags); up_write(&fi->dax->sem); return 0; } static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos, loff_t length, unsigned int flags, struct iomap *iomap) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; int ret; unsigned long idx = pos >> FUSE_DAX_SHIFT; struct interval_tree_node *node; /* * Take exclusive lock so that only one caller can try to setup * mapping and others wait. */ down_write(&fi->dax->sem); node = interval_tree_iter_first(&fi->dax->tree, idx, idx); /* We are holding either inode lock or invalidate_lock, and that should * ensure that dmap can't be truncated. We are holding a reference * on dmap and that should make sure it can't be reclaimed. So dmap * should still be there in tree despite the fact we dropped and * re-acquired the fi->dax->sem lock. */ ret = -EIO; if (WARN_ON(!node)) goto out_err; dmap = node_to_dmap(node); /* We took an extra reference on dmap to make sure its not reclaimd. * Now we hold fi->dax->sem lock and that reference is not needed * anymore. Drop it. */ if (refcount_dec_and_test(&dmap->refcnt)) { /* refcount should not hit 0. This object only goes * away when fuse connection goes away */ WARN_ON_ONCE(1); } /* Maybe another thread already upgraded mapping while we were not * holding lock. */ if (dmap->writable) { ret = 0; goto out_fill_iomap; } ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true, true); if (ret < 0) goto out_err; out_fill_iomap: fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); out_err: up_write(&fi->dax->sem); return ret; } /* This is just for DAX and the mapping is ephemeral, do not use it for other * purposes since there is no block device with a permanent mapping. */ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_dax_mapping *dmap; bool writable = flags & IOMAP_WRITE; unsigned long start_idx = pos >> FUSE_DAX_SHIFT; struct interval_tree_node *node; /* We don't support FIEMAP */ if (WARN_ON(flags & IOMAP_REPORT)) return -EIO; iomap->offset = pos; iomap->flags = 0; iomap->bdev = NULL; iomap->dax_dev = fc->dax->dev; /* * Both read/write and mmap path can race here. So we need something * to make sure if we are setting up mapping, then other path waits * * For now, use a semaphore for this. It probably needs to be * optimized later. */ down_read(&fi->dax->sem); node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); if (node) { dmap = node_to_dmap(node); if (writable && !dmap->writable) { /* Upgrade read-only mapping to read-write. This will * require exclusive fi->dax->sem lock as we don't want * two threads to be trying to this simultaneously * for same dmap. So drop shared lock and acquire * exclusive lock. * * Before dropping fi->dax->sem lock, take reference * on dmap so that its not freed by range reclaim. */ refcount_inc(&dmap->refcnt); up_read(&fi->dax->sem); pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n", __func__, pos, length); return fuse_upgrade_dax_mapping(inode, pos, length, flags, iomap); } else { fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); up_read(&fi->dax->sem); return 0; } } else { up_read(&fi->dax->sem); pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n", __func__, pos, length); if (pos >= i_size_read(inode)) goto iomap_hole; return fuse_setup_new_dax_mapping(inode, pos, length, flags, iomap); } /* * If read beyond end of file happens, fs code seems to return * it as hole */ iomap_hole: fuse_fill_iomap_hole(iomap, length); pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n", __func__, pos, length, iomap->length); return 0; } static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length, ssize_t written, unsigned int flags, struct iomap *iomap) { struct fuse_dax_mapping *dmap = iomap->private; if (dmap) { if (refcount_dec_and_test(&dmap->refcnt)) { /* refcount should not hit 0. This object only goes * away when fuse connection goes away */ WARN_ON_ONCE(1); } } /* DAX writes beyond end-of-file aren't handled using iomap, so the * file size is unchanged and there is nothing to do here. */ return 0; } static const struct iomap_ops fuse_iomap_ops = { .iomap_begin = fuse_iomap_begin, .iomap_end = fuse_iomap_end, }; static void fuse_wait_dax_page(struct inode *inode) { filemap_invalidate_unlock(inode->i_mapping); schedule(); filemap_invalidate_lock(inode->i_mapping); } /* Should be called with mapping->invalidate_lock held exclusively. */ int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end) { return dax_break_layout(inode, dmap_start, dmap_end, fuse_wait_dax_page); } ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock_shared(inode)) return -EAGAIN; } else { inode_lock_shared(inode); } ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops); inode_unlock_shared(inode); /* TODO file_accessed(iocb->f_filp) */ return ret; } static bool file_extending_write(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); return (iov_iter_rw(from) == WRITE && ((iocb->ki_pos) >= i_size_read(inode) || (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode)))); } static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); ssize_t ret; ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); fuse_write_update_attr(inode, iocb->ki_pos, ret); return ret; } ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock(inode)) return -EAGAIN; } else { inode_lock(inode); } ret = generic_write_checks(iocb, from); if (ret <= 0) goto out; ret = file_remove_privs(iocb->ki_filp); if (ret) goto out; /* TODO file_update_time() but we don't want metadata I/O */ /* Do not use dax for file extending writes as write and on * disk i_size increase are not atomic otherwise. */ if (file_extending_write(iocb, from)) ret = fuse_dax_direct_write(iocb, from); else ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops); out: inode_unlock(inode); if (ret > 0) ret = generic_write_sync(iocb, ret); return ret; } static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, unsigned int order, bool write) { vm_fault_t ret; struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; unsigned long pfn; int error = 0; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn_dax *fcd = fc->dax; bool retry = false; if (write) sb_start_pagefault(sb); retry: if (retry && !(fcd->nr_free_ranges > 0)) wait_event(fcd->range_waitq, (fcd->nr_free_ranges > 0)); /* * We need to serialize against not only truncate but also against * fuse dax memory range reclaim. While a range is being reclaimed, * we do not want any read/write/mmap to make progress and try * to populate page cache or access memory we are trying to free. */ filemap_invalidate_lock_shared(inode->i_mapping); ret = dax_iomap_fault(vmf, order, &pfn, &error, &fuse_iomap_ops); if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) { error = 0; retry = true; filemap_invalidate_unlock_shared(inode->i_mapping); goto retry; } if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, order, pfn); filemap_invalidate_unlock_shared(inode->i_mapping); if (write) sb_end_pagefault(sb); return ret; } static vm_fault_t fuse_dax_fault(struct vm_fault *vmf) { return __fuse_dax_fault(vmf, 0, vmf->flags & FAULT_FLAG_WRITE); } static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf, unsigned int order) { return __fuse_dax_fault(vmf, order, vmf->flags & FAULT_FLAG_WRITE); } static vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf) { return __fuse_dax_fault(vmf, 0, true); } static vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf) { return __fuse_dax_fault(vmf, 0, true); } static const struct vm_operations_struct fuse_dax_vm_ops = { .fault = fuse_dax_fault, .huge_fault = fuse_dax_huge_fault, .page_mkwrite = fuse_dax_page_mkwrite, .pfn_mkwrite = fuse_dax_pfn_mkwrite, }; int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &fuse_dax_vm_ops; vm_flags_set(vma, VM_MIXEDMAP | VM_HUGEPAGE); return 0; } static int dmap_writeback_invalidate(struct inode *inode, struct fuse_dax_mapping *dmap) { int ret; loff_t start_pos = dmap->itn.start << FUSE_DAX_SHIFT; loff_t end_pos = (start_pos + FUSE_DAX_SZ - 1); ret = filemap_fdatawrite_range(inode->i_mapping, start_pos, end_pos); if (ret) { pr_debug("fuse: filemap_fdatawrite_range() failed. err=%d start_pos=0x%llx, end_pos=0x%llx\n", ret, start_pos, end_pos); return ret; } ret = invalidate_inode_pages2_range(inode->i_mapping, start_pos >> PAGE_SHIFT, end_pos >> PAGE_SHIFT); if (ret) pr_debug("fuse: invalidate_inode_pages2_range() failed err=%d\n", ret); return ret; } static int reclaim_one_dmap_locked(struct inode *inode, struct fuse_dax_mapping *dmap) { int ret; struct fuse_inode *fi = get_fuse_inode(inode); /* * igrab() was done to make sure inode won't go under us, and this * further avoids the race with evict(). */ ret = dmap_writeback_invalidate(inode, dmap); if (ret) return ret; /* Remove dax mapping from inode interval tree now */ interval_tree_remove(&dmap->itn, &fi->dax->tree); fi->dax->nr--; /* It is possible that umount/shutdown has killed the fuse connection * and worker thread is trying to reclaim memory in parallel. Don't * warn in that case. */ ret = dmap_removemapping_one(inode, dmap); if (ret && ret != -ENOTCONN) { pr_warn("Failed to remove mapping. offset=0x%llx len=0x%llx ret=%d\n", dmap->window_offset, dmap->length, ret); } return 0; } /* Find first mapped dmap for an inode and return file offset. Caller needs * to hold fi->dax->sem lock either shared or exclusive. */ static struct fuse_dax_mapping *inode_lookup_first_dmap(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; struct interval_tree_node *node; for (node = interval_tree_iter_first(&fi->dax->tree, 0, -1); node; node = interval_tree_iter_next(node, 0, -1)) { dmap = node_to_dmap(node); /* still in use. */ if (refcount_read(&dmap->refcnt) > 1) continue; return dmap; } return NULL; } /* * Find first mapping in the tree and free it and return it. Do not add * it back to free pool. */ static struct fuse_dax_mapping * inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode, bool *retry) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; u64 dmap_start, dmap_end; unsigned long start_idx; int ret; struct interval_tree_node *node; filemap_invalidate_lock(inode->i_mapping); /* Lookup a dmap and corresponding file offset to reclaim. */ down_read(&fi->dax->sem); dmap = inode_lookup_first_dmap(inode); if (dmap) { start_idx = dmap->itn.start; dmap_start = start_idx << FUSE_DAX_SHIFT; dmap_end = dmap_start + FUSE_DAX_SZ - 1; } up_read(&fi->dax->sem); if (!dmap) goto out_mmap_sem; /* * Make sure there are no references to inode pages using * get_user_pages() */ ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); if (ret) { pr_debug("fuse: fuse_dax_break_layouts() failed. err=%d\n", ret); dmap = ERR_PTR(ret); goto out_mmap_sem; } down_write(&fi->dax->sem); node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); /* Range already got reclaimed by somebody else */ if (!node) { if (retry) *retry = true; goto out_write_dmap_sem; } dmap = node_to_dmap(node); /* still in use. */ if (refcount_read(&dmap->refcnt) > 1) { dmap = NULL; if (retry) *retry = true; goto out_write_dmap_sem; } ret = reclaim_one_dmap_locked(inode, dmap); if (ret < 0) { dmap = ERR_PTR(ret); goto out_write_dmap_sem; } /* Clean up dmap. Do not add back to free list */ dmap_remove_busy_list(fcd, dmap); dmap->inode = NULL; dmap->itn.start = dmap->itn.last = 0; pr_debug("fuse: %s: inline reclaimed memory range. inode=%p, window_offset=0x%llx, length=0x%llx\n", __func__, inode, dmap->window_offset, dmap->length); out_write_dmap_sem: up_write(&fi->dax->sem); out_mmap_sem: filemap_invalidate_unlock(inode->i_mapping); return dmap; } static struct fuse_dax_mapping * alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode) { struct fuse_dax_mapping *dmap; struct fuse_inode *fi = get_fuse_inode(inode); while (1) { bool retry = false; dmap = alloc_dax_mapping(fcd); if (dmap) return dmap; dmap = inode_inline_reclaim_one_dmap(fcd, inode, &retry); /* * Either we got a mapping or it is an error, return in both * the cases. */ if (dmap) return dmap; /* If we could not reclaim a mapping because it * had a reference or some other temporary failure, * Try again. We want to give up inline reclaim only * if there is no range assigned to this node. Otherwise * if a deadlock is possible if we sleep with * mapping->invalidate_lock held and worker to free memory * can't make progress due to unavailability of * mapping->invalidate_lock. So sleep only if fi->dax->nr=0 */ if (retry) continue; /* * There are no mappings which can be reclaimed. Wait for one. * We are not holding fi->dax->sem. So it is possible * that range gets added now. But as we are not holding * mapping->invalidate_lock, worker should still be able to * free up a range and wake us up. */ if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) { if (wait_event_killable_exclusive(fcd->range_waitq, (fcd->nr_free_ranges > 0))) { return ERR_PTR(-EINTR); } } } } static int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd, struct inode *inode, unsigned long start_idx) { int ret; struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; struct interval_tree_node *node; /* Find fuse dax mapping at file offset inode. */ node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); /* Range already got cleaned up by somebody else */ if (!node) return 0; dmap = node_to_dmap(node); /* still in use. */ if (refcount_read(&dmap->refcnt) > 1) return 0; ret = reclaim_one_dmap_locked(inode, dmap); if (ret < 0) return ret; /* Cleanup dmap entry and add back to free list */ spin_lock(&fcd->lock); dmap_reinit_add_to_free_pool(fcd, dmap); spin_unlock(&fcd->lock); return ret; } /* * Free a range of memory. * Locking: * 1. Take mapping->invalidate_lock to block dax faults. * 2. Take fi->dax->sem to protect interval tree and also to make sure * read/write can not reuse a dmap which we might be freeing. */ static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd, struct inode *inode, unsigned long start_idx, unsigned long end_idx) { int ret; struct fuse_inode *fi = get_fuse_inode(inode); loff_t dmap_start = start_idx << FUSE_DAX_SHIFT; loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1; filemap_invalidate_lock(inode->i_mapping); ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); if (ret) { pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n", ret); goto out_mmap_sem; } down_write(&fi->dax->sem); ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx); up_write(&fi->dax->sem); out_mmap_sem: filemap_invalidate_unlock(inode->i_mapping); return ret; } static int try_to_free_dmap_chunks(struct fuse_conn_dax *fcd, unsigned long nr_to_free) { struct fuse_dax_mapping *dmap, *pos, *temp; int ret, nr_freed = 0; unsigned long start_idx = 0, end_idx = 0; struct inode *inode = NULL; /* Pick first busy range and free it for now*/ while (1) { if (nr_freed >= nr_to_free) break; dmap = NULL; spin_lock(&fcd->lock); if (!fcd->nr_busy_ranges) { spin_unlock(&fcd->lock); return 0; } list_for_each_entry_safe(pos, temp, &fcd->busy_ranges, busy_list) { /* skip this range if it's in use. */ if (refcount_read(&pos->refcnt) > 1) continue; inode = igrab(pos->inode); /* * This inode is going away. That will free * up all the ranges anyway, continue to * next range. */ if (!inode) continue; /* * Take this element off list and add it tail. If * this element can't be freed, it will help with * selecting new element in next iteration of loop. */ dmap = pos; list_move_tail(&dmap->busy_list, &fcd->busy_ranges); start_idx = end_idx = dmap->itn.start; break; } spin_unlock(&fcd->lock); if (!dmap) return 0; ret = lookup_and_reclaim_dmap(fcd, inode, start_idx, end_idx); iput(inode); if (ret) return ret; nr_freed++; } return 0; } static void fuse_dax_free_mem_worker(struct work_struct *work) { int ret; struct fuse_conn_dax *fcd = container_of(work, struct fuse_conn_dax, free_work.work); ret = try_to_free_dmap_chunks(fcd, FUSE_DAX_RECLAIM_CHUNK); if (ret) { pr_debug("fuse: try_to_free_dmap_chunks() failed with err=%d\n", ret); } /* If number of free ranges are still below threshold, requeue */ kick_dmap_free_worker(fcd, 1); } static void fuse_free_dax_mem_ranges(struct list_head *mem_list) { struct fuse_dax_mapping *range, *temp; /* Free All allocated elements */ list_for_each_entry_safe(range, temp, mem_list, list) { list_del(&range->list); if (!list_empty(&range->busy_list)) list_del(&range->busy_list); kfree(range); } } void fuse_dax_conn_free(struct fuse_conn *fc) { if (fc->dax) { fuse_free_dax_mem_ranges(&fc->dax->free_ranges); kfree(fc->dax); fc->dax = NULL; } } static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) { long nr_pages, nr_ranges; struct fuse_dax_mapping *range; int ret, id; size_t dax_size = -1; unsigned long i; init_waitqueue_head(&fcd->range_waitq); INIT_LIST_HEAD(&fcd->free_ranges); INIT_LIST_HEAD(&fcd->busy_ranges); INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker); id = dax_read_lock(); nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), DAX_ACCESS, NULL, NULL); dax_read_unlock(id); if (nr_pages < 0) { pr_debug("dax_direct_access() returned %ld\n", nr_pages); return nr_pages; } nr_ranges = nr_pages/FUSE_DAX_PAGES; pr_debug("%s: dax mapped %ld pages. nr_ranges=%ld\n", __func__, nr_pages, nr_ranges); for (i = 0; i < nr_ranges; i++) { range = kzalloc_obj(struct fuse_dax_mapping); ret = -ENOMEM; if (!range) goto out_err; /* TODO: This offset only works if virtio-fs driver is not * having some memory hidden at the beginning. This needs * better handling */ range->window_offset = i * FUSE_DAX_SZ; range->length = FUSE_DAX_SZ; INIT_LIST_HEAD(&range->busy_list); refcount_set(&range->refcnt, 1); list_add_tail(&range->list, &fcd->free_ranges); } fcd->nr_free_ranges = nr_ranges; fcd->nr_ranges = nr_ranges; return 0; out_err: /* Free All allocated elements */ fuse_free_dax_mem_ranges(&fcd->free_ranges); return ret; } int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode dax_mode, struct dax_device *dax_dev) { struct fuse_conn_dax *fcd; int err; fc->dax_mode = dax_mode; if (!dax_dev) return 0; fcd = kzalloc_obj(*fcd); if (!fcd) return -ENOMEM; spin_lock_init(&fcd->lock); fcd->dev = dax_dev; err = fuse_dax_mem_range_init(fcd); if (err) { kfree(fcd); return err; } fc->dax = fcd; return 0; } bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi) { struct fuse_conn *fc = get_fuse_conn_super(sb); fi->dax = NULL; if (fc->dax) { fi->dax = kzalloc_obj(*fi->dax, GFP_KERNEL_ACCOUNT); if (!fi->dax) return false; init_rwsem(&fi->dax->sem); fi->dax->tree = RB_ROOT_CACHED; } return true; } static const struct address_space_operations fuse_dax_file_aops = { .direct_IO = noop_direct_IO, .dirty_folio = noop_dirty_folio, }; static bool fuse_should_enable_dax(struct inode *inode, unsigned int flags) { struct fuse_conn *fc = get_fuse_conn(inode); enum fuse_dax_mode dax_mode = fc->dax_mode; if (dax_mode == FUSE_DAX_NEVER) return false; /* * fc->dax may be NULL in 'inode' mode when filesystem device doesn't * support DAX, in which case it will silently fallback to 'never' mode. */ if (!fc->dax) return false; if (dax_mode == FUSE_DAX_ALWAYS) return true; /* dax_mode is FUSE_DAX_INODE* */ return fc->inode_dax && (flags & FUSE_ATTR_DAX); } void fuse_dax_inode_init(struct inode *inode, unsigned int flags) { if (!fuse_should_enable_dax(inode, flags)) return; inode->i_flags |= S_DAX; inode->i_data.a_ops = &fuse_dax_file_aops; } void fuse_dax_dontcache(struct inode *inode, unsigned int flags) { struct fuse_conn *fc = get_fuse_conn(inode); if (fuse_is_inode_dax_mode(fc->dax_mode) && ((bool) IS_DAX(inode) != (bool) (flags & FUSE_ATTR_DAX))) d_mark_dontcache(inode); } bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment) { if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) { pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n", map_alignment, FUSE_DAX_SZ); return false; } return true; } void fuse_dax_cancel_work(struct fuse_conn *fc) { struct fuse_conn_dax *fcd = fc->dax; if (fcd) cancel_delayed_work_sync(&fcd->free_work); } EXPORT_SYMBOL_GPL(fuse_dax_cancel_work);
140 20 125 126 142 57 67 17 1 66 92 97 16 2 98 41 142 141 59 60 195 104 20 19 202 102 31 195 100 134 193 100 133 43 103 194 68 138 39 39 39 39 1 191 174 19 98 98 132 131 181 16 185 8 166 22 11 30 77 93 47 133 8 186 41 41 37 19 19 19 6 19 19 2 19 24 24 24 23 23 19 7 4 4 9 8 1 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 // SPDX-License-Identifier: LGPL-2.0+ /* * PCM Plug-In shared (kernel/library) code * Copyright (c) 1999 by Jaroslav Kysela <perex@perex.cz> * Copyright (c) 2000 by Abramo Bagnara <abramo@alsa-project.org> */ #if 0 #define PLUGIN_DEBUG #endif #include <linux/slab.h> #include <linux/time.h> #include <linux/vmalloc.h> #include <sound/core.h> #include <sound/pcm.h> #include <sound/pcm_params.h> #include "pcm_plugin.h" #define snd_pcm_plug_first(plug) ((plug)->runtime->oss.plugin_first) #define snd_pcm_plug_last(plug) ((plug)->runtime->oss.plugin_last) /* * because some cards might have rates "very close", we ignore * all "resampling" requests within +-5% */ static int rate_match(unsigned int src_rate, unsigned int dst_rate) { unsigned int low = (src_rate * 95) / 100; unsigned int high = (src_rate * 105) / 100; return dst_rate >= low && dst_rate <= high; } static int snd_pcm_plugin_alloc(struct snd_pcm_plugin *plugin, snd_pcm_uframes_t frames) { struct snd_pcm_plugin_format *format; ssize_t width; size_t size; unsigned int channel; struct snd_pcm_plugin_channel *c; if (plugin->stream == SNDRV_PCM_STREAM_PLAYBACK) { format = &plugin->src_format; } else { format = &plugin->dst_format; } width = snd_pcm_format_physical_width(format->format); if (width < 0) return width; size = array3_size(frames, format->channels, width); /* check for too large period size once again */ if (size > 1024 * 1024) return -ENOMEM; if (snd_BUG_ON(size % 8)) return -ENXIO; size /= 8; if (plugin->buf_frames < frames) { kvfree(plugin->buf); plugin->buf = kvzalloc(size, GFP_KERNEL); plugin->buf_frames = frames; } if (!plugin->buf) { plugin->buf_frames = 0; return -ENOMEM; } c = plugin->buf_channels; if (plugin->access == SNDRV_PCM_ACCESS_RW_INTERLEAVED) { for (channel = 0; channel < format->channels; channel++, c++) { c->frames = frames; c->enabled = 1; c->wanted = 0; c->area.addr = plugin->buf; c->area.first = channel * width; c->area.step = format->channels * width; } } else if (plugin->access == SNDRV_PCM_ACCESS_RW_NONINTERLEAVED) { if (snd_BUG_ON(size % format->channels)) return -EINVAL; size /= format->channels; for (channel = 0; channel < format->channels; channel++, c++) { c->frames = frames; c->enabled = 1; c->wanted = 0; c->area.addr = plugin->buf + (channel * size); c->area.first = 0; c->area.step = width; } } else return -EINVAL; return 0; } int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames) { int err; if (snd_BUG_ON(!snd_pcm_plug_first(plug))) return -ENXIO; if (snd_pcm_plug_stream(plug) == SNDRV_PCM_STREAM_PLAYBACK) { struct snd_pcm_plugin *plugin = snd_pcm_plug_first(plug); while (plugin->next) { if (plugin->dst_frames) frames = plugin->dst_frames(plugin, frames); if ((snd_pcm_sframes_t)frames <= 0) return -ENXIO; plugin = plugin->next; err = snd_pcm_plugin_alloc(plugin, frames); if (err < 0) return err; } } else { struct snd_pcm_plugin *plugin = snd_pcm_plug_last(plug); while (plugin->prev) { if (plugin->src_frames) frames = plugin->src_frames(plugin, frames); if ((snd_pcm_sframes_t)frames <= 0) return -ENXIO; plugin = plugin->prev; err = snd_pcm_plugin_alloc(plugin, frames); if (err < 0) return err; } } return 0; } snd_pcm_sframes_t snd_pcm_plugin_client_channels(struct snd_pcm_plugin *plugin, snd_pcm_uframes_t frames, struct snd_pcm_plugin_channel **channels) { *channels = plugin->buf_channels; return frames; } int snd_pcm_plugin_build(struct snd_pcm_substream *plug, const char *name, struct snd_pcm_plugin_format *src_format, struct snd_pcm_plugin_format *dst_format, size_t extra, struct snd_pcm_plugin **ret) { struct snd_pcm_plugin *plugin; unsigned int channels; if (snd_BUG_ON(!plug)) return -ENXIO; if (snd_BUG_ON(!src_format || !dst_format)) return -ENXIO; plugin = kzalloc(sizeof(*plugin) + extra, GFP_KERNEL); if (plugin == NULL) return -ENOMEM; plugin->name = name; plugin->plug = plug; plugin->stream = snd_pcm_plug_stream(plug); plugin->access = SNDRV_PCM_ACCESS_RW_INTERLEAVED; plugin->src_format = *src_format; plugin->src_width = snd_pcm_format_physical_width(src_format->format); snd_BUG_ON(plugin->src_width <= 0); plugin->dst_format = *dst_format; plugin->dst_width = snd_pcm_format_physical_width(dst_format->format); snd_BUG_ON(plugin->dst_width <= 0); if (plugin->stream == SNDRV_PCM_STREAM_PLAYBACK) channels = src_format->channels; else channels = dst_format->channels; plugin->buf_channels = kzalloc_objs(*plugin->buf_channels, channels); if (plugin->buf_channels == NULL) { snd_pcm_plugin_free(plugin); return -ENOMEM; } plugin->client_channels = snd_pcm_plugin_client_channels; *ret = plugin; return 0; } int snd_pcm_plugin_free(struct snd_pcm_plugin *plugin) { if (! plugin) return 0; if (plugin->private_free) plugin->private_free(plugin); kfree(plugin->buf_channels); kvfree(plugin->buf); kfree(plugin); return 0; } static snd_pcm_sframes_t calc_dst_frames(struct snd_pcm_substream *plug, snd_pcm_sframes_t frames, bool check_size) { struct snd_pcm_plugin *plugin, *plugin_next; plugin = snd_pcm_plug_first(plug); while (plugin && frames > 0) { plugin_next = plugin->next; if (check_size && plugin->buf_frames && frames > plugin->buf_frames) frames = plugin->buf_frames; if (plugin->dst_frames) { frames = plugin->dst_frames(plugin, frames); if (frames < 0) return frames; } plugin = plugin_next; } return frames; } static snd_pcm_sframes_t calc_src_frames(struct snd_pcm_substream *plug, snd_pcm_sframes_t frames, bool check_size) { struct snd_pcm_plugin *plugin, *plugin_prev; plugin = snd_pcm_plug_last(plug); while (plugin && frames > 0) { plugin_prev = plugin->prev; if (plugin->src_frames) { frames = plugin->src_frames(plugin, frames); if (frames < 0) return frames; } if (check_size && plugin->buf_frames && frames > plugin->buf_frames) frames = plugin->buf_frames; plugin = plugin_prev; } return frames; } snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_pcm_uframes_t drv_frames) { if (snd_BUG_ON(!plug)) return -ENXIO; switch (snd_pcm_plug_stream(plug)) { case SNDRV_PCM_STREAM_PLAYBACK: return calc_src_frames(plug, drv_frames, false); case SNDRV_PCM_STREAM_CAPTURE: return calc_dst_frames(plug, drv_frames, false); default: snd_BUG(); return -EINVAL; } } snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pcm_uframes_t clt_frames) { if (snd_BUG_ON(!plug)) return -ENXIO; switch (snd_pcm_plug_stream(plug)) { case SNDRV_PCM_STREAM_PLAYBACK: return calc_dst_frames(plug, clt_frames, false); case SNDRV_PCM_STREAM_CAPTURE: return calc_src_frames(plug, clt_frames, false); default: snd_BUG(); return -EINVAL; } } static int snd_pcm_plug_formats(const struct snd_mask *mask, snd_pcm_format_t format) { struct snd_mask formats = *mask; u64 linfmts = (SNDRV_PCM_FMTBIT_U8 | SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_U16_LE | SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_U16_BE | SNDRV_PCM_FMTBIT_S16_BE | SNDRV_PCM_FMTBIT_U24_LE | SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_U24_BE | SNDRV_PCM_FMTBIT_S24_BE | SNDRV_PCM_FMTBIT_U24_3LE | SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_U24_3BE | SNDRV_PCM_FMTBIT_S24_3BE | SNDRV_PCM_FMTBIT_U32_LE | SNDRV_PCM_FMTBIT_S32_LE | SNDRV_PCM_FMTBIT_U32_BE | SNDRV_PCM_FMTBIT_S32_BE); snd_mask_set(&formats, (__force int)SNDRV_PCM_FORMAT_MU_LAW); if (formats.bits[0] & lower_32_bits(linfmts)) formats.bits[0] |= lower_32_bits(linfmts); if (formats.bits[1] & upper_32_bits(linfmts)) formats.bits[1] |= upper_32_bits(linfmts); return snd_mask_test(&formats, (__force int)format); } static const snd_pcm_format_t preferred_formats[] = { SNDRV_PCM_FORMAT_S16_LE, SNDRV_PCM_FORMAT_S16_BE, SNDRV_PCM_FORMAT_U16_LE, SNDRV_PCM_FORMAT_U16_BE, SNDRV_PCM_FORMAT_S24_3LE, SNDRV_PCM_FORMAT_S24_3BE, SNDRV_PCM_FORMAT_U24_3LE, SNDRV_PCM_FORMAT_U24_3BE, SNDRV_PCM_FORMAT_S24_LE, SNDRV_PCM_FORMAT_S24_BE, SNDRV_PCM_FORMAT_U24_LE, SNDRV_PCM_FORMAT_U24_BE, SNDRV_PCM_FORMAT_S32_LE, SNDRV_PCM_FORMAT_S32_BE, SNDRV_PCM_FORMAT_U32_LE, SNDRV_PCM_FORMAT_U32_BE, SNDRV_PCM_FORMAT_S8, SNDRV_PCM_FORMAT_U8 }; snd_pcm_format_t snd_pcm_plug_slave_format(snd_pcm_format_t format, const struct snd_mask *format_mask) { int i; if (snd_mask_test(format_mask, (__force int)format)) return format; if (!snd_pcm_plug_formats(format_mask, format)) return (__force snd_pcm_format_t)-EINVAL; if (snd_pcm_format_linear(format)) { unsigned int width = snd_pcm_format_width(format); int unsignd = snd_pcm_format_unsigned(format) > 0; int big = snd_pcm_format_big_endian(format) > 0; unsigned int badness, best = -1; snd_pcm_format_t best_format = (__force snd_pcm_format_t)-1; for (i = 0; i < ARRAY_SIZE(preferred_formats); i++) { snd_pcm_format_t f = preferred_formats[i]; unsigned int w; if (!snd_mask_test(format_mask, (__force int)f)) continue; w = snd_pcm_format_width(f); if (w >= width) badness = w - width; else badness = width - w + 32; badness += snd_pcm_format_unsigned(f) != unsignd; badness += snd_pcm_format_big_endian(f) != big; if (badness < best) { best_format = f; best = badness; } } if ((__force int)best_format >= 0) return best_format; else return (__force snd_pcm_format_t)-EINVAL; } else { switch (format) { case SNDRV_PCM_FORMAT_MU_LAW: for (i = 0; i < ARRAY_SIZE(preferred_formats); ++i) { snd_pcm_format_t format1 = preferred_formats[i]; if (snd_mask_test(format_mask, (__force int)format1)) return format1; } fallthrough; default: return (__force snd_pcm_format_t)-EINVAL; } } } int snd_pcm_plug_format_plugins(struct snd_pcm_substream *plug, struct snd_pcm_hw_params *params, struct snd_pcm_hw_params *slave_params) { struct snd_pcm_plugin_format tmpformat; struct snd_pcm_plugin_format dstformat; struct snd_pcm_plugin_format srcformat; snd_pcm_access_t src_access, dst_access; struct snd_pcm_plugin *plugin = NULL; int err; int stream = snd_pcm_plug_stream(plug); int slave_interleaved = (params_channels(slave_params) == 1 || params_access(slave_params) == SNDRV_PCM_ACCESS_RW_INTERLEAVED); switch (stream) { case SNDRV_PCM_STREAM_PLAYBACK: dstformat.format = params_format(slave_params); dstformat.rate = params_rate(slave_params); dstformat.channels = params_channels(slave_params); srcformat.format = params_format(params); srcformat.rate = params_rate(params); srcformat.channels = params_channels(params); src_access = SNDRV_PCM_ACCESS_RW_INTERLEAVED; dst_access = (slave_interleaved ? SNDRV_PCM_ACCESS_RW_INTERLEAVED : SNDRV_PCM_ACCESS_RW_NONINTERLEAVED); break; case SNDRV_PCM_STREAM_CAPTURE: dstformat.format = params_format(params); dstformat.rate = params_rate(params); dstformat.channels = params_channels(params); srcformat.format = params_format(slave_params); srcformat.rate = params_rate(slave_params); srcformat.channels = params_channels(slave_params); src_access = (slave_interleaved ? SNDRV_PCM_ACCESS_RW_INTERLEAVED : SNDRV_PCM_ACCESS_RW_NONINTERLEAVED); dst_access = SNDRV_PCM_ACCESS_RW_INTERLEAVED; break; default: snd_BUG(); return -EINVAL; } tmpformat = srcformat; pdprintf("srcformat: format=%i, rate=%i, channels=%i\n", srcformat.format, srcformat.rate, srcformat.channels); pdprintf("dstformat: format=%i, rate=%i, channels=%i\n", dstformat.format, dstformat.rate, dstformat.channels); /* Format change (linearization) */ if (! rate_match(srcformat.rate, dstformat.rate) && ! snd_pcm_format_linear(srcformat.format)) { if (srcformat.format != SNDRV_PCM_FORMAT_MU_LAW) return -EINVAL; tmpformat.format = SNDRV_PCM_FORMAT_S16; err = snd_pcm_plugin_build_mulaw(plug, &srcformat, &tmpformat, &plugin); if (err < 0) return err; err = snd_pcm_plugin_append(plugin); if (err < 0) { snd_pcm_plugin_free(plugin); return err; } srcformat = tmpformat; src_access = dst_access; } /* channels reduction */ if (srcformat.channels > dstformat.channels) { tmpformat.channels = dstformat.channels; err = snd_pcm_plugin_build_route(plug, &srcformat, &tmpformat, &plugin); pdprintf("channels reduction: src=%i, dst=%i returns %i\n", srcformat.channels, tmpformat.channels, err); if (err < 0) return err; err = snd_pcm_plugin_append(plugin); if (err < 0) { snd_pcm_plugin_free(plugin); return err; } srcformat = tmpformat; src_access = dst_access; } /* rate resampling */ if (!rate_match(srcformat.rate, dstformat.rate)) { if (srcformat.format != SNDRV_PCM_FORMAT_S16) { /* convert to S16 for resampling */ tmpformat.format = SNDRV_PCM_FORMAT_S16; err = snd_pcm_plugin_build_linear(plug, &srcformat, &tmpformat, &plugin); if (err < 0) return err; err = snd_pcm_plugin_append(plugin); if (err < 0) { snd_pcm_plugin_free(plugin); return err; } srcformat = tmpformat; src_access = dst_access; } tmpformat.rate = dstformat.rate; err = snd_pcm_plugin_build_rate(plug, &srcformat, &tmpformat, &plugin); pdprintf("rate down resampling: src=%i, dst=%i returns %i\n", srcformat.rate, tmpformat.rate, err); if (err < 0) return err; err = snd_pcm_plugin_append(plugin); if (err < 0) { snd_pcm_plugin_free(plugin); return err; } srcformat = tmpformat; src_access = dst_access; } /* format change */ if (srcformat.format != dstformat.format) { tmpformat.format = dstformat.format; if (srcformat.format == SNDRV_PCM_FORMAT_MU_LAW || tmpformat.format == SNDRV_PCM_FORMAT_MU_LAW) { err = snd_pcm_plugin_build_mulaw(plug, &srcformat, &tmpformat, &plugin); } else if (snd_pcm_format_linear(srcformat.format) && snd_pcm_format_linear(tmpformat.format)) { err = snd_pcm_plugin_build_linear(plug, &srcformat, &tmpformat, &plugin); } else return -EINVAL; pdprintf("format change: src=%i, dst=%i returns %i\n", srcformat.format, tmpformat.format, err); if (err < 0) return err; err = snd_pcm_plugin_append(plugin); if (err < 0) { snd_pcm_plugin_free(plugin); return err; } srcformat = tmpformat; src_access = dst_access; } /* channels extension */ if (srcformat.channels < dstformat.channels) { tmpformat.channels = dstformat.channels; err = snd_pcm_plugin_build_route(plug, &srcformat, &tmpformat, &plugin); pdprintf("channels extension: src=%i, dst=%i returns %i\n", srcformat.channels, tmpformat.channels, err); if (err < 0) return err; err = snd_pcm_plugin_append(plugin); if (err < 0) { snd_pcm_plugin_free(plugin); return err; } srcformat = tmpformat; src_access = dst_access; } /* de-interleave */ if (src_access != dst_access) { err = snd_pcm_plugin_build_copy(plug, &srcformat, &tmpformat, &plugin); pdprintf("interleave change (copy: returns %i)\n", err); if (err < 0) return err; err = snd_pcm_plugin_append(plugin); if (err < 0) { snd_pcm_plugin_free(plugin); return err; } } return 0; } snd_pcm_sframes_t snd_pcm_plug_client_channels_buf(struct snd_pcm_substream *plug, char *buf, snd_pcm_uframes_t count, struct snd_pcm_plugin_channel **channels) { struct snd_pcm_plugin *plugin; struct snd_pcm_plugin_channel *v; struct snd_pcm_plugin_format *format; int width, nchannels, channel; int stream = snd_pcm_plug_stream(plug); if (snd_BUG_ON(!buf)) return -ENXIO; if (stream == SNDRV_PCM_STREAM_PLAYBACK) { plugin = snd_pcm_plug_first(plug); format = &plugin->src_format; } else { plugin = snd_pcm_plug_last(plug); format = &plugin->dst_format; } v = plugin->buf_channels; *channels = v; width = snd_pcm_format_physical_width(format->format); if (width < 0) return width; nchannels = format->channels; if (snd_BUG_ON(plugin->access != SNDRV_PCM_ACCESS_RW_INTERLEAVED && format->channels > 1)) return -ENXIO; for (channel = 0; channel < nchannels; channel++, v++) { v->frames = count; v->enabled = 1; v->wanted = (stream == SNDRV_PCM_STREAM_CAPTURE); v->area.addr = buf; v->area.first = channel * width; v->area.step = nchannels * width; } return count; } snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, struct snd_pcm_plugin_channel *src_channels, snd_pcm_uframes_t size) { struct snd_pcm_plugin *plugin, *next; struct snd_pcm_plugin_channel *dst_channels; int err; snd_pcm_sframes_t frames = size; plugin = snd_pcm_plug_first(plug); while (plugin) { if (frames <= 0) return frames; next = plugin->next; if (next) { snd_pcm_sframes_t frames1 = frames; if (plugin->dst_frames) { frames1 = plugin->dst_frames(plugin, frames); if (frames1 <= 0) return frames1; } err = next->client_channels(next, frames1, &dst_channels); if (err < 0) return err; if (err != frames1) { frames = err; if (plugin->src_frames) { frames = plugin->src_frames(plugin, frames1); if (frames <= 0) return frames; } } } else dst_channels = NULL; pdprintf("write plugin: %s, %li\n", plugin->name, frames); frames = plugin->transfer(plugin, src_channels, dst_channels, frames); if (frames < 0) return frames; src_channels = dst_channels; plugin = next; } return calc_src_frames(plug, frames, true); } snd_pcm_sframes_t snd_pcm_plug_read_transfer(struct snd_pcm_substream *plug, struct snd_pcm_plugin_channel *dst_channels_final, snd_pcm_uframes_t size) { struct snd_pcm_plugin *plugin, *next; struct snd_pcm_plugin_channel *src_channels, *dst_channels; snd_pcm_sframes_t frames = size; int err; frames = calc_src_frames(plug, frames, true); if (frames < 0) return frames; src_channels = NULL; plugin = snd_pcm_plug_first(plug); while (plugin && frames > 0) { next = plugin->next; if (next) { err = plugin->client_channels(plugin, frames, &dst_channels); if (err < 0) return err; frames = err; } else { dst_channels = dst_channels_final; } pdprintf("read plugin: %s, %li\n", plugin->name, frames); frames = plugin->transfer(plugin, src_channels, dst_channels, frames); if (frames < 0) return frames; plugin = next; src_channels = dst_channels; } return frames; } int snd_pcm_area_silence(const struct snd_pcm_channel_area *dst_area, size_t dst_offset, size_t samples, snd_pcm_format_t format) { /* FIXME: sub byte resolution and odd dst_offset */ unsigned char *dst; unsigned int dst_step; int width; const unsigned char *silence; if (!dst_area->addr) return 0; dst = dst_area->addr + (dst_area->first + dst_area->step * dst_offset) / 8; width = snd_pcm_format_physical_width(format); if (width <= 0) return -EINVAL; if (dst_area->step == (unsigned int) width && width >= 8) return snd_pcm_format_set_silence(format, dst, samples); silence = snd_pcm_format_silence_64(format); if (! silence) return -EINVAL; dst_step = dst_area->step / 8; if (width == 4) { /* Ima ADPCM */ int dstbit = dst_area->first % 8; int dstbit_step = dst_area->step % 8; while (samples-- > 0) { if (dstbit) *dst &= 0xf0; else *dst &= 0x0f; dst += dst_step; dstbit += dstbit_step; if (dstbit == 8) { dst++; dstbit = 0; } } } else { width /= 8; while (samples-- > 0) { memcpy(dst, silence, width); dst += dst_step; } } return 0; } int snd_pcm_area_copy(const struct snd_pcm_channel_area *src_area, size_t src_offset, const struct snd_pcm_channel_area *dst_area, size_t dst_offset, size_t samples, snd_pcm_format_t format) { /* FIXME: sub byte resolution and odd dst_offset */ char *src, *dst; int width; int src_step, dst_step; src = src_area->addr + (src_area->first + src_area->step * src_offset) / 8; if (!src_area->addr) return snd_pcm_area_silence(dst_area, dst_offset, samples, format); dst = dst_area->addr + (dst_area->first + dst_area->step * dst_offset) / 8; if (!dst_area->addr) return 0; width = snd_pcm_format_physical_width(format); if (width <= 0) return -EINVAL; if (src_area->step == (unsigned int) width && dst_area->step == (unsigned int) width && width >= 8) { size_t bytes = samples * width / 8; memcpy(dst, src, bytes); return 0; } src_step = src_area->step / 8; dst_step = dst_area->step / 8; if (width == 4) { /* Ima ADPCM */ int srcbit = src_area->first % 8; int srcbit_step = src_area->step % 8; int dstbit = dst_area->first % 8; int dstbit_step = dst_area->step % 8; while (samples-- > 0) { unsigned char srcval; if (srcbit) srcval = *src & 0x0f; else srcval = (*src & 0xf0) >> 4; if (dstbit) *dst = (*dst & 0xf0) | srcval; else *dst = (*dst & 0x0f) | (srcval << 4); src += src_step; srcbit += srcbit_step; if (srcbit == 8) { src++; srcbit = 0; } dst += dst_step; dstbit += dstbit_step; if (dstbit == 8) { dst++; dstbit = 0; } } } else { width /= 8; while (samples-- > 0) { memcpy(dst, src, width); src += src_step; dst += dst_step; } } return 0; }
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 // SPDX-License-Identifier: GPL-2.0 /* * thermal_hwmon.c - Generic Thermal Management hwmon support. * * Code based on Intel thermal_core.c. Copyrights of the original code: * Copyright (C) 2008 Intel Corp * Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com> * Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com> * * Copyright (C) 2013 Texas Instruments * Copyright (C) 2013 Eduardo Valentin <eduardo.valentin@ti.com> */ #include <linux/err.h> #include <linux/export.h> #include <linux/hwmon.h> #include <linux/slab.h> #include <linux/thermal.h> #include "thermal_hwmon.h" #include "thermal_core.h" /* hwmon sys I/F */ /* thermal zone devices with the same type share one hwmon device */ struct thermal_hwmon_device { char type[THERMAL_NAME_LENGTH]; struct device *device; int count; struct list_head tz_list; struct list_head node; }; struct thermal_hwmon_attr { struct device_attribute attr; char name[16]; }; /* one temperature input for each thermal zone */ struct thermal_hwmon_temp { struct list_head hwmon_node; struct thermal_zone_device *tz; struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */ }; static LIST_HEAD(thermal_hwmon_list); static DEFINE_MUTEX(thermal_hwmon_list_lock); static ssize_t temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) { int temperature; int ret; struct thermal_hwmon_attr *hwmon_attr = container_of(attr, struct thermal_hwmon_attr, attr); struct thermal_hwmon_temp *temp = container_of(hwmon_attr, struct thermal_hwmon_temp, temp_input); struct thermal_zone_device *tz = temp->tz; ret = thermal_zone_get_temp(tz, &temperature); if (ret) return ret; return sysfs_emit(buf, "%d\n", temperature); } static ssize_t temp_crit_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_hwmon_attr *hwmon_attr = container_of(attr, struct thermal_hwmon_attr, attr); struct thermal_hwmon_temp *temp = container_of(hwmon_attr, struct thermal_hwmon_temp, temp_crit); struct thermal_zone_device *tz = temp->tz; int temperature; int ret; guard(thermal_zone)(tz); ret = tz->ops.get_crit_temp(tz, &temperature); if (ret) return ret; return sysfs_emit(buf, "%d\n", temperature); } static struct thermal_hwmon_device * thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) { struct thermal_hwmon_device *hwmon; char type[THERMAL_NAME_LENGTH]; mutex_lock(&thermal_hwmon_list_lock); list_for_each_entry(hwmon, &thermal_hwmon_list, node) { strscpy(type, tz->type); strreplace(type, '-', '_'); if (!strcmp(hwmon->type, type)) { mutex_unlock(&thermal_hwmon_list_lock); return hwmon; } } mutex_unlock(&thermal_hwmon_list_lock); return NULL; } /* Find the temperature input matching a given thermal zone */ static struct thermal_hwmon_temp * thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon, const struct thermal_zone_device *tz) { struct thermal_hwmon_temp *temp; mutex_lock(&thermal_hwmon_list_lock); list_for_each_entry(temp, &hwmon->tz_list, hwmon_node) if (temp->tz == tz) { mutex_unlock(&thermal_hwmon_list_lock); return temp; } mutex_unlock(&thermal_hwmon_list_lock); return NULL; } static bool thermal_zone_crit_temp_valid(struct thermal_zone_device *tz) { int temp; return tz->ops.get_crit_temp && !tz->ops.get_crit_temp(tz, &temp); } int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) { struct thermal_hwmon_device *hwmon; struct thermal_hwmon_temp *temp; int new_hwmon_device = 1; int result; hwmon = thermal_hwmon_lookup_by_type(tz); if (hwmon) { new_hwmon_device = 0; goto register_sys_interface; } hwmon = kzalloc_obj(*hwmon); if (!hwmon) return -ENOMEM; INIT_LIST_HEAD(&hwmon->tz_list); strscpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH); strreplace(hwmon->type, '-', '_'); hwmon->device = hwmon_device_register_for_thermal(&tz->device, hwmon->type, hwmon); if (IS_ERR(hwmon->device)) { result = PTR_ERR(hwmon->device); goto free_mem; } register_sys_interface: temp = kzalloc_obj(*temp); if (!temp) { result = -ENOMEM; goto unregister_name; } temp->tz = tz; hwmon->count++; snprintf(temp->temp_input.name, sizeof(temp->temp_input.name), "temp%d_input", hwmon->count); temp->temp_input.attr.attr.name = temp->temp_input.name; temp->temp_input.attr.attr.mode = 0444; temp->temp_input.attr.show = temp_input_show; sysfs_attr_init(&temp->temp_input.attr.attr); result = device_create_file(hwmon->device, &temp->temp_input.attr); if (result) goto free_temp_mem; if (thermal_zone_crit_temp_valid(tz)) { snprintf(temp->temp_crit.name, sizeof(temp->temp_crit.name), "temp%d_crit", hwmon->count); temp->temp_crit.attr.attr.name = temp->temp_crit.name; temp->temp_crit.attr.attr.mode = 0444; temp->temp_crit.attr.show = temp_crit_show; sysfs_attr_init(&temp->temp_crit.attr.attr); result = device_create_file(hwmon->device, &temp->temp_crit.attr); if (result) goto unregister_input; } mutex_lock(&thermal_hwmon_list_lock); if (new_hwmon_device) list_add_tail(&hwmon->node, &thermal_hwmon_list); list_add_tail(&temp->hwmon_node, &hwmon->tz_list); mutex_unlock(&thermal_hwmon_list_lock); return 0; unregister_input: device_remove_file(hwmon->device, &temp->temp_input.attr); free_temp_mem: kfree(temp); unregister_name: if (new_hwmon_device) hwmon_device_unregister(hwmon->device); free_mem: kfree(hwmon); return result; } EXPORT_SYMBOL_GPL(thermal_add_hwmon_sysfs); void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) { struct thermal_hwmon_device *hwmon; struct thermal_hwmon_temp *temp; hwmon = thermal_hwmon_lookup_by_type(tz); if (unlikely(!hwmon)) { /* Should never happen... */ dev_dbg(&tz->device, "hwmon device lookup failed!\n"); return; } temp = thermal_hwmon_lookup_temp(hwmon, tz); if (unlikely(!temp)) { /* Should never happen... */ dev_dbg(&tz->device, "temperature input lookup failed!\n"); return; } device_remove_file(hwmon->device, &temp->temp_input.attr); if (thermal_zone_crit_temp_valid(tz)) device_remove_file(hwmon->device, &temp->temp_crit.attr); mutex_lock(&thermal_hwmon_list_lock); list_del(&temp->hwmon_node); kfree(temp); if (!list_empty(&hwmon->tz_list)) { mutex_unlock(&thermal_hwmon_list_lock); return; } list_del(&hwmon->node); mutex_unlock(&thermal_hwmon_list_lock); hwmon_device_unregister(hwmon->device); kfree(hwmon); } EXPORT_SYMBOL_GPL(thermal_remove_hwmon_sysfs); static void devm_thermal_hwmon_release(struct device *dev, void *res) { thermal_remove_hwmon_sysfs(*(struct thermal_zone_device **)res); } int devm_thermal_add_hwmon_sysfs(struct device *dev, struct thermal_zone_device *tz) { struct thermal_zone_device **ptr; int ret; ptr = devres_alloc(devm_thermal_hwmon_release, sizeof(*ptr), GFP_KERNEL); if (!ptr) { dev_warn(dev, "Failed to allocate device resource data\n"); return -ENOMEM; } ret = thermal_add_hwmon_sysfs(tz); if (ret) { dev_warn(dev, "Failed to add hwmon sysfs attributes\n"); devres_free(ptr); return ret; } *ptr = tz; devres_add(dev, ptr); return ret; } EXPORT_SYMBOL_GPL(devm_thermal_add_hwmon_sysfs); MODULE_IMPORT_NS("HWMON_THERMAL");
2 1 1 6 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPVS: Round-Robin Scheduling module * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * * Fixes/Changes: * Wensong Zhang : changed the ip_vs_rr_schedule to return dest * Julian Anastasov : fixed the NULL pointer access bug in debugging * Wensong Zhang : changed some comestics things for debugging * Wensong Zhang : changed for the d-linked destination list * Wensong Zhang : added the ip_vs_rr_update_svc * Wensong Zhang : added any dest with weight=0 is quiesced */ #define pr_fmt(fmt) "IPVS: " fmt #include <linux/module.h> #include <linux/kernel.h> #include <net/ip_vs.h> static int ip_vs_rr_init_svc(struct ip_vs_service *svc) { svc->sched_data = &svc->destinations; return 0; } static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest) { struct list_head *p; spin_lock_bh(&svc->sched_lock); p = (struct list_head *) svc->sched_data; /* dest is already unlinked, so p->prev is not valid but * p->next is valid, use it to reach previous entry. */ if (p == &dest->n_list) svc->sched_data = p->next->prev; spin_unlock_bh(&svc->sched_lock); return 0; } /* * Round-Robin Scheduling */ static struct ip_vs_dest * ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct list_head *p; struct ip_vs_dest *dest, *last; int pass = 0; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); spin_lock_bh(&svc->sched_lock); p = (struct list_head *) svc->sched_data; last = dest = list_entry(p, struct ip_vs_dest, n_list); do { list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) > 0) /* HIT */ goto out; if (dest == last) goto stop; } pass++; /* Previous dest could be unlinked, do not loop forever. * If we stay at head there is no need for 2nd pass. */ } while (pass < 2 && p != &svc->destinations); stop: spin_unlock_bh(&svc->sched_lock); ip_vs_scheduler_err(svc, "no destination available"); return NULL; out: svc->sched_data = &dest->n_list; spin_unlock_bh(&svc->sched_lock); IP_VS_DBG_BUF(6, "RR: server %s:%u " "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), refcount_read(&dest->refcnt), atomic_read(&dest->weight)); return dest; } static struct ip_vs_scheduler ip_vs_rr_scheduler = { .name = "rr", /* name */ .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), .init_service = ip_vs_rr_init_svc, .add_dest = NULL, .del_dest = ip_vs_rr_del_dest, .schedule = ip_vs_rr_schedule, }; static int __init ip_vs_rr_init(void) { return register_ip_vs_scheduler(&ip_vs_rr_scheduler); } static void __exit ip_vs_rr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_rr_scheduler); synchronize_rcu(); } module_init(ip_vs_rr_init); module_exit(ip_vs_rr_cleanup); MODULE_DESCRIPTION("ipvs round-robin scheduler"); MODULE_LICENSE("GPL");
9 9 9 20 20 53 9 17 20 12 1 1 28 27 1 1 6 5 1 2 2 10 10 2 2 5 7 7 2 2 7 3 4 42 1 28 13 6 2 2 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * Copyright (c) 2014 Intel Corporation * Author: Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/kernel.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/random.h> #include <linux/smp.h> #include <linux/static_key.h> #include <net/dst.h> #include <net/ip.h> #include <net/sock.h> #include <net/tcp_states.h> /* for TCP_TIME_WAIT */ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nft_meta.h> #include <net/netfilter/nf_tables_offload.h> #include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */ #define NFT_META_SECS_PER_MINUTE 60 #define NFT_META_SECS_PER_HOUR 3600 #define NFT_META_SECS_PER_DAY 86400 #define NFT_META_DAYS_PER_WEEK 7 static u8 nft_meta_weekday(void) { time64_t secs = ktime_get_real_seconds(); unsigned int dse; u8 wday; secs -= NFT_META_SECS_PER_MINUTE * sys_tz.tz_minuteswest; dse = div_u64(secs, NFT_META_SECS_PER_DAY); wday = (4 + dse) % NFT_META_DAYS_PER_WEEK; return wday; } static u32 nft_meta_hour(time64_t secs) { struct tm tm; time64_to_tm(secs, 0, &tm); return tm.tm_hour * NFT_META_SECS_PER_HOUR + tm.tm_min * NFT_META_SECS_PER_MINUTE + tm.tm_sec; } static noinline_for_stack void nft_meta_get_eval_time(enum nft_meta_keys key, u32 *dest) { switch (key) { case NFT_META_TIME_NS: nft_reg_store64((u64 *)dest, ktime_get_real_ns()); break; case NFT_META_TIME_DAY: nft_reg_store8(dest, nft_meta_weekday()); break; case NFT_META_TIME_HOUR: *dest = nft_meta_hour(ktime_get_real_seconds()); break; default: break; } } static noinline bool nft_meta_get_eval_pkttype_lo(const struct nft_pktinfo *pkt, u32 *dest) { const struct sk_buff *skb = pkt->skb; switch (nft_pf(pkt)) { case NFPROTO_IPV4: if (ipv4_is_multicast(ip_hdr(skb)->daddr)) nft_reg_store8(dest, PACKET_MULTICAST); else nft_reg_store8(dest, PACKET_BROADCAST); break; case NFPROTO_IPV6: nft_reg_store8(dest, PACKET_MULTICAST); break; case NFPROTO_NETDEV: switch (skb->protocol) { case htons(ETH_P_IP): { int noff = skb_network_offset(skb); struct iphdr *iph, _iph; iph = skb_header_pointer(skb, noff, sizeof(_iph), &_iph); if (!iph) return false; if (ipv4_is_multicast(iph->daddr)) nft_reg_store8(dest, PACKET_MULTICAST); else nft_reg_store8(dest, PACKET_BROADCAST); break; } case htons(ETH_P_IPV6): nft_reg_store8(dest, PACKET_MULTICAST); break; default: WARN_ON_ONCE(1); return false; } break; default: WARN_ON_ONCE(1); return false; } return true; } static noinline bool nft_meta_get_eval_skugid(enum nft_meta_keys key, u32 *dest, const struct nft_pktinfo *pkt) { struct sock *sk = skb_to_full_sk(pkt->skb); struct socket *sock; if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk))) return false; read_lock_bh(&sk->sk_callback_lock); sock = sk->sk_socket; if (!sock || !sock->file) { read_unlock_bh(&sk->sk_callback_lock); return false; } switch (key) { case NFT_META_SKUID: *dest = from_kuid_munged(sock_net(sk)->user_ns, sock->file->f_cred->fsuid); break; case NFT_META_SKGID: *dest = from_kgid_munged(sock_net(sk)->user_ns, sock->file->f_cred->fsgid); break; default: break; } read_unlock_bh(&sk->sk_callback_lock); return true; } #ifdef CONFIG_CGROUP_NET_CLASSID static noinline bool nft_meta_get_eval_cgroup(u32 *dest, const struct nft_pktinfo *pkt) { struct sock *sk = skb_to_full_sk(pkt->skb); if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk))) return false; *dest = sock_cgroup_classid(&sk->sk_cgrp_data); return true; } #endif static noinline bool nft_meta_get_eval_kind(enum nft_meta_keys key, u32 *dest, const struct nft_pktinfo *pkt) { const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); switch (key) { case NFT_META_IIFKIND: if (!in || !in->rtnl_link_ops) return false; strscpy_pad((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ); break; case NFT_META_OIFKIND: if (!out || !out->rtnl_link_ops) return false; strscpy_pad((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ); break; default: return false; } return true; } static void nft_meta_store_ifindex(u32 *dest, const struct net_device *dev) { *dest = dev ? dev->ifindex : 0; } static void nft_meta_store_ifname(u32 *dest, const struct net_device *dev) { strscpy_pad((char *)dest, dev ? dev->name : "", IFNAMSIZ); } static bool nft_meta_store_iftype(u32 *dest, const struct net_device *dev) { if (!dev) return false; nft_reg_store16(dest, dev->type); return true; } static bool nft_meta_store_ifgroup(u32 *dest, const struct net_device *dev) { if (!dev) return false; *dest = dev->group; return true; } static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest, const struct nft_pktinfo *pkt) { switch (key) { case NFT_META_IIFNAME: nft_meta_store_ifname(dest, nft_in(pkt)); break; case NFT_META_OIFNAME: nft_meta_store_ifname(dest, nft_out(pkt)); break; case NFT_META_IIF: nft_meta_store_ifindex(dest, nft_in(pkt)); break; case NFT_META_OIF: nft_meta_store_ifindex(dest, nft_out(pkt)); break; case NFT_META_IFTYPE: if (!nft_meta_store_iftype(dest, pkt->skb->dev)) return false; break; case __NFT_META_IIFTYPE: if (!nft_meta_store_iftype(dest, nft_in(pkt))) return false; break; case NFT_META_OIFTYPE: if (!nft_meta_store_iftype(dest, nft_out(pkt))) return false; break; case NFT_META_IIFGROUP: if (!nft_meta_store_ifgroup(dest, nft_in(pkt))) return false; break; case NFT_META_OIFGROUP: if (!nft_meta_store_ifgroup(dest, nft_out(pkt))) return false; break; default: return false; } return true; } #ifdef CONFIG_IP_ROUTE_CLASSID static noinline bool nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest) { const struct dst_entry *dst = skb_dst(skb); if (!dst) return false; *dest = dst->tclassid; return true; } #endif static noinline u32 nft_meta_get_eval_sdif(const struct nft_pktinfo *pkt) { switch (nft_pf(pkt)) { case NFPROTO_IPV4: return inet_sdif(pkt->skb); case NFPROTO_IPV6: return inet6_sdif(pkt->skb); } return 0; } static noinline void nft_meta_get_eval_sdifname(u32 *dest, const struct nft_pktinfo *pkt) { u32 sdif = nft_meta_get_eval_sdif(pkt); const struct net_device *dev; dev = sdif ? dev_get_by_index_rcu(nft_net(pkt), sdif) : NULL; nft_meta_store_ifname(dest, dev); } void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; u32 *dest = &regs->data[priv->dreg]; switch (priv->key) { case NFT_META_LEN: *dest = skb->len; break; case NFT_META_PROTOCOL: nft_reg_store16(dest, (__force u16)skb->protocol); break; case NFT_META_NFPROTO: nft_reg_store8(dest, nft_pf(pkt)); break; case NFT_META_L4PROTO: if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) goto err; nft_reg_store8(dest, pkt->tprot); break; case NFT_META_PRIORITY: *dest = skb->priority; break; case NFT_META_MARK: *dest = skb->mark; break; case NFT_META_IIF: case NFT_META_OIF: case NFT_META_IIFNAME: case NFT_META_OIFNAME: case NFT_META_IIFTYPE: case NFT_META_OIFTYPE: case NFT_META_IIFGROUP: case NFT_META_OIFGROUP: if (!nft_meta_get_eval_ifname(priv->key, dest, pkt)) goto err; break; case NFT_META_SKUID: case NFT_META_SKGID: if (!nft_meta_get_eval_skugid(priv->key, dest, pkt)) goto err; break; #ifdef CONFIG_IP_ROUTE_CLASSID case NFT_META_RTCLASSID: if (!nft_meta_get_eval_rtclassid(skb, dest)) goto err; break; #endif #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: *dest = skb->secmark; break; #endif case NFT_META_PKTTYPE: if (skb->pkt_type != PACKET_LOOPBACK) { nft_reg_store8(dest, skb->pkt_type); break; } if (!nft_meta_get_eval_pkttype_lo(pkt, dest)) goto err; break; case NFT_META_CPU: *dest = raw_smp_processor_id(); break; #ifdef CONFIG_CGROUP_NET_CLASSID case NFT_META_CGROUP: if (!nft_meta_get_eval_cgroup(dest, pkt)) goto err; break; #endif case NFT_META_PRANDOM: *dest = get_random_u32(); break; #ifdef CONFIG_XFRM case NFT_META_SECPATH: nft_reg_store8(dest, secpath_exists(skb)); break; #endif case NFT_META_IIFKIND: case NFT_META_OIFKIND: if (!nft_meta_get_eval_kind(priv->key, dest, pkt)) goto err; break; case NFT_META_TIME_NS: case NFT_META_TIME_DAY: case NFT_META_TIME_HOUR: nft_meta_get_eval_time(priv->key, dest); break; case NFT_META_SDIF: *dest = nft_meta_get_eval_sdif(pkt); break; case NFT_META_SDIFNAME: nft_meta_get_eval_sdifname(dest, pkt); break; default: WARN_ON(1); goto err; } return; err: regs->verdict.code = NFT_BREAK; } EXPORT_SYMBOL_GPL(nft_meta_get_eval); void nft_meta_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_meta *meta = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; u32 *sreg = &regs->data[meta->sreg]; u32 value = *sreg; u8 value8; switch (meta->key) { case NFT_META_MARK: skb->mark = value; break; case NFT_META_PRIORITY: skb->priority = value; break; case NFT_META_PKTTYPE: value8 = nft_reg_load8(sreg); if (skb->pkt_type != value8 && skb_pkt_type_ok(value8) && skb_pkt_type_ok(skb->pkt_type)) skb->pkt_type = value8; break; case NFT_META_NFTRACE: value8 = nft_reg_load8(sreg); skb->nf_trace = !!value8; break; #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: skb->secmark = value; break; #endif default: WARN_ON(1); } } EXPORT_SYMBOL_GPL(nft_meta_set_eval); const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { [NFTA_META_DREG] = { .type = NLA_U32 }, [NFTA_META_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_META_SREG] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(nft_meta_policy); int nft_meta_get_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); unsigned int len; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_PROTOCOL: case NFT_META_IIFTYPE: case NFT_META_OIFTYPE: len = sizeof(u16); break; case NFT_META_NFPROTO: case NFT_META_L4PROTO: case NFT_META_LEN: case NFT_META_PRIORITY: case NFT_META_MARK: case NFT_META_IIF: case NFT_META_OIF: case NFT_META_SDIF: case NFT_META_SKUID: case NFT_META_SKGID: #ifdef CONFIG_IP_ROUTE_CLASSID case NFT_META_RTCLASSID: #endif #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: #endif case NFT_META_PKTTYPE: case NFT_META_CPU: case NFT_META_IIFGROUP: case NFT_META_OIFGROUP: #ifdef CONFIG_CGROUP_NET_CLASSID case NFT_META_CGROUP: #endif len = sizeof(u32); break; case NFT_META_IIFNAME: case NFT_META_OIFNAME: case NFT_META_IIFKIND: case NFT_META_OIFKIND: case NFT_META_SDIFNAME: len = IFNAMSIZ; break; case NFT_META_PRANDOM: len = sizeof(u32); break; #ifdef CONFIG_XFRM case NFT_META_SECPATH: len = sizeof(u8); break; #endif case NFT_META_TIME_NS: len = sizeof(u64); break; case NFT_META_TIME_DAY: len = sizeof(u8); break; case NFT_META_TIME_HOUR: len = sizeof(u32); break; default: return -EOPNOTSUPP; } priv->len = len; return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); } EXPORT_SYMBOL_GPL(nft_meta_get_init); static int nft_meta_get_validate_sdif(const struct nft_ctx *ctx) { unsigned int hooks; switch (ctx->family) { case NFPROTO_IPV4: case NFPROTO_IPV6: case NFPROTO_INET: hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD); break; default: return -EOPNOTSUPP; } return nft_chain_validate_hooks(ctx->chain, hooks); } static int nft_meta_get_validate_xfrm(const struct nft_ctx *ctx) { #ifdef CONFIG_XFRM unsigned int hooks; switch (ctx->family) { case NFPROTO_NETDEV: hooks = 1 << NF_NETDEV_INGRESS; break; case NFPROTO_IPV4: case NFPROTO_IPV6: case NFPROTO_INET: hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD); break; default: return -EOPNOTSUPP; } return nft_chain_validate_hooks(ctx->chain, hooks); #else return 0; #endif } static int nft_meta_get_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); switch (priv->key) { case NFT_META_SECPATH: return nft_meta_get_validate_xfrm(ctx); case NFT_META_SDIF: case NFT_META_SDIFNAME: return nft_meta_get_validate_sdif(ctx); default: break; } return 0; } int nft_meta_set_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_meta *priv = nft_expr_priv(expr); unsigned int hooks; if (priv->key != NFT_META_PKTTYPE) return 0; switch (ctx->family) { case NFPROTO_BRIDGE: hooks = 1 << NF_BR_PRE_ROUTING; break; case NFPROTO_NETDEV: hooks = 1 << NF_NETDEV_INGRESS; break; case NFPROTO_IPV4: case NFPROTO_IPV6: case NFPROTO_INET: hooks = 1 << NF_INET_PRE_ROUTING; break; default: return -EOPNOTSUPP; } return nft_chain_validate_hooks(ctx->chain, hooks); } EXPORT_SYMBOL_GPL(nft_meta_set_validate); int nft_meta_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); unsigned int len; int err; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_MARK: case NFT_META_PRIORITY: #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: #endif len = sizeof(u32); break; case NFT_META_NFTRACE: len = sizeof(u8); break; case NFT_META_PKTTYPE: len = sizeof(u8); break; default: return -EOPNOTSUPP; } priv->len = len; err = nft_parse_register_load(ctx, tb[NFTA_META_SREG], &priv->sreg, len); if (err < 0) return err; if (priv->key == NFT_META_NFTRACE) static_branch_inc(&nft_trace_enabled); return 0; } EXPORT_SYMBOL_GPL(nft_meta_set_init); int nft_meta_get_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_meta *priv = nft_expr_priv(expr); if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) goto nla_put_failure; if (nft_dump_register(skb, NFTA_META_DREG, priv->dreg)) goto nla_put_failure; return 0; nla_put_failure: return -1; } EXPORT_SYMBOL_GPL(nft_meta_get_dump); int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_meta *priv = nft_expr_priv(expr); if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) goto nla_put_failure; if (nft_dump_register(skb, NFTA_META_SREG, priv->sreg)) goto nla_put_failure; return 0; nla_put_failure: return -1; } EXPORT_SYMBOL_GPL(nft_meta_set_dump); void nft_meta_set_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); if (priv->key == NFT_META_NFTRACE) static_branch_dec(&nft_trace_enabled); } EXPORT_SYMBOL_GPL(nft_meta_set_destroy); static int nft_meta_get_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); struct nft_offload_reg *reg = &ctx->regs[priv->dreg]; switch (priv->key) { case NFT_META_PROTOCOL: NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto, sizeof(__u16), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK); break; case NFT_META_L4PROTO: NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto, sizeof(__u8), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT); break; case NFT_META_IIF: NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta, ingress_ifindex, sizeof(__u32), reg); break; case NFT_META_IIFTYPE: NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta, ingress_iftype, sizeof(__u16), reg); break; default: return -EOPNOTSUPP; } return 0; } bool nft_meta_get_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); const struct nft_meta *meta; if (!nft_reg_track_cmp(track, expr, priv->dreg)) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } meta = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->key != meta->key || priv->dreg != meta->dreg) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } if (!track->regs[priv->dreg].bitwise) return true; return nft_expr_reduce_bitwise(track, expr); } EXPORT_SYMBOL_GPL(nft_meta_get_reduce); static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_get_eval, .init = nft_meta_get_init, .dump = nft_meta_get_dump, .reduce = nft_meta_get_reduce, .validate = nft_meta_get_validate, .offload = nft_meta_get_offload, }; static bool nft_meta_set_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { int i; for (i = 0; i < NFT_REG32_NUM; i++) { if (!track->regs[i].selector) continue; if (track->regs[i].selector->ops != &nft_meta_get_ops) continue; __nft_reg_track_cancel(track, i); } return false; } static const struct nft_expr_ops nft_meta_set_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, .init = nft_meta_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, .reduce = nft_meta_set_reduce, .validate = nft_meta_set_validate, }; static const struct nft_expr_ops * nft_meta_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { if (tb[NFTA_META_KEY] == NULL) return ERR_PTR(-EINVAL); if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) return ERR_PTR(-EINVAL); #if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) && IS_MODULE(CONFIG_NFT_BRIDGE_META) if (ctx->family == NFPROTO_BRIDGE) return ERR_PTR(-EAGAIN); #endif if (tb[NFTA_META_DREG]) return &nft_meta_get_ops; if (tb[NFTA_META_SREG]) return &nft_meta_set_ops; return ERR_PTR(-EINVAL); } static int nft_meta_inner_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); unsigned int len; if (!tb[NFTA_META_KEY] || !tb[NFTA_META_DREG]) return -EINVAL; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_PROTOCOL: len = sizeof(u16); break; case NFT_META_L4PROTO: len = sizeof(u32); break; default: return -EOPNOTSUPP; } priv->len = len; return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); } void nft_meta_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { const struct nft_meta *priv = nft_expr_priv(expr); u32 *dest = &regs->data[priv->dreg]; switch (priv->key) { case NFT_META_PROTOCOL: nft_reg_store16(dest, (__force u16)tun_ctx->llproto); break; case NFT_META_L4PROTO: if (!(tun_ctx->flags & NFT_PAYLOAD_CTX_INNER_TH)) goto err; nft_reg_store8(dest, tun_ctx->l4proto); break; default: WARN_ON_ONCE(1); goto err; } return; err: regs->verdict.code = NFT_BREAK; } EXPORT_SYMBOL_GPL(nft_meta_inner_eval); static const struct nft_expr_ops nft_meta_inner_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .init = nft_meta_inner_init, .dump = nft_meta_get_dump, /* direct call to nft_meta_inner_eval(). */ }; struct nft_expr_type nft_meta_type __read_mostly = { .name = "meta", .select_ops = nft_meta_select_ops, .inner_ops = &nft_meta_inner_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, .owner = THIS_MODULE, }; #ifdef CONFIG_NETWORK_SECMARK struct nft_secmark { u32 secid; char *ctx; }; static const struct nla_policy nft_secmark_policy[NFTA_SECMARK_MAX + 1] = { [NFTA_SECMARK_CTX] = { .type = NLA_STRING, .len = NFT_SECMARK_CTX_MAXLEN }, }; static int nft_secmark_compute_secid(struct nft_secmark *priv) { u32 tmp_secid = 0; int err; err = security_secctx_to_secid(priv->ctx, strlen(priv->ctx), &tmp_secid); if (err) return err; if (!tmp_secid) return -ENOENT; err = security_secmark_relabel_packet(tmp_secid); if (err) return err; priv->secid = tmp_secid; return 0; } static void nft_secmark_obj_eval(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_secmark *priv = nft_obj_data(obj); struct sk_buff *skb = pkt->skb; skb->secmark = priv->secid; } static int nft_secmark_obj_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) { struct nft_secmark *priv = nft_obj_data(obj); int err; if (tb[NFTA_SECMARK_CTX] == NULL) return -EINVAL; priv->ctx = nla_strdup(tb[NFTA_SECMARK_CTX], GFP_KERNEL_ACCOUNT); if (!priv->ctx) return -ENOMEM; err = nft_secmark_compute_secid(priv); if (err) { kfree(priv->ctx); return err; } security_secmark_refcount_inc(); return 0; } static int nft_secmark_obj_dump(struct sk_buff *skb, struct nft_object *obj, bool reset) { struct nft_secmark *priv = nft_obj_data(obj); int err; if (nla_put_string(skb, NFTA_SECMARK_CTX, priv->ctx)) return -1; if (reset) { err = nft_secmark_compute_secid(priv); if (err) return err; } return 0; } static void nft_secmark_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) { struct nft_secmark *priv = nft_obj_data(obj); security_secmark_refcount_dec(); kfree(priv->ctx); } static const struct nft_object_ops nft_secmark_obj_ops = { .type = &nft_secmark_obj_type, .size = sizeof(struct nft_secmark), .init = nft_secmark_obj_init, .eval = nft_secmark_obj_eval, .dump = nft_secmark_obj_dump, .destroy = nft_secmark_obj_destroy, }; struct nft_object_type nft_secmark_obj_type __read_mostly = { .type = NFT_OBJECT_SECMARK, .ops = &nft_secmark_obj_ops, .maxattr = NFTA_SECMARK_MAX, .policy = nft_secmark_policy, .owner = THIS_MODULE, }; #endif /* CONFIG_NETWORK_SECMARK */
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 // SPDX-License-Identifier: GPL-2.0 /* -*- linux-c -*- * Cypress USB Thermometer driver * * Copyright (c) 2004 Erik Rigtorp <erkki@linux.nu> <erik@rigtorp.com> * * This driver works with Elektor magazine USB Interface as published in * issue #291. It should also work with the original starter kit/demo board * from Cypress. */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/usb.h> #define DRIVER_AUTHOR "Erik Rigtorp" #define DRIVER_DESC "Cypress USB Thermometer driver" #define USB_SKEL_VENDOR_ID 0x04b4 #define USB_SKEL_PRODUCT_ID 0x0002 static const struct usb_device_id id_table[] = { { USB_DEVICE(USB_SKEL_VENDOR_ID, USB_SKEL_PRODUCT_ID) }, { } }; MODULE_DEVICE_TABLE (usb, id_table); /* Structure to hold all of our device specific stuff */ struct usb_cytherm { struct usb_device *udev; /* save off the usb device pointer */ struct usb_interface *interface; /* the interface for this device */ int brightness; }; /* Vendor requests */ /* They all operate on one byte at a time */ #define PING 0x00 #define READ_ROM 0x01 /* Reads form ROM, value = address */ #define READ_RAM 0x02 /* Reads form RAM, value = address */ #define WRITE_RAM 0x03 /* Write to RAM, value = address, index = data */ #define READ_PORT 0x04 /* Reads from port, value = address */ #define WRITE_PORT 0x05 /* Write to port, value = address, index = data */ /* Send a vendor command to device */ static int vendor_command(struct usb_device *dev, unsigned char request, unsigned char value, unsigned char index, void *buf, int size) { return usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), request, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_OTHER, value, index, buf, size, USB_CTRL_GET_TIMEOUT); } #define BRIGHTNESS 0x2c /* RAM location for brightness value */ #define BRIGHTNESS_SEM 0x2b /* RAM location for brightness semaphore */ static ssize_t brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); struct usb_cytherm *cytherm = usb_get_intfdata(intf); return sprintf(buf, "%i", cytherm->brightness); } static ssize_t brightness_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_interface *intf = to_usb_interface(dev); struct usb_cytherm *cytherm = usb_get_intfdata(intf); unsigned char *buffer; int retval; buffer = kmalloc(8, GFP_KERNEL); if (!buffer) return 0; cytherm->brightness = simple_strtoul(buf, NULL, 10); if (cytherm->brightness > 0xFF) cytherm->brightness = 0xFF; else if (cytherm->brightness < 0) cytherm->brightness = 0; /* Set brightness */ retval = vendor_command(cytherm->udev, WRITE_RAM, BRIGHTNESS, cytherm->brightness, buffer, 8); if (retval) dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval); /* Inform µC that we have changed the brightness setting */ retval = vendor_command(cytherm->udev, WRITE_RAM, BRIGHTNESS_SEM, 0x01, buffer, 8); if (retval) dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval); kfree(buffer); return count; } static DEVICE_ATTR_RW(brightness); #define TEMP 0x33 /* RAM location for temperature */ #define SIGN 0x34 /* RAM location for temperature sign */ static ssize_t temp_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); struct usb_cytherm *cytherm = usb_get_intfdata(intf); int retval; unsigned char *buffer; int temp, sign; buffer = kmalloc(8, GFP_KERNEL); if (!buffer) return 0; /* read temperature */ retval = vendor_command(cytherm->udev, READ_RAM, TEMP, 0, buffer, 8); if (retval) dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval); temp = buffer[1]; /* read sign */ retval = vendor_command(cytherm->udev, READ_RAM, SIGN, 0, buffer, 8); if (retval) dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval); sign = buffer[1]; kfree(buffer); return sprintf(buf, "%c%i.%i", sign ? '-' : '+', temp >> 1, 5*(temp - ((temp >> 1) << 1))); } static DEVICE_ATTR_RO(temp); #define BUTTON 0x7a static ssize_t button_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); struct usb_cytherm *cytherm = usb_get_intfdata(intf); int retval; unsigned char *buffer; buffer = kmalloc(8, GFP_KERNEL); if (!buffer) return 0; /* check button */ retval = vendor_command(cytherm->udev, READ_RAM, BUTTON, 0, buffer, 8); if (retval) dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval); retval = buffer[1]; kfree(buffer); if (retval) return sprintf(buf, "1"); else return sprintf(buf, "0"); } static DEVICE_ATTR_RO(button); static ssize_t port0_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); struct usb_cytherm *cytherm = usb_get_intfdata(intf); int retval; unsigned char *buffer; buffer = kmalloc(8, GFP_KERNEL); if (!buffer) return 0; retval = vendor_command(cytherm->udev, READ_PORT, 0, 0, buffer, 8); if (retval) dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval); retval = buffer[1]; kfree(buffer); return sprintf(buf, "%d", retval); } static ssize_t port0_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_interface *intf = to_usb_interface(dev); struct usb_cytherm *cytherm = usb_get_intfdata(intf); unsigned char *buffer; int retval; int tmp; buffer = kmalloc(8, GFP_KERNEL); if (!buffer) return 0; tmp = simple_strtoul(buf, NULL, 10); if (tmp > 0xFF) tmp = 0xFF; else if (tmp < 0) tmp = 0; retval = vendor_command(cytherm->udev, WRITE_PORT, 0, tmp, buffer, 8); if (retval) dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval); kfree(buffer); return count; } static DEVICE_ATTR_RW(port0); static ssize_t port1_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); struct usb_cytherm *cytherm = usb_get_intfdata(intf); int retval; unsigned char *buffer; buffer = kmalloc(8, GFP_KERNEL); if (!buffer) return 0; retval = vendor_command(cytherm->udev, READ_PORT, 1, 0, buffer, 8); if (retval) dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval); retval = buffer[1]; kfree(buffer); return sprintf(buf, "%d", retval); } static ssize_t port1_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_interface *intf = to_usb_interface(dev); struct usb_cytherm *cytherm = usb_get_intfdata(intf); unsigned char *buffer; int retval; int tmp; buffer = kmalloc(8, GFP_KERNEL); if (!buffer) return 0; tmp = simple_strtoul(buf, NULL, 10); if (tmp > 0xFF) tmp = 0xFF; else if (tmp < 0) tmp = 0; retval = vendor_command(cytherm->udev, WRITE_PORT, 1, tmp, buffer, 8); if (retval) dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval); kfree(buffer); return count; } static DEVICE_ATTR_RW(port1); static struct attribute *cytherm_attrs[] = { &dev_attr_brightness.attr, &dev_attr_temp.attr, &dev_attr_button.attr, &dev_attr_port0.attr, &dev_attr_port1.attr, NULL, }; ATTRIBUTE_GROUPS(cytherm); static int cytherm_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(interface); struct usb_cytherm *dev; int retval = -ENOMEM; dev = kzalloc_obj(struct usb_cytherm); if (!dev) goto error_mem; dev->udev = usb_get_dev(udev); usb_set_intfdata(interface, dev); dev->brightness = 0xFF; dev_info(&interface->dev, "Cypress thermometer device now attached\n"); return 0; error_mem: return retval; } static void cytherm_disconnect(struct usb_interface *interface) { struct usb_cytherm *dev; dev = usb_get_intfdata(interface); /* first remove the files, then NULL the pointer */ usb_set_intfdata(interface, NULL); usb_put_dev(dev->udev); kfree(dev); dev_info(&interface->dev, "Cypress thermometer now disconnected\n"); } /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver cytherm_driver = { .name = "cytherm", .probe = cytherm_probe, .disconnect = cytherm_disconnect, .id_table = id_table, .dev_groups = cytherm_groups, }; module_usb_driver(cytherm_driver); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL");
4 6 1 6 6 6 1 1 1 1 1 1 1 5 3 3 1 4 4 4 2 2 1 1 1 1 1 1 1 4 1 3 4 4 4 3 3 3 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" #include "bitset.h" struct fec_req_info { struct ethnl_req_info base; }; struct fec_reply_data { struct ethnl_reply_data base; __ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes); u32 active_fec; u8 fec_auto; struct fec_stat_grp { u64 stats[1 + ETHTOOL_MAX_LANES]; u8 cnt; } corr, uncorr, corr_bits; struct ethtool_fec_hist fec_stat_hist; }; #define FEC_REPDATA(__reply_base) \ container_of(__reply_base, struct fec_reply_data, base) #define ETHTOOL_FEC_MASK ((ETHTOOL_FEC_LLRS << 1) - 1) const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1] = { [ETHTOOL_A_FEC_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats), }; static void ethtool_fec_to_link_modes(u32 fec, unsigned long *link_modes, u8 *fec_auto) { if (fec_auto) *fec_auto = !!(fec & ETHTOOL_FEC_AUTO); if (fec & ETHTOOL_FEC_OFF) __set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, link_modes); if (fec & ETHTOOL_FEC_RS) __set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, link_modes); if (fec & ETHTOOL_FEC_BASER) __set_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, link_modes); if (fec & ETHTOOL_FEC_LLRS) __set_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT, link_modes); } static int ethtool_link_modes_to_fecparam(struct ethtool_fecparam *fec, unsigned long *link_modes, u8 fec_auto) { memset(fec, 0, sizeof(*fec)); if (fec_auto) fec->fec |= ETHTOOL_FEC_AUTO; if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, link_modes)) fec->fec |= ETHTOOL_FEC_OFF; if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, link_modes)) fec->fec |= ETHTOOL_FEC_RS; if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, link_modes)) fec->fec |= ETHTOOL_FEC_BASER; if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT, link_modes)) fec->fec |= ETHTOOL_FEC_LLRS; if (!bitmap_empty(link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) return -EINVAL; return 0; } static void fec_stats_recalc(struct fec_stat_grp *grp, struct ethtool_fec_stat *stats) { int i; if (stats->lanes[0] == ETHTOOL_STAT_NOT_SET) { grp->stats[0] = stats->total; grp->cnt = stats->total != ETHTOOL_STAT_NOT_SET; return; } grp->cnt = 1; grp->stats[0] = 0; for (i = 0; i < ETHTOOL_MAX_LANES; i++) { if (stats->lanes[i] == ETHTOOL_STAT_NOT_SET) break; grp->stats[0] += stats->lanes[i]; grp->stats[grp->cnt++] = stats->lanes[i]; } } static int fec_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { __ETHTOOL_DECLARE_LINK_MODE_MASK(active_fec_modes) = {}; struct fec_reply_data *data = FEC_REPDATA(reply_base); struct net_device *dev = reply_base->dev; struct ethtool_fecparam fec = {}; int ret; if (!dev->ethtool_ops->get_fecparam) return -EOPNOTSUPP; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; ret = dev->ethtool_ops->get_fecparam(dev, &fec); if (ret) goto out_complete; if (req_base->flags & ETHTOOL_FLAG_STATS && dev->ethtool_ops->get_fec_stats) { struct ethtool_fec_stats stats; ethtool_stats_init((u64 *)&stats, sizeof(stats) / 8); ethtool_stats_init((u64 *)data->fec_stat_hist.values, sizeof(data->fec_stat_hist.values) / 8); dev->ethtool_ops->get_fec_stats(dev, &stats, &data->fec_stat_hist); fec_stats_recalc(&data->corr, &stats.corrected_blocks); fec_stats_recalc(&data->uncorr, &stats.uncorrectable_blocks); fec_stats_recalc(&data->corr_bits, &stats.corrected_bits); } WARN_ON_ONCE(fec.reserved); ethtool_fec_to_link_modes(fec.fec, data->fec_link_modes, &data->fec_auto); ethtool_fec_to_link_modes(fec.active_fec, active_fec_modes, NULL); data->active_fec = find_first_bit(active_fec_modes, __ETHTOOL_LINK_MODE_MASK_NBITS); /* Don't report attr if no FEC mode set. Note that * ethtool_fecparam_to_link_modes() ignores NONE and AUTO. */ if (data->active_fec == __ETHTOOL_LINK_MODE_MASK_NBITS) data->active_fec = 0; out_complete: ethnl_ops_complete(dev); return ret; } static int fec_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct fec_reply_data *data = FEC_REPDATA(reply_base); int len = 0; int ret; ret = ethnl_bitset_size(data->fec_link_modes, NULL, __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names, compact); if (ret < 0) return ret; len += ret; len += nla_total_size(sizeof(u8)) + /* _FEC_AUTO */ nla_total_size(sizeof(u32)); /* _FEC_ACTIVE */ if (req_base->flags & ETHTOOL_FLAG_STATS) { len += 3 * nla_total_size_64bit(sizeof(u64) * (1 + ETHTOOL_MAX_LANES)); /* add FEC bins information */ len += (nla_total_size(0) + /* _A_FEC_HIST */ nla_total_size(4) + /* _A_FEC_HIST_BIN_LOW */ nla_total_size(4) + /* _A_FEC_HIST_BIN_HI */ /* _A_FEC_HIST_BIN_VAL + per-lane values */ nla_total_size_64bit(sizeof(u64)) + nla_total_size_64bit(sizeof(u64) * ETHTOOL_MAX_LANES)) * ETHTOOL_FEC_HIST_MAX; } return len; } static int fec_put_hist(struct sk_buff *skb, const struct ethtool_fec_hist *hist) { const struct ethtool_fec_hist_range *ranges = hist->ranges; const struct ethtool_fec_hist_value *values = hist->values; struct nlattr *nest; int i, j; u64 sum; if (!ranges) return 0; for (i = 0; i < ETHTOOL_FEC_HIST_MAX; i++) { if (i && !ranges[i].low && !ranges[i].high) break; if (WARN_ON_ONCE(values[i].sum == ETHTOOL_STAT_NOT_SET && values[i].per_lane[0] == ETHTOOL_STAT_NOT_SET)) break; nest = nla_nest_start(skb, ETHTOOL_A_FEC_STAT_HIST); if (!nest) return -EMSGSIZE; if (nla_put_u32(skb, ETHTOOL_A_FEC_HIST_BIN_LOW, ranges[i].low) || nla_put_u32(skb, ETHTOOL_A_FEC_HIST_BIN_HIGH, ranges[i].high)) goto err_cancel_hist; sum = 0; for (j = 0; j < ETHTOOL_MAX_LANES; j++) { if (values[i].per_lane[j] == ETHTOOL_STAT_NOT_SET) break; sum += values[i].per_lane[j]; } if (nla_put_uint(skb, ETHTOOL_A_FEC_HIST_BIN_VAL, values[i].sum == ETHTOOL_STAT_NOT_SET ? sum : values[i].sum)) goto err_cancel_hist; if (j && nla_put_64bit(skb, ETHTOOL_A_FEC_HIST_BIN_VAL_PER_LANE, sizeof(u64) * j, values[i].per_lane, ETHTOOL_A_FEC_HIST_PAD)) goto err_cancel_hist; nla_nest_end(skb, nest); } return 0; err_cancel_hist: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int fec_put_stats(struct sk_buff *skb, const struct fec_reply_data *data) { struct nlattr *nest; nest = nla_nest_start(skb, ETHTOOL_A_FEC_STATS); if (!nest) return -EMSGSIZE; if (nla_put_64bit(skb, ETHTOOL_A_FEC_STAT_CORRECTED, sizeof(u64) * data->corr.cnt, data->corr.stats, ETHTOOL_A_FEC_STAT_PAD) || nla_put_64bit(skb, ETHTOOL_A_FEC_STAT_UNCORR, sizeof(u64) * data->uncorr.cnt, data->uncorr.stats, ETHTOOL_A_FEC_STAT_PAD) || nla_put_64bit(skb, ETHTOOL_A_FEC_STAT_CORR_BITS, sizeof(u64) * data->corr_bits.cnt, data->corr_bits.stats, ETHTOOL_A_FEC_STAT_PAD)) goto err_cancel; if (fec_put_hist(skb, &data->fec_stat_hist)) goto err_cancel; nla_nest_end(skb, nest); return 0; err_cancel: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int fec_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct fec_reply_data *data = FEC_REPDATA(reply_base); int ret; ret = ethnl_put_bitset(skb, ETHTOOL_A_FEC_MODES, data->fec_link_modes, NULL, __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names, compact); if (ret < 0) return ret; if (nla_put_u8(skb, ETHTOOL_A_FEC_AUTO, data->fec_auto) || (data->active_fec && nla_put_u32(skb, ETHTOOL_A_FEC_ACTIVE, data->active_fec))) return -EMSGSIZE; if (req_base->flags & ETHTOOL_FLAG_STATS && fec_put_stats(skb, data)) return -EMSGSIZE; return 0; } /* FEC_SET */ const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1] = { [ETHTOOL_A_FEC_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_FEC_MODES] = { .type = NLA_NESTED }, [ETHTOOL_A_FEC_AUTO] = NLA_POLICY_MAX(NLA_U8, 1), }; static int ethnl_set_fec_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; return ops->get_fecparam && ops->set_fecparam ? 1 : -EOPNOTSUPP; } static int ethnl_set_fec(struct ethnl_req_info *req_info, struct genl_info *info) { __ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes) = {}; struct net_device *dev = req_info->dev; struct nlattr **tb = info->attrs; struct ethtool_fecparam fec = {}; bool mod = false; u8 fec_auto; int ret; ret = dev->ethtool_ops->get_fecparam(dev, &fec); if (ret < 0) return ret; ethtool_fec_to_link_modes(fec.fec, fec_link_modes, &fec_auto); ret = ethnl_update_bitset(fec_link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS, tb[ETHTOOL_A_FEC_MODES], link_mode_names, info->extack, &mod); if (ret < 0) return ret; ethnl_update_u8(&fec_auto, tb[ETHTOOL_A_FEC_AUTO], &mod); if (!mod) return 0; ret = ethtool_link_modes_to_fecparam(&fec, fec_link_modes, fec_auto); if (ret) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES], "invalid FEC modes requested"); return ret; } if (!fec.fec) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES], "no FEC modes set"); return -EINVAL; } ret = dev->ethtool_ops->set_fecparam(dev, &fec); return ret < 0 ? ret : 1; } const struct ethnl_request_ops ethnl_fec_request_ops = { .request_cmd = ETHTOOL_MSG_FEC_GET, .reply_cmd = ETHTOOL_MSG_FEC_GET_REPLY, .hdr_attr = ETHTOOL_A_FEC_HEADER, .req_info_size = sizeof(struct fec_req_info), .reply_data_size = sizeof(struct fec_reply_data), .prepare_data = fec_prepare_data, .reply_size = fec_reply_size, .fill_reply = fec_fill_reply, .set_validate = ethnl_set_fec_validate, .set = ethnl_set_fec, .set_ntf_cmd = ETHTOOL_MSG_FEC_NTF, };
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 // SPDX-License-Identifier: GPL-2.0+ /* Copyright (c) 2022 Amarula Solutions, Dario Binacchi <dario.binacchi@amarulasolutions.com> * */ #include <linux/can/dev.h> #include <linux/ethtool.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/platform_device.h> #include "slcan.h" static const char slcan_priv_flags_strings[][ETH_GSTRING_LEN] = { #define SLCAN_PRIV_FLAGS_ERR_RST_ON_OPEN BIT(0) "err-rst-on-open", }; static void slcan_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { switch (stringset) { case ETH_SS_PRIV_FLAGS: memcpy(data, slcan_priv_flags_strings, sizeof(slcan_priv_flags_strings)); } } static u32 slcan_get_priv_flags(struct net_device *ndev) { u32 flags = 0; if (slcan_err_rst_on_open(ndev)) flags |= SLCAN_PRIV_FLAGS_ERR_RST_ON_OPEN; return flags; } static int slcan_set_priv_flags(struct net_device *ndev, u32 flags) { bool err_rst_op_open = !!(flags & SLCAN_PRIV_FLAGS_ERR_RST_ON_OPEN); return slcan_enable_err_rst_on_open(ndev, err_rst_op_open); } static int slcan_get_sset_count(struct net_device *netdev, int sset) { switch (sset) { case ETH_SS_PRIV_FLAGS: return ARRAY_SIZE(slcan_priv_flags_strings); default: return -EOPNOTSUPP; } } const struct ethtool_ops slcan_ethtool_ops = { .get_strings = slcan_get_strings, .get_priv_flags = slcan_get_priv_flags, .set_priv_flags = slcan_set_priv_flags, .get_sset_count = slcan_get_sset_count, .get_ts_info = ethtool_op_get_ts_info, };
5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PART_STAT_H #define _LINUX_PART_STAT_H #include <linux/blkdev.h> #include <asm/local.h> struct disk_stats { u64 nsecs[NR_STAT_GROUPS]; unsigned long sectors[NR_STAT_GROUPS]; unsigned long ios[NR_STAT_GROUPS]; unsigned long merges[NR_STAT_GROUPS]; unsigned long io_ticks; local_t in_flight[2]; }; /* * Macros to operate on percpu disk statistics: * * part_stat_{add|sub|inc|dec}() modify the stat counters and should * be called between part_stat_lock() and part_stat_unlock(). * * part_stat_read() can be called at any time. */ #define part_stat_lock() preempt_disable() #define part_stat_unlock() preempt_enable() #define part_stat_get_cpu(part, field, cpu) \ (per_cpu_ptr((part)->bd_stats, (cpu))->field) #define part_stat_get(part, field) \ part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ TYPEOF_UNQUAL((part)->bd_stats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ res += per_cpu_ptr((part)->bd_stats, _cpu)->field; \ res; \ }) static inline void part_stat_set_all(struct block_device *part, int value) { int i; for_each_possible_cpu(i) memset(per_cpu_ptr(part->bd_stats, i), value, sizeof(struct disk_stats)); } #define part_stat_read_accum(part, field) \ (part_stat_read(part, field[STAT_READ]) + \ part_stat_read(part, field[STAT_WRITE]) + \ part_stat_read(part, field[STAT_DISCARD])) #define __part_stat_add(part, field, addnd) \ __this_cpu_add((part)->bd_stats->field, addnd) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ if (bdev_is_partition(part)) \ __part_stat_add(bdev_whole(part), field, addnd); \ } while (0) #define part_stat_dec(part, field) \ part_stat_add(part, field, -1) #define part_stat_inc(part, field) \ part_stat_add(part, field, 1) #define part_stat_sub(part, field, subnd) \ part_stat_add(part, field, -subnd) #define part_stat_local_dec(part, field) \ local_dec(&(part_stat_get(part, field))) #define part_stat_local_inc(part, field) \ local_inc(&(part_stat_get(part, field))) #define part_stat_local_read(part, field) \ local_read(&(part_stat_get(part, field))) #define part_stat_local_read_cpu(part, field, cpu) \ local_read(&(part_stat_get_cpu(part, field, cpu))) unsigned int bdev_count_inflight(struct block_device *part); #endif /* _LINUX_PART_STAT_H */
23 15 21 31 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 /* * linux/fs/nls/nls_cp874.c * * Charset cp874 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2026, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 0x90*/ 0x0000, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 0xa0*/ 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, /* 0xb0*/ 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, /* 0xc0*/ 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, /* 0xd0*/ 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, /* 0xe0*/ 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, /* 0xf0*/ 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ }; static const unsigned char page0e[256] = { 0x00, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0x00-0x07 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0x08-0x0f */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0x10-0x17 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0x18-0x1f */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0x20-0x27 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0x28-0x2f */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0x30-0x37 */ 0xd8, 0xd9, 0xda, 0x00, 0x00, 0x00, 0x00, 0xdf, /* 0x38-0x3f */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0x40-0x47 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0x48-0x4f */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0x50-0x57 */ 0xf8, 0xf9, 0xfa, 0xfb, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x91, 0x92, 0x00, 0x00, 0x93, 0x94, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */ }; static const unsigned char *const page_uni2charset[256] = { page00, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page0e, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0x00, 0x00, 0x00, 0x00, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0x00, 0x00, 0x00, 0x00, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp874", .alias = "tis-620", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp874(void) { return register_nls(&table); } static void __exit exit_nls_cp874(void) { unregister_nls(&table); } module_init(init_nls_cp874) module_exit(exit_nls_cp874) MODULE_DESCRIPTION("NLS Thai charset (CP874, TIS-620)"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_NLS(tis-620);
20 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 /* SPDX-License-Identifier: GPL-2.0-only */ /* * fence-chain: chain fences together in a timeline * * Copyright (C) 2018 Advanced Micro Devices, Inc. * Authors: * Christian König <christian.koenig@amd.com> */ #ifndef __LINUX_DMA_FENCE_CHAIN_H #define __LINUX_DMA_FENCE_CHAIN_H #include <linux/dma-fence.h> #include <linux/irq_work.h> #include <linux/slab.h> /** * struct dma_fence_chain - fence to represent an node of a fence chain * @base: fence base class * @prev: previous fence of the chain * @prev_seqno: original previous seqno before garbage collection * @fence: encapsulated fence * @lock: spinlock for fence handling */ struct dma_fence_chain { struct dma_fence base; struct dma_fence __rcu *prev; u64 prev_seqno; struct dma_fence *fence; union { /** * @cb: callback for signaling * * This is used to add the callback for signaling the * complection of the fence chain. Never used at the same time * as the irq work. */ struct dma_fence_cb cb; /** * @work: irq work item for signaling * * Irq work structure to allow us to add the callback without * running into lock inversion. Never used at the same time as * the callback. */ struct irq_work work; }; spinlock_t lock; }; /** * to_dma_fence_chain - cast a fence to a dma_fence_chain * @fence: fence to cast to a dma_fence_array * * Returns NULL if the fence is not a dma_fence_chain, * or the dma_fence_chain otherwise. */ static inline struct dma_fence_chain * to_dma_fence_chain(struct dma_fence *fence) { if (!fence || !dma_fence_is_chain(fence)) return NULL; return container_of(fence, struct dma_fence_chain, base); } /** * dma_fence_chain_contained - return the contained fence * @fence: the fence to test * * If the fence is a dma_fence_chain the function returns the fence contained * inside the chain object, otherwise it returns the fence itself. */ static inline struct dma_fence * dma_fence_chain_contained(struct dma_fence *fence) { struct dma_fence_chain *chain = to_dma_fence_chain(fence); return chain ? chain->fence : fence; } /** * dma_fence_chain_alloc * * Returns a new struct dma_fence_chain object or NULL on failure. * * This specialized allocator has to be a macro for its allocations to be * accounted separately (to have a separate alloc_tag). The typecast is * intentional to enforce typesafety. */ #define dma_fence_chain_alloc() \ kmalloc_obj(struct dma_fence_chain) /** * dma_fence_chain_free * @chain: chain node to free * * Frees up an allocated but not used struct dma_fence_chain object. This * doesn't need an RCU grace period since the fence was never initialized nor * published. After dma_fence_chain_init() has been called the fence must be * released by calling dma_fence_put(), and not through this function. */ static inline void dma_fence_chain_free(struct dma_fence_chain *chain) { kfree(chain); }; /** * dma_fence_chain_for_each - iterate over all fences in chain * @iter: current fence * @head: starting point * * Iterate over all fences in the chain. We keep a reference to the current * fence while inside the loop which must be dropped when breaking out. * * For a deep dive iterator see dma_fence_unwrap_for_each(). */ #define dma_fence_chain_for_each(iter, head) \ for (iter = dma_fence_get(head); iter; \ iter = dma_fence_chain_walk(iter)) struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence); int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno); void dma_fence_chain_init(struct dma_fence_chain *chain, struct dma_fence *prev, struct dma_fence *fence, uint64_t seqno); #endif /* __LINUX_DMA_FENCE_CHAIN_H */
11 12 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 // SPDX-License-Identifier: GPL-2.0-only /* * drivers/acpi/device_sysfs.c - ACPI device sysfs attributes and modalias. * * Copyright (C) 2015, Intel Corp. * Author: Mika Westerberg <mika.westerberg@linux.intel.com> * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <linux/acpi.h> #include <linux/device.h> #include <linux/export.h> #include <linux/nls.h> #include "internal.h" static ssize_t acpi_object_path(acpi_handle handle, char *buf) { struct acpi_buffer path = {ACPI_ALLOCATE_BUFFER, NULL}; int result; result = acpi_get_name(handle, ACPI_FULL_PATHNAME, &path); if (result) return result; result = sysfs_emit(buf, "%s\n", (char *)path.pointer); kfree(path.pointer); return result; } struct acpi_data_node_attr { struct attribute attr; ssize_t (*show)(struct acpi_data_node *, char *); ssize_t (*store)(struct acpi_data_node *, const char *, size_t count); }; #define DATA_NODE_ATTR(_name) \ static struct acpi_data_node_attr data_node_##_name = \ __ATTR(_name, 0444, data_node_show_##_name, NULL) static ssize_t data_node_show_path(struct acpi_data_node *dn, char *buf) { return dn->handle ? acpi_object_path(dn->handle, buf) : 0; } DATA_NODE_ATTR(path); static struct attribute *acpi_data_node_default_attrs[] = { &data_node_path.attr, NULL }; ATTRIBUTE_GROUPS(acpi_data_node_default); #define to_data_node(k) container_of(k, struct acpi_data_node, kobj) #define to_attr(a) container_of(a, struct acpi_data_node_attr, attr) static ssize_t acpi_data_node_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct acpi_data_node *dn = to_data_node(kobj); struct acpi_data_node_attr *dn_attr = to_attr(attr); return dn_attr->show ? dn_attr->show(dn, buf) : -ENXIO; } static const struct sysfs_ops acpi_data_node_sysfs_ops = { .show = acpi_data_node_attr_show, }; static void acpi_data_node_release(struct kobject *kobj) { struct acpi_data_node *dn = to_data_node(kobj); complete(&dn->kobj_done); } static const struct kobj_type acpi_data_node_ktype = { .sysfs_ops = &acpi_data_node_sysfs_ops, .default_groups = acpi_data_node_default_groups, .release = acpi_data_node_release, }; static void acpi_expose_nondev_subnodes(struct kobject *kobj, struct acpi_device_data *data) { struct list_head *list = &data->subnodes; struct acpi_data_node *dn; if (list_empty(list)) return; list_for_each_entry(dn, list, sibling) { int ret; init_completion(&dn->kobj_done); ret = kobject_init_and_add(&dn->kobj, &acpi_data_node_ktype, kobj, "%s", dn->name); if (!ret) acpi_expose_nondev_subnodes(&dn->kobj, &dn->data); else if (dn->handle) acpi_handle_err(dn->handle, "Failed to expose (%d)\n", ret); } } static void acpi_hide_nondev_subnodes(struct acpi_device_data *data) { struct list_head *list = &data->subnodes; struct acpi_data_node *dn; if (list_empty(list)) return; list_for_each_entry_reverse(dn, list, sibling) { acpi_hide_nondev_subnodes(&dn->data); kobject_put(&dn->kobj); } } /** * create_pnp_modalias - Create hid/cid(s) string for modalias and uevent * @acpi_dev: ACPI device object. * @modalias: Buffer to print into. * @size: Size of the buffer. * * Creates hid/cid(s) string needed for modalias and uevent * e.g. on a device with hid:IBM0001 and cid:ACPI0001 you get: * char *modalias: "acpi:IBM0001:ACPI0001" * Return: 0: no _HID and no _CID * -EINVAL: output error * -ENOMEM: output is truncated */ static int create_pnp_modalias(const struct acpi_device *acpi_dev, char *modalias, int size) { int len; int count; struct acpi_hardware_id *id; /* Avoid unnecessarily loading modules for non present devices. */ if (!acpi_device_is_present(acpi_dev)) return 0; /* * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the * device's list. */ count = 0; list_for_each_entry(id, &acpi_dev->pnp.ids, list) if (strcmp(id->id, ACPI_DT_NAMESPACE_HID)) count++; if (!count) return 0; len = snprintf(modalias, size, "acpi:"); if (len >= size) return -ENOMEM; size -= len; list_for_each_entry(id, &acpi_dev->pnp.ids, list) { if (!strcmp(id->id, ACPI_DT_NAMESPACE_HID)) continue; count = snprintf(&modalias[len], size, "%s:", id->id); if (count >= size) return -ENOMEM; len += count; size -= count; } return len; } /** * create_of_modalias - Creates DT compatible string for modalias and uevent * @acpi_dev: ACPI device object. * @modalias: Buffer to print into. * @size: Size of the buffer. * * Expose DT compatible modalias as of:NnameTCcompatible. This function should * only be called for devices having ACPI_DT_NAMESPACE_HID in their list of * ACPI/PNP IDs. */ static int create_of_modalias(const struct acpi_device *acpi_dev, char *modalias, int size) { struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER }; const union acpi_object *of_compatible, *obj; acpi_status status; int len, count; int i, nval; char *c; status = acpi_get_name(acpi_dev->handle, ACPI_SINGLE_NAME, &buf); if (ACPI_FAILURE(status)) return -ENODEV; /* DT strings are all in lower case */ for (c = buf.pointer; *c != '\0'; c++) *c = tolower(*c); len = snprintf(modalias, size, "of:N%sT", (char *)buf.pointer); ACPI_FREE(buf.pointer); if (len >= size) return -ENOMEM; size -= len; of_compatible = acpi_dev->data.of_compatible; if (of_compatible->type == ACPI_TYPE_PACKAGE) { nval = of_compatible->package.count; obj = of_compatible->package.elements; } else { /* Must be ACPI_TYPE_STRING. */ nval = 1; obj = of_compatible; } for (i = 0; i < nval; i++, obj++) { count = snprintf(&modalias[len], size, "C%s", obj->string.pointer); if (count >= size) return -ENOMEM; len += count; size -= count; } return len; } int __acpi_device_uevent_modalias(const struct acpi_device *adev, struct kobj_uevent_env *env) { int len; if (!adev) return -ENODEV; if (list_empty(&adev->pnp.ids)) return 0; if (add_uevent_var(env, "MODALIAS=")) return -ENOMEM; if (adev->data.of_compatible) len = create_of_modalias(adev, &env->buf[env->buflen - 1], sizeof(env->buf) - env->buflen); else len = create_pnp_modalias(adev, &env->buf[env->buflen - 1], sizeof(env->buf) - env->buflen); if (len < 0) return len; env->buflen += len; return 0; } /** * acpi_device_uevent_modalias - uevent modalias for ACPI-enumerated devices. * @dev: Struct device to get ACPI device node. * @env: Environment variables of the kobject uevent. * * Create the uevent modalias field for ACPI-enumerated devices. * * Because other buses do not support ACPI HIDs & CIDs, e.g. for a device with * hid:IBM0001 and cid:ACPI0001 you get: "acpi:IBM0001:ACPI0001". */ int acpi_device_uevent_modalias(const struct device *dev, struct kobj_uevent_env *env) { return __acpi_device_uevent_modalias(acpi_companion_match(dev), env); } EXPORT_SYMBOL_GPL(acpi_device_uevent_modalias); static int __acpi_device_modalias(const struct acpi_device *adev, char *buf, int size) { int len, count; if (!adev) return -ENODEV; if (list_empty(&adev->pnp.ids)) return 0; len = create_pnp_modalias(adev, buf, size - 1); if (len < 0) { return len; } else if (len > 0) { buf[len++] = '\n'; size -= len; } if (!adev->data.of_compatible) return len; count = create_of_modalias(adev, buf + len, size - 1); if (count < 0) { return count; } else if (count > 0) { len += count; buf[len++] = '\n'; } return len; } /** * acpi_device_modalias - modalias sysfs attribute for ACPI-enumerated devices. * @dev: Struct device to get ACPI device node. * @buf: The buffer to save pnp_modalias and of_modalias. * @size: Size of buffer. * * Create the modalias sysfs attribute for ACPI-enumerated devices. * * Because other buses do not support ACPI HIDs & CIDs, e.g. for a device with * hid:IBM0001 and cid:ACPI0001 you get: "acpi:IBM0001:ACPI0001". */ int acpi_device_modalias(struct device *dev, char *buf, int size) { return __acpi_device_modalias(acpi_companion_match(dev), buf, size); } EXPORT_SYMBOL_GPL(acpi_device_modalias); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { return __acpi_device_modalias(to_acpi_device(dev), buf, 1024); } static DEVICE_ATTR_RO(modalias); static ssize_t real_power_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *adev = to_acpi_device(dev); int state; int ret; ret = acpi_device_get_power(adev, &state); if (ret) return ret; return sysfs_emit(buf, "%s\n", acpi_power_state_string(state)); } static DEVICE_ATTR_RO(real_power_state); static ssize_t power_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *adev = to_acpi_device(dev); return sysfs_emit(buf, "%s\n", acpi_power_state_string(adev->power.state)); } static DEVICE_ATTR_RO(power_state); static ssize_t eject_store(struct device *d, struct device_attribute *attr, const char *buf, size_t count) { struct acpi_device *acpi_device = to_acpi_device(d); acpi_object_type not_used; acpi_status status; if (!count || buf[0] != '1') return -EINVAL; if ((!acpi_device->handler || !acpi_device->handler->hotplug.enabled) && !d->driver) return -ENODEV; status = acpi_get_type(acpi_device->handle, &not_used); if (ACPI_FAILURE(status) || !acpi_device->flags.ejectable) return -ENODEV; acpi_dev_get(acpi_device); status = acpi_hotplug_schedule(acpi_device, ACPI_OST_EC_OSPM_EJECT); if (ACPI_SUCCESS(status)) return count; acpi_dev_put(acpi_device); acpi_evaluate_ost(acpi_device->handle, ACPI_OST_EC_OSPM_EJECT, ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL); return status == AE_NO_MEMORY ? -ENOMEM : -EAGAIN; } static DEVICE_ATTR_WO(eject); static ssize_t hid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); return sysfs_emit(buf, "%s\n", acpi_device_hid(acpi_dev)); } static DEVICE_ATTR_RO(hid); static ssize_t cid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); struct acpi_device_info *info = NULL; ssize_t len = 0; acpi_get_object_info(acpi_dev->handle, &info); if (!info) return 0; if (info->valid & ACPI_VALID_CID) { struct acpi_pnp_device_id_list *cid_list = &info->compatible_id_list; int i; for (i = 0; i < cid_list->count - 1; i++) len += sysfs_emit_at(buf, len, "%s,", cid_list->ids[i].string); len += sysfs_emit_at(buf, len, "%s\n", cid_list->ids[i].string); } kfree(info); return len; } static DEVICE_ATTR_RO(cid); static ssize_t uid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); return sysfs_emit(buf, "%s\n", acpi_device_uid(acpi_dev)); } static DEVICE_ATTR_RO(uid); static ssize_t adr_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); if (acpi_dev->pnp.bus_address > U32_MAX) return sysfs_emit(buf, "0x%016llx\n", acpi_dev->pnp.bus_address); else return sysfs_emit(buf, "0x%08llx\n", acpi_dev->pnp.bus_address); } static DEVICE_ATTR_RO(adr); static ssize_t path_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); return acpi_object_path(acpi_dev->handle, buf); } static DEVICE_ATTR_RO(path); /* sysfs file that shows description text from the ACPI _STR method */ static ssize_t description_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; union acpi_object *str_obj; acpi_status status; int result; status = acpi_evaluate_object_typed(acpi_dev->handle, "_STR", NULL, &buffer, ACPI_TYPE_BUFFER); if (ACPI_FAILURE(status)) return -EIO; str_obj = buffer.pointer; /* * The _STR object contains a Unicode identifier for a device. * We need to convert to utf-8 so it can be displayed. */ result = utf16s_to_utf8s( (wchar_t *)str_obj->buffer.pointer, str_obj->buffer.length, UTF16_LITTLE_ENDIAN, buf, PAGE_SIZE - 1); buf[result++] = '\n'; ACPI_FREE(str_obj); return result; } static DEVICE_ATTR_RO(description); static ssize_t sun_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); acpi_status status; unsigned long long sun; status = acpi_evaluate_integer(acpi_dev->handle, "_SUN", NULL, &sun); if (ACPI_FAILURE(status)) return -EIO; return sysfs_emit(buf, "%llu\n", sun); } static DEVICE_ATTR_RO(sun); static ssize_t hrv_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); acpi_status status; unsigned long long hrv; status = acpi_evaluate_integer(acpi_dev->handle, "_HRV", NULL, &hrv); if (ACPI_FAILURE(status)) return -EIO; return sysfs_emit(buf, "%llu\n", hrv); } static DEVICE_ATTR_RO(hrv); static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); acpi_status status; unsigned long long sta; status = acpi_evaluate_integer(acpi_dev->handle, "_STA", NULL, &sta); if (ACPI_FAILURE(status)) return -EIO; return sysfs_emit(buf, "%llu\n", sta); } static DEVICE_ATTR_RO(status); static struct attribute *acpi_attrs[] = { &dev_attr_path.attr, &dev_attr_hid.attr, &dev_attr_cid.attr, &dev_attr_modalias.attr, &dev_attr_description.attr, &dev_attr_adr.attr, &dev_attr_uid.attr, &dev_attr_sun.attr, &dev_attr_hrv.attr, &dev_attr_status.attr, &dev_attr_eject.attr, &dev_attr_power_state.attr, &dev_attr_real_power_state.attr, NULL }; static bool acpi_show_attr(struct acpi_device *dev, const struct device_attribute *attr) { /* * Devices gotten from FADT don't have a "path" attribute */ if (attr == &dev_attr_path) return dev->handle; if (attr == &dev_attr_hid || attr == &dev_attr_modalias) return !list_empty(&dev->pnp.ids); if (attr == &dev_attr_description) return acpi_has_method(dev->handle, "_STR"); if (attr == &dev_attr_adr) return dev->pnp.type.bus_address; if (attr == &dev_attr_uid) return acpi_device_uid(dev); if (attr == &dev_attr_sun) return acpi_has_method(dev->handle, "_SUN"); if (attr == &dev_attr_hrv) return acpi_has_method(dev->handle, "_HRV"); if (attr == &dev_attr_status) return acpi_has_method(dev->handle, "_STA"); if (attr == &dev_attr_cid) return acpi_has_method(dev->handle, "_CID"); /* * If device has _EJ0, 'eject' file is created that is used to trigger * hot-removal function from userland. */ if (attr == &dev_attr_eject) return acpi_has_method(dev->handle, "_EJ0"); if (attr == &dev_attr_power_state) return dev->flags.power_manageable; if (attr == &dev_attr_real_power_state) return dev->flags.power_manageable && dev->power.flags.power_resources; dev_warn_once(&dev->dev, "Unexpected attribute: %s\n", attr->attr.name); return false; } static umode_t acpi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int attrno) { struct acpi_device *dev = to_acpi_device(kobj_to_dev(kobj)); if (acpi_show_attr(dev, container_of(attr, struct device_attribute, attr))) return attr->mode; else return 0; } static const struct attribute_group acpi_group = { .attrs = acpi_attrs, .is_visible = acpi_attr_is_visible, }; const struct attribute_group *acpi_groups[] = { &acpi_group, NULL }; /** * acpi_device_setup_files - Create sysfs attributes of an ACPI device. * @dev: ACPI device object. */ void acpi_device_setup_files(struct acpi_device *dev) { acpi_expose_nondev_subnodes(&dev->dev.kobj, &dev->data); } /** * acpi_device_remove_files - Remove sysfs attributes of an ACPI device. * @dev: ACPI device object. */ void acpi_device_remove_files(struct acpi_device *dev) { acpi_hide_nondev_subnodes(&dev->data); }
55 16 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * AEAD: Authenticated Encryption with Associated Data * * Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_AEAD_H #define _CRYPTO_AEAD_H #include <linux/atomic.h> #include <linux/container_of.h> #include <linux/crypto.h> #include <linux/slab.h> #include <linux/types.h> /** * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API * * The AEAD cipher API is used with the ciphers of type CRYPTO_ALG_TYPE_AEAD * (listed as type "aead" in /proc/crypto) * * The most prominent examples for this type of encryption is GCM and CCM. * However, the kernel supports other types of AEAD ciphers which are defined * with the following cipher string: * * authenc(keyed message digest, block cipher) * * For example: authenc(hmac(sha256), cbc(aes)) * * The example code provided for the symmetric key cipher operation applies * here as well. Naturally all *skcipher* symbols must be exchanged the *aead* * pendants discussed in the following. In addition, for the AEAD operation, * the aead_request_set_ad function must be used to set the pointer to the * associated data memory location before performing the encryption or * decryption operation. Another deviation from the asynchronous block cipher * operation is that the caller should explicitly check for -EBADMSG of the * crypto_aead_decrypt. That error indicates an authentication error, i.e. * a breach in the integrity of the message. In essence, that -EBADMSG error * code is the key bonus an AEAD cipher has over "standard" block chaining * modes. * * Memory Structure: * * The source scatterlist must contain the concatenation of * associated data || plaintext or ciphertext. * * The destination scatterlist has the same layout, except that the plaintext * (resp. ciphertext) will grow (resp. shrink) by the authentication tag size * during encryption (resp. decryption). The authentication tag is generated * during the encryption operation and appended to the ciphertext. During * decryption, the authentication tag is consumed along with the ciphertext and * used to verify the integrity of the plaintext and the associated data. * * In-place encryption/decryption is enabled by using the same scatterlist * pointer for both the source and destination. * * Even in the out-of-place case, space must be reserved in the destination for * the associated data, even though it won't be written to. This makes the * in-place and out-of-place cases more consistent. It is permissible for the * "destination" associated data to alias the "source" associated data. * * As with the other scatterlist crypto APIs, zero-length scatterlist elements * are not allowed in the used part of the scatterlist. Thus, if there is no * associated data, the first element must point to the plaintext/ciphertext. * * To meet the needs of IPsec, a special quirk applies to rfc4106, rfc4309, * rfc4543, and rfc7539esp ciphers. For these ciphers, the final 'ivsize' bytes * of the associated data buffer must contain a second copy of the IV. This is * in addition to the copy passed to aead_request_set_crypt(). These two IV * copies must not differ; different implementations of the same algorithm may * behave differently in that case. Note that the algorithm might not actually * treat the IV as associated data; nevertheless the length passed to * aead_request_set_ad() must include it. */ struct crypto_aead; struct scatterlist; /** * struct aead_request - AEAD request * @base: Common attributes for async crypto requests * @assoclen: Length in bytes of associated data for authentication * @cryptlen: Length of data to be encrypted or decrypted * @iv: Initialisation vector * @src: Source data * @dst: Destination data * @__ctx: Start of private context data */ struct aead_request { struct crypto_async_request base; unsigned int assoclen; unsigned int cryptlen; u8 *iv; struct scatterlist *src; struct scatterlist *dst; void *__ctx[] CRYPTO_MINALIGN_ATTR; }; /** * struct aead_alg - AEAD cipher definition * @maxauthsize: Set the maximum authentication tag size supported by the * transformation. A transformation may support smaller tag sizes. * As the authentication tag is a message digest to ensure the * integrity of the encrypted data, a consumer typically wants the * largest authentication tag possible as defined by this * variable. * @setauthsize: Set authentication size for the AEAD transformation. This * function is used to specify the consumer requested size of the * authentication tag to be either generated by the transformation * during encryption or the size of the authentication tag to be * supplied during the decryption operation. This function is also * responsible for checking the authentication tag size for * validity. * @setkey: see struct skcipher_alg * @encrypt: see struct skcipher_alg * @decrypt: see struct skcipher_alg * @ivsize: see struct skcipher_alg * @chunksize: see struct skcipher_alg * @init: Initialize the cryptographic transformation object. This function * is used to initialize the cryptographic transformation object. * This function is called only once at the instantiation time, right * after the transformation context was allocated. In case the * cryptographic hardware has some special requirements which need to * be handled by software, this function shall check for the precise * requirement of the transformation and put any software fallbacks * in place. * @exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @init, used to remove various changes set in * @init. * @base: Definition of a generic crypto cipher algorithm. * * All fields except @ivsize is mandatory and must be filled. */ struct aead_alg { int (*setkey)(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); int (*setauthsize)(struct crypto_aead *tfm, unsigned int authsize); int (*encrypt)(struct aead_request *req); int (*decrypt)(struct aead_request *req); int (*init)(struct crypto_aead *tfm); void (*exit)(struct crypto_aead *tfm); unsigned int ivsize; unsigned int maxauthsize; unsigned int chunksize; struct crypto_alg base; }; struct crypto_aead { unsigned int authsize; unsigned int reqsize; struct crypto_tfm base; }; struct crypto_sync_aead { struct crypto_aead base; }; #define MAX_SYNC_AEAD_REQSIZE 384 #define SYNC_AEAD_REQUEST_ON_STACK(name, _tfm) \ char __##name##_desc[sizeof(struct aead_request) + \ MAX_SYNC_AEAD_REQSIZE \ ] CRYPTO_MINALIGN_ATTR; \ struct aead_request *name = \ (((struct aead_request *)__##name##_desc)->base.tfm = \ crypto_sync_aead_tfm((_tfm)), \ (void *)__##name##_desc) static inline struct crypto_aead *__crypto_aead_cast(struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_aead, base); } /** * crypto_alloc_aead() - allocate AEAD cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * AEAD cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for an AEAD. The returned struct * crypto_aead is the cipher handle that is required for any subsequent * API invocation for that AEAD. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask); struct crypto_sync_aead *crypto_alloc_sync_aead(const char *alg_name, u32 type, u32 mask); static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm) { return &tfm->base; } static inline struct crypto_tfm *crypto_sync_aead_tfm(struct crypto_sync_aead *tfm) { return crypto_aead_tfm(&tfm->base); } /** * crypto_free_aead() - zeroize and free aead handle * @tfm: cipher handle to be freed * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_aead(struct crypto_aead *tfm) { crypto_destroy_tfm(tfm, crypto_aead_tfm(tfm)); } static inline void crypto_free_sync_aead(struct crypto_sync_aead *tfm) { crypto_free_aead(&tfm->base); } /** * crypto_has_aead() - Search for the availability of an aead. * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * aead * @type: specifies the type of the aead * @mask: specifies the mask for the aead * * Return: true when the aead is known to the kernel crypto API; false * otherwise */ int crypto_has_aead(const char *alg_name, u32 type, u32 mask); static inline const char *crypto_aead_driver_name(struct crypto_aead *tfm) { return crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm)); } static inline struct aead_alg *crypto_aead_alg(struct crypto_aead *tfm) { return container_of(crypto_aead_tfm(tfm)->__crt_alg, struct aead_alg, base); } static inline unsigned int crypto_aead_alg_ivsize(struct aead_alg *alg) { return alg->ivsize; } /** * crypto_aead_ivsize() - obtain IV size * @tfm: cipher handle * * The size of the IV for the aead referenced by the cipher handle is * returned. This IV size may be zero if the cipher does not need an IV. * * Return: IV size in bytes */ static inline unsigned int crypto_aead_ivsize(struct crypto_aead *tfm) { return crypto_aead_alg_ivsize(crypto_aead_alg(tfm)); } static inline unsigned int crypto_sync_aead_ivsize(struct crypto_sync_aead *tfm) { return crypto_aead_ivsize(&tfm->base); } /** * crypto_aead_authsize() - obtain maximum authentication data size * @tfm: cipher handle * * The maximum size of the authentication data for the AEAD cipher referenced * by the AEAD cipher handle is returned. The authentication data size may be * zero if the cipher implements a hard-coded maximum. * * The authentication data may also be known as "tag value". * * Return: authentication data size / tag size in bytes */ static inline unsigned int crypto_aead_authsize(struct crypto_aead *tfm) { return tfm->authsize; } static inline unsigned int crypto_sync_aead_authsize(struct crypto_sync_aead *tfm) { return crypto_aead_authsize(&tfm->base); } static inline unsigned int crypto_aead_alg_maxauthsize(struct aead_alg *alg) { return alg->maxauthsize; } static inline unsigned int crypto_aead_maxauthsize(struct crypto_aead *aead) { return crypto_aead_alg_maxauthsize(crypto_aead_alg(aead)); } static inline unsigned int crypto_sync_aead_maxauthsize(struct crypto_sync_aead *tfm) { return crypto_aead_maxauthsize(&tfm->base); } /** * crypto_aead_blocksize() - obtain block size of cipher * @tfm: cipher handle * * The block size for the AEAD referenced with the cipher handle is returned. * The caller may use that information to allocate appropriate memory for the * data returned by the encryption or decryption operation * * Return: block size of cipher */ static inline unsigned int crypto_aead_blocksize(struct crypto_aead *tfm) { return crypto_tfm_alg_blocksize(crypto_aead_tfm(tfm)); } static inline unsigned int crypto_sync_aead_blocksize(struct crypto_sync_aead *tfm) { return crypto_aead_blocksize(&tfm->base); } static inline unsigned int crypto_aead_alignmask(struct crypto_aead *tfm) { return crypto_tfm_alg_alignmask(crypto_aead_tfm(tfm)); } static inline u32 crypto_aead_get_flags(struct crypto_aead *tfm) { return crypto_tfm_get_flags(crypto_aead_tfm(tfm)); } static inline void crypto_aead_set_flags(struct crypto_aead *tfm, u32 flags) { crypto_tfm_set_flags(crypto_aead_tfm(tfm), flags); } static inline void crypto_aead_clear_flags(struct crypto_aead *tfm, u32 flags) { crypto_tfm_clear_flags(crypto_aead_tfm(tfm), flags); } static inline u32 crypto_sync_aead_get_flags(struct crypto_sync_aead *tfm) { return crypto_aead_get_flags(&tfm->base); } static inline void crypto_sync_aead_set_flags(struct crypto_sync_aead *tfm, u32 flags) { crypto_aead_set_flags(&tfm->base, flags); } static inline void crypto_sync_aead_clear_flags(struct crypto_sync_aead *tfm, u32 flags) { crypto_aead_clear_flags(&tfm->base, flags); } /** * crypto_aead_setkey() - set key for cipher * @tfm: cipher handle * @key: buffer holding the key * @keylen: length of the key in bytes * * The caller provided key is set for the AEAD referenced by the cipher * handle. * * Note, the key length determines the cipher type. Many block ciphers implement * different cipher modes depending on the key size, such as AES-128 vs AES-192 * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 * is performed. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); static inline int crypto_sync_aead_setkey(struct crypto_sync_aead *tfm, const u8 *key, unsigned int keylen) { return crypto_aead_setkey(&tfm->base, key, keylen); } /** * crypto_aead_setauthsize() - set authentication data size * @tfm: cipher handle * @authsize: size of the authentication data / tag in bytes * * Set the authentication data size / tag size. AEAD requires an authentication * tag (or MAC) in addition to the associated data. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize); static inline int crypto_sync_aead_setauthsize(struct crypto_sync_aead *tfm, unsigned int authsize) { return crypto_aead_setauthsize(&tfm->base, authsize); } static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req) { return __crypto_aead_cast(req->base.tfm); } static inline struct crypto_sync_aead *crypto_sync_aead_reqtfm(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); return container_of(tfm, struct crypto_sync_aead, base); } /** * crypto_aead_encrypt() - encrypt plaintext * @req: reference to the aead_request handle that holds all information * needed to perform the cipher operation * * Encrypt plaintext data using the aead_request handle. That data structure * and how it is filled with data is discussed with the aead_request_* * functions. * * IMPORTANT NOTE The encryption operation creates the authentication data / * tag. That data is concatenated with the created ciphertext. * The ciphertext memory size is therefore the given number of * block cipher blocks + the size defined by the * crypto_aead_setauthsize invocation. The caller must ensure * that sufficient memory is available for the ciphertext and * the authentication tag. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_aead_encrypt(struct aead_request *req); /** * crypto_aead_decrypt() - decrypt ciphertext * @req: reference to the aead_request handle that holds all information * needed to perform the cipher operation * * Decrypt ciphertext data using the aead_request handle. That data structure * and how it is filled with data is discussed with the aead_request_* * functions. * * IMPORTANT NOTE The caller must concatenate the ciphertext followed by the * authentication data / tag. That authentication data / tag * must have the size defined by the crypto_aead_setauthsize * invocation. * * * Return: 0 if the cipher operation was successful; -EBADMSG: The AEAD * cipher operation performs the authentication of the data during the * decryption operation. Therefore, the function returns this error if * the authentication of the ciphertext was unsuccessful (i.e. the * integrity of the ciphertext or the associated data was violated); * < 0 if an error occurred. */ int crypto_aead_decrypt(struct aead_request *req); /** * DOC: Asynchronous AEAD Request Handle * * The aead_request data structure contains all pointers to data required for * the AEAD cipher operation. This includes the cipher handle (which can be * used by multiple aead_request instances), pointer to plaintext and * ciphertext, asynchronous callback function, etc. It acts as a handle to the * aead_request_* API calls in a similar way as AEAD handle to the * crypto_aead_* API calls. */ /** * crypto_aead_reqsize() - obtain size of the request data structure * @tfm: cipher handle * * Return: number of bytes */ static inline unsigned int crypto_aead_reqsize(struct crypto_aead *tfm) { return tfm->reqsize; } /** * aead_request_set_tfm() - update cipher handle reference in request * @req: request handle to be modified * @tfm: cipher handle that shall be added to the request handle * * Allow the caller to replace the existing aead handle in the request * data structure with a different one. */ static inline void aead_request_set_tfm(struct aead_request *req, struct crypto_aead *tfm) { req->base.tfm = crypto_aead_tfm(tfm); } static inline void aead_request_set_sync_tfm(struct aead_request *req, struct crypto_sync_aead *tfm) { aead_request_set_tfm(req, &tfm->base); } /** * aead_request_alloc() - allocate request data structure * @tfm: cipher handle to be registered with the request * @gfp: memory allocation flag that is handed to kmalloc by the API call. * * Allocate the request data structure that must be used with the AEAD * encrypt and decrypt API calls. During the allocation, the provided aead * handle is registered in the request data structure. * * Return: allocated request handle in case of success, or NULL if out of memory */ static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm, gfp_t gfp) { struct aead_request *req; req = kmalloc(sizeof(*req) + crypto_aead_reqsize(tfm), gfp); if (likely(req)) aead_request_set_tfm(req, tfm); return req; } /** * aead_request_free() - zeroize and free request data structure * @req: request data structure cipher handle to be freed */ static inline void aead_request_free(struct aead_request *req) { kfree_sensitive(req); } /** * aead_request_set_callback() - set asynchronous callback function * @req: request handle * @flags: specify zero or an ORing of the flags * CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and * increase the wait queue beyond the initial maximum size; * CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep * @compl: callback function pointer to be registered with the request handle * @data: The data pointer refers to memory that is not used by the kernel * crypto API, but provided to the callback function for it to use. Here, * the caller can provide a reference to memory the callback function can * operate on. As the callback function is invoked asynchronously to the * related functionality, it may need to access data structures of the * related functionality which can be referenced using this pointer. The * callback function can access the memory via the "data" field in the * crypto_async_request data structure provided to the callback function. * * Setting the callback function that is triggered once the cipher operation * completes * * The callback function is registered with the aead_request handle and * must comply with the following template:: * * void callback_function(struct crypto_async_request *req, int error) */ static inline void aead_request_set_callback(struct aead_request *req, u32 flags, crypto_completion_t compl, void *data) { req->base.complete = compl; req->base.data = data; req->base.flags = flags; } /** * aead_request_set_crypt - set data buffers * @req: request handle * @src: source scatter / gather list * @dst: destination scatter / gather list * @cryptlen: number of bytes to process from @src * @iv: IV for the cipher operation which must comply with the IV size defined * by crypto_aead_ivsize() * * Setting the source data and destination data scatter / gather lists which * hold the associated data concatenated with the plaintext or ciphertext. See * below for the authentication tag. * * For encryption, the source is treated as the plaintext and the * destination is the ciphertext. For a decryption operation, the use is * reversed - the source is the ciphertext and the destination is the plaintext. * * The memory structure for cipher operation has the following structure: * * - AEAD encryption input: assoc data || plaintext * - AEAD encryption output: assoc data || ciphertext || auth tag * - AEAD decryption input: assoc data || ciphertext || auth tag * - AEAD decryption output: assoc data || plaintext * * Albeit the kernel requires the presence of the AAD buffer, however, * the kernel does not fill the AAD buffer in the output case. If the * caller wants to have that data buffer filled, the caller must either * use an in-place cipher operation (i.e. same memory location for * input/output memory location). */ static inline void aead_request_set_crypt(struct aead_request *req, struct scatterlist *src, struct scatterlist *dst, unsigned int cryptlen, u8 *iv) { req->src = src; req->dst = dst; req->cryptlen = cryptlen; req->iv = iv; } /** * aead_request_set_ad - set associated data information * @req: request handle * @assoclen: number of bytes in associated data * * Setting the AD information. This function sets the length of * the associated data. */ static inline void aead_request_set_ad(struct aead_request *req, unsigned int assoclen) { req->assoclen = assoclen; } #endif /* _CRYPTO_AEAD_H */
62 352 305 15 188 180 322 247 303 343 290 333 30 22 35 396 363 6 31 16 27 13 5 31 215 171 23 23 14 24 12 28 28 28 28 12 28 33 33 32 33 33 20 33 29 19 5 6 27 10 30 30 18 10 28 28 14 26 221 298 297 368 366 33 28 33 30 394 392 388 266 344 154 8 337 28 329 330 324 67 2 322 284 45 413 413 170 243 362 414 363 363 207 363 363 207 33 327 329 330 330 329 412 412 169 242 413 413 414 99 410 312 98 412 414 1 83 262 66 412 427 322 124 373 373 374 305 68 68 373 373 373 302 303 224 162 8 161 181 45 180 306 99 305 110 29 15 101 102 91 74 110 94 192 11 182 182 181 10 94 128 27 171 106 127 142 86 112 110 297 26 294 294 160 159 294 303 168 211 303 303 296 288 303 94 288 288 92 90 5 90 5 303 298 262 89 214 158 184 184 144 92 76 184 211 89 92 158 143 144 144 144 369 351 15 11 34 1 2 13 27 34 22 11 11 22 293 293 140 230 293 293 8 5 5 1 8 22 266 27 8 8 293 321 25 1 234 22 11 22 149 112 368 366 62 58 367 64 337 181 228 102 68 72 321 366 61 367 367 341 26 33 222 131 368 219 149 363 329 366 327 35 368 368 368 368 221 10 297 143 224 297 297 366 13 232 45 197 308 116 216 368 309 297 144 366 366 15 365 555 555 557 555 545 8 557 219 1 335 555 554 1 1 1 556 553 21 34 34 34 34 555 556 555 557 556 518 39 557 557 1 1 33 1 33 33 33 33 33 33 33 33 33 24 345 8 343 3 346 343 344 345 301 162 8 343 204 120 64 346 345 301 8 296 149 20 20 115 20 17 6 18 18 6 4 18 17 6 20 365 222 149 86 38 15 29 33 16 17 2 20 10 14 73 51 117 117 114 116 1 77 51 48 368 367 80 20 12 2 2 4 2 84 84 52 52 1 46 371 222 220 14 45 18 46 46 117 151 151 1 52 149 52 328 328 52 62 300 162 31 31 31 29 7 80 81 12 46 14 60 34 81 81 80 116 117 117 116 80 51 51 51 18 51 51 49 2 51 51 51 51 117 62 56 55 56 11 56 56 56 56 51 20 56 51 20 56 650 247 516 515 39 38 515 39 38 39 39 39 368 69 283 281 222 189 63 151 372 372 371 372 119 2 2 119 119 280 84 7 119 119 162 149 298 148 298 291 150 370 118 118 118 118 118 62 62 376 376 320 63 21 40 62 2 298 353 80 368 12 223 214 8 116 296 370 316 73 21 349 367 1 1 1 1 1 1 1 1 1 1 1 1 212 32 201 203 41 161 3 162 1 1 141 19 18 1 159 161 160 160 160 228 200 46 227 12 227 196 46 210 5 25 208 28 2 7 133 110 109 212 5 5 5 7 10 8 1 7 3 1 3 5 5 6 6 6 4 4 8 8 8 8 14 14 2 2 3 8 9 10 3 9 1 8 8 22 1 21 4 16 10 5 4 10 21 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com * Written by Alex Tomas <alex@clusterfs.com> */ /* * mballoc.c contains the multiblocks allocation routines */ #include "ext4_jbd2.h" #include "mballoc.h" #include <linux/log2.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/nospec.h> #include <linux/backing-dev.h> #include <linux/freezer.h> #include <trace/events/ext4.h> #include <kunit/static_stub.h> /* * MUSTDO: * - test ext4_ext_search_left() and ext4_ext_search_right() * - search for metadata in few groups * * TODO v4: * - normalization should take into account whether file is still open * - discard preallocations if no free space left (policy?) * - don't normalize tails * - quota * - reservation for superuser * * TODO v3: * - bitmap read-ahead (proposed by Oleg Drokin aka green) * - track min/max extents in each group for better group selection * - mb_mark_used() may allocate chunk right after splitting buddy * - tree of groups sorted by number of free blocks * - error handling */ /* * The allocation request involve request for multiple number of blocks * near to the goal(block) value specified. * * During initialization phase of the allocator we decide to use the * group preallocation or inode preallocation depending on the size of * the file. The size of the file could be the resulting file size we * would have after allocation, or the current file size, which ever * is larger. If the size is less than sbi->s_mb_stream_request we * select to use the group preallocation. The default value of * s_mb_stream_request is 16 blocks. This can also be tuned via * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in * terms of number of blocks. * * The main motivation for having small file use group preallocation is to * ensure that we have small files closer together on the disk. * * First stage the allocator looks at the inode prealloc list, * ext4_inode_info->i_prealloc_list, which contains list of prealloc * spaces for this particular inode. The inode prealloc space is * represented as: * * pa_lstart -> the logical start block for this prealloc space * pa_pstart -> the physical start block for this prealloc space * pa_len -> length for this prealloc space (in clusters) * pa_free -> free space available in this prealloc space (in clusters) * * The inode preallocation space is used looking at the _logical_ start * block. If only the logical file block falls within the range of prealloc * space we will consume the particular prealloc space. This makes sure that * we have contiguous physical blocks representing the file blocks * * The important thing to be noted in case of inode prealloc space is that * we don't modify the values associated to inode prealloc space except * pa_free. * * If we are not able to find blocks in the inode prealloc space and if we * have the group allocation flag set then we look at the locality group * prealloc space. These are per CPU prealloc list represented as * * ext4_sb_info.s_locality_groups[smp_processor_id()] * * The reason for having a per cpu locality group is to reduce the contention * between CPUs. It is possible to get scheduled at this point. * * The locality group prealloc space is used looking at whether we have * enough free space (pa_free) within the prealloc space. * * If we can't allocate blocks via inode prealloc or/and locality group * prealloc then we look at the buddy cache. The buddy cache is represented * by ext4_sb_info.s_buddy_cache (struct inode) whose file offset gets * mapped to the buddy and bitmap information regarding different * groups. The buddy information is attached to buddy cache inode so that * we can access them through the page cache. The information regarding * each group is loaded via ext4_mb_load_buddy. The information involve * block bitmap and buddy information. The information are stored in the * inode as: * * { folio } * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]... * * * one block each for bitmap and buddy information. So for each group we * take up 2 blocks. A folio can contain blocks_per_folio (folio_size / * blocksize) blocks. So it can have information regarding groups_per_folio * which is blocks_per_folio/2 * * The buddy cache inode is not stored on disk. The inode is thrown * away when the filesystem is unmounted. * * We look for count number of blocks in the buddy cache. If we were able * to locate that many free blocks we return with additional information * regarding rest of the contiguous physical block available * * Before allocating blocks via buddy cache we normalize the request * blocks. This ensure we ask for more blocks that we needed. The extra * blocks that we get after allocation is added to the respective prealloc * list. In case of inode preallocation we follow a list of heuristics * based on file size. This can be found in ext4_mb_normalize_request. If * we are doing a group prealloc we try to normalize the request to * sbi->s_mb_group_prealloc. The default value of s_mb_group_prealloc is * dependent on the cluster size; for non-bigalloc file systems, it is * 512 blocks. This can be tuned via * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in * terms of number of blocks. If we have mounted the file system with -O * stripe=<value> option the group prealloc request is normalized to the * smallest multiple of the stripe value (sbi->s_stripe) which is * greater than the default mb_group_prealloc. * * If "mb_optimize_scan" mount option is set, we maintain in memory group info * structures in two data structures: * * 1) Array of largest free order xarrays (sbi->s_mb_largest_free_orders) * * Locking: Writers use xa_lock, readers use rcu_read_lock. * * This is an array of xarrays where the index in the array represents the * largest free order in the buddy bitmap of the participating group infos of * that xarray. So, there are exactly MB_NUM_ORDERS(sb) (which means total * number of buddy bitmap orders possible) number of xarrays. Group-infos are * placed in appropriate xarrays. * * 2) Average fragment size xarrays (sbi->s_mb_avg_fragment_size) * * Locking: Writers use xa_lock, readers use rcu_read_lock. * * This is an array of xarrays where in the i-th xarray there are groups with * average fragment size >= 2^i and < 2^(i+1). The average fragment size * is computed as ext4_group_info->bb_free / ext4_group_info->bb_fragments. * Note that we don't bother with a special xarray for completely empty * groups so we only have MB_NUM_ORDERS(sb) xarrays. Group-infos are placed * in appropriate xarrays. * * In xarray, the index is the block group number, the value is the block group * information, and a non-empty value indicates the block group is present in * the current xarray. * * When "mb_optimize_scan" mount option is set, mballoc consults the above data * structures to decide the order in which groups are to be traversed for * fulfilling an allocation request. * * At CR_POWER2_ALIGNED , we look for groups which have the largest_free_order * >= the order of the request. We directly look at the largest free order list * in the data structure (1) above where largest_free_order = order of the * request. If that list is empty, we look at remaining list in the increasing * order of largest_free_order. This allows us to perform CR_POWER2_ALIGNED * lookup in O(1) time. * * At CR_GOAL_LEN_FAST, we only consider groups where * average fragment size > request size. So, we lookup a group which has average * fragment size just above or equal to request size using our average fragment * size group lists (data structure 2) in O(1) time. * * At CR_BEST_AVAIL_LEN, we aim to optimize allocations which can't be satisfied * in CR_GOAL_LEN_FAST. The fact that we couldn't find a group in * CR_GOAL_LEN_FAST suggests that there is no BG that has avg * fragment size > goal length. So before falling to the slower * CR_GOAL_LEN_SLOW, in CR_BEST_AVAIL_LEN we proactively trim goal length and * then use the same fragment lists as CR_GOAL_LEN_FAST to find a BG with a big * enough average fragment size. This increases the chances of finding a * suitable block group in O(1) time and results in faster allocation at the * cost of reduced size of allocation. * * If "mb_optimize_scan" mount option is not set, mballoc traverses groups in * linear order which requires O(N) search time for each CR_POWER2_ALIGNED and * CR_GOAL_LEN_FAST phase. * * The regular allocator (using the buddy cache) supports a few tunables. * * /sys/fs/ext4/<partition>/mb_min_to_scan * /sys/fs/ext4/<partition>/mb_max_to_scan * /sys/fs/ext4/<partition>/mb_order2_req * /sys/fs/ext4/<partition>/mb_max_linear_groups * * The regular allocator uses buddy scan only if the request len is power of * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The * value of s_mb_order2_reqs can be tuned via * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to * stripe size (sbi->s_stripe), we try to search for contiguous block in * stripe size. This should result in better allocation on RAID setups. If * not, we search in the specific group using bitmap for best extents. The * tunable min_to_scan and max_to_scan control the behaviour here. * min_to_scan indicate how long the mballoc __must__ look for a best * extent and max_to_scan indicates how long the mballoc __can__ look for a * best extent in the found extents. Searching for the blocks starts with * the group specified as the goal value in allocation context via * ac_g_ex. Each group is first checked based on the criteria whether it * can be used for allocation. ext4_mb_good_group explains how the groups are * checked. * * When "mb_optimize_scan" is turned on, as mentioned above, the groups may not * get traversed linearly. That may result in subsequent allocations being not * close to each other. And so, the underlying device may get filled up in a * non-linear fashion. While that may not matter on non-rotational devices, for * rotational devices that may result in higher seek times. "mb_max_linear_groups" * tells mballoc how many groups mballoc should search linearly before * performing consulting above data structures for more efficient lookups. For * non rotational devices, this value defaults to 0 and for rotational devices * this is set to MB_DEFAULT_LINEAR_LIMIT. * * Both the prealloc space are getting populated as above. So for the first * request we will hit the buddy cache which will result in this prealloc * space getting filled. The prealloc space is then later used for the * subsequent request. */ /* * mballoc operates on the following data: * - on-disk bitmap * - in-core buddy (actually includes buddy and bitmap) * - preallocation descriptors (PAs) * * there are two types of preallocations: * - inode * assiged to specific inode and can be used for this inode only. * it describes part of inode's space preallocated to specific * physical blocks. any block from that preallocated can be used * independent. the descriptor just tracks number of blocks left * unused. so, before taking some block from descriptor, one must * make sure corresponded logical block isn't allocated yet. this * also means that freeing any block within descriptor's range * must discard all preallocated blocks. * - locality group * assigned to specific locality group which does not translate to * permanent set of inodes: inode can join and leave group. space * from this type of preallocation can be used for any inode. thus * it's consumed from the beginning to the end. * * relation between them can be expressed as: * in-core buddy = on-disk bitmap + preallocation descriptors * * this mean blocks mballoc considers used are: * - allocated blocks (persistent) * - preallocated blocks (non-persistent) * * consistency in mballoc world means that at any time a block is either * free or used in ALL structures. notice: "any time" should not be read * literally -- time is discrete and delimited by locks. * * to keep it simple, we don't use block numbers, instead we count number of * blocks: how many blocks marked used/free in on-disk bitmap, buddy and PA. * * all operations can be expressed as: * - init buddy: buddy = on-disk + PAs * - new PA: buddy += N; PA = N * - use inode PA: on-disk += N; PA -= N * - discard inode PA buddy -= on-disk - PA; PA = 0 * - use locality group PA on-disk += N; PA -= N * - discard locality group PA buddy -= PA; PA = 0 * note: 'buddy -= on-disk - PA' is used to show that on-disk bitmap * is used in real operation because we can't know actual used * bits from PA, only from on-disk bitmap * * if we follow this strict logic, then all operations above should be atomic. * given some of them can block, we'd have to use something like semaphores * killing performance on high-end SMP hardware. let's try to relax it using * the following knowledge: * 1) if buddy is referenced, it's already initialized * 2) while block is used in buddy and the buddy is referenced, * nobody can re-allocate that block * 3) we work on bitmaps and '+' actually means 'set bits'. if on-disk has * bit set and PA claims same block, it's OK. IOW, one can set bit in * on-disk bitmap if buddy has same bit set or/and PA covers corresponded * block * * so, now we're building a concurrency table: * - init buddy vs. * - new PA * blocks for PA are allocated in the buddy, buddy must be referenced * until PA is linked to allocation group to avoid concurrent buddy init * - use inode PA * we need to make sure that either on-disk bitmap or PA has uptodate data * given (3) we care that PA-=N operation doesn't interfere with init * - discard inode PA * the simplest way would be to have buddy initialized by the discard * - use locality group PA * again PA-=N must be serialized with init * - discard locality group PA * the simplest way would be to have buddy initialized by the discard * - new PA vs. * - use inode PA * i_data_sem serializes them * - discard inode PA * discard process must wait until PA isn't used by another process * - use locality group PA * some mutex should serialize them * - discard locality group PA * discard process must wait until PA isn't used by another process * - use inode PA * - use inode PA * i_data_sem or another mutex should serializes them * - discard inode PA * discard process must wait until PA isn't used by another process * - use locality group PA * nothing wrong here -- they're different PAs covering different blocks * - discard locality group PA * discard process must wait until PA isn't used by another process * * now we're ready to make few consequences: * - PA is referenced and while it is no discard is possible * - PA is referenced until block isn't marked in on-disk bitmap * - PA changes only after on-disk bitmap * - discard must not compete with init. either init is done before * any discard or they're serialized somehow * - buddy init as sum of on-disk bitmap and PAs is done atomically * * a special case when we've used PA to emptiness. no need to modify buddy * in this case, but we should care about concurrent init * */ /* * Logic in few words: * * - allocation: * load group * find blocks * mark bits in on-disk bitmap * release group * * - use preallocation: * find proper PA (per-inode or group) * load group * mark bits in on-disk bitmap * release group * release PA * * - free: * load group * mark bits in on-disk bitmap * release group * * - discard preallocations in group: * mark PAs deleted * move them onto local list * load on-disk bitmap * load group * remove PA from object (inode or locality group) * mark free blocks in-core * * - discard inode's preallocations: */ /* * Locking rules * * Locks: * - bitlock on a group (group) * - object (inode/locality) (object) * - per-pa lock (pa) * - cr_power2_aligned lists lock (cr_power2_aligned) * - cr_goal_len_fast lists lock (cr_goal_len_fast) * * Paths: * - new pa * object * group * * - find and use pa: * pa * * - release consumed pa: * pa * group * object * * - generate in-core bitmap: * group * pa * * - discard all for given object (inode, locality group): * object * pa * group * * - discard all for given group: * group * pa * group * object * * - allocation path (ext4_mb_regular_allocator) * group * cr_power2_aligned/cr_goal_len_fast */ static struct kmem_cache *ext4_pspace_cachep; static struct kmem_cache *ext4_ac_cachep; static struct kmem_cache *ext4_free_data_cachep; /* We create slab caches for groupinfo data structures based on the * superblock block size. There will be one per mounted filesystem for * each unique s_blocksize_bits */ #define NR_GRPINFO_CACHES 8 static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = { "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k", "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k", "ext4_groupinfo_64k", "ext4_groupinfo_128k" }; static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group); static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac); static int ext4_mb_scan_group(struct ext4_allocation_context *ac, ext4_group_t group); static int ext4_try_to_trim_range(struct super_block *sb, struct ext4_buddy *e4b, ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks); /* * The algorithm using this percpu seq counter goes below: * 1. We sample the percpu discard_pa_seq counter before trying for block * allocation in ext4_mb_new_blocks(). * 2. We increment this percpu discard_pa_seq counter when we either allocate * or free these blocks i.e. while marking those blocks as used/free in * mb_mark_used()/mb_free_blocks(). * 3. We also increment this percpu seq counter when we successfully identify * that the bb_prealloc_list is not empty and hence proceed for discarding * of those PAs inside ext4_mb_discard_group_preallocations(). * * Now to make sure that the regular fast path of block allocation is not * affected, as a small optimization we only sample the percpu seq counter * on that cpu. Only when the block allocation fails and when freed blocks * found were 0, that is when we sample percpu seq counter for all cpus using * below function ext4_get_discard_pa_seq_sum(). This happens after making * sure that all the PAs on grp->bb_prealloc_list got freed or if it's empty. */ static DEFINE_PER_CPU(u64, discard_pa_seq); static inline u64 ext4_get_discard_pa_seq_sum(void) { int __cpu; u64 __seq = 0; for_each_possible_cpu(__cpu) __seq += per_cpu(discard_pa_seq, __cpu); return __seq; } static inline void *mb_correct_addr_and_bit(int *bit, void *addr) { #if BITS_PER_LONG == 64 *bit += ((unsigned long) addr & 7UL) << 3; addr = (void *) ((unsigned long) addr & ~7UL); #elif BITS_PER_LONG == 32 *bit += ((unsigned long) addr & 3UL) << 3; addr = (void *) ((unsigned long) addr & ~3UL); #else #error "how many bits you are?!" #endif return addr; } static inline int mb_test_bit(int bit, void *addr) { /* * ext4_test_bit on architecture like powerpc * needs unsigned long aligned address */ addr = mb_correct_addr_and_bit(&bit, addr); return ext4_test_bit(bit, addr); } static inline void mb_set_bit(int bit, void *addr) { addr = mb_correct_addr_and_bit(&bit, addr); ext4_set_bit(bit, addr); } static inline void mb_clear_bit(int bit, void *addr) { addr = mb_correct_addr_and_bit(&bit, addr); ext4_clear_bit(bit, addr); } static inline int mb_test_and_clear_bit(int bit, void *addr) { addr = mb_correct_addr_and_bit(&bit, addr); return ext4_test_and_clear_bit(bit, addr); } static inline int mb_find_next_zero_bit(void *addr, int max, int start) { int fix = 0, ret, tmpmax; addr = mb_correct_addr_and_bit(&fix, addr); tmpmax = max + fix; start += fix; ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix; if (ret > max) return max; return ret; } static inline int mb_find_next_bit(void *addr, int max, int start) { int fix = 0, ret, tmpmax; addr = mb_correct_addr_and_bit(&fix, addr); tmpmax = max + fix; start += fix; ret = ext4_find_next_bit(addr, tmpmax, start) - fix; if (ret > max) return max; return ret; } static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) { char *bb; BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); BUG_ON(max == NULL); if (order > e4b->bd_blkbits + 1) { *max = 0; return NULL; } /* at order 0 we see each particular block */ if (order == 0) { *max = 1 << (e4b->bd_blkbits + 3); return e4b->bd_bitmap; } bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; return bb; } #ifdef DOUBLE_CHECK static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, int first, int count) { int i; struct super_block *sb = e4b->bd_sb; if (unlikely(e4b->bd_info->bb_bitmap == NULL)) return; assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); for (i = 0; i < count; i++) { if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { ext4_fsblk_t blocknr; blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(EXT4_SB(sb), first + i); ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, inode ? inode->i_ino : 0, blocknr, "freeing block already freed " "(bit %u)", first + i); } mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); } } static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count) { int i; if (unlikely(e4b->bd_info->bb_bitmap == NULL)) return; assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); for (i = 0; i < count; i++) { BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap)); mb_set_bit(first + i, e4b->bd_info->bb_bitmap); } } static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) { if (unlikely(e4b->bd_info->bb_bitmap == NULL)) return; if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) { unsigned char *b1, *b2; int i; b1 = (unsigned char *) e4b->bd_info->bb_bitmap; b2 = (unsigned char *) bitmap; for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { if (b1[i] != b2[i]) { ext4_msg(e4b->bd_sb, KERN_ERR, "corruption in group %u " "at byte %u(%u): %x in copy != %x " "on disk/prealloc", e4b->bd_group, i, i * 8, b1[i], b2[i]); BUG(); } } } } static void mb_group_bb_bitmap_alloc(struct super_block *sb, struct ext4_group_info *grp, ext4_group_t group) { struct buffer_head *bh; grp->bb_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS); if (!grp->bb_bitmap) return; bh = ext4_read_block_bitmap(sb, group); if (IS_ERR_OR_NULL(bh)) { kfree(grp->bb_bitmap); grp->bb_bitmap = NULL; return; } memcpy(grp->bb_bitmap, bh->b_data, sb->s_blocksize); put_bh(bh); } static void mb_group_bb_bitmap_free(struct ext4_group_info *grp) { kfree(grp->bb_bitmap); } #else static inline void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, int first, int count) { return; } static inline void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count) { return; } static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) { return; } static inline void mb_group_bb_bitmap_alloc(struct super_block *sb, struct ext4_group_info *grp, ext4_group_t group) { return; } static inline void mb_group_bb_bitmap_free(struct ext4_group_info *grp) { return; } #endif #ifdef AGGRESSIVE_CHECK #define MB_CHECK_ASSERT(assert) \ do { \ if (!(assert)) { \ printk(KERN_EMERG \ "Assertion failure in %s() at %s:%d: \"%s\"\n", \ function, file, line, # assert); \ BUG(); \ } \ } while (0) /* * Perform buddy integrity check with the following steps: * * 1. Top-down validation (from highest order down to order 1, excluding order-0 bitmap): * For each pair of adjacent orders, if a higher-order bit is set (indicating a free block), * at most one of the two corresponding lower-order bits may be clear (free). * * 2. Order-0 (bitmap) validation, performed on bit pairs: * - If either bit in a pair is set (1, allocated), then all corresponding higher-order bits * must not be free (0). * - If both bits in a pair are clear (0, free), then exactly one of the corresponding * higher-order bits must be free (0). * * 3. Preallocation (pa) list validation: * For each preallocated block (pa) in the group: * - Verify that pa_pstart falls within the bounds of this block group. * - Ensure the corresponding bit(s) in the order-0 bitmap are marked as allocated (1). */ static void __mb_check_buddy(struct ext4_buddy *e4b, char *file, const char *function, int line) { struct super_block *sb = e4b->bd_sb; int order = e4b->bd_blkbits + 1; int max; int max2; int i; int j; int k; int count; struct ext4_group_info *grp; int fragments = 0; int fstart; struct list_head *cur; void *buddy; void *buddy2; if (e4b->bd_info->bb_check_counter++ % 10) return; while (order > 1) { buddy = mb_find_buddy(e4b, order, &max); MB_CHECK_ASSERT(buddy); buddy2 = mb_find_buddy(e4b, order - 1, &max2); MB_CHECK_ASSERT(buddy2); MB_CHECK_ASSERT(buddy != buddy2); MB_CHECK_ASSERT(max * 2 == max2); count = 0; for (i = 0; i < max; i++) { if (mb_test_bit(i, buddy)) { /* only single bit in buddy2 may be 0 */ if (!mb_test_bit(i << 1, buddy2)) { MB_CHECK_ASSERT( mb_test_bit((i<<1)+1, buddy2)); } continue; } count++; } MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count); order--; } fstart = -1; buddy = mb_find_buddy(e4b, 0, &max); for (i = 0; i < max; i++) { if (!mb_test_bit(i, buddy)) { MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free); if (fstart == -1) { fragments++; fstart = i; } } else { fstart = -1; } if (!(i & 1)) { int in_use, zero_bit_count = 0; in_use = mb_test_bit(i, buddy) || mb_test_bit(i + 1, buddy); for (j = 1; j < e4b->bd_blkbits + 2; j++) { buddy2 = mb_find_buddy(e4b, j, &max2); k = i >> j; MB_CHECK_ASSERT(k < max2); if (!mb_test_bit(k, buddy2)) zero_bit_count++; } MB_CHECK_ASSERT(zero_bit_count == !in_use); } } MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info)); MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); grp = ext4_get_group_info(sb, e4b->bd_group); if (!grp) return; list_for_each(cur, &grp->bb_prealloc_list) { ext4_group_t groupnr; struct ext4_prealloc_space *pa; pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); if (!pa->pa_len) continue; ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k); MB_CHECK_ASSERT(groupnr == e4b->bd_group); for (i = 0; i < pa->pa_len; i++) MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); } } #undef MB_CHECK_ASSERT #define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ __FILE__, __func__, __LINE__) #else #define mb_check_buddy(e4b) #endif /* * Divide blocks started from @first with length @len into * smaller chunks with power of 2 blocks. * Clear the bits in bitmap which the blocks of the chunk(s) covered, * then increase bb_counters[] for corresponded chunk size. */ static void ext4_mb_mark_free_simple(struct super_block *sb, void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, struct ext4_group_info *grp) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_grpblk_t min; ext4_grpblk_t max; ext4_grpblk_t chunk; unsigned int border; BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb)); border = 2 << sb->s_blocksize_bits; while (len > 0) { /* find how many blocks can be covered since this position */ max = ffs(first | border) - 1; /* find how many blocks of power 2 we need to mark */ min = fls(len) - 1; if (max < min) min = max; chunk = 1 << min; /* mark multiblock chunks only */ grp->bb_counters[min]++; if (min > 0) mb_clear_bit(first >> min, buddy + sbi->s_mb_offsets[min]); len -= chunk; first += chunk; } } static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len) { int order; /* * We don't bother with a special lists groups with only 1 block free * extents and for completely empty groups. */ order = fls(len) - 2; if (order < 0) return 0; if (order == MB_NUM_ORDERS(sb)) order--; if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb))) order = MB_NUM_ORDERS(sb) - 1; return order; } /* Move group to appropriate avg_fragment_size list */ static void mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) { struct ext4_sb_info *sbi = EXT4_SB(sb); int new, old; if (!test_opt2(sb, MB_OPTIMIZE_SCAN)) return; old = grp->bb_avg_fragment_size_order; new = grp->bb_fragments == 0 ? -1 : mb_avg_fragment_size_order(sb, grp->bb_free / grp->bb_fragments); if (new == old) return; if (old >= 0) xa_erase(&sbi->s_mb_avg_fragment_size[old], grp->bb_group); grp->bb_avg_fragment_size_order = new; if (new >= 0) { /* * Cannot use __GFP_NOFAIL because we hold the group lock. * Although allocation for insertion may fails, it's not fatal * as we have linear traversal to fall back on. */ int err = xa_insert(&sbi->s_mb_avg_fragment_size[new], grp->bb_group, grp, GFP_ATOMIC); if (err) mb_debug(sb, "insert group: %u to s_mb_avg_fragment_size[%d] failed, err %d", grp->bb_group, new, err); } } static ext4_group_t ext4_get_allocation_groups_count( struct ext4_allocation_context *ac) { ext4_group_t ngroups = ext4_get_groups_count(ac->ac_sb); /* non-extent files are limited to low blocks/groups */ if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))) ngroups = EXT4_SB(ac->ac_sb)->s_blockfile_groups; /* Pairs with smp_wmb() in ext4_update_super() */ smp_rmb(); return ngroups; } static int ext4_mb_scan_groups_xa_range(struct ext4_allocation_context *ac, struct xarray *xa, ext4_group_t start, ext4_group_t end) { struct super_block *sb = ac->ac_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); enum criteria cr = ac->ac_criteria; ext4_group_t ngroups = ext4_get_allocation_groups_count(ac); unsigned long group = start; struct ext4_group_info *grp; if (WARN_ON_ONCE(end > ngroups || start >= end)) return 0; xa_for_each_range(xa, group, grp, start, end - 1) { int err; if (sbi->s_mb_stats) atomic64_inc(&sbi->s_bal_cX_groups_considered[cr]); err = ext4_mb_scan_group(ac, grp->bb_group); if (err || ac->ac_status != AC_STATUS_CONTINUE) return err; cond_resched(); } return 0; } /* * Find a suitable group of given order from the largest free orders xarray. */ static inline int ext4_mb_scan_groups_largest_free_order_range(struct ext4_allocation_context *ac, int order, ext4_group_t start, ext4_group_t end) { struct xarray *xa = &EXT4_SB(ac->ac_sb)->s_mb_largest_free_orders[order]; if (xa_empty(xa)) return 0; return ext4_mb_scan_groups_xa_range(ac, xa, start, end); } /* * Choose next group by traversing largest_free_order lists. Updates *new_cr if * cr level needs an update. */ static int ext4_mb_scan_groups_p2_aligned(struct ext4_allocation_context *ac, ext4_group_t group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); int i; int ret = 0; ext4_group_t start, end; start = group; end = ext4_get_allocation_groups_count(ac); wrap_around: for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) { ret = ext4_mb_scan_groups_largest_free_order_range(ac, i, start, end); if (ret || ac->ac_status != AC_STATUS_CONTINUE) return ret; } if (start) { end = start; start = 0; goto wrap_around; } if (sbi->s_mb_stats) atomic64_inc(&sbi->s_bal_cX_failed[ac->ac_criteria]); /* Increment cr and search again if no group is found */ ac->ac_criteria = CR_GOAL_LEN_FAST; return ret; } /* * Find a suitable group of given order from the average fragments xarray. */ static int ext4_mb_scan_groups_avg_frag_order_range(struct ext4_allocation_context *ac, int order, ext4_group_t start, ext4_group_t end) { struct xarray *xa = &EXT4_SB(ac->ac_sb)->s_mb_avg_fragment_size[order]; if (xa_empty(xa)) return 0; return ext4_mb_scan_groups_xa_range(ac, xa, start, end); } /* * Choose next group by traversing average fragment size list of suitable * order. Updates *new_cr if cr level needs an update. */ static int ext4_mb_scan_groups_goal_fast(struct ext4_allocation_context *ac, ext4_group_t group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); int i, ret = 0; ext4_group_t start, end; start = group; end = ext4_get_allocation_groups_count(ac); wrap_around: i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len); for (; i < MB_NUM_ORDERS(ac->ac_sb); i++) { ret = ext4_mb_scan_groups_avg_frag_order_range(ac, i, start, end); if (ret || ac->ac_status != AC_STATUS_CONTINUE) return ret; } if (start) { end = start; start = 0; goto wrap_around; } if (sbi->s_mb_stats) atomic64_inc(&sbi->s_bal_cX_failed[ac->ac_criteria]); /* * CR_BEST_AVAIL_LEN works based on the concept that we have * a larger normalized goal len request which can be trimmed to * a smaller goal len such that it can still satisfy original * request len. However, allocation request for non-regular * files never gets normalized. * See function ext4_mb_normalize_request() (EXT4_MB_HINT_DATA). */ if (ac->ac_flags & EXT4_MB_HINT_DATA) ac->ac_criteria = CR_BEST_AVAIL_LEN; else ac->ac_criteria = CR_GOAL_LEN_SLOW; return ret; } /* * We couldn't find a group in CR_GOAL_LEN_FAST so try to find the highest free fragment * order we have and proactively trim the goal request length to that order to * find a suitable group faster. * * This optimizes allocation speed at the cost of slightly reduced * preallocations. However, we make sure that we don't trim the request too * much and fall to CR_GOAL_LEN_SLOW in that case. */ static int ext4_mb_scan_groups_best_avail(struct ext4_allocation_context *ac, ext4_group_t group) { int ret = 0; struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); int i, order, min_order; unsigned long num_stripe_clusters = 0; ext4_group_t start, end; /* * mb_avg_fragment_size_order() returns order in a way that makes * retrieving back the length using (1 << order) inaccurate. Hence, use * fls() instead since we need to know the actual length while modifying * goal length. */ order = fls(ac->ac_g_ex.fe_len) - 1; if (WARN_ON_ONCE(order - 1 > MB_NUM_ORDERS(ac->ac_sb))) order = MB_NUM_ORDERS(ac->ac_sb); min_order = order - sbi->s_mb_best_avail_max_trim_order; if (min_order < 0) min_order = 0; if (sbi->s_stripe > 0) { /* * We are assuming that stripe size is always a multiple of * cluster ratio otherwise __ext4_fill_super exists early. */ num_stripe_clusters = EXT4_NUM_B2C(sbi, sbi->s_stripe); if (1 << min_order < num_stripe_clusters) /* * We consider 1 order less because later we round * up the goal len to num_stripe_clusters */ min_order = fls(num_stripe_clusters) - 1; } if (1 << min_order < ac->ac_o_ex.fe_len) min_order = fls(ac->ac_o_ex.fe_len); start = group; end = ext4_get_allocation_groups_count(ac); wrap_around: for (i = order; i >= min_order; i--) { int frag_order; /* * Scale down goal len to make sure we find something * in the free fragments list. Basically, reduce * preallocations. */ ac->ac_g_ex.fe_len = 1 << i; if (num_stripe_clusters > 0) { /* * Try to round up the adjusted goal length to * stripe size (in cluster units) multiple for * efficiency. */ ac->ac_g_ex.fe_len = roundup(ac->ac_g_ex.fe_len, num_stripe_clusters); } frag_order = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len); ret = ext4_mb_scan_groups_avg_frag_order_range(ac, frag_order, start, end); if (ret || ac->ac_status != AC_STATUS_CONTINUE) return ret; } if (start) { end = start; start = 0; goto wrap_around; } /* Reset goal length to original goal length before falling into CR_GOAL_LEN_SLOW */ ac->ac_g_ex.fe_len = ac->ac_orig_goal_len; if (sbi->s_mb_stats) atomic64_inc(&sbi->s_bal_cX_failed[ac->ac_criteria]); ac->ac_criteria = CR_GOAL_LEN_SLOW; return ret; } static inline int should_optimize_scan(struct ext4_allocation_context *ac) { if (unlikely(!test_opt2(ac->ac_sb, MB_OPTIMIZE_SCAN))) return 0; if (ac->ac_criteria >= CR_GOAL_LEN_SLOW) return 0; return 1; } /* * next linear group for allocation. */ static void next_linear_group(ext4_group_t *group, ext4_group_t ngroups) { /* * Artificially restricted ngroups for non-extent * files makes group > ngroups possible on first loop. */ *group = *group + 1 >= ngroups ? 0 : *group + 1; } static int ext4_mb_scan_groups_linear(struct ext4_allocation_context *ac, ext4_group_t ngroups, ext4_group_t *start, ext4_group_t count) { int ret, i; enum criteria cr = ac->ac_criteria; struct super_block *sb = ac->ac_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group = *start; for (i = 0; i < count; i++, next_linear_group(&group, ngroups)) { ret = ext4_mb_scan_group(ac, group); if (ret || ac->ac_status != AC_STATUS_CONTINUE) return ret; cond_resched(); } *start = group; if (count == ngroups) ac->ac_criteria++; /* Processed all groups and haven't found blocks */ if (sbi->s_mb_stats && i == ngroups) atomic64_inc(&sbi->s_bal_cX_failed[cr]); return 0; } static int ext4_mb_scan_groups(struct ext4_allocation_context *ac) { int ret = 0; ext4_group_t start; struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); ext4_group_t ngroups = ext4_get_allocation_groups_count(ac); /* searching for the right group start from the goal value specified */ start = ac->ac_g_ex.fe_group; if (start >= ngroups) start = 0; ac->ac_prefetch_grp = start; ac->ac_prefetch_nr = 0; if (!should_optimize_scan(ac)) return ext4_mb_scan_groups_linear(ac, ngroups, &start, ngroups); /* * Optimized scanning can return non adjacent groups which can cause * seek overhead for rotational disks. So try few linear groups before * trying optimized scan. */ if (sbi->s_mb_max_linear_groups) ret = ext4_mb_scan_groups_linear(ac, ngroups, &start, sbi->s_mb_max_linear_groups); if (ret || ac->ac_status != AC_STATUS_CONTINUE) return ret; switch (ac->ac_criteria) { case CR_POWER2_ALIGNED: return ext4_mb_scan_groups_p2_aligned(ac, start); case CR_GOAL_LEN_FAST: return ext4_mb_scan_groups_goal_fast(ac, start); case CR_BEST_AVAIL_LEN: return ext4_mb_scan_groups_best_avail(ac, start); default: /* * TODO: For CR_GOAL_LEN_SLOW, we can arrange groups in an * rb tree sorted by bb_free. But until that happens, we should * never come here. */ WARN_ON(1); } return 0; } /* * Cache the order of the largest free extent we have available in this block * group. */ static void mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp) { struct ext4_sb_info *sbi = EXT4_SB(sb); int new, old = grp->bb_largest_free_order; for (new = MB_NUM_ORDERS(sb) - 1; new >= 0; new--) if (grp->bb_counters[new] > 0) break; /* No need to move between order lists? */ if (new == old) return; if (old >= 0) { struct xarray *xa = &sbi->s_mb_largest_free_orders[old]; if (!xa_empty(xa) && xa_load(xa, grp->bb_group)) xa_erase(xa, grp->bb_group); } grp->bb_largest_free_order = new; if (test_opt2(sb, MB_OPTIMIZE_SCAN) && new >= 0 && grp->bb_free) { /* * Cannot use __GFP_NOFAIL because we hold the group lock. * Although allocation for insertion may fails, it's not fatal * as we have linear traversal to fall back on. */ int err = xa_insert(&sbi->s_mb_largest_free_orders[new], grp->bb_group, grp, GFP_ATOMIC); if (err) mb_debug(sb, "insert group: %u to s_mb_largest_free_orders[%d] failed, err %d", grp->bb_group, new, err); } } static noinline_for_stack void ext4_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap, ext4_group_t group, struct ext4_group_info *grp) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); ext4_grpblk_t i = 0; ext4_grpblk_t first; ext4_grpblk_t len; unsigned free = 0; unsigned fragments = 0; unsigned long long period = get_cycles(); /* initialize buddy from bitmap which is aggregation * of on-disk bitmap and preallocations */ i = mb_find_next_zero_bit(bitmap, max, 0); grp->bb_first_free = i; while (i < max) { fragments++; first = i; i = mb_find_next_bit(bitmap, max, i); len = i - first; free += len; if (len > 1) ext4_mb_mark_free_simple(sb, buddy, first, len, grp); else grp->bb_counters[0]++; if (i < max) i = mb_find_next_zero_bit(bitmap, max, i); } grp->bb_fragments = fragments; if (free != grp->bb_free) { ext4_grp_locked_error(sb, group, 0, 0, "block bitmap and bg descriptor " "inconsistent: %u vs %u free clusters", free, grp->bb_free); /* * If we intend to continue, we consider group descriptor * corrupt and update bb_free using bitmap value */ grp->bb_free = free; ext4_mark_group_bitmap_corrupted(sb, group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); } mb_set_largest_free_order(sb, grp); mb_update_avg_fragment_size(sb, grp); clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); period = get_cycles() - period; atomic_inc(&sbi->s_mb_buddies_generated); atomic64_add(period, &sbi->s_mb_generation_time); } static void mb_regenerate_buddy(struct ext4_buddy *e4b) { int count; int order = 1; void *buddy; while ((buddy = mb_find_buddy(e4b, order++, &count))) mb_set_bits(buddy, 0, count); e4b->bd_info->bb_fragments = 0; memset(e4b->bd_info->bb_counters, 0, sizeof(*e4b->bd_info->bb_counters) * (e4b->bd_sb->s_blocksize_bits + 2)); ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, e4b->bd_bitmap, e4b->bd_group, e4b->bd_info); } /* The buddy information is attached the buddy cache inode * for convenience. The information regarding each group * is loaded via ext4_mb_load_buddy. The information involve * block bitmap and buddy information. The information are * stored in the inode as * * { folio } * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]... * * * one block each for bitmap and buddy information. * So for each group we take up 2 blocks. A folio can * contain blocks_per_folio (folio_size / blocksize) blocks. * So it can have information regarding groups_per_folio which * is blocks_per_folio/2 * * Locking note: This routine takes the block group lock of all groups * for this folio; do not hold this lock when calling this routine! */ static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp) { ext4_group_t ngroups; unsigned int blocksize; int blocks_per_folio; int groups_per_folio; int err = 0; int i; ext4_group_t first_group, group; int first_block; struct super_block *sb; struct buffer_head *bhs; struct buffer_head **bh = NULL; struct inode *inode; char *data; char *bitmap; struct ext4_group_info *grinfo; inode = folio->mapping->host; sb = inode->i_sb; ngroups = ext4_get_groups_count(sb); blocksize = i_blocksize(inode); blocks_per_folio = folio_size(folio) / blocksize; WARN_ON_ONCE(!blocks_per_folio); groups_per_folio = DIV_ROUND_UP(blocks_per_folio, 2); mb_debug(sb, "init folio %lu\n", folio->index); /* allocate buffer_heads to read bitmaps */ if (groups_per_folio > 1) { i = sizeof(struct buffer_head *) * groups_per_folio; bh = kzalloc(i, gfp); if (bh == NULL) return -ENOMEM; } else bh = &bhs; /* read all groups the folio covers into the cache */ first_group = EXT4_PG_TO_LBLK(inode, folio->index) / 2; for (i = 0, group = first_group; i < groups_per_folio; i++, group++) { if (group >= ngroups) break; grinfo = ext4_get_group_info(sb, group); if (!grinfo) continue; /* * If folio is uptodate then we came here after online resize * which added some new uninitialized group info structs, so * we must skip all initialized uptodate buddies on the folio, * which may be currently in use by an allocating task. */ if (folio_test_uptodate(folio) && !EXT4_MB_GRP_NEED_INIT(grinfo)) { bh[i] = NULL; continue; } bh[i] = ext4_read_block_bitmap_nowait(sb, group, false); if (IS_ERR(bh[i])) { err = PTR_ERR(bh[i]); bh[i] = NULL; goto out; } mb_debug(sb, "read bitmap for group %u\n", group); } /* wait for I/O completion */ for (i = 0, group = first_group; i < groups_per_folio; i++, group++) { int err2; if (!bh[i]) continue; err2 = ext4_wait_block_bitmap(sb, group, bh[i]); if (!err) err = err2; } first_block = EXT4_PG_TO_LBLK(inode, folio->index); for (i = 0; i < blocks_per_folio; i++) { group = (first_block + i) >> 1; if (group >= ngroups) break; if (!bh[group - first_group]) /* skip initialized uptodate buddy */ continue; if (!buffer_verified(bh[group - first_group])) /* Skip faulty bitmaps */ continue; err = 0; /* * data carry information regarding this * particular group in the format specified * above * */ data = folio_address(folio) + (i * blocksize); bitmap = bh[group - first_group]->b_data; /* * We place the buddy block and bitmap block * close together */ grinfo = ext4_get_group_info(sb, group); if (!grinfo) { err = -EFSCORRUPTED; goto out; } if ((first_block + i) & 1) { /* this is block of buddy */ BUG_ON(incore == NULL); mb_debug(sb, "put buddy for group %u in folio %lu/%x\n", group, folio->index, i * blocksize); trace_ext4_mb_buddy_bitmap_load(sb, group); grinfo->bb_fragments = 0; memset(grinfo->bb_counters, 0, sizeof(*grinfo->bb_counters) * (MB_NUM_ORDERS(sb))); /* * incore got set to the group block bitmap below */ ext4_lock_group(sb, group); /* init the buddy */ memset(data, 0xff, blocksize); ext4_mb_generate_buddy(sb, data, incore, group, grinfo); ext4_unlock_group(sb, group); incore = NULL; } else { /* this is block of bitmap */ BUG_ON(incore != NULL); mb_debug(sb, "put bitmap for group %u in folio %lu/%x\n", group, folio->index, i * blocksize); trace_ext4_mb_bitmap_load(sb, group); /* see comments in ext4_mb_put_pa() */ ext4_lock_group(sb, group); memcpy(data, bitmap, blocksize); /* mark all preallocated blks used in in-core bitmap */ ext4_mb_generate_from_pa(sb, data, group); WARN_ON_ONCE(!RB_EMPTY_ROOT(&grinfo->bb_free_root)); ext4_unlock_group(sb, group); /* set incore so that the buddy information can be * generated using this */ incore = data; } } folio_mark_uptodate(folio); out: if (bh) { for (i = 0; i < groups_per_folio; i++) brelse(bh[i]); if (bh != &bhs) kfree(bh); } return err; } /* * Lock the buddy and bitmap folios. This makes sure other parallel init_group * on the same buddy folio doesn't happen while holding the buddy folio lock. * Return locked buddy and bitmap folios on e4b struct. If buddy and bitmap * are on the same folio e4b->bd_buddy_folio is NULL and return value is 0. */ static int ext4_mb_get_buddy_folio_lock(struct super_block *sb, ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp) { struct inode *inode = EXT4_SB(sb)->s_buddy_cache; int block, pnum; struct folio *folio; e4b->bd_buddy_folio = NULL; e4b->bd_bitmap_folio = NULL; /* * the buddy cache inode stores the block bitmap * and buddy information in consecutive blocks. * So for each group we need two blocks. */ block = group * 2; pnum = EXT4_LBLK_TO_PG(inode, block); folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); if (IS_ERR(folio)) return PTR_ERR(folio); BUG_ON(folio->mapping != inode->i_mapping); WARN_ON_ONCE(folio_size(folio) < sb->s_blocksize); e4b->bd_bitmap_folio = folio; e4b->bd_bitmap = folio_address(folio) + offset_in_folio(folio, EXT4_LBLK_TO_B(inode, block)); block++; pnum = EXT4_LBLK_TO_PG(inode, block); if (folio_contains(folio, pnum)) { /* buddy and bitmap are on the same folio */ return 0; } /* we need another folio for the buddy */ folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); if (IS_ERR(folio)) return PTR_ERR(folio); BUG_ON(folio->mapping != inode->i_mapping); WARN_ON_ONCE(folio_size(folio) < sb->s_blocksize); e4b->bd_buddy_folio = folio; return 0; } static void ext4_mb_put_buddy_folio_lock(struct ext4_buddy *e4b) { if (e4b->bd_bitmap_folio) { folio_unlock(e4b->bd_bitmap_folio); folio_put(e4b->bd_bitmap_folio); } if (e4b->bd_buddy_folio) { folio_unlock(e4b->bd_buddy_folio); folio_put(e4b->bd_buddy_folio); } } /* * Locking note: This routine calls ext4_mb_init_cache(), which takes the * block group lock of all groups for this folio; do not hold the BG lock when * calling this routine! */ static noinline_for_stack int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp) { struct ext4_group_info *this_grp; struct ext4_buddy e4b; struct folio *folio; int ret = 0; might_sleep(); mb_debug(sb, "init group %u\n", group); this_grp = ext4_get_group_info(sb, group); if (!this_grp) return -EFSCORRUPTED; /* * This ensures that we don't reinit the buddy cache * folio which map to the group from which we are already * allocating. If we are looking at the buddy cache we would * have taken a reference using ext4_mb_load_buddy and that * would have pinned buddy folio to page cache. * The call to ext4_mb_get_buddy_folio_lock will mark the * folio accessed. */ ret = ext4_mb_get_buddy_folio_lock(sb, group, &e4b, gfp); if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) { /* * somebody initialized the group * return without doing anything */ goto err; } folio = e4b.bd_bitmap_folio; ret = ext4_mb_init_cache(folio, NULL, gfp); if (ret) goto err; if (!folio_test_uptodate(folio)) { ret = -EIO; goto err; } if (e4b.bd_buddy_folio == NULL) { /* * If both the bitmap and buddy are in * the same folio we don't need to force * init the buddy */ ret = 0; goto err; } /* init buddy cache */ folio = e4b.bd_buddy_folio; ret = ext4_mb_init_cache(folio, e4b.bd_bitmap, gfp); if (ret) goto err; if (!folio_test_uptodate(folio)) { ret = -EIO; goto err; } err: ext4_mb_put_buddy_folio_lock(&e4b); return ret; } /* * Locking note: This routine calls ext4_mb_init_cache(), which takes the * block group lock of all groups for this folio; do not hold the BG lock when * calling this routine! */ static noinline_for_stack int ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp) { int block; int pnum; struct folio *folio; int ret; struct ext4_group_info *grp; struct ext4_sb_info *sbi = EXT4_SB(sb); struct inode *inode = sbi->s_buddy_cache; might_sleep(); mb_debug(sb, "load group %u\n", group); grp = ext4_get_group_info(sb, group); if (!grp) return -EFSCORRUPTED; e4b->bd_blkbits = sb->s_blocksize_bits; e4b->bd_info = grp; e4b->bd_sb = sb; e4b->bd_group = group; e4b->bd_buddy_folio = NULL; e4b->bd_bitmap_folio = NULL; if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { /* * we need full data about the group * to make a good selection */ ret = ext4_mb_init_group(sb, group, gfp); if (ret) return ret; } /* * the buddy cache inode stores the block bitmap * and buddy information in consecutive blocks. * So for each group we need two blocks. */ block = group * 2; pnum = EXT4_LBLK_TO_PG(inode, block); /* Avoid locking the folio in the fast path ... */ folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_ACCESSED, 0); if (IS_ERR(folio) || !folio_test_uptodate(folio) || folio_test_locked(folio)) { /* * folio_test_locked is employed to detect ongoing folio * migrations, since concurrent migrations can lead to * bitmap inconsistency. And if we are not uptodate that * implies somebody just created the folio but is yet to * initialize it. We can drop the folio reference and * try to get the folio with lock in both cases to avoid * concurrency. */ if (!IS_ERR(folio)) folio_put(folio); folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); if (!IS_ERR(folio)) { if (WARN_RATELIMIT(folio->mapping != inode->i_mapping, "ext4: bitmap's mapping != inode->i_mapping\n")) { /* should never happen */ folio_unlock(folio); ret = -EINVAL; goto err; } if (!folio_test_uptodate(folio)) { ret = ext4_mb_init_cache(folio, NULL, gfp); if (ret) { folio_unlock(folio); goto err; } mb_cmp_bitmaps(e4b, folio_address(folio) + offset_in_folio(folio, EXT4_LBLK_TO_B(inode, block))); } folio_unlock(folio); } } if (IS_ERR(folio)) { ret = PTR_ERR(folio); goto err; } if (!folio_test_uptodate(folio)) { ret = -EIO; goto err; } /* Folios marked accessed already */ e4b->bd_bitmap_folio = folio; e4b->bd_bitmap = folio_address(folio) + offset_in_folio(folio, EXT4_LBLK_TO_B(inode, block)); block++; pnum = EXT4_LBLK_TO_PG(inode, block); /* buddy and bitmap are on the same folio? */ if (folio_contains(folio, pnum)) { folio_get(folio); goto update_buddy; } /* we need another folio for the buddy */ folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_ACCESSED, 0); if (IS_ERR(folio) || !folio_test_uptodate(folio) || folio_test_locked(folio)) { if (!IS_ERR(folio)) folio_put(folio); folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); if (!IS_ERR(folio)) { if (WARN_RATELIMIT(folio->mapping != inode->i_mapping, "ext4: buddy bitmap's mapping != inode->i_mapping\n")) { /* should never happen */ folio_unlock(folio); ret = -EINVAL; goto err; } if (!folio_test_uptodate(folio)) { ret = ext4_mb_init_cache(folio, e4b->bd_bitmap, gfp); if (ret) { folio_unlock(folio); goto err; } } folio_unlock(folio); } } if (IS_ERR(folio)) { ret = PTR_ERR(folio); goto err; } if (!folio_test_uptodate(folio)) { ret = -EIO; goto err; } update_buddy: /* Folios marked accessed already */ e4b->bd_buddy_folio = folio; e4b->bd_buddy = folio_address(folio) + offset_in_folio(folio, EXT4_LBLK_TO_B(inode, block)); return 0; err: if (!IS_ERR_OR_NULL(folio)) folio_put(folio); if (e4b->bd_bitmap_folio) folio_put(e4b->bd_bitmap_folio); e4b->bd_buddy = NULL; e4b->bd_bitmap = NULL; return ret; } static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, struct ext4_buddy *e4b) { return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS); } static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) { if (e4b->bd_bitmap_folio) folio_put(e4b->bd_bitmap_folio); if (e4b->bd_buddy_folio) folio_put(e4b->bd_buddy_folio); } static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) { int order = 1, max; void *bb; BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); while (order <= e4b->bd_blkbits + 1) { bb = mb_find_buddy(e4b, order, &max); if (!mb_test_bit(block >> order, bb)) { /* this block is part of buddy of order 'order' */ return order; } order++; } return 0; } static void mb_clear_bits(void *bm, int cur, int len) { __u32 *addr; len = cur + len; while (cur < len) { if ((cur & 31) == 0 && (len - cur) >= 32) { /* fast path: clear whole word at once */ addr = bm + (cur >> 3); *addr = 0; cur += 32; continue; } mb_clear_bit(cur, bm); cur++; } } /* clear bits in given range * will return first found zero bit if any, -1 otherwise */ static int mb_test_and_clear_bits(void *bm, int cur, int len) { __u32 *addr; int zero_bit = -1; len = cur + len; while (cur < len) { if ((cur & 31) == 0 && (len - cur) >= 32) { /* fast path: clear whole word at once */ addr = bm + (cur >> 3); if (*addr != (__u32)(-1) && zero_bit == -1) zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0); *addr = 0; cur += 32; continue; } if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1) zero_bit = cur; cur++; } return zero_bit; } void mb_set_bits(void *bm, int cur, int len) { __u32 *addr; len = cur + len; while (cur < len) { if ((cur & 31) == 0 && (len - cur) >= 32) { /* fast path: set whole word at once */ addr = bm + (cur >> 3); *addr = 0xffffffff; cur += 32; continue; } mb_set_bit(cur, bm); cur++; } } static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side) { if (mb_test_bit(*bit + side, bitmap)) { mb_clear_bit(*bit, bitmap); (*bit) -= side; return 1; } else { (*bit) += side; mb_set_bit(*bit, bitmap); return -1; } } static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last) { int max; int order = 1; void *buddy = mb_find_buddy(e4b, order, &max); while (buddy) { void *buddy2; /* Bits in range [first; last] are known to be set since * corresponding blocks were allocated. Bits in range * (first; last) will stay set because they form buddies on * upper layer. We just deal with borders if they don't * align with upper layer and then go up. * Releasing entire group is all about clearing * single bit of highest order buddy. */ /* Example: * --------------------------------- * | 1 | 1 | 1 | 1 | * --------------------------------- * | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | * --------------------------------- * 0 1 2 3 4 5 6 7 * \_____________________/ * * Neither [1] nor [6] is aligned to above layer. * Left neighbour [0] is free, so mark it busy, * decrease bb_counters and extend range to * [0; 6] * Right neighbour [7] is busy. It can't be coaleasced with [6], so * mark [6] free, increase bb_counters and shrink range to * [0; 5]. * Then shift range to [0; 2], go up and do the same. */ if (first & 1) e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1); if (!(last & 1)) e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1); if (first > last) break; order++; buddy2 = mb_find_buddy(e4b, order, &max); if (!buddy2) { mb_clear_bits(buddy, first, last - first + 1); e4b->bd_info->bb_counters[order - 1] += last - first + 1; break; } first >>= 1; last >>= 1; buddy = buddy2; } } static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, int first, int count) { int left_is_free = 0; int right_is_free = 0; int block; int last = first + count - 1; struct super_block *sb = e4b->bd_sb; if (WARN_ON(count == 0)) return; BUG_ON(last >= (sb->s_blocksize << 3)); assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); /* Don't bother if the block group is corrupt. */ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) return; mb_check_buddy(e4b); mb_free_blocks_double(inode, e4b, first, count); /* access memory sequentially: check left neighbour, * clear range and then check right neighbour */ if (first != 0) left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap); block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count); if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0]) right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap); if (unlikely(block != -1)) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t blocknr; /* * Fastcommit replay can free already freed blocks which * corrupts allocation info. Regenerate it. */ if (sbi->s_mount_state & EXT4_FC_REPLAY) { mb_regenerate_buddy(e4b); goto check; } blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(sbi, block); ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, inode ? inode->i_ino : 0, blocknr, "freeing already freed block (bit %u); block bitmap corrupt.", block); return; } this_cpu_inc(discard_pa_seq); e4b->bd_info->bb_free += count; if (first < e4b->bd_info->bb_first_free) e4b->bd_info->bb_first_free = first; /* let's maintain fragments counter */ if (left_is_free && right_is_free) e4b->bd_info->bb_fragments--; else if (!left_is_free && !right_is_free) e4b->bd_info->bb_fragments++; /* buddy[0] == bd_bitmap is a special case, so handle * it right away and let mb_buddy_mark_free stay free of * zero order checks. * Check if neighbours are to be coaleasced, * adjust bitmap bb_counters and borders appropriately. */ if (first & 1) { first += !left_is_free; e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1; } if (!(last & 1)) { last -= !right_is_free; e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1; } if (first <= last) mb_buddy_mark_free(e4b, first >> 1, last >> 1); mb_set_largest_free_order(sb, e4b->bd_info); mb_update_avg_fragment_size(sb, e4b->bd_info); check: mb_check_buddy(e4b); } static int mb_find_extent(struct ext4_buddy *e4b, int block, int needed, struct ext4_free_extent *ex) { int max, order, next; void *buddy; assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); BUG_ON(ex == NULL); buddy = mb_find_buddy(e4b, 0, &max); BUG_ON(buddy == NULL); BUG_ON(block >= max); if (mb_test_bit(block, buddy)) { ex->fe_len = 0; ex->fe_start = 0; ex->fe_group = 0; return 0; } /* find actual order */ order = mb_find_order_for_block(e4b, block); ex->fe_len = (1 << order) - (block & ((1 << order) - 1)); ex->fe_start = block; ex->fe_group = e4b->bd_group; block = block >> order; while (needed > ex->fe_len && mb_find_buddy(e4b, order, &max)) { if (block + 1 >= max) break; next = (block + 1) * (1 << order); if (mb_test_bit(next, e4b->bd_bitmap)) break; order = mb_find_order_for_block(e4b, next); block = next >> order; ex->fe_len += 1 << order; } if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) { /* Should never happen! (but apparently sometimes does?!?) */ WARN_ON(1); ext4_grp_locked_error(e4b->bd_sb, e4b->bd_group, 0, 0, "corruption or bug in mb_find_extent " "block=%d, order=%d needed=%d ex=%u/%d/%d@%u", block, order, needed, ex->fe_group, ex->fe_start, ex->fe_len, ex->fe_logical); ex->fe_len = 0; ex->fe_start = 0; ex->fe_group = 0; } return ex->fe_len; } static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) { int ord; int mlen = 0; int max = 0; int start = ex->fe_start; int len = ex->fe_len; unsigned ret = 0; int len0 = len; void *buddy; int ord_start, ord_end; BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3)); BUG_ON(e4b->bd_group != ex->fe_group); assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); mb_check_buddy(e4b); mb_mark_used_double(e4b, start, len); this_cpu_inc(discard_pa_seq); e4b->bd_info->bb_free -= len; if (e4b->bd_info->bb_first_free == start) e4b->bd_info->bb_first_free += len; /* let's maintain fragments counter */ if (start != 0) mlen = !mb_test_bit(start - 1, e4b->bd_bitmap); if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0]) max = !mb_test_bit(start + len, e4b->bd_bitmap); if (mlen && max) e4b->bd_info->bb_fragments++; else if (!mlen && !max) e4b->bd_info->bb_fragments--; /* let's maintain buddy itself */ while (len) { ord = mb_find_order_for_block(e4b, start); if (((start >> ord) << ord) == start && len >= (1 << ord)) { /* the whole chunk may be allocated at once! */ mlen = 1 << ord; buddy = mb_find_buddy(e4b, ord, &max); BUG_ON((start >> ord) >= max); mb_set_bit(start >> ord, buddy); e4b->bd_info->bb_counters[ord]--; start += mlen; len -= mlen; BUG_ON(len < 0); continue; } /* store for history */ if (ret == 0) ret = len | (ord << 16); BUG_ON(ord <= 0); buddy = mb_find_buddy(e4b, ord, &max); mb_set_bit(start >> ord, buddy); e4b->bd_info->bb_counters[ord]--; ord_start = (start >> ord) << ord; ord_end = ord_start + (1 << ord); /* first chunk */ if (start > ord_start) ext4_mb_mark_free_simple(e4b->bd_sb, e4b->bd_buddy, ord_start, start - ord_start, e4b->bd_info); /* last chunk */ if (start + len < ord_end) { ext4_mb_mark_free_simple(e4b->bd_sb, e4b->bd_buddy, start + len, ord_end - (start + len), e4b->bd_info); break; } len = start + len - ord_end; start = ord_end; } mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); mb_update_avg_fragment_size(e4b->bd_sb, e4b->bd_info); mb_set_bits(e4b->bd_bitmap, ex->fe_start, len0); mb_check_buddy(e4b); return ret; } /* * Must be called under group lock! */ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, struct ext4_buddy *e4b) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); int ret; BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group); BUG_ON(ac->ac_status == AC_STATUS_FOUND); ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len); ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical; ret = mb_mark_used(e4b, &ac->ac_b_ex); /* preallocation can change ac_b_ex, thus we store actually * allocated blocks for history */ ac->ac_f_ex = ac->ac_b_ex; ac->ac_status = AC_STATUS_FOUND; ac->ac_tail = ret & 0xffff; ac->ac_buddy = ret >> 16; /* * take the folio reference. We want the folio to be pinned * so that we don't get a ext4_mb_init_cache_call for this * group until we update the bitmap. That would mean we * double allocate blocks. The reference is dropped * in ext4_mb_release_context */ ac->ac_bitmap_folio = e4b->bd_bitmap_folio; folio_get(ac->ac_bitmap_folio); ac->ac_buddy_folio = e4b->bd_buddy_folio; folio_get(ac->ac_buddy_folio); /* store last allocated for subsequent stream allocation */ if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { int hash = ac->ac_inode->i_ino % sbi->s_mb_nr_global_goals; WRITE_ONCE(sbi->s_mb_last_groups[hash], ac->ac_f_ex.fe_group); } /* * As we've just preallocated more space than * user requested originally, we store allocated * space in a special descriptor. */ if (ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) ext4_mb_new_preallocation(ac); } static void ext4_mb_check_limits(struct ext4_allocation_context *ac, struct ext4_buddy *e4b, int finish_group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_free_extent *bex = &ac->ac_b_ex; struct ext4_free_extent *gex = &ac->ac_g_ex; if (ac->ac_status == AC_STATUS_FOUND) return; /* * We don't want to scan for a whole year */ if (ac->ac_found > sbi->s_mb_max_to_scan && !(ac->ac_flags & EXT4_MB_HINT_FIRST)) { ac->ac_status = AC_STATUS_BREAK; return; } /* * Haven't found good chunk so far, let's continue */ if (bex->fe_len < gex->fe_len) return; if (finish_group || ac->ac_found > sbi->s_mb_min_to_scan) ext4_mb_use_best_found(ac, e4b); } /* * The routine checks whether found extent is good enough. If it is, * then the extent gets marked used and flag is set to the context * to stop scanning. Otherwise, the extent is compared with the * previous found extent and if new one is better, then it's stored * in the context. Later, the best found extent will be used, if * mballoc can't find good enough extent. * * The algorithm used is roughly as follows: * * * If free extent found is exactly as big as goal, then * stop the scan and use it immediately * * * If free extent found is smaller than goal, then keep retrying * upto a max of sbi->s_mb_max_to_scan times (default 200). After * that stop scanning and use whatever we have. * * * If free extent found is bigger than goal, then keep retrying * upto a max of sbi->s_mb_min_to_scan times (default 10) before * stopping the scan and using the extent. * * * FIXME: real allocation policy is to be designed yet! */ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac, struct ext4_free_extent *ex, struct ext4_buddy *e4b) { struct ext4_free_extent *bex = &ac->ac_b_ex; struct ext4_free_extent *gex = &ac->ac_g_ex; BUG_ON(ex->fe_len <= 0); BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); ac->ac_found++; ac->ac_cX_found[ac->ac_criteria]++; /* * The special case - take what you catch first */ if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) { *bex = *ex; ext4_mb_use_best_found(ac, e4b); return; } /* * Let's check whether the chuck is good enough */ if (ex->fe_len == gex->fe_len) { *bex = *ex; ext4_mb_use_best_found(ac, e4b); return; } /* * If this is first found extent, just store it in the context */ if (bex->fe_len == 0) { *bex = *ex; return; } /* * If new found extent is better, store it in the context */ if (bex->fe_len < gex->fe_len) { /* if the request isn't satisfied, any found extent * larger than previous best one is better */ if (ex->fe_len > bex->fe_len) *bex = *ex; } else if (ex->fe_len > gex->fe_len) { /* if the request is satisfied, then we try to find * an extent that still satisfy the request, but is * smaller than previous one */ if (ex->fe_len < bex->fe_len) *bex = *ex; } ext4_mb_check_limits(ac, e4b, 0); } static noinline_for_stack void ext4_mb_try_best_found(struct ext4_allocation_context *ac, struct ext4_buddy *e4b) { struct ext4_free_extent ex = ac->ac_b_ex; ext4_group_t group = ex.fe_group; int max; int err; BUG_ON(ex.fe_len <= 0); err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); if (err) return; ext4_lock_group(ac->ac_sb, group); if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) goto out; max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); if (max > 0) { ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); } static noinline_for_stack int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, struct ext4_buddy *e4b) { ext4_group_t group = ac->ac_g_ex.fe_group; int max; int err; struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); struct ext4_free_extent ex; if (!grp) return -EFSCORRUPTED; if (!(ac->ac_flags & (EXT4_MB_HINT_TRY_GOAL | EXT4_MB_HINT_GOAL_ONLY))) return 0; if (grp->bb_free == 0) return 0; err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); if (err) { if (EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info) && !(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) return 0; return err; } ext4_lock_group(ac->ac_sb, group); if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) goto out; max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, ac->ac_g_ex.fe_len, &ex); ex.fe_logical = 0xDEADFA11; /* debug value */ if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == EXT4_NUM_B2C(sbi, sbi->s_stripe)) { ext4_fsblk_t start; start = ext4_grp_offs_to_block(ac->ac_sb, &ex); /* use do_div to get remainder (would be 64-bit modulo) */ if (do_div(start, sbi->s_stripe) == 0) { ac->ac_found++; ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } } else if (max >= ac->ac_g_ex.fe_len) { BUG_ON(ex.fe_len <= 0); BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group); BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start); ac->ac_found++; ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) { /* Sometimes, caller may want to merge even small * number of blocks to an existing extent */ BUG_ON(ex.fe_len <= 0); BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group); BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start); ac->ac_found++; ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); return 0; } /* * The routine scans buddy structures (not bitmap!) from given order * to max order and tries to find big enough chunk to satisfy the req */ static noinline_for_stack void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, struct ext4_buddy *e4b) { struct super_block *sb = ac->ac_sb; struct ext4_group_info *grp = e4b->bd_info; void *buddy; int i; int k; int max; BUG_ON(ac->ac_2order <= 0); for (i = ac->ac_2order; i < MB_NUM_ORDERS(sb); i++) { if (grp->bb_counters[i] == 0) continue; buddy = mb_find_buddy(e4b, i, &max); if (WARN_RATELIMIT(buddy == NULL, "ext4: mb_simple_scan_group: mb_find_buddy failed, (%d)\n", i)) continue; k = mb_find_next_zero_bit(buddy, max, 0); if (k >= max) { ext4_mark_group_bitmap_corrupted(ac->ac_sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0, "%d free clusters of order %d. But found 0", grp->bb_counters[i], i); break; } ac->ac_found++; ac->ac_cX_found[ac->ac_criteria]++; ac->ac_b_ex.fe_len = 1 << i; ac->ac_b_ex.fe_start = k << i; ac->ac_b_ex.fe_group = e4b->bd_group; ext4_mb_use_best_found(ac, e4b); BUG_ON(ac->ac_f_ex.fe_len != ac->ac_g_ex.fe_len); if (EXT4_SB(sb)->s_mb_stats) atomic_inc(&EXT4_SB(sb)->s_bal_2orders); break; } } /* * The routine scans the group and measures all found extents. * In order to optimize scanning, caller must pass number of * free blocks in the group, so the routine can know upper limit. */ static noinline_for_stack void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, struct ext4_buddy *e4b) { struct super_block *sb = ac->ac_sb; void *bitmap = e4b->bd_bitmap; struct ext4_free_extent ex; int i, j, freelen; int free; free = e4b->bd_info->bb_free; if (WARN_ON(free <= 0)) return; i = e4b->bd_info->bb_first_free; while (free && ac->ac_status == AC_STATUS_CONTINUE) { i = mb_find_next_zero_bit(bitmap, EXT4_CLUSTERS_PER_GROUP(sb), i); if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) { /* * IF we have corrupt bitmap, we won't find any * free blocks even though group info says we * have free blocks */ ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " "group info. But bitmap says 0", free); break; } if (!ext4_mb_cr_expensive(ac->ac_criteria)) { /* * In CR_GOAL_LEN_FAST and CR_BEST_AVAIL_LEN, we are * sure that this group will have a large enough * continuous free extent, so skip over the smaller free * extents */ j = mb_find_next_bit(bitmap, EXT4_CLUSTERS_PER_GROUP(sb), i); freelen = j - i; if (freelen < ac->ac_g_ex.fe_len) { i = j; free -= freelen; continue; } } mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex); if (WARN_ON(ex.fe_len <= 0)) break; if (free < ex.fe_len) { ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " "group info. But got %d blocks", free, ex.fe_len); /* * The number of free blocks differs. This mostly * indicate that the bitmap is corrupt. So exit * without claiming the space. */ break; } ex.fe_logical = 0xDEADC0DE; /* debug value */ ext4_mb_measure_extent(ac, &ex, e4b); i += ex.fe_len; free -= ex.fe_len; } ext4_mb_check_limits(ac, e4b, 1); } /* * This is a special case for storages like raid5 * we try to find stripe-aligned chunks for stripe-size-multiple requests */ static noinline_for_stack void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, struct ext4_buddy *e4b) { struct super_block *sb = ac->ac_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); void *bitmap = e4b->bd_bitmap; struct ext4_free_extent ex; ext4_fsblk_t first_group_block; ext4_fsblk_t a; ext4_grpblk_t i, stripe; int max; BUG_ON(sbi->s_stripe == 0); /* find first stripe-aligned block in group */ first_group_block = ext4_group_first_block_no(sb, e4b->bd_group); a = first_group_block + sbi->s_stripe - 1; do_div(a, sbi->s_stripe); i = (a * sbi->s_stripe) - first_group_block; stripe = EXT4_NUM_B2C(sbi, sbi->s_stripe); i = EXT4_B2C(sbi, i); while (i < EXT4_CLUSTERS_PER_GROUP(sb)) { if (!mb_test_bit(i, bitmap)) { max = mb_find_extent(e4b, i, stripe, &ex); if (max >= stripe) { ac->ac_found++; ac->ac_cX_found[ac->ac_criteria]++; ex.fe_logical = 0xDEADF00D; /* debug value */ ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); break; } } i += stripe; } } static void __ext4_mb_scan_group(struct ext4_allocation_context *ac) { bool is_stripe_aligned; struct ext4_sb_info *sbi; enum criteria cr = ac->ac_criteria; ac->ac_groups_scanned++; if (cr == CR_POWER2_ALIGNED) return ext4_mb_simple_scan_group(ac, ac->ac_e4b); sbi = EXT4_SB(ac->ac_sb); is_stripe_aligned = false; if ((sbi->s_stripe >= sbi->s_cluster_ratio) && !(ac->ac_g_ex.fe_len % EXT4_NUM_B2C(sbi, sbi->s_stripe))) is_stripe_aligned = true; if ((cr == CR_GOAL_LEN_FAST || cr == CR_BEST_AVAIL_LEN) && is_stripe_aligned) ext4_mb_scan_aligned(ac, ac->ac_e4b); if (ac->ac_status == AC_STATUS_CONTINUE) ext4_mb_complex_scan_group(ac, ac->ac_e4b); } /* * This is also called BEFORE we load the buddy bitmap. * Returns either 1 or 0 indicating that the group is either suitable * for the allocation or not. */ static bool ext4_mb_good_group(struct ext4_allocation_context *ac, ext4_group_t group, enum criteria cr) { ext4_grpblk_t free, fragments; int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); BUG_ON(cr < CR_POWER2_ALIGNED || cr >= EXT4_MB_NUM_CRS); if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) return false; free = grp->bb_free; if (free == 0) return false; fragments = grp->bb_fragments; if (fragments == 0) return false; switch (cr) { case CR_POWER2_ALIGNED: BUG_ON(ac->ac_2order == 0); /* Avoid using the first bg of a flexgroup for data files */ if ((ac->ac_flags & EXT4_MB_HINT_DATA) && (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && ((group % flex_size) == 0)) return false; if (free < ac->ac_g_ex.fe_len) return false; if (ac->ac_2order >= MB_NUM_ORDERS(ac->ac_sb)) return true; if (grp->bb_largest_free_order < ac->ac_2order) return false; return true; case CR_GOAL_LEN_FAST: case CR_BEST_AVAIL_LEN: if ((free / fragments) >= ac->ac_g_ex.fe_len) return true; break; case CR_GOAL_LEN_SLOW: if (free >= ac->ac_g_ex.fe_len) return true; break; case CR_ANY_FREE: return true; default: BUG(); } return false; } /* * This could return negative error code if something goes wrong * during ext4_mb_init_group(). This should not be called with * ext4_lock_group() held. * * Note: because we are conditionally operating with the group lock in * the EXT4_MB_STRICT_CHECK case, we need to fake out sparse in this * function using __acquire and __release. This means we need to be * super careful before messing with the error path handling via "goto * out"! */ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, ext4_group_t group, enum criteria cr) { struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); struct super_block *sb = ac->ac_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK; ext4_grpblk_t free; int ret = 0; if (!grp) return -EFSCORRUPTED; if (sbi->s_mb_stats) atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]); if (should_lock) { ext4_lock_group(sb, group); __release(ext4_group_lock_ptr(sb, group)); } free = grp->bb_free; if (free == 0) goto out; /* * In all criterias except CR_ANY_FREE we try to avoid groups that * can't possibly satisfy the full goal request due to insufficient * free blocks. */ if (cr < CR_ANY_FREE && free < ac->ac_g_ex.fe_len) goto out; if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) goto out; if (should_lock) { __acquire(ext4_group_lock_ptr(sb, group)); ext4_unlock_group(sb, group); } /* We only do this if the grp has never been initialized */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); int ret; /* * CR_POWER2_ALIGNED/CR_GOAL_LEN_FAST is a very optimistic * search to find large good chunks almost for free. If buddy * data is not ready, then this optimization makes no sense. But * we never skip the first block group in a flex_bg, since this * gets used for metadata block allocation, and we want to make * sure we locate metadata blocks in the first block group in * the flex_bg if possible. */ if (!ext4_mb_cr_expensive(cr) && (!sbi->s_log_groups_per_flex || ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) && !(ext4_has_group_desc_csum(sb) && (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) return 0; ret = ext4_mb_init_group(sb, group, GFP_NOFS); if (ret) return ret; } if (should_lock) { ext4_lock_group(sb, group); __release(ext4_group_lock_ptr(sb, group)); } ret = ext4_mb_good_group(ac, group, cr); out: if (should_lock) { __acquire(ext4_group_lock_ptr(sb, group)); ext4_unlock_group(sb, group); } return ret; } /* * Start prefetching @nr block bitmaps starting at @group. * Return the next group which needs to be prefetched. */ ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group, unsigned int nr, int *cnt) { ext4_group_t ngroups = ext4_get_groups_count(sb); struct buffer_head *bh; struct blk_plug plug; blk_start_plug(&plug); while (nr-- > 0) { struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); struct ext4_group_info *grp = ext4_get_group_info(sb, group); /* * Prefetch block groups with free blocks; but don't * bother if it is marked uninitialized on disk, since * it won't require I/O to read. Also only try to * prefetch once, so we avoid getblk() call, which can * be expensive. */ if (gdp && grp && !EXT4_MB_GRP_TEST_AND_SET_READ(grp) && EXT4_MB_GRP_NEED_INIT(grp) && ext4_free_group_clusters(sb, gdp) > 0 ) { bh = ext4_read_block_bitmap_nowait(sb, group, true); if (bh && !IS_ERR(bh)) { if (!buffer_uptodate(bh) && cnt) (*cnt)++; brelse(bh); } } if (++group >= ngroups) group = 0; } blk_finish_plug(&plug); return group; } /* * Batch reads of the block allocation bitmaps to get * multiple READs in flight; limit prefetching at inexpensive * CR, otherwise mballoc can spend a lot of time loading * imperfect groups */ static void ext4_mb_might_prefetch(struct ext4_allocation_context *ac, ext4_group_t group) { struct ext4_sb_info *sbi; if (ac->ac_prefetch_grp != group) return; sbi = EXT4_SB(ac->ac_sb); if (ext4_mb_cr_expensive(ac->ac_criteria) || ac->ac_prefetch_ios < sbi->s_mb_prefetch_limit) { unsigned int nr = sbi->s_mb_prefetch; if (ext4_has_feature_flex_bg(ac->ac_sb)) { nr = 1 << sbi->s_log_groups_per_flex; nr -= group & (nr - 1); nr = umin(nr, sbi->s_mb_prefetch); } ac->ac_prefetch_nr = nr; ac->ac_prefetch_grp = ext4_mb_prefetch(ac->ac_sb, group, nr, &ac->ac_prefetch_ios); } } /* * Prefetching reads the block bitmap into the buffer cache; but we * need to make sure that the buddy bitmap in the page cache has been * initialized. Note that ext4_mb_init_group() will block if the I/O * is not yet completed, or indeed if it was not initiated by * ext4_mb_prefetch did not start the I/O. * * TODO: We should actually kick off the buddy bitmap setup in a work * queue when the buffer I/O is completed, so that we don't block * waiting for the block allocation bitmap read to finish when * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator(). */ void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group, unsigned int nr) { struct ext4_group_desc *gdp; struct ext4_group_info *grp; while (nr-- > 0) { if (!group) group = ext4_get_groups_count(sb); group--; gdp = ext4_get_group_desc(sb, group, NULL); grp = ext4_get_group_info(sb, group); if (grp && gdp && EXT4_MB_GRP_NEED_INIT(grp) && ext4_free_group_clusters(sb, gdp) > 0) { if (ext4_mb_init_group(sb, group, GFP_NOFS)) break; } } } static int ext4_mb_scan_group(struct ext4_allocation_context *ac, ext4_group_t group) { int ret; struct super_block *sb = ac->ac_sb; enum criteria cr = ac->ac_criteria; ext4_mb_might_prefetch(ac, group); /* prevent unnecessary buddy loading. */ if (cr < CR_ANY_FREE && spin_is_locked(ext4_group_lock_ptr(sb, group))) return 0; /* This now checks without needing the buddy folio */ ret = ext4_mb_good_group_nolock(ac, group, cr); if (ret <= 0) { if (!ac->ac_first_err) ac->ac_first_err = ret; return 0; } ret = ext4_mb_load_buddy(sb, group, ac->ac_e4b); if (ret) return ret; /* skip busy group */ if (cr >= CR_ANY_FREE) ext4_lock_group(sb, group); else if (!ext4_try_lock_group(sb, group)) goto out_unload; /* We need to check again after locking the block group. */ if (unlikely(!ext4_mb_good_group(ac, group, cr))) goto out_unlock; __ext4_mb_scan_group(ac); out_unlock: ext4_unlock_group(sb, group); out_unload: ext4_mb_unload_buddy(ac->ac_e4b); return ret; } static noinline_for_stack int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) { ext4_group_t i; int err = 0; struct super_block *sb = ac->ac_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_buddy e4b; BUG_ON(ac->ac_status == AC_STATUS_FOUND); /* first, try the goal */ err = ext4_mb_find_by_goal(ac, &e4b); if (err || ac->ac_status == AC_STATUS_FOUND) goto out; if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) goto out; /* * ac->ac_2order is set only if the fe_len is a power of 2 * if ac->ac_2order is set we also set criteria to CR_POWER2_ALIGNED * so that we try exact allocation using buddy. */ i = fls(ac->ac_g_ex.fe_len); ac->ac_2order = 0; /* * We search using buddy data only if the order of the request * is greater than equal to the sbi_s_mb_order2_reqs * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req * We also support searching for power-of-two requests only for * requests upto maximum buddy size we have constructed. */ if (i >= sbi->s_mb_order2_reqs && i <= MB_NUM_ORDERS(sb)) { if (is_power_of_2(ac->ac_g_ex.fe_len)) ac->ac_2order = array_index_nospec(i - 1, MB_NUM_ORDERS(sb)); } /* if stream allocation is enabled, use global goal */ if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { int hash = ac->ac_inode->i_ino % sbi->s_mb_nr_global_goals; ac->ac_g_ex.fe_group = READ_ONCE(sbi->s_mb_last_groups[hash]); ac->ac_g_ex.fe_start = -1; ac->ac_flags &= ~EXT4_MB_HINT_TRY_GOAL; } /* * Let's just scan groups to find more-less suitable blocks We * start with CR_GOAL_LEN_FAST, unless it is power of 2 * aligned, in which case let's do that faster approach first. */ ac->ac_criteria = CR_GOAL_LEN_FAST; if (ac->ac_2order) ac->ac_criteria = CR_POWER2_ALIGNED; ac->ac_e4b = &e4b; ac->ac_prefetch_ios = 0; ac->ac_first_err = 0; repeat: while (ac->ac_criteria < EXT4_MB_NUM_CRS) { err = ext4_mb_scan_groups(ac); if (err) goto out; if (ac->ac_status != AC_STATUS_CONTINUE) break; } if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND && !(ac->ac_flags & EXT4_MB_HINT_FIRST)) { /* * We've been searching too long. Let's try to allocate * the best chunk we've found so far */ ext4_mb_try_best_found(ac, &e4b); if (ac->ac_status != AC_STATUS_FOUND) { int lost; /* * Someone more lucky has already allocated it. * The only thing we can do is just take first * found block(s) */ lost = atomic_inc_return(&sbi->s_mb_lost_chunks); mb_debug(sb, "lost chunk, group: %u, start: %d, len: %d, lost: %d\n", ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len, lost); ac->ac_b_ex.fe_group = 0; ac->ac_b_ex.fe_start = 0; ac->ac_b_ex.fe_len = 0; ac->ac_status = AC_STATUS_CONTINUE; ac->ac_flags |= EXT4_MB_HINT_FIRST; ac->ac_criteria = CR_ANY_FREE; goto repeat; } } if (sbi->s_mb_stats && ac->ac_status == AC_STATUS_FOUND) { atomic64_inc(&sbi->s_bal_cX_hits[ac->ac_criteria]); if (ac->ac_flags & EXT4_MB_STREAM_ALLOC && ac->ac_b_ex.fe_group == ac->ac_g_ex.fe_group) atomic_inc(&sbi->s_bal_stream_goals); } out: if (!err && ac->ac_status != AC_STATUS_FOUND && ac->ac_first_err) err = ac->ac_first_err; mb_debug(sb, "Best len %d, origin len %d, ac_status %u, ac_flags 0x%x, cr %d ret %d\n", ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status, ac->ac_flags, ac->ac_criteria, err); if (ac->ac_prefetch_nr) ext4_mb_prefetch_fini(sb, ac->ac_prefetch_grp, ac->ac_prefetch_nr); return err; } static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) { struct super_block *sb = pde_data(file_inode(seq->file)); ext4_group_t group; if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) return NULL; group = *pos + 1; return (void *) ((unsigned long) group); } static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) { struct super_block *sb = pde_data(file_inode(seq->file)); ext4_group_t group; ++*pos; if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) return NULL; group = *pos + 1; return (void *) ((unsigned long) group); } static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) { struct super_block *sb = pde_data(file_inode(seq->file)); ext4_group_t group = (ext4_group_t) ((unsigned long) v); int i, err; char nbuf[16]; struct ext4_buddy e4b; struct ext4_group_info *grinfo; unsigned char blocksize_bits = min_t(unsigned char, sb->s_blocksize_bits, EXT4_MAX_BLOCK_LOG_SIZE); DEFINE_RAW_FLEX(struct ext4_group_info, sg, bb_counters, EXT4_MAX_BLOCK_LOG_SIZE + 2); group--; if (group == 0) seq_puts(seq, "#group: free frags first [" " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 " " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n"); i = (blocksize_bits + 2) * sizeof(sg->bb_counters[0]) + sizeof(struct ext4_group_info); grinfo = ext4_get_group_info(sb, group); if (!grinfo) return 0; /* Load the group info in memory only if not already loaded. */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) { err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { seq_printf(seq, "#%-5u: %s\n", group, ext4_decode_error(NULL, err, nbuf)); return 0; } ext4_mb_unload_buddy(&e4b); } /* * We care only about free space counters in the group info and * these are safe to access even after the buddy has been unloaded */ memcpy(sg, grinfo, i); seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg->bb_free, sg->bb_fragments, sg->bb_first_free); for (i = 0; i <= 13; i++) seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ? sg->bb_counters[i] : 0); seq_puts(seq, " ]"); if (EXT4_MB_GRP_BBITMAP_CORRUPT(sg)) seq_puts(seq, " Block bitmap corrupted!"); seq_putc(seq, '\n'); return 0; } static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v) { } const struct seq_operations ext4_mb_seq_groups_ops = { .start = ext4_mb_seq_groups_start, .next = ext4_mb_seq_groups_next, .stop = ext4_mb_seq_groups_stop, .show = ext4_mb_seq_groups_show, }; int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset) { struct super_block *sb = seq->private; struct ext4_sb_info *sbi = EXT4_SB(sb); seq_puts(seq, "mballoc:\n"); if (!sbi->s_mb_stats) { seq_puts(seq, "\tmb stats collection turned off.\n"); seq_puts( seq, "\tTo enable, please write \"1\" to sysfs file mb_stats.\n"); return 0; } seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs)); seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success)); seq_printf(seq, "\tgroups_scanned: %u\n", atomic_read(&sbi->s_bal_groups_scanned)); /* CR_POWER2_ALIGNED stats */ seq_puts(seq, "\tcr_p2_aligned_stats:\n"); seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR_POWER2_ALIGNED])); seq_printf( seq, "\t\tgroups_considered: %llu\n", atomic64_read( &sbi->s_bal_cX_groups_considered[CR_POWER2_ALIGNED])); seq_printf(seq, "\t\textents_scanned: %u\n", atomic_read(&sbi->s_bal_cX_ex_scanned[CR_POWER2_ALIGNED])); seq_printf(seq, "\t\tuseless_loops: %llu\n", atomic64_read(&sbi->s_bal_cX_failed[CR_POWER2_ALIGNED])); /* CR_GOAL_LEN_FAST stats */ seq_puts(seq, "\tcr_goal_fast_stats:\n"); seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR_GOAL_LEN_FAST])); seq_printf(seq, "\t\tgroups_considered: %llu\n", atomic64_read( &sbi->s_bal_cX_groups_considered[CR_GOAL_LEN_FAST])); seq_printf(seq, "\t\textents_scanned: %u\n", atomic_read(&sbi->s_bal_cX_ex_scanned[CR_GOAL_LEN_FAST])); seq_printf(seq, "\t\tuseless_loops: %llu\n", atomic64_read(&sbi->s_bal_cX_failed[CR_GOAL_LEN_FAST])); /* CR_BEST_AVAIL_LEN stats */ seq_puts(seq, "\tcr_best_avail_stats:\n"); seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR_BEST_AVAIL_LEN])); seq_printf( seq, "\t\tgroups_considered: %llu\n", atomic64_read( &sbi->s_bal_cX_groups_considered[CR_BEST_AVAIL_LEN])); seq_printf(seq, "\t\textents_scanned: %u\n", atomic_read(&sbi->s_bal_cX_ex_scanned[CR_BEST_AVAIL_LEN])); seq_printf(seq, "\t\tuseless_loops: %llu\n", atomic64_read(&sbi->s_bal_cX_failed[CR_BEST_AVAIL_LEN])); /* CR_GOAL_LEN_SLOW stats */ seq_puts(seq, "\tcr_goal_slow_stats:\n"); seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR_GOAL_LEN_SLOW])); seq_printf(seq, "\t\tgroups_considered: %llu\n", atomic64_read( &sbi->s_bal_cX_groups_considered[CR_GOAL_LEN_SLOW])); seq_printf(seq, "\t\textents_scanned: %u\n", atomic_read(&sbi->s_bal_cX_ex_scanned[CR_GOAL_LEN_SLOW])); seq_printf(seq, "\t\tuseless_loops: %llu\n", atomic64_read(&sbi->s_bal_cX_failed[CR_GOAL_LEN_SLOW])); /* CR_ANY_FREE stats */ seq_puts(seq, "\tcr_any_free_stats:\n"); seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR_ANY_FREE])); seq_printf( seq, "\t\tgroups_considered: %llu\n", atomic64_read(&sbi->s_bal_cX_groups_considered[CR_ANY_FREE])); seq_printf(seq, "\t\textents_scanned: %u\n", atomic_read(&sbi->s_bal_cX_ex_scanned[CR_ANY_FREE])); seq_printf(seq, "\t\tuseless_loops: %llu\n", atomic64_read(&sbi->s_bal_cX_failed[CR_ANY_FREE])); /* Aggregates */ seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned)); seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals)); seq_printf(seq, "\t\tstream_goal_hits: %u\n", atomic_read(&sbi->s_bal_stream_goals)); seq_printf(seq, "\t\tlen_goal_hits: %u\n", atomic_read(&sbi->s_bal_len_goals)); seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders)); seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks)); seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks)); seq_printf(seq, "\tbuddies_generated: %u/%u\n", atomic_read(&sbi->s_mb_buddies_generated), ext4_get_groups_count(sb)); seq_printf(seq, "\tbuddies_time_used: %llu\n", atomic64_read(&sbi->s_mb_generation_time)); seq_printf(seq, "\tpreallocated: %u\n", atomic_read(&sbi->s_mb_preallocated)); seq_printf(seq, "\tdiscarded: %u\n", atomic_read(&sbi->s_mb_discarded)); return 0; } static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos) { struct super_block *sb = pde_data(file_inode(seq->file)); unsigned long position; if (*pos < 0 || *pos >= 2*MB_NUM_ORDERS(sb)) return NULL; position = *pos + 1; return (void *) ((unsigned long) position); } static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, loff_t *pos) { struct super_block *sb = pde_data(file_inode(seq->file)); unsigned long position; ++*pos; if (*pos < 0 || *pos >= 2*MB_NUM_ORDERS(sb)) return NULL; position = *pos + 1; return (void *) ((unsigned long) position); } static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v) { struct super_block *sb = pde_data(file_inode(seq->file)); struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned long position = ((unsigned long) v); struct ext4_group_info *grp; unsigned int count; unsigned long idx; position--; if (position >= MB_NUM_ORDERS(sb)) { position -= MB_NUM_ORDERS(sb); if (position == 0) seq_puts(seq, "avg_fragment_size_lists:\n"); count = 0; xa_for_each(&sbi->s_mb_avg_fragment_size[position], idx, grp) count++; seq_printf(seq, "\tlist_order_%u_groups: %u\n", (unsigned int)position, count); return 0; } if (position == 0) { seq_printf(seq, "optimize_scan: %d\n", test_opt2(sb, MB_OPTIMIZE_SCAN) ? 1 : 0); seq_puts(seq, "max_free_order_lists:\n"); } count = 0; xa_for_each(&sbi->s_mb_largest_free_orders[position], idx, grp) count++; seq_printf(seq, "\tlist_order_%u_groups: %u\n", (unsigned int)position, count); return 0; } static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v) { } const struct seq_operations ext4_mb_seq_structs_summary_ops = { .start = ext4_mb_seq_structs_summary_start, .next = ext4_mb_seq_structs_summary_next, .stop = ext4_mb_seq_structs_summary_stop, .show = ext4_mb_seq_structs_summary_show, }; static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) { int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index]; BUG_ON(!cachep); return cachep; } /* * Allocate the top-level s_group_info array for the specified number * of groups */ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned size; struct ext4_group_info ***old_groupinfo, ***new_groupinfo; size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); if (size <= sbi->s_group_info_size) return 0; size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size); new_groupinfo = kvzalloc(size, GFP_KERNEL); if (!new_groupinfo) { ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); return -ENOMEM; } rcu_read_lock(); old_groupinfo = rcu_dereference(sbi->s_group_info); if (old_groupinfo) memcpy(new_groupinfo, old_groupinfo, sbi->s_group_info_size * sizeof(*sbi->s_group_info)); rcu_read_unlock(); rcu_assign_pointer(sbi->s_group_info, new_groupinfo); sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); if (old_groupinfo) ext4_kvfree_array_rcu(old_groupinfo); ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", sbi->s_group_info_size); return 0; } /* Create and initialize ext4_group_info data for the given group. */ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, struct ext4_group_desc *desc) { int i; int metalen = 0; int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb); struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_info **meta_group_info; struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); /* * First check if this group is the first of a reserved block. * If it's true, we have to allocate a new table of pointers * to ext4_group_info structures */ if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb); meta_group_info = kmalloc(metalen, GFP_NOFS); if (meta_group_info == NULL) { ext4_msg(sb, KERN_ERR, "can't allocate mem " "for a buddy group"); return -ENOMEM; } rcu_read_lock(); rcu_dereference(sbi->s_group_info)[idx] = meta_group_info; rcu_read_unlock(); } meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx); i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS); if (meta_group_info[i] == NULL) { ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); goto exit_group_info; } set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(meta_group_info[i]->bb_state)); /* * initialize bb_free to be able to skip * empty groups without initialization */ if (ext4_has_group_desc_csum(sb) && (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { meta_group_info[i]->bb_free = ext4_free_clusters_after_init(sb, group, desc); } else { meta_group_info[i]->bb_free = ext4_free_group_clusters(sb, desc); } INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); init_rwsem(&meta_group_info[i]->alloc_sem); meta_group_info[i]->bb_free_root = RB_ROOT; meta_group_info[i]->bb_largest_free_order = -1; /* uninit */ meta_group_info[i]->bb_avg_fragment_size_order = -1; /* uninit */ meta_group_info[i]->bb_group = group; mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group); return 0; exit_group_info: /* If a meta_group_info table has been allocated, release it now */ if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { struct ext4_group_info ***group_info; rcu_read_lock(); group_info = rcu_dereference(sbi->s_group_info); kfree(group_info[idx]); group_info[idx] = NULL; rcu_read_unlock(); } return -ENOMEM; } /* ext4_mb_add_groupinfo */ static int ext4_mb_init_backend(struct super_block *sb) { ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; struct ext4_sb_info *sbi = EXT4_SB(sb); int err; struct ext4_group_desc *desc; struct ext4_group_info ***group_info; struct kmem_cache *cachep; err = ext4_mb_alloc_groupinfo(sb, ngroups); if (err) return err; sbi->s_buddy_cache = new_inode(sb); if (sbi->s_buddy_cache == NULL) { ext4_msg(sb, KERN_ERR, "can't get new inode"); goto err_freesgi; } /* To avoid potentially colliding with an valid on-disk inode number, * use EXT4_BAD_INO for the buddy cache inode number. This inode is * not in the inode hash, so it should never be found by iget(), but * this will avoid confusion if it ever shows up during debugging. */ sbi->s_buddy_cache->i_ino = EXT4_BAD_INO; EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; ext4_set_inode_mapping_order(sbi->s_buddy_cache); for (i = 0; i < ngroups; i++) { cond_resched(); desc = ext4_get_group_desc(sb, i, NULL); if (desc == NULL) { ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i); goto err_freebuddy; } if (ext4_mb_add_groupinfo(sb, i, desc) != 0) goto err_freebuddy; } if (ext4_has_feature_flex_bg(sb)) { /* a single flex group is supposed to be read by a single IO. * 2 ^ s_log_groups_per_flex != UINT_MAX as s_mb_prefetch is * unsigned integer, so the maximum shift is 32. */ if (sbi->s_es->s_log_groups_per_flex >= 32) { ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group"); goto err_freebuddy; } sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex, BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9)); sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ } else { sbi->s_mb_prefetch = 32; } if (sbi->s_mb_prefetch > ext4_get_groups_count(sb)) sbi->s_mb_prefetch = ext4_get_groups_count(sb); /* * now many real IOs to prefetch within a single allocation at * CR_POWER2_ALIGNED. Given CR_POWER2_ALIGNED is an CPU-related * optimization we shouldn't try to load too many groups, at some point * we should start to use what we've got in memory. * with an average random access time 5ms, it'd take a second to get * 200 groups (* N with flex_bg), so let's make this limit 4 */ sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4; if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb)) sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb); return 0; err_freebuddy: cachep = get_groupinfo_cache(sb->s_blocksize_bits); while (i-- > 0) { struct ext4_group_info *grp = ext4_get_group_info(sb, i); if (grp) kmem_cache_free(cachep, grp); } i = sbi->s_group_info_size; rcu_read_lock(); group_info = rcu_dereference(sbi->s_group_info); while (i-- > 0) kfree(group_info[i]); rcu_read_unlock(); iput(sbi->s_buddy_cache); err_freesgi: kvfree(rcu_access_pointer(sbi->s_group_info)); return -ENOMEM; } static void ext4_groupinfo_destroy_slabs(void) { int i; for (i = 0; i < NR_GRPINFO_CACHES; i++) { kmem_cache_destroy(ext4_groupinfo_caches[i]); ext4_groupinfo_caches[i] = NULL; } } static int ext4_groupinfo_create_slab(size_t size) { static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex); int slab_size; int blocksize_bits = order_base_2(size); int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; struct kmem_cache *cachep; if (cache_index >= NR_GRPINFO_CACHES) return -EINVAL; if (unlikely(cache_index < 0)) cache_index = 0; mutex_lock(&ext4_grpinfo_slab_create_mutex); if (ext4_groupinfo_caches[cache_index]) { mutex_unlock(&ext4_grpinfo_slab_create_mutex); return 0; /* Already created */ } slab_size = offsetof(struct ext4_group_info, bb_counters[blocksize_bits + 2]); cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index], slab_size, 0, SLAB_RECLAIM_ACCOUNT, NULL); ext4_groupinfo_caches[cache_index] = cachep; mutex_unlock(&ext4_grpinfo_slab_create_mutex); if (!cachep) { printk(KERN_EMERG "EXT4-fs: no memory for groupinfo slab cache\n"); return -ENOMEM; } return 0; } static void ext4_discard_work(struct work_struct *work) { struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, s_discard_work); struct super_block *sb = sbi->s_sb; struct ext4_free_data *fd, *nfd; struct ext4_buddy e4b; LIST_HEAD(discard_list); ext4_group_t grp, load_grp; int err = 0; spin_lock(&sbi->s_md_lock); list_splice_init(&sbi->s_discard_list, &discard_list); spin_unlock(&sbi->s_md_lock); load_grp = UINT_MAX; list_for_each_entry_safe(fd, nfd, &discard_list, efd_list) { /* * If filesystem is umounting or no memory or suffering * from no space, give up the discard */ if ((sb->s_flags & SB_ACTIVE) && !err && !atomic_read(&sbi->s_retry_alloc_pending)) { grp = fd->efd_group; if (grp != load_grp) { if (load_grp != UINT_MAX) ext4_mb_unload_buddy(&e4b); err = ext4_mb_load_buddy(sb, grp, &e4b); if (err) { kmem_cache_free(ext4_free_data_cachep, fd); load_grp = UINT_MAX; continue; } else { load_grp = grp; } } ext4_lock_group(sb, grp); ext4_try_to_trim_range(sb, &e4b, fd->efd_start_cluster, fd->efd_start_cluster + fd->efd_count - 1, 1); ext4_unlock_group(sb, grp); } kmem_cache_free(ext4_free_data_cachep, fd); } if (load_grp != UINT_MAX) ext4_mb_unload_buddy(&e4b); } static inline void ext4_mb_avg_fragment_size_destroy(struct ext4_sb_info *sbi) { if (!sbi->s_mb_avg_fragment_size) return; for (int i = 0; i < MB_NUM_ORDERS(sbi->s_sb); i++) xa_destroy(&sbi->s_mb_avg_fragment_size[i]); kfree(sbi->s_mb_avg_fragment_size); sbi->s_mb_avg_fragment_size = NULL; } static inline void ext4_mb_largest_free_orders_destroy(struct ext4_sb_info *sbi) { if (!sbi->s_mb_largest_free_orders) return; for (int i = 0; i < MB_NUM_ORDERS(sbi->s_sb); i++) xa_destroy(&sbi->s_mb_largest_free_orders[i]); kfree(sbi->s_mb_largest_free_orders); sbi->s_mb_largest_free_orders = NULL; } int ext4_mb_init(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned i, j; unsigned offset, offset_incr; unsigned max; int ret; i = MB_NUM_ORDERS(sb) * sizeof(*sbi->s_mb_offsets); sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); if (sbi->s_mb_offsets == NULL) { ret = -ENOMEM; goto out; } i = MB_NUM_ORDERS(sb) * sizeof(*sbi->s_mb_maxs); sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); if (sbi->s_mb_maxs == NULL) { ret = -ENOMEM; goto out; } ret = ext4_groupinfo_create_slab(sb->s_blocksize); if (ret < 0) goto out; /* order 0 is regular bitmap */ sbi->s_mb_maxs[0] = sb->s_blocksize << 3; sbi->s_mb_offsets[0] = 0; i = 1; offset = 0; offset_incr = 1 << (sb->s_blocksize_bits - 1); max = sb->s_blocksize << 2; do { sbi->s_mb_offsets[i] = offset; sbi->s_mb_maxs[i] = max; offset += offset_incr; offset_incr = offset_incr >> 1; max = max >> 1; i++; } while (i < MB_NUM_ORDERS(sb)); sbi->s_mb_avg_fragment_size = kmalloc_objs(struct xarray, MB_NUM_ORDERS(sb)); if (!sbi->s_mb_avg_fragment_size) { ret = -ENOMEM; goto out; } for (i = 0; i < MB_NUM_ORDERS(sb); i++) xa_init(&sbi->s_mb_avg_fragment_size[i]); sbi->s_mb_largest_free_orders = kmalloc_objs(struct xarray, MB_NUM_ORDERS(sb)); if (!sbi->s_mb_largest_free_orders) { ret = -ENOMEM; goto out; } for (i = 0; i < MB_NUM_ORDERS(sb); i++) xa_init(&sbi->s_mb_largest_free_orders[i]); spin_lock_init(&sbi->s_md_lock); atomic_set(&sbi->s_mb_free_pending, 0); INIT_LIST_HEAD(&sbi->s_freed_data_list[0]); INIT_LIST_HEAD(&sbi->s_freed_data_list[1]); INIT_LIST_HEAD(&sbi->s_discard_list); INIT_WORK(&sbi->s_discard_work, ext4_discard_work); atomic_set(&sbi->s_retry_alloc_pending, 0); sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; sbi->s_mb_stats = MB_DEFAULT_STATS; sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; sbi->s_mb_best_avail_max_trim_order = MB_DEFAULT_BEST_AVAIL_TRIM_ORDER; /* * The default group preallocation is 512, which for 4k block * sizes translates to 2 megabytes. However for bigalloc file * systems, this is probably too big (i.e, if the cluster size * is 1 megabyte, then group preallocation size becomes half a * gigabyte!). As a default, we will keep a two megabyte * group pralloc size for cluster sizes up to 64k, and after * that, we will force a minimum group preallocation size of * 32 clusters. This translates to 8 megs when the cluster * size is 256k, and 32 megs when the cluster size is 1 meg, * which seems reasonable as a default. */ sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >> sbi->s_cluster_bits, 32); /* * If there is a s_stripe > 1, then we set the s_mb_group_prealloc * to the lowest multiple of s_stripe which is bigger than * the s_mb_group_prealloc as determined above. We want * the preallocation size to be an exact multiple of the * RAID stripe size so that preallocations don't fragment * the stripes. */ if (sbi->s_stripe > 1) { sbi->s_mb_group_prealloc = roundup( sbi->s_mb_group_prealloc, EXT4_NUM_B2C(sbi, sbi->s_stripe)); } sbi->s_mb_nr_global_goals = umin(num_possible_cpus(), DIV_ROUND_UP(sbi->s_groups_count, 4)); sbi->s_mb_last_groups = kzalloc_objs(ext4_group_t, sbi->s_mb_nr_global_goals); if (sbi->s_mb_last_groups == NULL) { ret = -ENOMEM; goto out; } sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); if (sbi->s_locality_groups == NULL) { ret = -ENOMEM; goto out_free_last_groups; } for_each_possible_cpu(i) { struct ext4_locality_group *lg; lg = per_cpu_ptr(sbi->s_locality_groups, i); mutex_init(&lg->lg_mutex); for (j = 0; j < PREALLOC_TB_SIZE; j++) INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); spin_lock_init(&lg->lg_prealloc_lock); } if (bdev_nonrot(sb->s_bdev)) sbi->s_mb_max_linear_groups = 0; else sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT; /* init file for buddy data */ ret = ext4_mb_init_backend(sb); if (ret != 0) goto out_free_locality_groups; return 0; out_free_locality_groups: free_percpu(sbi->s_locality_groups); sbi->s_locality_groups = NULL; out_free_last_groups: kfree(sbi->s_mb_last_groups); sbi->s_mb_last_groups = NULL; out: ext4_mb_avg_fragment_size_destroy(sbi); ext4_mb_largest_free_orders_destroy(sbi); kfree(sbi->s_mb_offsets); sbi->s_mb_offsets = NULL; kfree(sbi->s_mb_maxs); sbi->s_mb_maxs = NULL; return ret; } /* need to called with the ext4 group lock held */ static int ext4_mb_cleanup_pa(struct ext4_group_info *grp) { struct ext4_prealloc_space *pa; struct list_head *cur, *tmp; int count = 0; list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) { pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); list_del(&pa->pa_group_list); count++; kmem_cache_free(ext4_pspace_cachep, pa); } return count; } void ext4_mb_release(struct super_block *sb) { ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int num_meta_group_infos; struct ext4_group_info *grinfo, ***group_info; struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); int count; /* * wait the discard work to drain all of ext4_free_data */ flush_work(&sbi->s_discard_work); WARN_ON_ONCE(!list_empty(&sbi->s_discard_list)); group_info = rcu_access_pointer(sbi->s_group_info); if (group_info) { for (i = 0; i < ngroups; i++) { cond_resched(); grinfo = ext4_get_group_info(sb, i); if (!grinfo) continue; mb_group_bb_bitmap_free(grinfo); ext4_lock_group(sb, i); count = ext4_mb_cleanup_pa(grinfo); if (count) mb_debug(sb, "mballoc: %d PAs left\n", count); ext4_unlock_group(sb, i); kmem_cache_free(cachep, grinfo); } num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); for (i = 0; i < num_meta_group_infos; i++) kfree(group_info[i]); kvfree(group_info); } ext4_mb_avg_fragment_size_destroy(sbi); ext4_mb_largest_free_orders_destroy(sbi); kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); iput(sbi->s_buddy_cache); if (sbi->s_mb_stats) { ext4_msg(sb, KERN_INFO, "mballoc: %u blocks %u reqs (%u success)", atomic_read(&sbi->s_bal_allocated), atomic_read(&sbi->s_bal_reqs), atomic_read(&sbi->s_bal_success)); ext4_msg(sb, KERN_INFO, "mballoc: %u extents scanned, %u groups scanned, %u goal hits, " "%u 2^N hits, %u breaks, %u lost", atomic_read(&sbi->s_bal_ex_scanned), atomic_read(&sbi->s_bal_groups_scanned), atomic_read(&sbi->s_bal_goals), atomic_read(&sbi->s_bal_2orders), atomic_read(&sbi->s_bal_breaks), atomic_read(&sbi->s_mb_lost_chunks)); ext4_msg(sb, KERN_INFO, "mballoc: %u generated and it took %llu", atomic_read(&sbi->s_mb_buddies_generated), atomic64_read(&sbi->s_mb_generation_time)); ext4_msg(sb, KERN_INFO, "mballoc: %u preallocated, %u discarded", atomic_read(&sbi->s_mb_preallocated), atomic_read(&sbi->s_mb_discarded)); } free_percpu(sbi->s_locality_groups); kfree(sbi->s_mb_last_groups); } static inline int ext4_issue_discard(struct super_block *sb, ext4_group_t block_group, ext4_grpblk_t cluster, int count) { ext4_fsblk_t discard_block; discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) + ext4_group_first_block_no(sb, block_group)); count = EXT4_C2B(EXT4_SB(sb), count); trace_ext4_discard_blocks(sb, (unsigned long long) discard_block, count); return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); } static void ext4_free_data_in_buddy(struct super_block *sb, struct ext4_free_data *entry) { struct ext4_buddy e4b; struct ext4_group_info *db; int err, count = 0; mb_debug(sb, "gonna free %u blocks in group %u (0x%p):", entry->efd_count, entry->efd_group, entry); err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b); /* we expect to find existing buddy because it's pinned */ BUG_ON(err != 0); atomic_sub(entry->efd_count, &EXT4_SB(sb)->s_mb_free_pending); db = e4b.bd_info; /* there are blocks to put in buddy to make them really free */ count += entry->efd_count; ext4_lock_group(sb, entry->efd_group); /* Take it out of per group rb tree */ rb_erase(&entry->efd_node, &(db->bb_free_root)); mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count); /* * Clear the trimmed flag for the group so that the next * ext4_trim_fs can trim it. */ EXT4_MB_GRP_CLEAR_TRIMMED(db); if (!db->bb_free_root.rb_node) { /* No more items in the per group rb tree * balance refcounts from ext4_mb_free_metadata() */ folio_put(e4b.bd_buddy_folio); folio_put(e4b.bd_bitmap_folio); } ext4_unlock_group(sb, entry->efd_group); ext4_mb_unload_buddy(&e4b); mb_debug(sb, "freed %d blocks in 1 structures\n", count); } /* * This function is called by the jbd2 layer once the commit has finished, * so we know we can free the blocks that were released with that commit. */ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_free_data *entry, *tmp; LIST_HEAD(freed_data_list); struct list_head *s_freed_head = &sbi->s_freed_data_list[commit_tid & 1]; bool wake; list_replace_init(s_freed_head, &freed_data_list); list_for_each_entry(entry, &freed_data_list, efd_list) ext4_free_data_in_buddy(sb, entry); if (test_opt(sb, DISCARD)) { spin_lock(&sbi->s_md_lock); wake = list_empty(&sbi->s_discard_list); list_splice_tail(&freed_data_list, &sbi->s_discard_list); spin_unlock(&sbi->s_md_lock); if (wake) queue_work(system_dfl_wq, &sbi->s_discard_work); } else { list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list) kmem_cache_free(ext4_free_data_cachep, entry); } } int __init ext4_init_mballoc(void) { ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space, SLAB_RECLAIM_ACCOUNT); if (ext4_pspace_cachep == NULL) goto out; ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context, SLAB_RECLAIM_ACCOUNT); if (ext4_ac_cachep == NULL) goto out_pa_free; ext4_free_data_cachep = KMEM_CACHE(ext4_free_data, SLAB_RECLAIM_ACCOUNT); if (ext4_free_data_cachep == NULL) goto out_ac_free; return 0; out_ac_free: kmem_cache_destroy(ext4_ac_cachep); out_pa_free: kmem_cache_destroy(ext4_pspace_cachep); out: return -ENOMEM; } void ext4_exit_mballoc(void) { /* * Wait for completion of call_rcu()'s on ext4_pspace_cachep * before destroying the slab cache. */ rcu_barrier(); kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); kmem_cache_destroy(ext4_free_data_cachep); ext4_groupinfo_destroy_slabs(); } #define EXT4_MB_BITMAP_MARKED_CHECK 0x0001 #define EXT4_MB_SYNC_UPDATE 0x0002 int ext4_mb_mark_context(handle_t *handle, struct super_block *sb, bool state, ext4_group_t group, ext4_grpblk_t blkoff, ext4_grpblk_t len, int flags, ext4_grpblk_t *ret_changed) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct buffer_head *bitmap_bh = NULL; struct ext4_group_desc *gdp; struct buffer_head *gdp_bh; int err; unsigned int i, already, changed = len; KUNIT_STATIC_STUB_REDIRECT(ext4_mb_mark_context, handle, sb, state, group, blkoff, len, flags, ret_changed); if (ret_changed) *ret_changed = 0; bitmap_bh = ext4_read_block_bitmap(sb, group); if (IS_ERR(bitmap_bh)) return PTR_ERR(bitmap_bh); if (handle) { BUFFER_TRACE(bitmap_bh, "getting write access"); err = ext4_journal_get_write_access(handle, sb, bitmap_bh, EXT4_JTR_NONE); if (err) goto out_err; } err = -EIO; gdp = ext4_get_group_desc(sb, group, &gdp_bh); if (!gdp) goto out_err; if (handle) { BUFFER_TRACE(gdp_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, gdp_bh, EXT4_JTR_NONE); if (err) goto out_err; } ext4_lock_group(sb, group); if (ext4_has_group_desc_csum(sb) && (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, group, gdp)); } if (flags & EXT4_MB_BITMAP_MARKED_CHECK) { already = 0; for (i = 0; i < len; i++) if (mb_test_bit(blkoff + i, bitmap_bh->b_data) == state) already++; changed = len - already; } if (state) { mb_set_bits(bitmap_bh->b_data, blkoff, len); ext4_free_group_clusters_set(sb, gdp, ext4_free_group_clusters(sb, gdp) - changed); } else { mb_clear_bits(bitmap_bh->b_data, blkoff, len); ext4_free_group_clusters_set(sb, gdp, ext4_free_group_clusters(sb, gdp) + changed); } ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh); ext4_group_desc_csum_set(sb, group, gdp); ext4_unlock_group(sb, group); if (ret_changed) *ret_changed = changed; if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, group); struct flex_groups *fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); if (state) atomic64_sub(changed, &fg->free_clusters); else atomic64_add(changed, &fg->free_clusters); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (err) goto out_err; err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); if (err) goto out_err; if (flags & EXT4_MB_SYNC_UPDATE) { sync_dirty_buffer(bitmap_bh); sync_dirty_buffer(gdp_bh); } out_err: brelse(bitmap_bh); return err; } /* * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps * Returns 0 if success or error code */ static noinline_for_stack int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, handle_t *handle) { struct ext4_group_desc *gdp; struct ext4_sb_info *sbi; struct super_block *sb; ext4_fsblk_t block; int err, len; int flags = 0; ext4_grpblk_t changed; BUG_ON(ac->ac_status != AC_STATUS_FOUND); BUG_ON(ac->ac_b_ex.fe_len <= 0); sb = ac->ac_sb; sbi = EXT4_SB(sb); gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, NULL); if (!gdp) return -EIO; ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, ext4_free_group_clusters(sb, gdp)); block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); if (!ext4_inode_block_valid(ac->ac_inode, block, len)) { ext4_error(sb, "Allocating blocks %llu-%llu which overlap " "fs metadata", block, block+len); /* File system mounted not to panic on error * Fix the bitmap and return EFSCORRUPTED * We leak some of the blocks here. */ err = ext4_mb_mark_context(handle, sb, true, ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len, 0, NULL); if (!err) err = -EFSCORRUPTED; return err; } #ifdef AGGRESSIVE_CHECK flags |= EXT4_MB_BITMAP_MARKED_CHECK; #endif err = ext4_mb_mark_context(handle, sb, true, ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len, flags, &changed); if (err && changed == 0) return err; #ifdef AGGRESSIVE_CHECK BUG_ON(changed != ac->ac_b_ex.fe_len); #endif percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len); return err; } /* * Idempotent helper for Ext4 fast commit replay path to set the state of * blocks in bitmaps and update counters. */ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, int len, bool state) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; ext4_grpblk_t blkoff; int err = 0; unsigned int clen, thisgrp_len; while (len > 0) { ext4_get_group_no_and_offset(sb, block, &group, &blkoff); /* * Check to see if we are freeing blocks across a group * boundary. * In case of flex_bg, this can happen that (block, len) may * span across more than one group. In that case we need to * get the corresponding group metadata to work with. * For this we have goto again loop. */ thisgrp_len = min(len, EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff)); clen = EXT4_NUM_B2C(sbi, thisgrp_len); if (!ext4_sb_block_valid(sb, NULL, block, thisgrp_len)) { ext4_error(sb, "Marking blocks in system zone - " "Block = %llu, len = %u", block, thisgrp_len); break; } err = ext4_mb_mark_context(NULL, sb, state, group, blkoff, clen, EXT4_MB_BITMAP_MARKED_CHECK | EXT4_MB_SYNC_UPDATE, NULL); if (err) break; block += thisgrp_len; len -= thisgrp_len; BUG_ON(len < 0); } } /* * here we normalize request for locality group * Group request are normalized to s_mb_group_prealloc, which goes to * s_strip if we set the same via mount option. * s_mb_group_prealloc can be configured via * /sys/fs/ext4/<partition>/mb_group_prealloc * * XXX: should we try to preallocate more than the group has now? */ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) { struct super_block *sb = ac->ac_sb; struct ext4_locality_group *lg = ac->ac_lg; BUG_ON(lg == NULL); ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; mb_debug(sb, "goal %u blocks for locality group\n", ac->ac_g_ex.fe_len); } /* * This function returns the next element to look at during inode * PA rbtree walk. We assume that we have held the inode PA rbtree lock * (ei->i_prealloc_lock) * * new_start The start of the range we want to compare * cur_start The existing start that we are comparing against * node The node of the rb_tree */ static inline struct rb_node* ext4_mb_pa_rb_next_iter(ext4_lblk_t new_start, ext4_lblk_t cur_start, struct rb_node *node) { if (new_start < cur_start) return node->rb_left; else return node->rb_right; } static inline void ext4_mb_pa_assert_overlap(struct ext4_allocation_context *ac, ext4_lblk_t start, loff_t end) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_prealloc_space *tmp_pa; ext4_lblk_t tmp_pa_start; loff_t tmp_pa_end; struct rb_node *iter; read_lock(&ei->i_prealloc_lock); for (iter = ei->i_prealloc_node.rb_node; iter; iter = ext4_mb_pa_rb_next_iter(start, tmp_pa_start, iter)) { tmp_pa = rb_entry(iter, struct ext4_prealloc_space, pa_node.inode_node); tmp_pa_start = tmp_pa->pa_lstart; tmp_pa_end = pa_logical_end(sbi, tmp_pa); spin_lock(&tmp_pa->pa_lock); if (tmp_pa->pa_deleted == 0) BUG_ON(!(start >= tmp_pa_end || end <= tmp_pa_start)); spin_unlock(&tmp_pa->pa_lock); } read_unlock(&ei->i_prealloc_lock); } /* * Given an allocation context "ac" and a range "start", "end", check * and adjust boundaries if the range overlaps with any of the existing * preallocatoins stored in the corresponding inode of the allocation context. * * Parameters: * ac allocation context * start start of the new range * end end of the new range */ static inline void ext4_mb_pa_adjust_overlap(struct ext4_allocation_context *ac, ext4_lblk_t *start, loff_t *end) { struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_prealloc_space *tmp_pa = NULL, *left_pa = NULL, *right_pa = NULL; struct rb_node *iter; ext4_lblk_t new_start, tmp_pa_start, right_pa_start = -1; loff_t new_end, tmp_pa_end, left_pa_end = -1; new_start = *start; new_end = *end; /* * Adjust the normalized range so that it doesn't overlap with any * existing preallocated blocks(PAs). Make sure to hold the rbtree lock * so it doesn't change underneath us. */ read_lock(&ei->i_prealloc_lock); /* Step 1: find any one immediate neighboring PA of the normalized range */ for (iter = ei->i_prealloc_node.rb_node; iter; iter = ext4_mb_pa_rb_next_iter(ac->ac_o_ex.fe_logical, tmp_pa_start, iter)) { tmp_pa = rb_entry(iter, struct ext4_prealloc_space, pa_node.inode_node); tmp_pa_start = tmp_pa->pa_lstart; tmp_pa_end = pa_logical_end(sbi, tmp_pa); /* PA must not overlap original request */ spin_lock(&tmp_pa->pa_lock); if (tmp_pa->pa_deleted == 0) BUG_ON(!(ac->ac_o_ex.fe_logical >= tmp_pa_end || ac->ac_o_ex.fe_logical < tmp_pa_start)); spin_unlock(&tmp_pa->pa_lock); } /* * Step 2: check if the found PA is left or right neighbor and * get the other neighbor */ if (tmp_pa) { if (tmp_pa->pa_lstart < ac->ac_o_ex.fe_logical) { struct rb_node *tmp; left_pa = tmp_pa; tmp = rb_next(&left_pa->pa_node.inode_node); if (tmp) { right_pa = rb_entry(tmp, struct ext4_prealloc_space, pa_node.inode_node); } } else { struct rb_node *tmp; right_pa = tmp_pa; tmp = rb_prev(&right_pa->pa_node.inode_node); if (tmp) { left_pa = rb_entry(tmp, struct ext4_prealloc_space, pa_node.inode_node); } } } /* Step 3: get the non deleted neighbors */ if (left_pa) { for (iter = &left_pa->pa_node.inode_node;; iter = rb_prev(iter)) { if (!iter) { left_pa = NULL; break; } tmp_pa = rb_entry(iter, struct ext4_prealloc_space, pa_node.inode_node); left_pa = tmp_pa; spin_lock(&tmp_pa->pa_lock); if (tmp_pa->pa_deleted == 0) { spin_unlock(&tmp_pa->pa_lock); break; } spin_unlock(&tmp_pa->pa_lock); } } if (right_pa) { for (iter = &right_pa->pa_node.inode_node;; iter = rb_next(iter)) { if (!iter) { right_pa = NULL; break; } tmp_pa = rb_entry(iter, struct ext4_prealloc_space, pa_node.inode_node); right_pa = tmp_pa; spin_lock(&tmp_pa->pa_lock); if (tmp_pa->pa_deleted == 0) { spin_unlock(&tmp_pa->pa_lock); break; } spin_unlock(&tmp_pa->pa_lock); } } if (left_pa) { left_pa_end = pa_logical_end(sbi, left_pa); BUG_ON(left_pa_end > ac->ac_o_ex.fe_logical); } if (right_pa) { right_pa_start = right_pa->pa_lstart; BUG_ON(right_pa_start <= ac->ac_o_ex.fe_logical); } /* Step 4: trim our normalized range to not overlap with the neighbors */ if (left_pa) { if (left_pa_end > new_start) new_start = left_pa_end; } if (right_pa) { if (right_pa_start < new_end) new_end = right_pa_start; } read_unlock(&ei->i_prealloc_lock); /* XXX: extra loop to check we really don't overlap preallocations */ ext4_mb_pa_assert_overlap(ac, new_start, new_end); *start = new_start; *end = new_end; } /* * Normalization means making request better in terms of * size and alignment */ static noinline_for_stack void ext4_mb_normalize_request(struct ext4_allocation_context *ac, struct ext4_allocation_request *ar) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_super_block *es = sbi->s_es; int bsbits, max; loff_t size, start_off, end; loff_t orig_size __maybe_unused; ext4_lblk_t start; /* do normalize only data requests, metadata requests do not need preallocation */ if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) return; /* sometime caller may want exact blocks */ if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) return; /* caller may indicate that preallocation isn't * required (it's a tail, for example) */ if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC) return; if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) { ext4_mb_normalize_group_request(ac); return ; } bsbits = ac->ac_sb->s_blocksize_bits; /* first, let's learn actual file size * given current request is allocated */ size = extent_logical_end(sbi, &ac->ac_o_ex); size = size << bsbits; if (size < i_size_read(ac->ac_inode)) size = i_size_read(ac->ac_inode); orig_size = size; /* max size of free chunks */ max = 2 << bsbits; #define NRL_CHECK_SIZE(req, size, max, chunk_size) \ (req <= (size) || max <= (chunk_size)) /* first, try to predict filesize */ /* XXX: should this table be tunable? */ start_off = 0; if (size <= 16 * 1024) { size = 16 * 1024; } else if (size <= 32 * 1024) { size = 32 * 1024; } else if (size <= 64 * 1024) { size = 64 * 1024; } else if (size <= 128 * 1024) { size = 128 * 1024; } else if (size <= 256 * 1024) { size = 256 * 1024; } else if (size <= 512 * 1024) { size = 512 * 1024; } else if (size <= 1024 * 1024) { size = 1024 * 1024; } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> (21 - bsbits)) << 21; size = 2 * 1024 * 1024; } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> (22 - bsbits)) << 22; size = 4 * 1024 * 1024; } else if (NRL_CHECK_SIZE(EXT4_C2B(sbi, ac->ac_o_ex.fe_len), (8<<20)>>bsbits, max, 8 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> (23 - bsbits)) << 23; size = 8 * 1024 * 1024; } else { start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits; size = (loff_t) EXT4_C2B(sbi, ac->ac_o_ex.fe_len) << bsbits; } size = size >> bsbits; start = start_off >> bsbits; /* * For tiny groups (smaller than 8MB) the chosen allocation * alignment may be larger than group size. Make sure the * alignment does not move allocation to a different group which * makes mballoc fail assertions later. */ start = max(start, rounddown(ac->ac_o_ex.fe_logical, (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb))); /* avoid unnecessary preallocation that may trigger assertions */ if (start + size > EXT_MAX_BLOCKS) size = EXT_MAX_BLOCKS - start; /* don't cover already allocated blocks in selected range */ if (ar->pleft && start <= ar->lleft) { size -= ar->lleft + 1 - start; start = ar->lleft + 1; } if (ar->pright && start + size - 1 >= ar->lright) size -= start + size - ar->lright; /* * Trim allocation request for filesystems with artificially small * groups. */ if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb); end = start + size; ext4_mb_pa_adjust_overlap(ac, &start, &end); size = end - start; /* * In this function "start" and "size" are normalized for better * alignment and length such that we could preallocate more blocks. * This normalization is done such that original request of * ac->ac_o_ex.fe_logical & fe_len should always lie within "start" and * "size" boundaries. * (Note fe_len can be relaxed since FS block allocation API does not * provide gurantee on number of contiguous blocks allocation since that * depends upon free space left, etc). * In case of inode pa, later we use the allocated blocks * [pa_pstart + fe_logical - pa_lstart, fe_len/size] from the preallocated * range of goal/best blocks [start, size] to put it at the * ac_o_ex.fe_logical extent of this inode. * (See ext4_mb_use_inode_pa() for more details) */ if (start + size <= ac->ac_o_ex.fe_logical || start > ac->ac_o_ex.fe_logical) { ext4_msg(ac->ac_sb, KERN_ERR, "start %lu, size %lu, fe_logical %lu", (unsigned long) start, (unsigned long) size, (unsigned long) ac->ac_o_ex.fe_logical); BUG(); } BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); /* now prepare goal request */ /* XXX: is it better to align blocks WRT to logical * placement or satisfy big request as is */ ac->ac_g_ex.fe_logical = start; ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size); ac->ac_orig_goal_len = ac->ac_g_ex.fe_len; /* define goal start in order to merge */ if (ar->pright && (ar->lright == (start + size)) && ar->pright >= size && ar->pright - size >= le32_to_cpu(es->s_first_data_block)) { /* merge to the right */ ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size, &ac->ac_g_ex.fe_group, &ac->ac_g_ex.fe_start); ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; } if (ar->pleft && (ar->lleft + 1 == start) && ar->pleft + 1 < ext4_blocks_count(es)) { /* merge to the left */ ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1, &ac->ac_g_ex.fe_group, &ac->ac_g_ex.fe_start); ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; } mb_debug(ac->ac_sb, "goal: %lld(was %lld) blocks at %u\n", size, orig_size, start); } static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); if (sbi->s_mb_stats && ac->ac_g_ex.fe_len >= 1) { atomic_inc(&sbi->s_bal_reqs); atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len) atomic_inc(&sbi->s_bal_success); atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); for (int i=0; i<EXT4_MB_NUM_CRS; i++) { atomic_add(ac->ac_cX_found[i], &sbi->s_bal_cX_ex_scanned[i]); } atomic_add(ac->ac_groups_scanned, &sbi->s_bal_groups_scanned); if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) atomic_inc(&sbi->s_bal_goals); /* did we allocate as much as normalizer originally wanted? */ if (ac->ac_f_ex.fe_len == ac->ac_orig_goal_len) atomic_inc(&sbi->s_bal_len_goals); if (ac->ac_found > sbi->s_mb_max_to_scan) atomic_inc(&sbi->s_bal_breaks); } if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) trace_ext4_mballoc_alloc(ac); else trace_ext4_mballoc_prealloc(ac); } /* * Called on failure; free up any blocks from the inode PA for this * context. We don't need this for MB_GROUP_PA because we only change * pa_free in ext4_mb_release_context(), but on failure, we've already * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed. */ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) { struct ext4_prealloc_space *pa = ac->ac_pa; struct ext4_buddy e4b; int err; if (pa == NULL) { if (ac->ac_f_ex.fe_len == 0) return; err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b); if (WARN_RATELIMIT(err, "ext4: mb_load_buddy failed (%d)", err)) /* * This should never happen since we pin the * folios in the ext4_allocation_context so * ext4_mb_load_buddy() should never fail. */ return; ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group); mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start, ac->ac_f_ex.fe_len); ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group); ext4_mb_unload_buddy(&e4b); return; } if (pa->pa_type == MB_INODE_PA) { spin_lock(&pa->pa_lock); pa->pa_free += ac->ac_b_ex.fe_len; spin_unlock(&pa->pa_lock); } } /* * use blocks preallocated to inode */ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); ext4_fsblk_t start; ext4_fsblk_t end; int len; /* found preallocated blocks, use them */ start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len), start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len)); len = EXT4_NUM_B2C(sbi, end - start); ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group, &ac->ac_b_ex.fe_start); ac->ac_b_ex.fe_len = len; ac->ac_status = AC_STATUS_FOUND; ac->ac_pa = pa; BUG_ON(start < pa->pa_pstart); BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len)); BUG_ON(pa->pa_free < len); BUG_ON(ac->ac_b_ex.fe_len <= 0); pa->pa_free -= len; mb_debug(ac->ac_sb, "use %llu/%d from inode pa %p\n", start, len, pa); } /* * use blocks preallocated to locality group */ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa) { unsigned int len = ac->ac_o_ex.fe_len; ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, &ac->ac_b_ex.fe_group, &ac->ac_b_ex.fe_start); ac->ac_b_ex.fe_len = len; ac->ac_status = AC_STATUS_FOUND; ac->ac_pa = pa; /* we don't correct pa_pstart or pa_len here to avoid * possible race when the group is being loaded concurrently * instead we correct pa later, after blocks are marked * in on-disk bitmap -- see ext4_mb_release_context() * Other CPUs are prevented from allocating from this pa by lg_mutex */ mb_debug(ac->ac_sb, "use %u/%u from group pa %p\n", pa->pa_lstart, len, pa); } /* * Return the prealloc space that have minimal distance * from the goal block. @cpa is the prealloc * space that is having currently known minimal distance * from the goal block. */ static struct ext4_prealloc_space * ext4_mb_check_group_pa(ext4_fsblk_t goal_block, struct ext4_prealloc_space *pa, struct ext4_prealloc_space *cpa) { ext4_fsblk_t cur_distance, new_distance; if (cpa == NULL) { atomic_inc(&pa->pa_count); return pa; } cur_distance = abs(goal_block - cpa->pa_pstart); new_distance = abs(goal_block - pa->pa_pstart); if (cur_distance <= new_distance) return cpa; /* drop the previous reference */ atomic_dec(&cpa->pa_count); atomic_inc(&pa->pa_count); return pa; } /* * check if found pa meets EXT4_MB_HINT_GOAL_ONLY */ static bool ext4_mb_pa_goal_check(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); ext4_fsblk_t start; if (likely(!(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))) return true; /* * If EXT4_MB_HINT_GOAL_ONLY is set, ac_g_ex will not be adjusted * in ext4_mb_normalize_request and will keep same with ac_o_ex * from ext4_mb_initialize_context. Choose ac_g_ex here to keep * consistent with ext4_mb_find_by_goal. */ start = pa->pa_pstart + (ac->ac_g_ex.fe_logical - pa->pa_lstart); if (ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex) != start) return false; if (ac->ac_g_ex.fe_len > pa->pa_len - EXT4_B2C(sbi, ac->ac_g_ex.fe_logical - pa->pa_lstart)) return false; return true; } /* * search goal blocks in preallocated space */ static noinline_for_stack bool ext4_mb_use_preallocated(struct ext4_allocation_context *ac) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); int order, i; struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_locality_group *lg; struct ext4_prealloc_space *tmp_pa = NULL, *cpa = NULL; struct rb_node *iter; ext4_fsblk_t goal_block; /* only data can be preallocated */ if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) return false; /* * first, try per-file preallocation by searching the inode pa rbtree. * * Here, we can't do a direct traversal of the tree because * ext4_mb_discard_group_preallocation() can paralelly mark the pa * deleted and that can cause direct traversal to skip some entries. */ read_lock(&ei->i_prealloc_lock); if (RB_EMPTY_ROOT(&ei->i_prealloc_node)) { goto try_group_pa; } /* * Step 1: Find a pa with logical start immediately adjacent to the * original logical start. This could be on the left or right. * * (tmp_pa->pa_lstart never changes so we can skip locking for it). */ for (iter = ei->i_prealloc_node.rb_node; iter; iter = ext4_mb_pa_rb_next_iter(ac->ac_o_ex.fe_logical, tmp_pa->pa_lstart, iter)) { tmp_pa = rb_entry(iter, struct ext4_prealloc_space, pa_node.inode_node); } /* * Step 2: The adjacent pa might be to the right of logical start, find * the left adjacent pa. After this step we'd have a valid tmp_pa whose * logical start is towards the left of original request's logical start */ if (tmp_pa->pa_lstart > ac->ac_o_ex.fe_logical) { struct rb_node *tmp; tmp = rb_prev(&tmp_pa->pa_node.inode_node); if (tmp) { tmp_pa = rb_entry(tmp, struct ext4_prealloc_space, pa_node.inode_node); } else { /* * If there is no adjacent pa to the left then finding * an overlapping pa is not possible hence stop searching * inode pa tree */ goto try_group_pa; } } BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical)); /* * Step 3: If the left adjacent pa is deleted, keep moving left to find * the first non deleted adjacent pa. After this step we should have a * valid tmp_pa which is guaranteed to be non deleted. */ for (iter = &tmp_pa->pa_node.inode_node;; iter = rb_prev(iter)) { if (!iter) { /* * no non deleted left adjacent pa, so stop searching * inode pa tree */ goto try_group_pa; } tmp_pa = rb_entry(iter, struct ext4_prealloc_space, pa_node.inode_node); spin_lock(&tmp_pa->pa_lock); if (tmp_pa->pa_deleted == 0) { /* * We will keep holding the pa_lock from * this point on because we don't want group discard * to delete this pa underneath us. Since group * discard is anyways an ENOSPC operation it * should be okay for it to wait a few more cycles. */ break; } else { spin_unlock(&tmp_pa->pa_lock); } } BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical)); BUG_ON(tmp_pa->pa_deleted == 1); /* * Step 4: We now have the non deleted left adjacent pa. Only this * pa can possibly satisfy the request hence check if it overlaps * original logical start and stop searching if it doesn't. */ if (ac->ac_o_ex.fe_logical >= pa_logical_end(sbi, tmp_pa)) { spin_unlock(&tmp_pa->pa_lock); goto try_group_pa; } /* non-extent files can't have physical blocks past 2^32 */ if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && (tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) > EXT4_MAX_BLOCK_FILE_PHYS)) { /* * Since PAs don't overlap, we won't find any other PA to * satisfy this. */ spin_unlock(&tmp_pa->pa_lock); goto try_group_pa; } if (tmp_pa->pa_free && likely(ext4_mb_pa_goal_check(ac, tmp_pa))) { atomic_inc(&tmp_pa->pa_count); ext4_mb_use_inode_pa(ac, tmp_pa); spin_unlock(&tmp_pa->pa_lock); read_unlock(&ei->i_prealloc_lock); return true; } else { /* * We found a valid overlapping pa but couldn't use it because * it had no free blocks. This should ideally never happen * because: * * 1. When a new inode pa is added to rbtree it must have * pa_free > 0 since otherwise we won't actually need * preallocation. * * 2. An inode pa that is in the rbtree can only have it's * pa_free become zero when another thread calls: * ext4_mb_new_blocks * ext4_mb_use_preallocated * ext4_mb_use_inode_pa * * 3. Further, after the above calls make pa_free == 0, we will * immediately remove it from the rbtree in: * ext4_mb_new_blocks * ext4_mb_release_context * ext4_mb_put_pa * * 4. Since the pa_free becoming 0 and pa_free getting removed * from tree both happen in ext4_mb_new_blocks, which is always * called with i_data_sem held for data allocations, we can be * sure that another process will never see a pa in rbtree with * pa_free == 0. */ WARN_ON_ONCE(tmp_pa->pa_free == 0); } spin_unlock(&tmp_pa->pa_lock); try_group_pa: read_unlock(&ei->i_prealloc_lock); /* can we use group allocation? */ if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)) return false; /* inode may have no locality group for some reason */ lg = ac->ac_lg; if (lg == NULL) return false; order = fls(ac->ac_o_ex.fe_len) - 1; if (order > PREALLOC_TB_SIZE - 1) /* The max size of hash table is PREALLOC_TB_SIZE */ order = PREALLOC_TB_SIZE - 1; goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex); /* * search for the prealloc space that is having * minimal distance from the goal block. */ for (i = order; i < PREALLOC_TB_SIZE; i++) { rcu_read_lock(); list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[i], pa_node.lg_list) { spin_lock(&tmp_pa->pa_lock); if (tmp_pa->pa_deleted == 0 && tmp_pa->pa_free >= ac->ac_o_ex.fe_len) { cpa = ext4_mb_check_group_pa(goal_block, tmp_pa, cpa); } spin_unlock(&tmp_pa->pa_lock); } rcu_read_unlock(); } if (cpa) { ext4_mb_use_group_pa(ac, cpa); return true; } return false; } /* * the function goes through all preallocation in this group and marks them * used in in-core bitmap. buddy must be generated from this bitmap * Need to be called with ext4 group lock held */ static noinline_for_stack void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct ext4_prealloc_space *pa; struct list_head *cur; ext4_group_t groupnr; ext4_grpblk_t start; int preallocated = 0; int len; if (!grp) return; /* all form of preallocation discards first load group, * so the only competing code is preallocation use. * we don't need any locking here * notice we do NOT ignore preallocations with pa_deleted * otherwise we could leave used blocks available for * allocation in buddy when concurrent ext4_mb_put_pa() * is dropping preallocation */ list_for_each(cur, &grp->bb_prealloc_list) { pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); spin_lock(&pa->pa_lock); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &start); len = pa->pa_len; spin_unlock(&pa->pa_lock); if (unlikely(len == 0)) continue; BUG_ON(groupnr != group); mb_set_bits(bitmap, start, len); preallocated += len; } mb_debug(sb, "preallocated %d for group %u\n", preallocated, group); } static void ext4_mb_mark_pa_deleted(struct super_block *sb, struct ext4_prealloc_space *pa) { struct ext4_inode_info *ei; if (pa->pa_deleted) { ext4_warning(sb, "deleted pa, type:%d, pblk:%llu, lblk:%u, len:%d\n", pa->pa_type, pa->pa_pstart, pa->pa_lstart, pa->pa_len); return; } pa->pa_deleted = 1; if (pa->pa_type == MB_INODE_PA) { ei = EXT4_I(pa->pa_inode); atomic_dec(&ei->i_prealloc_active); } } static inline void ext4_mb_pa_free(struct ext4_prealloc_space *pa) { BUG_ON(!pa); BUG_ON(atomic_read(&pa->pa_count)); BUG_ON(pa->pa_deleted == 0); kmem_cache_free(ext4_pspace_cachep, pa); } static void ext4_mb_pa_callback(struct rcu_head *head) { struct ext4_prealloc_space *pa; pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); ext4_mb_pa_free(pa); } /* * drops a reference to preallocated space descriptor * if this was the last reference and the space is consumed */ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, struct super_block *sb, struct ext4_prealloc_space *pa) { ext4_group_t grp; ext4_fsblk_t grp_blk; struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); /* in this short window concurrent discard can set pa_deleted */ spin_lock(&pa->pa_lock); if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) { spin_unlock(&pa->pa_lock); return; } if (pa->pa_deleted == 1) { spin_unlock(&pa->pa_lock); return; } ext4_mb_mark_pa_deleted(sb, pa); spin_unlock(&pa->pa_lock); grp_blk = pa->pa_pstart; /* * If doing group-based preallocation, pa_pstart may be in the * next group when pa is used up */ if (pa->pa_type == MB_GROUP_PA) grp_blk--; grp = ext4_get_group_number(sb, grp_blk); /* * possible race: * * P1 (buddy init) P2 (regular allocation) * find block B in PA * copy on-disk bitmap to buddy * mark B in on-disk bitmap * drop PA from group * mark all PAs in buddy * * thus, P1 initializes buddy with B available. to prevent this * we make "copy" and "mark all PAs" atomic and serialize "drop PA" * against that pair */ ext4_lock_group(sb, grp); list_del(&pa->pa_group_list); ext4_unlock_group(sb, grp); if (pa->pa_type == MB_INODE_PA) { write_lock(pa->pa_node_lock.inode_lock); rb_erase(&pa->pa_node.inode_node, &ei->i_prealloc_node); write_unlock(pa->pa_node_lock.inode_lock); ext4_mb_pa_free(pa); } else { spin_lock(pa->pa_node_lock.lg_lock); list_del_rcu(&pa->pa_node.lg_list); spin_unlock(pa->pa_node_lock.lg_lock); call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); } } static void ext4_mb_pa_rb_insert(struct rb_root *root, struct rb_node *new) { struct rb_node **iter = &root->rb_node, *parent = NULL; struct ext4_prealloc_space *iter_pa, *new_pa; ext4_lblk_t iter_start, new_start; while (*iter) { iter_pa = rb_entry(*iter, struct ext4_prealloc_space, pa_node.inode_node); new_pa = rb_entry(new, struct ext4_prealloc_space, pa_node.inode_node); iter_start = iter_pa->pa_lstart; new_start = new_pa->pa_lstart; parent = *iter; if (new_start < iter_start) iter = &((*iter)->rb_left); else iter = &((*iter)->rb_right); } rb_link_node(new, parent, iter); rb_insert_color(new, root); } /* * creates new preallocated space for given inode */ static noinline_for_stack void ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) { struct super_block *sb = ac->ac_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_prealloc_space *pa; struct ext4_group_info *grp; struct ext4_inode_info *ei; /* preallocate only when found space is larger then requested */ BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len); BUG_ON(ac->ac_status != AC_STATUS_FOUND); BUG_ON(!S_ISREG(ac->ac_inode->i_mode)); BUG_ON(ac->ac_pa == NULL); pa = ac->ac_pa; if (ac->ac_b_ex.fe_len < ac->ac_orig_goal_len) { struct ext4_free_extent ex = { .fe_logical = ac->ac_g_ex.fe_logical, .fe_len = ac->ac_orig_goal_len, }; loff_t orig_goal_end = extent_logical_end(sbi, &ex); loff_t o_ex_end = extent_logical_end(sbi, &ac->ac_o_ex); /* * We can't allocate as much as normalizer wants, so we try * to get proper lstart to cover the original request, except * when the goal doesn't cover the original request as below: * * orig_ex:2045/2055(10), isize:8417280 -> normalized:0/2048 * best_ex:0/200(200) -> adjusted: 1848/2048(200) */ BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical); BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len); /* * Use the below logic for adjusting best extent as it keeps * fragmentation in check while ensuring logical range of best * extent doesn't overflow out of goal extent: * * 1. Check if best ex can be kept at end of goal (before * cr_best_avail trimmed it) and still cover original start * 2. Else, check if best ex can be kept at start of goal and * still cover original end * 3. Else, keep the best ex at start of original request. */ ex.fe_len = ac->ac_b_ex.fe_len; ex.fe_logical = orig_goal_end - EXT4_C2B(sbi, ex.fe_len); if (ac->ac_o_ex.fe_logical >= ex.fe_logical) goto adjust_bex; ex.fe_logical = ac->ac_g_ex.fe_logical; if (o_ex_end <= extent_logical_end(sbi, &ex)) goto adjust_bex; ex.fe_logical = ac->ac_o_ex.fe_logical; adjust_bex: ac->ac_b_ex.fe_logical = ex.fe_logical; BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end); } pa->pa_lstart = ac->ac_b_ex.fe_logical; pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); pa->pa_len = ac->ac_b_ex.fe_len; pa->pa_free = pa->pa_len; spin_lock_init(&pa->pa_lock); INIT_LIST_HEAD(&pa->pa_group_list); pa->pa_deleted = 0; pa->pa_type = MB_INODE_PA; mb_debug(sb, "new inode pa %p: %llu/%d for %u\n", pa, pa->pa_pstart, pa->pa_len, pa->pa_lstart); trace_ext4_mb_new_inode_pa(ac, pa); atomic_add(pa->pa_free, &sbi->s_mb_preallocated); ext4_mb_use_inode_pa(ac, pa); ei = EXT4_I(ac->ac_inode); grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); if (!grp) return; pa->pa_node_lock.inode_lock = &ei->i_prealloc_lock; pa->pa_inode = ac->ac_inode; list_add(&pa->pa_group_list, &grp->bb_prealloc_list); write_lock(pa->pa_node_lock.inode_lock); ext4_mb_pa_rb_insert(&ei->i_prealloc_node, &pa->pa_node.inode_node); write_unlock(pa->pa_node_lock.inode_lock); atomic_inc(&ei->i_prealloc_active); } /* * creates new preallocated space for locality group inodes belongs to */ static noinline_for_stack void ext4_mb_new_group_pa(struct ext4_allocation_context *ac) { struct super_block *sb = ac->ac_sb; struct ext4_locality_group *lg; struct ext4_prealloc_space *pa; struct ext4_group_info *grp; /* preallocate only when found space is larger then requested */ BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len); BUG_ON(ac->ac_status != AC_STATUS_FOUND); BUG_ON(!S_ISREG(ac->ac_inode->i_mode)); BUG_ON(ac->ac_pa == NULL); pa = ac->ac_pa; pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); pa->pa_lstart = pa->pa_pstart; pa->pa_len = ac->ac_b_ex.fe_len; pa->pa_free = pa->pa_len; spin_lock_init(&pa->pa_lock); INIT_LIST_HEAD(&pa->pa_node.lg_list); INIT_LIST_HEAD(&pa->pa_group_list); pa->pa_deleted = 0; pa->pa_type = MB_GROUP_PA; mb_debug(sb, "new group pa %p: %llu/%d for %u\n", pa, pa->pa_pstart, pa->pa_len, pa->pa_lstart); trace_ext4_mb_new_group_pa(ac, pa); ext4_mb_use_group_pa(ac, pa); atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); if (!grp) return; lg = ac->ac_lg; BUG_ON(lg == NULL); pa->pa_node_lock.lg_lock = &lg->lg_prealloc_lock; pa->pa_inode = NULL; list_add(&pa->pa_group_list, &grp->bb_prealloc_list); /* * We will later add the new pa to the right bucket * after updating the pa_free in ext4_mb_release_context */ } static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac) { if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) ext4_mb_new_group_pa(ac); else ext4_mb_new_inode_pa(ac); } /* * finds all unused blocks in on-disk bitmap, frees them in * in-core bitmap and buddy. * @pa must be unlinked from inode and group lists, so that * nobody else can find/use it. * the caller MUST hold group/inode locks. * TODO: optimize the case when there are no in-core structures yet */ static noinline_for_stack void ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, struct ext4_prealloc_space *pa) { struct super_block *sb = e4b->bd_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned int end; unsigned int next; ext4_group_t group; ext4_grpblk_t bit; unsigned long long grp_blk_start; int free = 0; BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit); BUG_ON(group != e4b->bd_group && pa->pa_len != 0); end = bit + pa->pa_len; while (bit < end) { bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); if (bit >= end) break; next = mb_find_next_bit(bitmap_bh->b_data, end, bit); mb_debug(sb, "free preallocated %u/%u in group %u\n", (unsigned) ext4_group_first_block_no(sb, group) + bit, (unsigned) next - bit, (unsigned) group); free += next - bit; trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); trace_ext4_mb_release_inode_pa(pa, (grp_blk_start + EXT4_C2B(sbi, bit)), next - bit); mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); bit = next + 1; } if (free != pa->pa_free) { ext4_msg(e4b->bd_sb, KERN_CRIT, "pa %p: logic %lu, phys. %lu, len %d", pa, (unsigned long) pa->pa_lstart, (unsigned long) pa->pa_pstart, pa->pa_len); ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u", free, pa->pa_free); /* * pa is already deleted so we use the value obtained * from the bitmap and continue. */ } atomic_add(free, &sbi->s_mb_discarded); } static noinline_for_stack void ext4_mb_release_group_pa(struct ext4_buddy *e4b, struct ext4_prealloc_space *pa) { struct super_block *sb = e4b->bd_sb; ext4_group_t group; ext4_grpblk_t bit; trace_ext4_mb_release_group_pa(sb, pa); BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) { ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu", e4b->bd_group, group, pa->pa_pstart); return; } mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); } /* * releases all preallocations in given group * * first, we need to decide discard policy: * - when do we discard * 1) ENOSPC * - how many do we discard * 1) how many requested */ static noinline_for_stack int ext4_mb_discard_group_preallocations(struct super_block *sb, ext4_group_t group, int *busy) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct buffer_head *bitmap_bh = NULL; struct ext4_prealloc_space *pa, *tmp; LIST_HEAD(list); struct ext4_buddy e4b; struct ext4_inode_info *ei; int err; int free = 0; if (!grp) return 0; mb_debug(sb, "discard preallocation for group %u\n", group); if (list_empty(&grp->bb_prealloc_list)) goto out_dbg; bitmap_bh = ext4_read_block_bitmap(sb, group); if (IS_ERR(bitmap_bh)) { err = PTR_ERR(bitmap_bh); ext4_error_err(sb, -err, "Error %d reading block bitmap for %u", err, group); goto out_dbg; } err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { ext4_warning(sb, "Error %d loading buddy information for %u", err, group); put_bh(bitmap_bh); goto out_dbg; } ext4_lock_group(sb, group); list_for_each_entry_safe(pa, tmp, &grp->bb_prealloc_list, pa_group_list) { spin_lock(&pa->pa_lock); if (atomic_read(&pa->pa_count)) { spin_unlock(&pa->pa_lock); *busy = 1; continue; } if (pa->pa_deleted) { spin_unlock(&pa->pa_lock); continue; } /* seems this one can be freed ... */ ext4_mb_mark_pa_deleted(sb, pa); if (!free) this_cpu_inc(discard_pa_seq); /* we can trust pa_free ... */ free += pa->pa_free; spin_unlock(&pa->pa_lock); list_del(&pa->pa_group_list); list_add(&pa->u.pa_tmp_list, &list); } /* now free all selected PAs */ list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) { /* remove from object (inode or locality group) */ if (pa->pa_type == MB_GROUP_PA) { spin_lock(pa->pa_node_lock.lg_lock); list_del_rcu(&pa->pa_node.lg_list); spin_unlock(pa->pa_node_lock.lg_lock); } else { write_lock(pa->pa_node_lock.inode_lock); ei = EXT4_I(pa->pa_inode); rb_erase(&pa->pa_node.inode_node, &ei->i_prealloc_node); write_unlock(pa->pa_node_lock.inode_lock); } list_del(&pa->u.pa_tmp_list); if (pa->pa_type == MB_GROUP_PA) { ext4_mb_release_group_pa(&e4b, pa); call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); } else { ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); ext4_mb_pa_free(pa); } } ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); put_bh(bitmap_bh); out_dbg: mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n", free, group, grp->bb_free); return free; } /* * releases all non-used preallocated blocks for given inode * * It's important to discard preallocations under i_data_sem * We don't want another block to be served from the prealloc * space when we are discarding the inode prealloc space. * * FIXME!! Make sure it is valid at all the call sites */ void ext4_discard_preallocations(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct super_block *sb = inode->i_sb; struct buffer_head *bitmap_bh = NULL; struct ext4_prealloc_space *pa, *tmp; ext4_group_t group = 0; LIST_HEAD(list); struct ext4_buddy e4b; struct rb_node *iter; int err; if (!S_ISREG(inode->i_mode)) return; if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) return; mb_debug(sb, "discard preallocation for inode %lu\n", inode->i_ino); trace_ext4_discard_preallocations(inode, atomic_read(&ei->i_prealloc_active)); repeat: /* first, collect all pa's in the inode */ write_lock(&ei->i_prealloc_lock); for (iter = rb_first(&ei->i_prealloc_node); iter; iter = rb_next(iter)) { pa = rb_entry(iter, struct ext4_prealloc_space, pa_node.inode_node); BUG_ON(pa->pa_node_lock.inode_lock != &ei->i_prealloc_lock); spin_lock(&pa->pa_lock); if (atomic_read(&pa->pa_count)) { /* this shouldn't happen often - nobody should * use preallocation while we're discarding it */ spin_unlock(&pa->pa_lock); write_unlock(&ei->i_prealloc_lock); ext4_msg(sb, KERN_ERR, "uh-oh! used pa while discarding"); WARN_ON(1); schedule_timeout_uninterruptible(HZ); goto repeat; } if (pa->pa_deleted == 0) { ext4_mb_mark_pa_deleted(sb, pa); spin_unlock(&pa->pa_lock); rb_erase(&pa->pa_node.inode_node, &ei->i_prealloc_node); list_add(&pa->u.pa_tmp_list, &list); continue; } /* someone is deleting pa right now */ spin_unlock(&pa->pa_lock); write_unlock(&ei->i_prealloc_lock); /* we have to wait here because pa_deleted * doesn't mean pa is already unlinked from * the list. as we might be called from * ->clear_inode() the inode will get freed * and concurrent thread which is unlinking * pa from inode's list may access already * freed memory, bad-bad-bad */ /* XXX: if this happens too often, we can * add a flag to force wait only in case * of ->clear_inode(), but not in case of * regular truncate */ schedule_timeout_uninterruptible(HZ); goto repeat; } write_unlock(&ei->i_prealloc_lock); list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) { BUG_ON(pa->pa_type != MB_INODE_PA); group = ext4_get_group_number(sb, pa->pa_pstart); err = ext4_mb_load_buddy_gfp(sb, group, &e4b, GFP_NOFS|__GFP_NOFAIL); if (err) { ext4_error_err(sb, -err, "Error %d loading buddy information for %u", err, group); continue; } bitmap_bh = ext4_read_block_bitmap(sb, group); if (IS_ERR(bitmap_bh)) { err = PTR_ERR(bitmap_bh); ext4_error_err(sb, -err, "Error %d reading block bitmap for %u", err, group); ext4_mb_unload_buddy(&e4b); continue; } ext4_lock_group(sb, group); list_del(&pa->pa_group_list); ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); put_bh(bitmap_bh); list_del(&pa->u.pa_tmp_list); ext4_mb_pa_free(pa); } } static int ext4_mb_pa_alloc(struct ext4_allocation_context *ac) { struct ext4_prealloc_space *pa; BUG_ON(ext4_pspace_cachep == NULL); pa = kmem_cache_zalloc(ext4_pspace_cachep, GFP_NOFS); if (!pa) return -ENOMEM; atomic_set(&pa->pa_count, 1); ac->ac_pa = pa; return 0; } static void ext4_mb_pa_put_free(struct ext4_allocation_context *ac) { struct ext4_prealloc_space *pa = ac->ac_pa; BUG_ON(!pa); ac->ac_pa = NULL; WARN_ON(!atomic_dec_and_test(&pa->pa_count)); /* * current function is only called due to an error or due to * len of found blocks < len of requested blocks hence the PA has not * been added to grp->bb_prealloc_list. So we don't need to lock it */ pa->pa_deleted = 1; ext4_mb_pa_free(pa); } #ifdef CONFIG_EXT4_DEBUG static inline void ext4_mb_show_pa(struct super_block *sb) { ext4_group_t i, ngroups; if (ext4_emergency_state(sb)) return; ngroups = ext4_get_groups_count(sb); mb_debug(sb, "groups: "); for (i = 0; i < ngroups; i++) { struct ext4_group_info *grp = ext4_get_group_info(sb, i); struct ext4_prealloc_space *pa; ext4_grpblk_t start; struct list_head *cur; if (!grp) continue; ext4_lock_group(sb, i); list_for_each(cur, &grp->bb_prealloc_list) { pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); spin_lock(&pa->pa_lock); ext4_get_group_no_and_offset(sb, pa->pa_pstart, NULL, &start); spin_unlock(&pa->pa_lock); mb_debug(sb, "PA:%u:%d:%d\n", i, start, pa->pa_len); } ext4_unlock_group(sb, i); mb_debug(sb, "%u: %d/%d\n", i, grp->bb_free, grp->bb_fragments); } } static void ext4_mb_show_ac(struct ext4_allocation_context *ac) { struct super_block *sb = ac->ac_sb; if (ext4_emergency_state(sb)) return; mb_debug(sb, "Can't allocate:" " Allocation context details:"); mb_debug(sb, "status %u flags 0x%x", ac->ac_status, ac->ac_flags); mb_debug(sb, "orig %lu/%lu/%lu@%lu, " "goal %lu/%lu/%lu@%lu, " "best %lu/%lu/%lu@%lu cr %d", (unsigned long)ac->ac_o_ex.fe_group, (unsigned long)ac->ac_o_ex.fe_start, (unsigned long)ac->ac_o_ex.fe_len, (unsigned long)ac->ac_o_ex.fe_logical, (unsigned long)ac->ac_g_ex.fe_group, (unsigned long)ac->ac_g_ex.fe_start, (unsigned long)ac->ac_g_ex.fe_len, (unsigned long)ac->ac_g_ex.fe_logical, (unsigned long)ac->ac_b_ex.fe_group, (unsigned long)ac->ac_b_ex.fe_start, (unsigned long)ac->ac_b_ex.fe_len, (unsigned long)ac->ac_b_ex.fe_logical, (int)ac->ac_criteria); mb_debug(sb, "%u found", ac->ac_found); mb_debug(sb, "used pa: %s, ", str_yes_no(ac->ac_pa)); if (ac->ac_pa) mb_debug(sb, "pa_type %s\n", ac->ac_pa->pa_type == MB_GROUP_PA ? "group pa" : "inode pa"); ext4_mb_show_pa(sb); } #else static inline void ext4_mb_show_pa(struct super_block *sb) { } static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac) { ext4_mb_show_pa(ac->ac_sb); } #endif /* * We use locality group preallocation for small size file. The size of the * file is determined by the current size or the resulting size after * allocation which ever is larger * * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req */ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); int bsbits = ac->ac_sb->s_blocksize_bits; loff_t size, isize; bool inode_pa_eligible, group_pa_eligible; if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) return; if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) return; group_pa_eligible = sbi->s_mb_group_prealloc > 0; inode_pa_eligible = true; size = extent_logical_end(sbi, &ac->ac_o_ex); isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) >> bsbits; /* No point in using inode preallocation for closed files */ if ((size == isize) && !ext4_fs_is_busy(sbi) && !inode_is_open_for_write(ac->ac_inode)) inode_pa_eligible = false; size = max(size, isize); /* Don't use group allocation for large files */ if (size > sbi->s_mb_stream_request) group_pa_eligible = false; if (!group_pa_eligible) { if (inode_pa_eligible) ac->ac_flags |= EXT4_MB_STREAM_ALLOC; else ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC; return; } BUG_ON(ac->ac_lg != NULL); /* * locality group prealloc space are per cpu. The reason for having * per cpu locality group is to reduce the contention between block * request from multiple CPUs. */ ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups); /* we're going to use group allocation */ ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; /* serialize all allocations in the group */ mutex_lock(&ac->ac_lg->lg_mutex); } static noinline_for_stack void ext4_mb_initialize_context(struct ext4_allocation_context *ac, struct ext4_allocation_request *ar) { struct super_block *sb = ar->inode->i_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; ext4_group_t group; unsigned int len; ext4_fsblk_t goal; ext4_grpblk_t block; /* we can't allocate > group size */ len = ar->len; /* just a dirty hack to filter too big requests */ if (len >= EXT4_CLUSTERS_PER_GROUP(sb)) len = EXT4_CLUSTERS_PER_GROUP(sb); /* start searching from the goal */ goal = ar->goal; if (goal < le32_to_cpu(es->s_first_data_block) || goal >= ext4_blocks_count(es)) goal = le32_to_cpu(es->s_first_data_block); ext4_get_group_no_and_offset(sb, goal, &group, &block); /* set up allocation goals */ ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical); ac->ac_status = AC_STATUS_CONTINUE; ac->ac_sb = sb; ac->ac_inode = ar->inode; ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical; ac->ac_o_ex.fe_group = group; ac->ac_o_ex.fe_start = block; ac->ac_o_ex.fe_len = len; ac->ac_g_ex = ac->ac_o_ex; ac->ac_orig_goal_len = ac->ac_g_ex.fe_len; ac->ac_flags = ar->flags; /* we have to define context: we'll work with a file or * locality group. this is a policy, actually */ ext4_mb_group_or_file(ac); mb_debug(sb, "init ac: %u blocks @ %u, goal %u, flags 0x%x, 2^%d, " "left: %u/%u, right %u/%u to %swritable\n", (unsigned) ar->len, (unsigned) ar->logical, (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, (unsigned) ar->lleft, (unsigned) ar->pleft, (unsigned) ar->lright, (unsigned) ar->pright, inode_is_open_for_write(ar->inode) ? "" : "non-"); } static noinline_for_stack void ext4_mb_discard_lg_preallocations(struct super_block *sb, struct ext4_locality_group *lg, int order, int total_entries) { ext4_group_t group = 0; struct ext4_buddy e4b; LIST_HEAD(discard_list); struct ext4_prealloc_space *pa, *tmp; mb_debug(sb, "discard locality group preallocation\n"); spin_lock(&lg->lg_prealloc_lock); list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], pa_node.lg_list, lockdep_is_held(&lg->lg_prealloc_lock)) { spin_lock(&pa->pa_lock); if (atomic_read(&pa->pa_count)) { /* * This is the pa that we just used * for block allocation. So don't * free that */ spin_unlock(&pa->pa_lock); continue; } if (pa->pa_deleted) { spin_unlock(&pa->pa_lock); continue; } /* only lg prealloc space */ BUG_ON(pa->pa_type != MB_GROUP_PA); /* seems this one can be freed ... */ ext4_mb_mark_pa_deleted(sb, pa); spin_unlock(&pa->pa_lock); list_del_rcu(&pa->pa_node.lg_list); list_add(&pa->u.pa_tmp_list, &discard_list); total_entries--; if (total_entries <= 5) { /* * we want to keep only 5 entries * allowing it to grow to 8. This * mak sure we don't call discard * soon for this list. */ break; } } spin_unlock(&lg->lg_prealloc_lock); list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) { int err; group = ext4_get_group_number(sb, pa->pa_pstart); err = ext4_mb_load_buddy_gfp(sb, group, &e4b, GFP_NOFS|__GFP_NOFAIL); if (err) { ext4_error_err(sb, -err, "Error %d loading buddy information for %u", err, group); continue; } ext4_lock_group(sb, group); list_del(&pa->pa_group_list); ext4_mb_release_group_pa(&e4b, pa); ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); list_del(&pa->u.pa_tmp_list); call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); } } /* * We have incremented pa_count. So it cannot be freed at this * point. Also we hold lg_mutex. So no parallel allocation is * possible from this lg. That means pa_free cannot be updated. * * A parallel ext4_mb_discard_group_preallocations is possible. * which can cause the lg_prealloc_list to be updated. */ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) { int order, added = 0, lg_prealloc_count = 1; struct super_block *sb = ac->ac_sb; struct ext4_locality_group *lg = ac->ac_lg; struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa; order = fls(pa->pa_free) - 1; if (order > PREALLOC_TB_SIZE - 1) /* The max size of hash table is PREALLOC_TB_SIZE */ order = PREALLOC_TB_SIZE - 1; /* Add the prealloc space to lg */ spin_lock(&lg->lg_prealloc_lock); list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], pa_node.lg_list, lockdep_is_held(&lg->lg_prealloc_lock)) { spin_lock(&tmp_pa->pa_lock); if (tmp_pa->pa_deleted) { spin_unlock(&tmp_pa->pa_lock); continue; } if (!added && pa->pa_free < tmp_pa->pa_free) { /* Add to the tail of the previous entry */ list_add_tail_rcu(&pa->pa_node.lg_list, &tmp_pa->pa_node.lg_list); added = 1; /* * we want to count the total * number of entries in the list */ } spin_unlock(&tmp_pa->pa_lock); lg_prealloc_count++; } if (!added) list_add_tail_rcu(&pa->pa_node.lg_list, &lg->lg_prealloc_list[order]); spin_unlock(&lg->lg_prealloc_lock); /* Now trim the list to be not more than 8 elements */ if (lg_prealloc_count > 8) ext4_mb_discard_lg_preallocations(sb, lg, order, lg_prealloc_count); } /* * release all resource we used in allocation */ static void ext4_mb_release_context(struct ext4_allocation_context *ac) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_prealloc_space *pa = ac->ac_pa; if (pa) { if (pa->pa_type == MB_GROUP_PA) { /* see comment in ext4_mb_use_group_pa() */ spin_lock(&pa->pa_lock); pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len); pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len); pa->pa_free -= ac->ac_b_ex.fe_len; pa->pa_len -= ac->ac_b_ex.fe_len; spin_unlock(&pa->pa_lock); /* * We want to add the pa to the right bucket. * Remove it from the list and while adding * make sure the list to which we are adding * doesn't grow big. */ if (likely(pa->pa_free)) { spin_lock(pa->pa_node_lock.lg_lock); list_del_rcu(&pa->pa_node.lg_list); spin_unlock(pa->pa_node_lock.lg_lock); ext4_mb_add_n_trim(ac); } } ext4_mb_put_pa(ac, ac->ac_sb, pa); } if (ac->ac_bitmap_folio) folio_put(ac->ac_bitmap_folio); if (ac->ac_buddy_folio) folio_put(ac->ac_buddy_folio); if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) mutex_unlock(&ac->ac_lg->lg_mutex); ext4_mb_collect_stats(ac); } static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) { ext4_group_t i, ngroups = ext4_get_groups_count(sb); int ret; int freed = 0, busy = 0; int retry = 0; trace_ext4_mb_discard_preallocations(sb, needed); if (needed == 0) needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1; repeat: for (i = 0; i < ngroups && needed > 0; i++) { ret = ext4_mb_discard_group_preallocations(sb, i, &busy); freed += ret; needed -= ret; cond_resched(); } if (needed > 0 && busy && ++retry < 3) { busy = 0; goto repeat; } return freed; } static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb, struct ext4_allocation_context *ac, u64 *seq) { int freed; u64 seq_retry = 0; bool ret = false; freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); if (freed) { ret = true; goto out_dbg; } seq_retry = ext4_get_discard_pa_seq_sum(); if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_retry != *seq) { ac->ac_flags |= EXT4_MB_STRICT_CHECK; *seq = seq_retry; ret = true; } out_dbg: mb_debug(sb, "freed %d, retry ? %s\n", freed, str_yes_no(ret)); return ret; } /* * Simple allocator for Ext4 fast commit replay path. It searches for blocks * linearly starting at the goal block and also excludes the blocks which * are going to be in use after fast commit replay. */ static ext4_fsblk_t ext4_mb_new_blocks_simple(struct ext4_allocation_request *ar, int *errp) { struct buffer_head *bitmap_bh; struct super_block *sb = ar->inode->i_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group, nr; ext4_grpblk_t blkoff; ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); ext4_grpblk_t i = 0; ext4_fsblk_t goal, block; struct ext4_super_block *es = sbi->s_es; goal = ar->goal; if (goal < le32_to_cpu(es->s_first_data_block) || goal >= ext4_blocks_count(es)) goal = le32_to_cpu(es->s_first_data_block); ar->len = 0; ext4_get_group_no_and_offset(sb, goal, &group, &blkoff); for (nr = ext4_get_groups_count(sb); nr > 0; nr--) { bitmap_bh = ext4_read_block_bitmap(sb, group); if (IS_ERR(bitmap_bh)) { *errp = PTR_ERR(bitmap_bh); pr_warn("Failed to read block bitmap\n"); return 0; } while (1) { i = mb_find_next_zero_bit(bitmap_bh->b_data, max, blkoff); if (i >= max) break; if (ext4_fc_replay_check_excluded(sb, ext4_group_first_block_no(sb, group) + EXT4_C2B(sbi, i))) { blkoff = i + 1; } else break; } brelse(bitmap_bh); if (i < max) break; if (++group >= ext4_get_groups_count(sb)) group = 0; blkoff = 0; } if (i >= max) { *errp = -ENOSPC; return 0; } block = ext4_group_first_block_no(sb, group) + EXT4_C2B(sbi, i); ext4_mb_mark_bb(sb, block, 1, true); ar->len = 1; *errp = 0; return block; } /* * Main entry point into mballoc to allocate blocks * it tries to use preallocation first, then falls back * to usual allocation */ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, struct ext4_allocation_request *ar, int *errp) { struct ext4_allocation_context *ac = NULL; struct ext4_sb_info *sbi; struct super_block *sb; ext4_fsblk_t block = 0; unsigned int inquota = 0; unsigned int reserv_clstrs = 0; int retries = 0; u64 seq; might_sleep(); sb = ar->inode->i_sb; sbi = EXT4_SB(sb); trace_ext4_request_blocks(ar); if (sbi->s_mount_state & EXT4_FC_REPLAY) return ext4_mb_new_blocks_simple(ar, errp); /* Allow to use superuser reservation for quota file */ if (ext4_is_quota_file(ar->inode)) ar->flags |= EXT4_MB_USE_ROOT_BLOCKS; if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) { /* Without delayed allocation we need to verify * there is enough free blocks to do block allocation * and verify allocation doesn't exceed the quota limits. */ while (ar->len && ext4_claim_free_clusters(sbi, ar->len, ar->flags)) { /* let others to free the space */ cond_resched(); ar->len = ar->len >> 1; } if (!ar->len) { ext4_mb_show_pa(sb); *errp = -ENOSPC; return 0; } reserv_clstrs = ar->len; if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { dquot_alloc_block_nofail(ar->inode, EXT4_C2B(sbi, ar->len)); } else { while (ar->len && dquot_alloc_block(ar->inode, EXT4_C2B(sbi, ar->len))) { ar->flags |= EXT4_MB_HINT_NOPREALLOC; ar->len--; } } inquota = ar->len; if (ar->len == 0) { *errp = -EDQUOT; goto out; } } ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS); if (!ac) { ar->len = 0; *errp = -ENOMEM; goto out; } ext4_mb_initialize_context(ac, ar); ac->ac_op = EXT4_MB_HISTORY_PREALLOC; seq = this_cpu_read(discard_pa_seq); if (!ext4_mb_use_preallocated(ac)) { ac->ac_op = EXT4_MB_HISTORY_ALLOC; ext4_mb_normalize_request(ac, ar); *errp = ext4_mb_pa_alloc(ac); if (*errp) goto errout; repeat: /* allocate space in core */ *errp = ext4_mb_regular_allocator(ac); /* * pa allocated above is added to grp->bb_prealloc_list only * when we were able to allocate some block i.e. when * ac->ac_status == AC_STATUS_FOUND. * And error from above mean ac->ac_status != AC_STATUS_FOUND * So we have to free this pa here itself. */ if (*errp) { ext4_mb_pa_put_free(ac); ext4_discard_allocated_blocks(ac); goto errout; } if (ac->ac_status == AC_STATUS_FOUND && ac->ac_o_ex.fe_len >= ac->ac_f_ex.fe_len) ext4_mb_pa_put_free(ac); } if (likely(ac->ac_status == AC_STATUS_FOUND)) { *errp = ext4_mb_mark_diskspace_used(ac, handle); if (*errp) { ext4_discard_allocated_blocks(ac); goto errout; } else { block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); ar->len = ac->ac_b_ex.fe_len; } } else { if (++retries < 3 && ext4_mb_discard_preallocations_should_retry(sb, ac, &seq)) goto repeat; /* * If block allocation fails then the pa allocated above * needs to be freed here itself. */ ext4_mb_pa_put_free(ac); *errp = -ENOSPC; } if (*errp) { errout: ac->ac_b_ex.fe_len = 0; ar->len = 0; ext4_mb_show_ac(ac); } ext4_mb_release_context(ac); kmem_cache_free(ext4_ac_cachep, ac); out: if (inquota && ar->len < inquota) dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len)); /* release any reserved blocks */ if (reserv_clstrs) percpu_counter_sub(&sbi->s_dirtyclusters_counter, reserv_clstrs); trace_ext4_allocate_blocks(ar, (unsigned long long)block); return block; } /* * We can merge two free data extents only if the physical blocks * are contiguous, AND the extents were freed by the same transaction, * AND the blocks are associated with the same group. */ static inline bool ext4_freed_extents_can_be_merged(struct ext4_free_data *entry1, struct ext4_free_data *entry2) { if (entry1->efd_tid != entry2->efd_tid) return false; if (entry1->efd_start_cluster + entry1->efd_count != entry2->efd_start_cluster) return false; if (WARN_ON_ONCE(entry1->efd_group != entry2->efd_group)) return false; return true; } static inline void ext4_merge_freed_extents(struct ext4_sb_info *sbi, struct rb_root *root, struct ext4_free_data *entry1, struct ext4_free_data *entry2) { entry1->efd_count += entry2->efd_count; spin_lock(&sbi->s_md_lock); list_del(&entry2->efd_list); spin_unlock(&sbi->s_md_lock); rb_erase(&entry2->efd_node, root); kmem_cache_free(ext4_free_data_cachep, entry2); } static inline void ext4_try_merge_freed_extent_prev(struct ext4_sb_info *sbi, struct rb_root *root, struct ext4_free_data *entry) { struct ext4_free_data *prev; struct rb_node *node; node = rb_prev(&entry->efd_node); if (!node) return; prev = rb_entry(node, struct ext4_free_data, efd_node); if (ext4_freed_extents_can_be_merged(prev, entry)) ext4_merge_freed_extents(sbi, root, prev, entry); } static inline void ext4_try_merge_freed_extent_next(struct ext4_sb_info *sbi, struct rb_root *root, struct ext4_free_data *entry) { struct ext4_free_data *next; struct rb_node *node; node = rb_next(&entry->efd_node); if (!node) return; next = rb_entry(node, struct ext4_free_data, efd_node); if (ext4_freed_extents_can_be_merged(entry, next)) ext4_merge_freed_extents(sbi, root, entry, next); } static noinline_for_stack void ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, struct ext4_free_data *new_entry) { ext4_group_t group = e4b->bd_group; ext4_grpblk_t cluster; ext4_grpblk_t clusters = new_entry->efd_count; struct ext4_free_data *entry = NULL; struct ext4_group_info *db = e4b->bd_info; struct super_block *sb = e4b->bd_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); struct rb_root *root = &db->bb_free_root; struct rb_node **n = &root->rb_node; struct rb_node *parent = NULL, *new_node; BUG_ON(!ext4_handle_valid(handle)); BUG_ON(e4b->bd_bitmap_folio == NULL); BUG_ON(e4b->bd_buddy_folio == NULL); new_node = &new_entry->efd_node; cluster = new_entry->efd_start_cluster; if (!*n) { /* first free block exent. We need to protect buddy cache from being freed, * otherwise we'll refresh it from * on-disk bitmap and lose not-yet-available * blocks */ folio_get(e4b->bd_buddy_folio); folio_get(e4b->bd_bitmap_folio); } while (*n) { parent = *n; entry = rb_entry(parent, struct ext4_free_data, efd_node); if (cluster < entry->efd_start_cluster) n = &(*n)->rb_left; else if (cluster >= (entry->efd_start_cluster + entry->efd_count)) n = &(*n)->rb_right; else { ext4_grp_locked_error(sb, group, 0, ext4_group_first_block_no(sb, group) + EXT4_C2B(sbi, cluster), "Block already on to-be-freed list"); kmem_cache_free(ext4_free_data_cachep, new_entry); return; } } atomic_add(clusters, &sbi->s_mb_free_pending); if (!entry) goto insert; /* Now try to see the extent can be merged to prev and next */ if (ext4_freed_extents_can_be_merged(new_entry, entry)) { entry->efd_start_cluster = cluster; entry->efd_count += new_entry->efd_count; kmem_cache_free(ext4_free_data_cachep, new_entry); ext4_try_merge_freed_extent_prev(sbi, root, entry); return; } if (ext4_freed_extents_can_be_merged(entry, new_entry)) { entry->efd_count += new_entry->efd_count; kmem_cache_free(ext4_free_data_cachep, new_entry); ext4_try_merge_freed_extent_next(sbi, root, entry); return; } insert: rb_link_node(new_node, parent, n); rb_insert_color(new_node, root); spin_lock(&sbi->s_md_lock); list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list[new_entry->efd_tid & 1]); spin_unlock(&sbi->s_md_lock); } static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block, unsigned long count) { struct super_block *sb = inode->i_sb; ext4_group_t group; ext4_grpblk_t blkoff; ext4_get_group_no_and_offset(sb, block, &group, &blkoff); ext4_mb_mark_context(NULL, sb, false, group, blkoff, count, EXT4_MB_BITMAP_MARKED_CHECK | EXT4_MB_SYNC_UPDATE, NULL); } /** * ext4_mb_clear_bb() -- helper function for freeing blocks. * Used by ext4_free_blocks() * @handle: handle for this transaction * @inode: inode * @block: starting physical block to be freed * @count: number of blocks to be freed * @flags: flags used by ext4_free_blocks */ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, ext4_fsblk_t block, unsigned long count, int flags) { struct super_block *sb = inode->i_sb; struct ext4_group_info *grp; unsigned int overflow; ext4_grpblk_t bit; ext4_group_t block_group; struct ext4_sb_info *sbi; struct ext4_buddy e4b; unsigned int count_clusters; int err = 0; int mark_flags = 0; ext4_grpblk_t changed; sbi = EXT4_SB(sb); if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && !ext4_inode_block_valid(inode, block, count)) { ext4_error(sb, "Freeing blocks in system zone - " "Block = %llu, count = %lu", block, count); /* err = 0. ext4_std_error should be a no op */ goto error_out; } flags |= EXT4_FREE_BLOCKS_VALIDATED; do_more: overflow = 0; ext4_get_group_no_and_offset(sb, block, &block_group, &bit); grp = ext4_get_group_info(sb, block_group); if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) return; /* * Check to see if we are freeing blocks across a group * boundary. */ if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) { overflow = EXT4_C2B(sbi, bit) + count - EXT4_BLOCKS_PER_GROUP(sb); count -= overflow; /* The range changed so it's no longer validated */ flags &= ~EXT4_FREE_BLOCKS_VALIDATED; } count_clusters = EXT4_NUM_B2C(sbi, count); trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters); /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */ err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b, GFP_NOFS|__GFP_NOFAIL); if (err) goto error_out; if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && !ext4_inode_block_valid(inode, block, count)) { ext4_error(sb, "Freeing blocks in system zone - " "Block = %llu, count = %lu", block, count); /* err = 0. ext4_std_error should be a no op */ goto error_clean; } #ifdef AGGRESSIVE_CHECK mark_flags |= EXT4_MB_BITMAP_MARKED_CHECK; #endif err = ext4_mb_mark_context(handle, sb, false, block_group, bit, count_clusters, mark_flags, &changed); if (err && changed == 0) goto error_clean; #ifdef AGGRESSIVE_CHECK BUG_ON(changed != count_clusters); #endif /* * We need to make sure we don't reuse the freed block until after the * transaction is committed. We make an exception if the inode is to be * written in writeback mode since writeback mode has weak data * consistency guarantees. */ if (ext4_handle_valid(handle) && ((flags & EXT4_FREE_BLOCKS_METADATA) || !ext4_should_writeback_data(inode))) { struct ext4_free_data *new_entry; /* * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed * to fail. */ new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS|__GFP_NOFAIL); new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; new_entry->efd_count = count_clusters; new_entry->efd_tid = handle->h_transaction->t_tid; ext4_lock_group(sb, block_group); ext4_mb_free_metadata(handle, &e4b, new_entry); } else { if (test_opt(sb, DISCARD)) { err = ext4_issue_discard(sb, block_group, bit, count_clusters); /* * Ignore EOPNOTSUPP error. This is consistent with * what happens when using journal. */ if (err == -EOPNOTSUPP) err = 0; if (err) ext4_msg(sb, KERN_WARNING, "discard request in" " group:%u block:%d count:%lu failed" " with %d", block_group, bit, count, err); } EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info); ext4_lock_group(sb, block_group); mb_free_blocks(inode, &e4b, bit, count_clusters); } ext4_unlock_group(sb, block_group); /* * on a bigalloc file system, defer the s_freeclusters_counter * update to the caller (ext4_remove_space and friends) so they * can determine if a cluster freed here should be rereserved */ if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) { if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); } if (overflow && !err) { block += count; count = overflow; ext4_mb_unload_buddy(&e4b); /* The range changed so it's no longer validated */ flags &= ~EXT4_FREE_BLOCKS_VALIDATED; goto do_more; } error_clean: ext4_mb_unload_buddy(&e4b); error_out: ext4_std_error(sb, err); } /** * ext4_free_blocks() -- Free given blocks and update quota * @handle: handle for this transaction * @inode: inode * @bh: optional buffer of the block to be freed * @block: starting physical block to be freed * @count: number of blocks to be freed * @flags: flags used by ext4_free_blocks */ void ext4_free_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t block, unsigned long count, int flags) { struct super_block *sb = inode->i_sb; unsigned int overflow; struct ext4_sb_info *sbi; sbi = EXT4_SB(sb); if (bh) { if (block) BUG_ON(block != bh->b_blocknr); else block = bh->b_blocknr; } if (sbi->s_mount_state & EXT4_FC_REPLAY) { ext4_free_blocks_simple(inode, block, EXT4_NUM_B2C(sbi, count)); return; } might_sleep(); if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && !ext4_inode_block_valid(inode, block, count)) { ext4_error(sb, "Freeing blocks not in datazone - " "block = %llu, count = %lu", block, count); return; } flags |= EXT4_FREE_BLOCKS_VALIDATED; ext4_debug("freeing block %llu\n", block); trace_ext4_free_blocks(inode, block, count, flags); if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { BUG_ON(count > 1); ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, inode, bh, block); } /* * If the extent to be freed does not begin on a cluster * boundary, we need to deal with partial clusters at the * beginning and end of the extent. Normally we will free * blocks at the beginning or the end unless we are explicitly * requested to avoid doing so. */ overflow = EXT4_PBLK_COFF(sbi, block); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { overflow = sbi->s_cluster_ratio - overflow; block += overflow; if (count > overflow) count -= overflow; else return; } else { block -= overflow; count += overflow; } /* The range changed so it's no longer validated */ flags &= ~EXT4_FREE_BLOCKS_VALIDATED; } overflow = EXT4_LBLK_COFF(sbi, count); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { if (count > overflow) count -= overflow; else return; } else count += sbi->s_cluster_ratio - overflow; /* The range changed so it's no longer validated */ flags &= ~EXT4_FREE_BLOCKS_VALIDATED; } if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { int i; int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA; for (i = 0; i < count; i++) { cond_resched(); if (is_metadata) bh = sb_find_get_block_nonatomic(inode->i_sb, block + i); ext4_forget(handle, is_metadata, inode, bh, block + i); } } ext4_mb_clear_bb(handle, inode, block, count, flags); } /** * ext4_group_add_blocks() -- Add given blocks to an existing group * @handle: handle to this transaction * @sb: super block * @block: start physical block to add to the block group * @count: number of blocks to free * * This marks the blocks as free in the bitmap and buddy. */ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count) { ext4_group_t block_group; ext4_grpblk_t bit; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_buddy e4b; int err = 0; ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block); ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1); unsigned long cluster_count = last_cluster - first_cluster + 1; ext4_grpblk_t changed; ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); if (cluster_count == 0) return 0; ext4_get_group_no_and_offset(sb, block, &block_group, &bit); /* * Check to see if we are freeing blocks across a group * boundary. */ if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) { ext4_warning(sb, "too many blocks added to group %u", block_group); err = -EINVAL; goto error_out; } err = ext4_mb_load_buddy(sb, block_group, &e4b); if (err) goto error_out; if (!ext4_sb_block_valid(sb, NULL, block, count)) { ext4_error(sb, "Adding blocks in system zones - " "Block = %llu, count = %lu", block, count); err = -EINVAL; goto error_clean; } err = ext4_mb_mark_context(handle, sb, false, block_group, bit, cluster_count, EXT4_MB_BITMAP_MARKED_CHECK, &changed); if (err && changed == 0) goto error_clean; if (changed != cluster_count) ext4_error(sb, "bit already cleared in group %u", block_group); ext4_lock_group(sb, block_group); mb_free_blocks(NULL, &e4b, bit, cluster_count); ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeclusters_counter, changed); error_clean: ext4_mb_unload_buddy(&e4b); error_out: ext4_std_error(sb, err); return err; } /** * ext4_trim_extent -- function to TRIM one single free extent in the group * @sb: super block for the file system * @start: starting block of the free extent in the alloc. group * @count: number of blocks to TRIM * @e4b: ext4 buddy for the group * * Trim "count" blocks starting at "start" in the "group". To assure that no * one will allocate those blocks, mark it as used in buddy bitmap. This must * be called with under the group lock. */ static int ext4_trim_extent(struct super_block *sb, int start, int count, struct ext4_buddy *e4b) __releases(bitlock) __acquires(bitlock) { struct ext4_free_extent ex; ext4_group_t group = e4b->bd_group; int ret = 0; trace_ext4_trim_extent(sb, group, start, count); assert_spin_locked(ext4_group_lock_ptr(sb, group)); ex.fe_start = start; ex.fe_group = group; ex.fe_len = count; /* * Mark blocks used, so no one can reuse them while * being trimmed. */ mb_mark_used(e4b, &ex); ext4_unlock_group(sb, group); ret = ext4_issue_discard(sb, group, start, count); ext4_lock_group(sb, group); mb_free_blocks(NULL, e4b, start, ex.fe_len); return ret; } static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, ext4_group_t grp) { unsigned long nr_clusters_in_group; if (grp < (ext4_get_groups_count(sb) - 1)) nr_clusters_in_group = EXT4_CLUSTERS_PER_GROUP(sb); else nr_clusters_in_group = (ext4_blocks_count(EXT4_SB(sb)->s_es) - ext4_group_first_block_no(sb, grp)) >> EXT4_CLUSTER_BITS(sb); return nr_clusters_in_group - 1; } static bool ext4_trim_interrupted(void) { return fatal_signal_pending(current) || freezing(current); } static int ext4_try_to_trim_range(struct super_block *sb, struct ext4_buddy *e4b, ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) __acquires(ext4_group_lock_ptr(sb, e4b->bd_group)) __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) { ext4_grpblk_t next, count, free_count, last, origin_start; bool set_trimmed = false; void *bitmap; if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) return 0; last = ext4_last_grp_cluster(sb, e4b->bd_group); bitmap = e4b->bd_bitmap; if (start == 0 && max >= last) set_trimmed = true; origin_start = start; start = max(e4b->bd_info->bb_first_free, start); count = 0; free_count = 0; while (start <= max) { start = mb_find_next_zero_bit(bitmap, max + 1, start); if (start > max) break; next = mb_find_next_bit(bitmap, last + 1, start); if (origin_start == 0 && next >= last) set_trimmed = true; if ((next - start) >= minblocks) { int ret = ext4_trim_extent(sb, start, next - start, e4b); if (ret && ret != -EOPNOTSUPP) return count; count += next - start; } free_count += next - start; start = next + 1; if (ext4_trim_interrupted()) return count; if (need_resched()) { ext4_unlock_group(sb, e4b->bd_group); cond_resched(); ext4_lock_group(sb, e4b->bd_group); } if ((e4b->bd_info->bb_free - free_count) < minblocks) break; } if (set_trimmed) EXT4_MB_GRP_SET_TRIMMED(e4b->bd_info); return count; } /** * ext4_trim_all_free -- function to trim all free space in alloc. group * @sb: super block for file system * @group: group to be trimmed * @start: first group block to examine * @max: last group block to examine * @minblocks: minimum extent block count * * ext4_trim_all_free walks through group's block bitmap searching for free * extents. When the free extent is found, mark it as used in group buddy * bitmap. Then issue a TRIM command on this extent and free the extent in * the group buddy bitmap. */ static ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) { struct ext4_buddy e4b; int ret; trace_ext4_trim_all_free(sb, group, start, max); ret = ext4_mb_load_buddy(sb, group, &e4b); if (ret) { ext4_warning(sb, "Error %d loading buddy information for %u", ret, group); return ret; } ext4_lock_group(sb, group); if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || minblocks < EXT4_SB(sb)->s_last_trim_minblks) ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); else ret = 0; ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); ext4_debug("trimmed %d blocks in the group %d\n", ret, group); return ret; } /** * ext4_trim_fs() -- trim ioctl handle function * @sb: superblock for filesystem * @range: fstrim_range structure * * start: First Byte to trim * len: number of Bytes to trim from start * minlen: minimum extent length in Bytes * ext4_trim_fs goes through all allocation groups containing Bytes from * start to start+len. For each such a group ext4_trim_all_free function * is invoked to trim all free space. */ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) { unsigned int discard_granularity = bdev_discard_granularity(sb->s_bdev); struct ext4_group_info *grp; ext4_group_t group, first_group, last_group; ext4_grpblk_t cnt = 0, first_cluster, last_cluster; uint64_t start, end, minlen, trimmed = 0; ext4_fsblk_t first_data_blk = le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es); int ret = 0; start = range->start >> sb->s_blocksize_bits; end = start + (range->len >> sb->s_blocksize_bits) - 1; minlen = EXT4_NUM_B2C(EXT4_SB(sb), range->minlen >> sb->s_blocksize_bits); if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) || start >= max_blks || range->len < sb->s_blocksize) return -EINVAL; /* No point to try to trim less than discard granularity */ if (range->minlen < discard_granularity) { minlen = EXT4_NUM_B2C(EXT4_SB(sb), discard_granularity >> sb->s_blocksize_bits); if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) goto out; } if (end >= max_blks - 1) end = max_blks - 1; if (end <= first_data_blk) goto out; if (start < first_data_blk) start = first_data_blk; /* Determine first and last group to examine based on start and end */ ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, &first_group, &first_cluster); ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end, &last_group, &last_cluster); /* end now represents the last cluster to discard in this group */ end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; for (group = first_group; group <= last_group; group++) { if (ext4_trim_interrupted()) break; grp = ext4_get_group_info(sb, group); if (!grp) continue; /* We only do this if the grp has never been initialized */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { ret = ext4_mb_init_group(sb, group, GFP_NOFS); if (ret) break; } /* * For all the groups except the last one, last cluster will * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to * change it for the last group, note that last_cluster is * already computed earlier by ext4_get_group_no_and_offset() */ if (group == last_group) end = last_cluster; if (grp->bb_free >= minlen) { cnt = ext4_trim_all_free(sb, group, first_cluster, end, minlen); if (cnt < 0) { ret = cnt; break; } trimmed += cnt; } /* * For every group except the first one, we are sure * that the first cluster to discard will be cluster #0. */ first_cluster = 0; } if (!ret) EXT4_SB(sb)->s_last_trim_minblks = minlen; out: range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; return ret; } /* Iterate all the free extents in the group. */ int ext4_mballoc_query_range( struct super_block *sb, ext4_group_t group, ext4_grpblk_t first, ext4_grpblk_t end, ext4_mballoc_query_range_fn meta_formatter, ext4_mballoc_query_range_fn formatter, void *priv) { void *bitmap; ext4_grpblk_t start, next; struct ext4_buddy e4b; int error; error = ext4_mb_load_buddy(sb, group, &e4b); if (error) return error; bitmap = e4b.bd_bitmap; ext4_lock_group(sb, group); start = max(e4b.bd_info->bb_first_free, first); if (end >= EXT4_CLUSTERS_PER_GROUP(sb)) end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; if (meta_formatter && start != first) { if (start > end) start = end; ext4_unlock_group(sb, group); error = meta_formatter(sb, group, first, start - first, priv); if (error) goto out_unload; ext4_lock_group(sb, group); } while (start <= end) { start = mb_find_next_zero_bit(bitmap, end + 1, start); if (start > end) break; next = mb_find_next_bit(bitmap, end + 1, start); ext4_unlock_group(sb, group); error = formatter(sb, group, start, next - start, priv); if (error) goto out_unload; ext4_lock_group(sb, group); start = next + 1; } ext4_unlock_group(sb, group); out_unload: ext4_mb_unload_buddy(&e4b); return error; } #if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS) void mb_clear_bits_test(void *bm, int cur, int len) { mb_clear_bits(bm, cur, len); } EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bits_test); ext4_fsblk_t ext4_mb_new_blocks_simple_test(struct ext4_allocation_request *ar, int *errp) { return ext4_mb_new_blocks_simple(ar, errp); } EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_new_blocks_simple_test); int mb_find_next_zero_bit_test(void *addr, int max, int start) { return mb_find_next_zero_bit(addr, max, start); } EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_zero_bit_test); int mb_find_next_bit_test(void *addr, int max, int start) { return mb_find_next_bit(addr, max, start); } EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_bit_test); void mb_clear_bit_test(int bit, void *addr) { mb_clear_bit(bit, addr); } EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bit_test); int mb_test_bit_test(int bit, void *addr) { return mb_test_bit(bit, addr); } EXPORT_SYMBOL_FOR_EXT4_TEST(mb_test_bit_test); int ext4_mb_mark_diskspace_used_test(struct ext4_allocation_context *ac, handle_t *handle) { return ext4_mb_mark_diskspace_used(ac, handle); } EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_diskspace_used_test); int mb_mark_used_test(struct ext4_buddy *e4b, struct ext4_free_extent *ex) { return mb_mark_used(e4b, ex); } EXPORT_SYMBOL_FOR_EXT4_TEST(mb_mark_used_test); void ext4_mb_generate_buddy_test(struct super_block *sb, void *buddy, void *bitmap, ext4_group_t group, struct ext4_group_info *grp) { ext4_mb_generate_buddy(sb, buddy, bitmap, group, grp); } EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_generate_buddy_test); int ext4_mb_load_buddy_test(struct super_block *sb, ext4_group_t group, struct ext4_buddy *e4b) { return ext4_mb_load_buddy(sb, group, e4b); } EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_load_buddy_test); void ext4_mb_unload_buddy_test(struct ext4_buddy *e4b) { ext4_mb_unload_buddy(e4b); } EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_unload_buddy_test); void mb_free_blocks_test(struct inode *inode, struct ext4_buddy *e4b, int first, int count) { mb_free_blocks(inode, e4b, first, count); } EXPORT_SYMBOL_FOR_EXT4_TEST(mb_free_blocks_test); void ext4_free_blocks_simple_test(struct inode *inode, ext4_fsblk_t block, unsigned long count) { return ext4_free_blocks_simple(inode, block, count); } EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_blocks_simple_test); EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_wait_block_bitmap); EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_init); EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_desc); EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_count_free_clusters); EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_info); EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_group_clusters_set); EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_release); EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_read_block_bitmap_nowait); EXPORT_SYMBOL_FOR_EXT4_TEST(mb_set_bits); EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_fc_init_inode); EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_context); #endif
6 4 6 8 8 2 1 16 7 9 2 2 2 1 2 2 2 4 4 3 1 4 4 4 3 3 6 3 4 6 18 1 2 2 1 2 8 2 8 2 9 3 3 9 8 3 3 2 1 4 12 3 3 3 8 6 4 6 8 5 1 19 1 16 6 6 6 5 3 3 2 5 2 10 13 13 12 6 4 8 3 14 3 2 9 17 17 16 8 8 20 3 1 1 2 1 3 8 1 2 2 5 1 2 1 1 6 9 1 2 3 4 1 2 4 6 6 3 2 1 6 3 2 1 27 10 2 2 2 1 2 9 5 1 4 2 2 4 3 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2015, Sony Mobile Communications Inc. * Copyright (c) 2013, The Linux Foundation. All rights reserved. */ #include <linux/module.h> #include <linux/netlink.h> #include <linux/qrtr.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/spinlock.h> #include <linux/wait.h> #include <net/sock.h> #include "qrtr.h" #define QRTR_PROTO_VER_1 1 #define QRTR_PROTO_VER_2 3 /* auto-bind range */ #define QRTR_MIN_EPH_SOCKET 0x4000 #define QRTR_MAX_EPH_SOCKET 0x7fff #define QRTR_EPH_PORT_RANGE \ XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET) #define QRTR_PORT_CTRL_LEGACY 0xffff /** * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1 * @version: protocol version * @type: packet type; one of QRTR_TYPE_* * @src_node_id: source node * @src_port_id: source port * @confirm_rx: boolean; whether a resume-tx packet should be send in reply * @size: length of packet, excluding this header * @dst_node_id: destination node * @dst_port_id: destination port */ struct qrtr_hdr_v1 { __le32 version; __le32 type; __le32 src_node_id; __le32 src_port_id; __le32 confirm_rx; __le32 size; __le32 dst_node_id; __le32 dst_port_id; } __packed; /** * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions * @version: protocol version * @type: packet type; one of QRTR_TYPE_* * @flags: bitmask of QRTR_FLAGS_* * @optlen: length of optional header data * @size: length of packet, excluding this header and optlen * @src_node_id: source node * @src_port_id: source port * @dst_node_id: destination node * @dst_port_id: destination port */ struct qrtr_hdr_v2 { u8 version; u8 type; u8 flags; u8 optlen; __le32 size; __le16 src_node_id; __le16 src_port_id; __le16 dst_node_id; __le16 dst_port_id; }; #define QRTR_FLAGS_CONFIRM_RX BIT(0) struct qrtr_cb { u32 src_node; u32 src_port; u32 dst_node; u32 dst_port; u8 type; u8 confirm_rx; }; #define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \ sizeof(struct qrtr_hdr_v2)) struct qrtr_sock { /* WARNING: sk must be the first member */ struct sock sk; struct sockaddr_qrtr us; struct sockaddr_qrtr peer; }; static inline struct qrtr_sock *qrtr_sk(struct sock *sk) { BUILD_BUG_ON(offsetof(struct qrtr_sock, sk) != 0); return container_of(sk, struct qrtr_sock, sk); } static unsigned int qrtr_local_nid = 1; /* for node ids */ static RADIX_TREE(qrtr_nodes, GFP_ATOMIC); static DEFINE_SPINLOCK(qrtr_nodes_lock); /* broadcast list */ static LIST_HEAD(qrtr_all_nodes); /* lock for qrtr_all_nodes and node reference */ static DEFINE_MUTEX(qrtr_node_lock); /* local port allocation management */ static DEFINE_XARRAY_ALLOC(qrtr_ports); /** * struct qrtr_node - endpoint node * @ep_lock: lock for endpoint management and callbacks * @ep: endpoint * @ref: reference count for node * @nid: node id * @qrtr_tx_flow: xarray of qrtr_tx_flow, keyed by node << 32 | port * @qrtr_tx_lock: lock for qrtr_tx_flow inserts * @rx_queue: receive queue * @item: list item for broadcast list */ struct qrtr_node { struct mutex ep_lock; struct qrtr_endpoint *ep; struct kref ref; unsigned int nid; struct xarray qrtr_tx_flow; struct mutex qrtr_tx_lock; /* for qrtr_tx_flow */ struct sk_buff_head rx_queue; struct list_head item; }; /** * struct qrtr_tx_flow - tx flow control * @resume_tx: waiters for a resume tx from the remote * @pending: number of waiting senders * @tx_failed: indicates that a message with confirm_rx flag was lost */ struct qrtr_tx_flow { struct wait_queue_head resume_tx; int pending; int tx_failed; }; #define QRTR_TX_FLOW_HIGH 10 #define QRTR_TX_FLOW_LOW 5 static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, int type, struct sockaddr_qrtr *from, struct sockaddr_qrtr *to); static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb, int type, struct sockaddr_qrtr *from, struct sockaddr_qrtr *to); static struct qrtr_sock *qrtr_port_lookup(int port); static void qrtr_port_put(struct qrtr_sock *ipc); /* Release node resources and free the node. * * Do not call directly, use qrtr_node_release. To be used with * kref_put_mutex. As such, the node mutex is expected to be locked on call. */ static void __qrtr_node_release(struct kref *kref) { struct qrtr_node *node = container_of(kref, struct qrtr_node, ref); struct radix_tree_iter iter; struct qrtr_tx_flow *flow; unsigned long flags; void __rcu **slot; unsigned long index; spin_lock_irqsave(&qrtr_nodes_lock, flags); /* If the node is a bridge for other nodes, there are possibly * multiple entries pointing to our released node, delete them all. */ radix_tree_for_each_slot(slot, &qrtr_nodes, &iter, 0) { if (*slot == node) radix_tree_iter_delete(&qrtr_nodes, &iter, slot); } spin_unlock_irqrestore(&qrtr_nodes_lock, flags); list_del(&node->item); mutex_unlock(&qrtr_node_lock); skb_queue_purge(&node->rx_queue); /* Free tx flow counters */ xa_for_each(&node->qrtr_tx_flow, index, flow) kfree(flow); xa_destroy(&node->qrtr_tx_flow); kfree(node); } /* Increment reference to node. */ static struct qrtr_node *qrtr_node_acquire(struct qrtr_node *node) { if (node) kref_get(&node->ref); return node; } /* Decrement reference to node and release as necessary. */ static void qrtr_node_release(struct qrtr_node *node) { if (!node) return; kref_put_mutex(&node->ref, __qrtr_node_release, &qrtr_node_lock); } /** * qrtr_tx_resume() - reset flow control counter * @node: qrtr_node that the QRTR_TYPE_RESUME_TX packet arrived on * @skb: resume_tx packet */ static void qrtr_tx_resume(struct qrtr_node *node, struct sk_buff *skb) { struct qrtr_ctrl_pkt *pkt = (struct qrtr_ctrl_pkt *)skb->data; u64 remote_node = le32_to_cpu(pkt->client.node); u32 remote_port = le32_to_cpu(pkt->client.port); struct qrtr_tx_flow *flow; unsigned long key; key = remote_node << 32 | remote_port; flow = xa_load(&node->qrtr_tx_flow, key); if (flow) { spin_lock(&flow->resume_tx.lock); flow->pending = 0; spin_unlock(&flow->resume_tx.lock); wake_up_interruptible_all(&flow->resume_tx); } consume_skb(skb); } /** * qrtr_tx_wait() - flow control for outgoing packets * @node: qrtr_node that the packet is to be send to * @dest_node: node id of the destination * @dest_port: port number of the destination * @type: type of message * * The flow control scheme is based around the low and high "watermarks". When * the low watermark is passed the confirm_rx flag is set on the outgoing * message, which will trigger the remote to send a control message of the type * QRTR_TYPE_RESUME_TX to reset the counter. If the high watermark is hit * further transmision should be paused. * * Return: 1 if confirm_rx should be set, 0 otherwise or errno failure */ static int qrtr_tx_wait(struct qrtr_node *node, int dest_node, int dest_port, int type) { unsigned long key = (u64)dest_node << 32 | dest_port; struct qrtr_tx_flow *flow; int confirm_rx = 0; int ret; /* Never set confirm_rx on non-data packets */ if (type != QRTR_TYPE_DATA) return 0; mutex_lock(&node->qrtr_tx_lock); flow = xa_load(&node->qrtr_tx_flow, key); if (!flow) { flow = kzalloc_obj(*flow); if (flow) { init_waitqueue_head(&flow->resume_tx); if (xa_err(xa_store(&node->qrtr_tx_flow, key, flow, GFP_KERNEL))) { kfree(flow); flow = NULL; } } } mutex_unlock(&node->qrtr_tx_lock); /* Set confirm_rx if we where unable to find and allocate a flow */ if (!flow) return 1; spin_lock_irq(&flow->resume_tx.lock); ret = wait_event_interruptible_locked_irq(flow->resume_tx, flow->pending < QRTR_TX_FLOW_HIGH || flow->tx_failed || !node->ep); if (ret < 0) { confirm_rx = ret; } else if (!node->ep) { confirm_rx = -EPIPE; } else if (flow->tx_failed) { flow->tx_failed = 0; confirm_rx = 1; } else { flow->pending++; confirm_rx = flow->pending == QRTR_TX_FLOW_LOW; } spin_unlock_irq(&flow->resume_tx.lock); return confirm_rx; } /** * qrtr_tx_flow_failed() - flag that tx of confirm_rx flagged messages failed * @node: qrtr_node that the packet is to be send to * @dest_node: node id of the destination * @dest_port: port number of the destination * * Signal that the transmission of a message with confirm_rx flag failed. The * flow's "pending" counter will keep incrementing towards QRTR_TX_FLOW_HIGH, * at which point transmission would stall forever waiting for the resume TX * message associated with the dropped confirm_rx message. * Work around this by marking the flow as having a failed transmission and * cause the next transmission attempt to be sent with the confirm_rx. */ static void qrtr_tx_flow_failed(struct qrtr_node *node, int dest_node, int dest_port) { unsigned long key = (u64)dest_node << 32 | dest_port; struct qrtr_tx_flow *flow; flow = xa_load(&node->qrtr_tx_flow, key); if (flow) { spin_lock_irq(&flow->resume_tx.lock); flow->tx_failed = 1; spin_unlock_irq(&flow->resume_tx.lock); } } /* Pass an outgoing packet socket buffer to the endpoint driver. */ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, int type, struct sockaddr_qrtr *from, struct sockaddr_qrtr *to) { struct qrtr_hdr_v1 *hdr; size_t len = skb->len; int rc, confirm_rx; confirm_rx = qrtr_tx_wait(node, to->sq_node, to->sq_port, type); if (confirm_rx < 0) { kfree_skb(skb); return confirm_rx; } hdr = skb_push(skb, sizeof(*hdr)); hdr->version = cpu_to_le32(QRTR_PROTO_VER_1); hdr->type = cpu_to_le32(type); hdr->src_node_id = cpu_to_le32(from->sq_node); hdr->src_port_id = cpu_to_le32(from->sq_port); if (to->sq_port == QRTR_PORT_CTRL) { hdr->dst_node_id = cpu_to_le32(node->nid); hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); } else { hdr->dst_node_id = cpu_to_le32(to->sq_node); hdr->dst_port_id = cpu_to_le32(to->sq_port); } hdr->size = cpu_to_le32(len); hdr->confirm_rx = !!confirm_rx; rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); if (!rc) { mutex_lock(&node->ep_lock); rc = -ENODEV; if (node->ep) rc = node->ep->xmit(node->ep, skb); else kfree_skb(skb); mutex_unlock(&node->ep_lock); } /* Need to ensure that a subsequent message carries the otherwise lost * confirm_rx flag if we dropped this one */ if (rc && confirm_rx) qrtr_tx_flow_failed(node, to->sq_node, to->sq_port); return rc; } /* Lookup node by id. * * callers must release with qrtr_node_release() */ static struct qrtr_node *qrtr_node_lookup(unsigned int nid) { struct qrtr_node *node; unsigned long flags; mutex_lock(&qrtr_node_lock); spin_lock_irqsave(&qrtr_nodes_lock, flags); node = radix_tree_lookup(&qrtr_nodes, nid); node = qrtr_node_acquire(node); spin_unlock_irqrestore(&qrtr_nodes_lock, flags); mutex_unlock(&qrtr_node_lock); return node; } /* Assign node id to node. * * This is mostly useful for automatic node id assignment, based on * the source id in the incoming packet. */ static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid) { unsigned long flags; if (nid == QRTR_EP_NID_AUTO) return; spin_lock_irqsave(&qrtr_nodes_lock, flags); radix_tree_insert(&qrtr_nodes, nid, node); if (node->nid == QRTR_EP_NID_AUTO) node->nid = nid; spin_unlock_irqrestore(&qrtr_nodes_lock, flags); } /** * qrtr_endpoint_post() - post incoming data * @ep: endpoint handle * @data: data pointer * @len: size of data in bytes * * Return: 0 on success; negative error code on failure */ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) { struct qrtr_node *node = ep->node; const struct qrtr_hdr_v1 *v1; const struct qrtr_hdr_v2 *v2; struct qrtr_sock *ipc; struct sk_buff *skb; struct qrtr_cb *cb; size_t size; unsigned int ver; size_t hdrlen; if (len == 0 || len & 3) return -EINVAL; skb = __netdev_alloc_skb(NULL, len, GFP_ATOMIC | __GFP_NOWARN); if (!skb) return -ENOMEM; cb = (struct qrtr_cb *)skb->cb; /* Version field in v1 is little endian, so this works for both cases */ ver = *(u8*)data; switch (ver) { case QRTR_PROTO_VER_1: if (len < sizeof(*v1)) goto err; v1 = data; hdrlen = sizeof(*v1); cb->type = le32_to_cpu(v1->type); cb->src_node = le32_to_cpu(v1->src_node_id); cb->src_port = le32_to_cpu(v1->src_port_id); cb->confirm_rx = !!v1->confirm_rx; cb->dst_node = le32_to_cpu(v1->dst_node_id); cb->dst_port = le32_to_cpu(v1->dst_port_id); size = le32_to_cpu(v1->size); break; case QRTR_PROTO_VER_2: if (len < sizeof(*v2)) goto err; v2 = data; hdrlen = sizeof(*v2) + v2->optlen; cb->type = v2->type; cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX); cb->src_node = le16_to_cpu(v2->src_node_id); cb->src_port = le16_to_cpu(v2->src_port_id); cb->dst_node = le16_to_cpu(v2->dst_node_id); cb->dst_port = le16_to_cpu(v2->dst_port_id); if (cb->src_port == (u16)QRTR_PORT_CTRL) cb->src_port = QRTR_PORT_CTRL; if (cb->dst_port == (u16)QRTR_PORT_CTRL) cb->dst_port = QRTR_PORT_CTRL; size = le32_to_cpu(v2->size); break; default: pr_err("qrtr: Invalid version %d\n", ver); goto err; } if (cb->dst_port == QRTR_PORT_CTRL_LEGACY) cb->dst_port = QRTR_PORT_CTRL; if (!size || len != ALIGN(size, 4) + hdrlen) goto err; if ((cb->type == QRTR_TYPE_NEW_SERVER || cb->type == QRTR_TYPE_RESUME_TX) && size < sizeof(struct qrtr_ctrl_pkt)) goto err; if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA && cb->type != QRTR_TYPE_RESUME_TX) goto err; skb_put_data(skb, data + hdrlen, size); qrtr_node_assign(node, cb->src_node); if (cb->type == QRTR_TYPE_NEW_SERVER) { /* Remote node endpoint can bridge other distant nodes */ const struct qrtr_ctrl_pkt *pkt; pkt = data + hdrlen; qrtr_node_assign(node, le32_to_cpu(pkt->server.node)); } if (cb->type == QRTR_TYPE_RESUME_TX) { qrtr_tx_resume(node, skb); } else { ipc = qrtr_port_lookup(cb->dst_port); if (!ipc) goto err; if (sock_queue_rcv_skb(&ipc->sk, skb)) { qrtr_port_put(ipc); goto err; } qrtr_port_put(ipc); } return 0; err: kfree_skb(skb); return -EINVAL; } EXPORT_SYMBOL_GPL(qrtr_endpoint_post); /** * qrtr_alloc_ctrl_packet() - allocate control packet skb * @pkt: reference to qrtr_ctrl_pkt pointer * @flags: the type of memory to allocate * * Returns newly allocated sk_buff, or NULL on failure * * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and * on success returns a reference to the control packet in @pkt. */ static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt, gfp_t flags) { const int pkt_len = sizeof(struct qrtr_ctrl_pkt); struct sk_buff *skb; skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, flags); if (!skb) return NULL; skb_reserve(skb, QRTR_HDR_MAX_SIZE); *pkt = skb_put_zero(skb, pkt_len); return skb; } /** * qrtr_endpoint_register() - register a new endpoint * @ep: endpoint to register * @nid: desired node id; may be QRTR_EP_NID_AUTO for auto-assignment * Return: 0 on success; negative error code on failure * * The specified endpoint must have the xmit function pointer set on call. */ int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid) { struct qrtr_node *node; if (!ep || !ep->xmit) return -EINVAL; node = kzalloc_obj(*node); if (!node) return -ENOMEM; kref_init(&node->ref); mutex_init(&node->ep_lock); skb_queue_head_init(&node->rx_queue); node->nid = QRTR_EP_NID_AUTO; node->ep = ep; xa_init(&node->qrtr_tx_flow); mutex_init(&node->qrtr_tx_lock); qrtr_node_assign(node, nid); mutex_lock(&qrtr_node_lock); list_add(&node->item, &qrtr_all_nodes); mutex_unlock(&qrtr_node_lock); ep->node = node; return 0; } EXPORT_SYMBOL_GPL(qrtr_endpoint_register); /** * qrtr_endpoint_unregister - unregister endpoint * @ep: endpoint to unregister */ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) { struct qrtr_node *node = ep->node; struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL}; struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL}; struct radix_tree_iter iter; struct qrtr_ctrl_pkt *pkt; struct qrtr_tx_flow *flow; struct sk_buff *skb; unsigned long flags; unsigned long index; void __rcu **slot; mutex_lock(&node->ep_lock); node->ep = NULL; mutex_unlock(&node->ep_lock); /* Notify the local controller about the event */ spin_lock_irqsave(&qrtr_nodes_lock, flags); radix_tree_for_each_slot(slot, &qrtr_nodes, &iter, 0) { if (*slot != node) continue; src.sq_node = iter.index; skb = qrtr_alloc_ctrl_packet(&pkt, GFP_ATOMIC); if (skb) { pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE); qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst); } } spin_unlock_irqrestore(&qrtr_nodes_lock, flags); /* Wake up any transmitters waiting for resume-tx from the node */ mutex_lock(&node->qrtr_tx_lock); xa_for_each(&node->qrtr_tx_flow, index, flow) wake_up_interruptible_all(&flow->resume_tx); mutex_unlock(&node->qrtr_tx_lock); qrtr_node_release(node); ep->node = NULL; } EXPORT_SYMBOL_GPL(qrtr_endpoint_unregister); /* Lookup socket by port. * * Callers must release with qrtr_port_put() */ static struct qrtr_sock *qrtr_port_lookup(int port) { struct qrtr_sock *ipc; if (port == QRTR_PORT_CTRL) port = 0; rcu_read_lock(); ipc = xa_load(&qrtr_ports, port); if (ipc) sock_hold(&ipc->sk); rcu_read_unlock(); return ipc; } /* Release acquired socket. */ static void qrtr_port_put(struct qrtr_sock *ipc) { sock_put(&ipc->sk); } /* Remove port assignment. */ static void qrtr_port_remove(struct qrtr_sock *ipc) { struct qrtr_ctrl_pkt *pkt; struct sk_buff *skb; int port = ipc->us.sq_port; struct sockaddr_qrtr to; to.sq_family = AF_QIPCRTR; to.sq_node = QRTR_NODE_BCAST; to.sq_port = QRTR_PORT_CTRL; skb = qrtr_alloc_ctrl_packet(&pkt, GFP_KERNEL); if (skb) { pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); pkt->client.node = cpu_to_le32(ipc->us.sq_node); pkt->client.port = cpu_to_le32(ipc->us.sq_port); skb_set_owner_w(skb, &ipc->sk); qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us, &to); } if (port == QRTR_PORT_CTRL) port = 0; __sock_put(&ipc->sk); xa_erase(&qrtr_ports, port); /* Ensure that if qrtr_port_lookup() did enter the RCU read section we * wait for it to up increment the refcount */ synchronize_rcu(); } /* Assign port number to socket. * * Specify port in the integer pointed to by port, and it will be adjusted * on return as necesssary. * * Port may be: * 0: Assign ephemeral port in [QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET] * <QRTR_MIN_EPH_SOCKET: Specified; requires CAP_NET_ADMIN * >QRTR_MIN_EPH_SOCKET: Specified; available to all */ static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) { int rc; if (!*port) { rc = xa_alloc(&qrtr_ports, port, ipc, QRTR_EPH_PORT_RANGE, GFP_KERNEL); } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) { rc = -EACCES; } else if (*port == QRTR_PORT_CTRL) { rc = xa_insert(&qrtr_ports, 0, ipc, GFP_KERNEL); } else { rc = xa_insert(&qrtr_ports, *port, ipc, GFP_KERNEL); } if (rc == -EBUSY) return -EADDRINUSE; else if (rc < 0) return rc; sock_hold(&ipc->sk); return 0; } /* Reset all non-control ports */ static void qrtr_reset_ports(void) { struct qrtr_sock *ipc; unsigned long index; rcu_read_lock(); xa_for_each_start(&qrtr_ports, index, ipc, 1) { sock_hold(&ipc->sk); ipc->sk.sk_err = ENETRESET; sk_error_report(&ipc->sk); sock_put(&ipc->sk); } rcu_read_unlock(); } /* Bind socket to address. * * Socket should be locked upon call. */ static int __qrtr_bind(struct socket *sock, const struct sockaddr_qrtr *addr, int zapped) { struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sock *sk = sock->sk; int port; int rc; /* rebinding ok */ if (!zapped && addr->sq_port == ipc->us.sq_port) return 0; port = addr->sq_port; rc = qrtr_port_assign(ipc, &port); if (rc) return rc; /* unbind previous, if any */ if (!zapped) qrtr_port_remove(ipc); ipc->us.sq_port = port; sock_reset_flag(sk, SOCK_ZAPPED); /* Notify all open ports about the new controller */ if (port == QRTR_PORT_CTRL) qrtr_reset_ports(); return 0; } /* Auto bind to an ephemeral port. */ static int qrtr_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct sockaddr_qrtr addr; if (!sock_flag(sk, SOCK_ZAPPED)) return 0; addr.sq_family = AF_QIPCRTR; addr.sq_node = qrtr_local_nid; addr.sq_port = 0; return __qrtr_bind(sock, &addr, 1); } /* Bind socket to specified sockaddr. */ static int qrtr_bind(struct socket *sock, struct sockaddr_unsized *saddr, int len) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sock *sk = sock->sk; int rc; if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) return -EINVAL; if (addr->sq_node != ipc->us.sq_node) return -EINVAL; lock_sock(sk); rc = __qrtr_bind(sock, addr, sock_flag(sk, SOCK_ZAPPED)); release_sock(sk); return rc; } /* Queue packet to local peer socket. */ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, int type, struct sockaddr_qrtr *from, struct sockaddr_qrtr *to) { struct qrtr_sock *ipc; struct qrtr_cb *cb; ipc = qrtr_port_lookup(to->sq_port); if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ if (ipc) qrtr_port_put(ipc); kfree_skb(skb); return -ENODEV; } cb = (struct qrtr_cb *)skb->cb; cb->src_node = from->sq_node; cb->src_port = from->sq_port; if (sock_queue_rcv_skb(&ipc->sk, skb)) { qrtr_port_put(ipc); kfree_skb(skb); return -ENOSPC; } qrtr_port_put(ipc); return 0; } /* Queue packet for broadcast. */ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb, int type, struct sockaddr_qrtr *from, struct sockaddr_qrtr *to) { struct sk_buff *skbn; mutex_lock(&qrtr_node_lock); list_for_each_entry(node, &qrtr_all_nodes, item) { skbn = pskb_copy(skb, GFP_KERNEL); if (!skbn) break; skb_set_owner_w(skbn, skb->sk); qrtr_node_enqueue(node, skbn, type, from, to); } mutex_unlock(&qrtr_node_lock); qrtr_local_enqueue(NULL, skb, type, from, to); return 0; } static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int, struct sockaddr_qrtr *, struct sockaddr_qrtr *); __le32 qrtr_type = cpu_to_le32(QRTR_TYPE_DATA); struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sock *sk = sock->sk; struct qrtr_node *node; struct sk_buff *skb; size_t plen; u32 type; int rc; if (msg->msg_flags & ~(MSG_DONTWAIT)) return -EINVAL; if (len > 65535) return -EMSGSIZE; lock_sock(sk); if (addr) { if (msg->msg_namelen < sizeof(*addr)) { release_sock(sk); return -EINVAL; } if (addr->sq_family != AF_QIPCRTR) { release_sock(sk); return -EINVAL; } rc = qrtr_autobind(sock); if (rc) { release_sock(sk); return rc; } } else if (sk->sk_state == TCP_ESTABLISHED) { addr = &ipc->peer; } else { release_sock(sk); return -ENOTCONN; } node = NULL; if (addr->sq_node == QRTR_NODE_BCAST) { if (addr->sq_port != QRTR_PORT_CTRL && qrtr_local_nid != QRTR_NODE_BCAST) { release_sock(sk); return -ENOTCONN; } enqueue_fn = qrtr_bcast_enqueue; } else if (addr->sq_node == ipc->us.sq_node) { enqueue_fn = qrtr_local_enqueue; } else { node = qrtr_node_lookup(addr->sq_node); if (!node) { release_sock(sk); return -ECONNRESET; } enqueue_fn = qrtr_node_enqueue; } plen = (len + 3) & ~3; skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE, msg->msg_flags & MSG_DONTWAIT, &rc); if (!skb) { rc = -ENOMEM; goto out_node; } skb_reserve(skb, QRTR_HDR_MAX_SIZE); rc = memcpy_from_msg(skb_put(skb, len), msg, len); if (rc) { kfree_skb(skb); goto out_node; } if (ipc->us.sq_port == QRTR_PORT_CTRL) { if (len < 4) { rc = -EINVAL; kfree_skb(skb); goto out_node; } /* control messages already require the type as 'command' */ skb_copy_bits(skb, 0, &qrtr_type, 4); } type = le32_to_cpu(qrtr_type); rc = enqueue_fn(node, skb, type, &ipc->us, addr); if (rc >= 0) rc = len; out_node: qrtr_node_release(node); release_sock(sk); return rc; } static int qrtr_send_resume_tx(struct qrtr_cb *cb) { struct sockaddr_qrtr remote = { AF_QIPCRTR, cb->src_node, cb->src_port }; struct sockaddr_qrtr local = { AF_QIPCRTR, cb->dst_node, cb->dst_port }; struct qrtr_ctrl_pkt *pkt; struct qrtr_node *node; struct sk_buff *skb; int ret; node = qrtr_node_lookup(remote.sq_node); if (!node) return -EINVAL; skb = qrtr_alloc_ctrl_packet(&pkt, GFP_KERNEL); if (!skb) return -ENOMEM; pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX); pkt->client.node = cpu_to_le32(cb->dst_node); pkt->client.port = cpu_to_le32(cb->dst_port); ret = qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX, &local, &remote); qrtr_node_release(node); return ret; } static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); struct sock *sk = sock->sk; struct sk_buff *skb; struct qrtr_cb *cb; int copied, rc; lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) { release_sock(sk); return -EADDRNOTAVAIL; } skb = skb_recv_datagram(sk, flags, &rc); if (!skb) { release_sock(sk); return rc; } cb = (struct qrtr_cb *)skb->cb; copied = skb->len; if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; } rc = skb_copy_datagram_msg(skb, 0, msg, copied); if (rc < 0) goto out; rc = copied; if (addr) { /* There is an anonymous 2-byte hole after sq_family, * make sure to clear it. */ memset(addr, 0, sizeof(*addr)); addr->sq_family = AF_QIPCRTR; addr->sq_node = cb->src_node; addr->sq_port = cb->src_port; msg->msg_namelen = sizeof(*addr); } out: if (cb->confirm_rx) qrtr_send_resume_tx(cb); skb_free_datagram(sk, skb); release_sock(sk); return rc; } static int qrtr_connect(struct socket *sock, struct sockaddr_unsized *saddr, int len, int flags) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sock *sk = sock->sk; int rc; if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) return -EINVAL; lock_sock(sk); sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; rc = qrtr_autobind(sock); if (rc) { release_sock(sk); return rc; } ipc->peer = *addr; sock->state = SS_CONNECTED; sk->sk_state = TCP_ESTABLISHED; release_sock(sk); return 0; } static int qrtr_getname(struct socket *sock, struct sockaddr *saddr, int peer) { struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sockaddr_qrtr qaddr; struct sock *sk = sock->sk; lock_sock(sk); if (peer) { if (sk->sk_state != TCP_ESTABLISHED) { release_sock(sk); return -ENOTCONN; } qaddr = ipc->peer; } else { qaddr = ipc->us; } release_sock(sk); qaddr.sq_family = AF_QIPCRTR; memcpy(saddr, &qaddr, sizeof(qaddr)); return sizeof(qaddr); } static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sock *sk = sock->sk; struct sockaddr_qrtr *sq; struct sk_buff *skb; struct ifreq ifr; long len = 0; int rc = 0; lock_sock(sk); switch (cmd) { case TIOCOUTQ: len = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (len < 0) len = 0; rc = put_user(len, (int __user *)argp); break; case TIOCINQ: skb = skb_peek(&sk->sk_receive_queue); if (skb) len = skb->len; rc = put_user(len, (int __user *)argp); break; case SIOCGIFADDR: if (get_user_ifreq(&ifr, NULL, argp)) { rc = -EFAULT; break; } sq = (struct sockaddr_qrtr *)&ifr.ifr_addr; *sq = ipc->us; if (put_user_ifreq(&ifr, argp)) { rc = -EFAULT; break; } break; case SIOCADDRT: case SIOCDELRT: case SIOCSIFADDR: case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCGIFBRDADDR: case SIOCSIFBRDADDR: case SIOCGIFNETMASK: case SIOCSIFNETMASK: rc = -EINVAL; break; default: rc = -ENOIOCTLCMD; break; } release_sock(sk); return rc; } static int qrtr_release(struct socket *sock) { struct sock *sk = sock->sk; struct qrtr_sock *ipc; if (!sk) return 0; lock_sock(sk); ipc = qrtr_sk(sk); sk->sk_shutdown = SHUTDOWN_MASK; if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); sock_orphan(sk); sock->sk = NULL; if (!sock_flag(sk, SOCK_ZAPPED)) qrtr_port_remove(ipc); skb_queue_purge(&sk->sk_receive_queue); release_sock(sk); sock_put(sk); return 0; } static const struct proto_ops qrtr_proto_ops = { .owner = THIS_MODULE, .family = AF_QIPCRTR, .bind = qrtr_bind, .connect = qrtr_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .listen = sock_no_listen, .sendmsg = qrtr_sendmsg, .recvmsg = qrtr_recvmsg, .getname = qrtr_getname, .ioctl = qrtr_ioctl, .gettstamp = sock_gettstamp, .poll = datagram_poll, .shutdown = sock_no_shutdown, .release = qrtr_release, .mmap = sock_no_mmap, }; static struct proto qrtr_proto = { .name = "QIPCRTR", .owner = THIS_MODULE, .obj_size = sizeof(struct qrtr_sock), }; static int qrtr_create(struct net *net, struct socket *sock, int protocol, int kern) { struct qrtr_sock *ipc; struct sock *sk; if (sock->type != SOCK_DGRAM) return -EPROTOTYPE; sk = sk_alloc(net, AF_QIPCRTR, GFP_KERNEL, &qrtr_proto, kern); if (!sk) return -ENOMEM; sock_set_flag(sk, SOCK_ZAPPED); sock_init_data(sock, sk); sock->ops = &qrtr_proto_ops; ipc = qrtr_sk(sk); ipc->us.sq_family = AF_QIPCRTR; ipc->us.sq_node = qrtr_local_nid; ipc->us.sq_port = 0; return 0; } static const struct net_proto_family qrtr_family = { .owner = THIS_MODULE, .family = AF_QIPCRTR, .create = qrtr_create, }; static int __init qrtr_proto_init(void) { int rc; rc = proto_register(&qrtr_proto, 1); if (rc) return rc; rc = sock_register(&qrtr_family); if (rc) goto err_proto; rc = qrtr_ns_init(); if (rc) goto err_sock; return 0; err_sock: sock_unregister(qrtr_family.family); err_proto: proto_unregister(&qrtr_proto); return rc; } postcore_initcall(qrtr_proto_init); static void __exit qrtr_proto_fini(void) { qrtr_ns_remove(); sock_unregister(qrtr_family.family); proto_unregister(&qrtr_proto); } module_exit(qrtr_proto_fini); MODULE_DESCRIPTION("Qualcomm IPC-router driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_NETPROTO(PF_QIPCRTR);
1 1 1 1 1 1 2 1 1 1 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 // SPDX-License-Identifier: GPL-2.0-or-later /* * taskstats.c - Export per-task statistics to userland * * Copyright (C) Shailabh Nagar, IBM Corp. 2006 * (C) Balbir Singh, IBM Corp. 2006 */ #include <linux/kernel.h> #include <linux/taskstats_kern.h> #include <linux/tsacct_kern.h> #include <linux/acct.h> #include <linux/delayacct.h> #include <linux/cpumask.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/cgroupstats.h> #include <linux/cgroup.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/pid_namespace.h> #include <net/genetlink.h> #include <linux/atomic.h> #include <linux/sched/cputime.h> /* * Maximum length of a cpumask that can be specified in * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute */ #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) static DEFINE_PER_CPU(__u32, taskstats_seqnum); static int family_registered; struct kmem_cache *taskstats_cache; static struct genl_family family; static const struct nla_policy taskstats_cmd_get_policy[] = { [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; static const struct nla_policy cgroupstats_cmd_get_policy[] = { [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, }; struct listener { struct list_head list; pid_t pid; char valid; }; struct listener_list { struct rw_semaphore sem; struct list_head list; }; static DEFINE_PER_CPU(struct listener_list, listener_array); enum actions { REGISTER, DEREGISTER, CPU_DONT_CARE }; static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, size_t size) { struct sk_buff *skb; void *reply; /* * If new attributes are added, please revisit this allocation */ skb = genlmsg_new(size, GFP_KERNEL); if (!skb) return -ENOMEM; if (!info) { int seq = this_cpu_inc_return(taskstats_seqnum) - 1; reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); } else reply = genlmsg_put_reply(skb, info, &family, 0, cmd); if (reply == NULL) { nlmsg_free(skb); return -EINVAL; } *skbp = skb; return 0; } /* * Send taskstats data in @skb to listener with nl_pid @pid */ static int send_reply(struct sk_buff *skb, struct genl_info *info) { struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); void *reply = genlmsg_data(genlhdr); genlmsg_end(skb, reply); return genlmsg_reply(skb, info); } /* * Send taskstats data in @skb to listeners registered for @cpu's exit data */ static void send_cpu_listeners(struct sk_buff *skb, struct listener_list *listeners) { struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); struct listener *s, *tmp; struct sk_buff *skb_next, *skb_cur = skb; void *reply = genlmsg_data(genlhdr); int delcount = 0; genlmsg_end(skb, reply); down_read(&listeners->sem); list_for_each_entry(s, &listeners->list, list) { int rc; skb_next = NULL; if (!list_is_last(&s->list, &listeners->list)) { skb_next = skb_clone(skb_cur, GFP_KERNEL); if (!skb_next) break; } rc = genlmsg_unicast(&init_net, skb_cur, s->pid); if (rc == -ECONNREFUSED) { s->valid = 0; delcount++; } skb_cur = skb_next; } up_read(&listeners->sem); if (skb_cur) nlmsg_free(skb_cur); if (!delcount) return; /* Delete invalidated entries */ down_write(&listeners->sem); list_for_each_entry_safe(s, tmp, &listeners->list, list) { if (!s->valid) { list_del(&s->list); kfree(s); } } up_write(&listeners->sem); } static void exe_add_tsk(struct taskstats *stats, struct task_struct *tsk) { /* No idea if I'm allowed to access that here, now. */ struct file *exe_file = get_task_exe_file(tsk); if (exe_file) { /* Following cp_new_stat64() in stat.c . */ stats->ac_exe_dev = huge_encode_dev(exe_file->f_inode->i_sb->s_dev); stats->ac_exe_inode = exe_file->f_inode->i_ino; fput(exe_file); } else { stats->ac_exe_dev = 0; stats->ac_exe_inode = 0; } } static void fill_stats(struct user_namespace *user_ns, struct pid_namespace *pid_ns, struct task_struct *tsk, struct taskstats *stats) { memset(stats, 0, sizeof(*stats)); /* * Each accounting subsystem adds calls to its functions to * fill in relevant parts of struct taskstsats as follows * * per-task-foo(stats, tsk); */ delayacct_add_tsk(stats, tsk); /* fill in basic acct fields */ stats->version = TASKSTATS_VERSION; stats->nvcsw = tsk->nvcsw; stats->nivcsw = tsk->nivcsw; bacct_add_tsk(user_ns, pid_ns, stats, tsk); /* fill in extended acct fields */ xacct_add_tsk(stats, tsk); /* add executable info */ exe_add_tsk(stats, tsk); } static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) { struct task_struct *tsk; tsk = find_get_task_by_vpid(pid); if (!tsk) return -ESRCH; fill_stats(current_user_ns(), task_active_pid_ns(current), tsk, stats); put_task_struct(tsk); return 0; } static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) { struct task_struct *tsk, *first; unsigned long flags; int rc = -ESRCH; u64 delta, utime, stime; u64 start_time; /* * Add additional stats from live tasks except zombie thread group * leaders who are already counted with the dead tasks */ rcu_read_lock(); first = find_task_by_vpid(tgid); if (!first || !lock_task_sighand(first, &flags)) goto out; if (first->signal->stats) memcpy(stats, first->signal->stats, sizeof(*stats)); else memset(stats, 0, sizeof(*stats)); start_time = ktime_get_ns(); for_each_thread(first, tsk) { if (tsk->exit_state) continue; /* * Accounting subsystem can call its functions here to * fill in relevant parts of struct taskstsats as follows * * per-task-foo(stats, tsk); */ delayacct_add_tsk(stats, tsk); /* calculate task elapsed time in nsec */ delta = start_time - tsk->start_time; /* Convert to micro seconds */ do_div(delta, NSEC_PER_USEC); stats->ac_etime += delta; task_cputime(tsk, &utime, &stime); stats->ac_utime += div_u64(utime, NSEC_PER_USEC); stats->ac_stime += div_u64(stime, NSEC_PER_USEC); stats->nvcsw += tsk->nvcsw; stats->nivcsw += tsk->nivcsw; } unlock_task_sighand(first, &flags); rc = 0; out: rcu_read_unlock(); stats->version = TASKSTATS_VERSION; /* * Accounting subsystems can also add calls here to modify * fields of taskstats. */ return rc; } static void fill_tgid_exit(struct task_struct *tsk) { unsigned long flags; spin_lock_irqsave(&tsk->sighand->siglock, flags); if (!tsk->signal->stats) goto ret; /* * Each accounting subsystem calls its functions here to * accumalate its per-task stats for tsk, into the per-tgid structure * * per-task-foo(tsk->signal->stats, tsk); */ delayacct_add_tsk(tsk->signal->stats, tsk); ret: spin_unlock_irqrestore(&tsk->sighand->siglock, flags); return; } static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) { struct listener_list *listeners; struct listener *s, *tmp, *s2; unsigned int cpu; int ret = 0; if (!cpumask_subset(mask, cpu_possible_mask)) return -EINVAL; if (current_user_ns() != &init_user_ns) return -EINVAL; if (task_active_pid_ns(current) != &init_pid_ns) return -EINVAL; if (isadd == REGISTER) { for_each_cpu(cpu, mask) { s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, cpu_to_node(cpu)); if (!s) { ret = -ENOMEM; goto cleanup; } s->pid = pid; s->valid = 1; listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); list_for_each_entry(s2, &listeners->list, list) { if (s2->pid == pid && s2->valid) goto exists; } list_add(&s->list, &listeners->list); s = NULL; exists: up_write(&listeners->sem); kfree(s); /* nop if NULL */ } return 0; } /* Deregister or cleanup */ cleanup: for_each_cpu(cpu, mask) { listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); list_for_each_entry_safe(s, tmp, &listeners->list, list) { if (s->pid == pid) { list_del(&s->list); kfree(s); break; } } up_write(&listeners->sem); } return ret; } static int parse(struct nlattr *na, struct cpumask *mask) { char *data; int len; int ret; if (na == NULL) return 1; len = nla_len(na); if (len > TASKSTATS_CPUMASK_MAXLEN) return -E2BIG; if (len < 1) return -EINVAL; data = kmalloc(len, GFP_KERNEL); if (!data) return -ENOMEM; nla_strscpy(data, na, len); ret = cpulist_parse(data, mask); kfree(data); return ret; } static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) { struct nlattr *na, *ret; int aggr; aggr = (type == TASKSTATS_TYPE_PID) ? TASKSTATS_TYPE_AGGR_PID : TASKSTATS_TYPE_AGGR_TGID; na = nla_nest_start_noflag(skb, aggr); if (!na) goto err; if (nla_put(skb, type, sizeof(pid), &pid) < 0) { nla_nest_cancel(skb, na); goto err; } ret = nla_reserve_64bit(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats), TASKSTATS_TYPE_NULL); if (!ret) { nla_nest_cancel(skb, na); goto err; } nla_nest_end(skb, na); return nla_data(ret); err: return NULL; } static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) { int rc = 0; struct sk_buff *rep_skb; struct cgroupstats *stats; struct nlattr *na; size_t size; u32 fd; na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; if (!na) return -EINVAL; fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); CLASS(fd, f)(fd); if (fd_empty(f)) return 0; size = nla_total_size(sizeof(struct cgroupstats)); rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) return rc; na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, sizeof(struct cgroupstats)); if (na == NULL) { nlmsg_free(rep_skb); return -EMSGSIZE; } stats = nla_data(na); memset(stats, 0, sizeof(*stats)); rc = cgroupstats_build(stats, fd_file(f)->f_path.dentry); if (rc < 0) { nlmsg_free(rep_skb); return rc; } return send_reply(rep_skb, info); } static int cmd_attr_register_cpumask(struct genl_info *info) { cpumask_var_t mask; int rc; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); if (rc < 0) goto out; rc = add_del_listener(info->snd_portid, mask, REGISTER); out: free_cpumask_var(mask); return rc; } static int cmd_attr_deregister_cpumask(struct genl_info *info) { cpumask_var_t mask; int rc; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); if (rc < 0) goto out; rc = add_del_listener(info->snd_portid, mask, DEREGISTER); out: free_cpumask_var(mask); return rc; } static size_t taskstats_packet_size(void) { size_t size; size = nla_total_size(sizeof(u32)) + nla_total_size_64bit(sizeof(struct taskstats)) + nla_total_size(0); return size; } static int cmd_attr_pid(struct genl_info *info) { struct taskstats *stats; struct sk_buff *rep_skb; size_t size; u32 pid; int rc; size = taskstats_packet_size(); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) return rc; rc = -EINVAL; pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); if (!stats) goto err; rc = fill_stats_for_pid(pid, stats); if (rc < 0) goto err; return send_reply(rep_skb, info); err: nlmsg_free(rep_skb); return rc; } static int cmd_attr_tgid(struct genl_info *info) { struct taskstats *stats; struct sk_buff *rep_skb; size_t size; u32 tgid; int rc; size = taskstats_packet_size(); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) return rc; rc = -EINVAL; tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); if (!stats) goto err; rc = fill_stats_for_tgid(tgid, stats); if (rc < 0) goto err; return send_reply(rep_skb, info); err: nlmsg_free(rep_skb); return rc; } static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) { if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) return cmd_attr_register_cpumask(info); else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) return cmd_attr_deregister_cpumask(info); else if (info->attrs[TASKSTATS_CMD_ATTR_PID]) return cmd_attr_pid(info); else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) return cmd_attr_tgid(info); else return -EINVAL; } static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; struct taskstats *stats_new, *stats; /* Pairs with smp_store_release() below. */ stats = smp_load_acquire(&sig->stats); if (stats || thread_group_empty(tsk)) return stats; /* No problem if kmem_cache_zalloc() fails */ stats_new = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); spin_lock_irq(&tsk->sighand->siglock); stats = sig->stats; if (!stats) { /* * Pairs with smp_store_release() above and order the * kmem_cache_zalloc(). */ smp_store_release(&sig->stats, stats_new); stats = stats_new; stats_new = NULL; } spin_unlock_irq(&tsk->sighand->siglock); if (stats_new) kmem_cache_free(taskstats_cache, stats_new); return stats; } /* Send pid data out on exit */ void taskstats_exit(struct task_struct *tsk, int group_dead) { int rc; struct listener_list *listeners; struct taskstats *stats; struct sk_buff *rep_skb; size_t size; int is_thread_group; if (!family_registered) return; /* * Size includes space for nested attributes */ size = taskstats_packet_size(); is_thread_group = !!taskstats_tgid_alloc(tsk); if (is_thread_group) { /* PID + STATS + TGID + STATS */ size = 2 * size; /* fill the tsk->signal->stats structure */ fill_tgid_exit(tsk); } listeners = raw_cpu_ptr(&listener_array); if (list_empty(&listeners->list)) return; rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) return; stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, task_pid_nr_ns(tsk, &init_pid_ns)); if (!stats) goto err; fill_stats(&init_user_ns, &init_pid_ns, tsk, stats); if (group_dead) stats->ac_flag |= AGROUP; /* * Doesn't matter if tsk is the leader or the last group member leaving */ if (!is_thread_group || !group_dead) goto send; stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, task_tgid_nr_ns(tsk, &init_pid_ns)); if (!stats) goto err; memcpy(stats, tsk->signal->stats, sizeof(*stats)); send: send_cpu_listeners(rep_skb, listeners); return; err: nlmsg_free(rep_skb); } static const struct genl_ops taskstats_ops[] = { { .cmd = TASKSTATS_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = taskstats_user_cmd, .policy = taskstats_cmd_get_policy, .maxattr = ARRAY_SIZE(taskstats_cmd_get_policy) - 1, .flags = GENL_ADMIN_PERM, }, { .cmd = CGROUPSTATS_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = cgroupstats_user_cmd, .policy = cgroupstats_cmd_get_policy, .maxattr = ARRAY_SIZE(cgroupstats_cmd_get_policy) - 1, }, }; static struct genl_family family __ro_after_init = { .name = TASKSTATS_GENL_NAME, .version = TASKSTATS_GENL_VERSION, .module = THIS_MODULE, .ops = taskstats_ops, .n_ops = ARRAY_SIZE(taskstats_ops), .resv_start_op = CGROUPSTATS_CMD_GET + 1, .netnsok = true, }; /* Needed early in initialization */ void __init taskstats_init_early(void) { unsigned int i; taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC); for_each_possible_cpu(i) { INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); init_rwsem(&(per_cpu(listener_array, i).sem)); } } static int __init taskstats_init(void) { int rc; rc = genl_register_family(&family); if (rc) return rc; family_registered = 1; pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); return 0; } /* * late initcall ensures initialization of statistics collection * mechanisms precedes initialization of the taskstats interface */ late_initcall(taskstats_init);
99 3071 1 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 /* SPDX-License-Identifier: GPL-2.0 */ /* * sysfs.h - definitions for the device driver filesystem * * Copyright (c) 2001,2002 Patrick Mochel * Copyright (c) 2004 Silicon Graphics, Inc. * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007 Tejun Heo <teheo@suse.de> * * Please see Documentation/filesystems/sysfs.rst for more information. */ #ifndef _SYSFS_H_ #define _SYSFS_H_ #include <linux/kernfs.h> #include <linux/compiler.h> #include <linux/errno.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/kobject_ns.h> #include <linux/stat.h> #include <linux/atomic.h> struct kobject; struct module; struct bin_attribute; enum kobj_ns_type; struct attribute { const char *name; umode_t mode; #ifdef CONFIG_DEBUG_LOCK_ALLOC bool ignore_lockdep:1; struct lock_class_key *key; struct lock_class_key skey; #endif }; /** * sysfs_attr_init - initialize a dynamically allocated sysfs attribute * @attr: struct attribute to initialize * * Initialize a dynamically allocated struct attribute so we can * make lockdep happy. This is a new requirement for attributes * and initially this is only needed when lockdep is enabled. * Lockdep gives a nice error when your attribute is added to * sysfs if you don't have this. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC #define sysfs_attr_init(attr) \ do { \ static struct lock_class_key __key; \ \ (attr)->key = &__key; \ } while (0) #else #define sysfs_attr_init(attr) do {} while (0) #endif #ifdef CONFIG_CFI #define __SYSFS_FUNCTION_ALTERNATIVE(MEMBERS...) struct { MEMBERS } #else #define __SYSFS_FUNCTION_ALTERNATIVE(MEMBERS...) union { MEMBERS } #endif /** * struct attribute_group - data structure used to declare an attribute group. * @name: Optional: Attribute group name * If specified, the attribute group will be created in a * new subdirectory with this name. Additionally when a * group is named, @is_visible and @is_bin_visible may * return SYSFS_GROUP_INVISIBLE to control visibility of * the directory itself. * @is_visible: Optional: Function to return permissions associated with an * attribute of the group. Will be called repeatedly for * each non-binary attribute in the group. Only read/write * permissions as well as SYSFS_PREALLOC are accepted. Must * return 0 if an attribute is not visible. The returned * value will replace static permissions defined in struct * attribute. Use SYSFS_GROUP_VISIBLE() when assigning this * callback to specify separate _group_visible() and * _attr_visible() handlers. * @is_bin_visible: * Optional: Function to return permissions associated with a * binary attribute of the group. Will be called repeatedly * for each binary attribute in the group. Only read/write * permissions as well as SYSFS_PREALLOC (and the * visibility flags for named groups) are accepted. Must * return 0 if a binary attribute is not visible. The * returned value will replace static permissions defined * in struct bin_attribute. If @is_visible is not set, Use * SYSFS_GROUP_VISIBLE() when assigning this callback to * specify separate _group_visible() and _attr_visible() * handlers. * @bin_size: * Optional: Function to return the size of a binary attribute * of the group. Will be called repeatedly for each binary * attribute in the group. Overwrites the size field embedded * inside the attribute itself. * @attrs: Pointer to NULL terminated list of attributes. * @bin_attrs: Pointer to NULL terminated list of binary attributes. * Either attrs or bin_attrs or both must be provided. */ struct attribute_group { const char *name; __SYSFS_FUNCTION_ALTERNATIVE( umode_t (*is_visible)(struct kobject *, struct attribute *, int); umode_t (*is_visible_const)(struct kobject *, const struct attribute *, int); ); umode_t (*is_bin_visible)(struct kobject *, const struct bin_attribute *, int); size_t (*bin_size)(struct kobject *, const struct bin_attribute *, int); union { struct attribute **attrs; const struct attribute *const *attrs_const; }; const struct bin_attribute *const *bin_attrs; }; #define SYSFS_PREALLOC 010000 #define SYSFS_GROUP_INVISIBLE 020000 /* * DEFINE_SYSFS_GROUP_VISIBLE(name): * A helper macro to pair with the assignment of ".is_visible = * SYSFS_GROUP_VISIBLE(name)", that arranges for the directory * associated with a named attribute_group to optionally be hidden. * This allows for static declaration of attribute_groups, and the * simplification of attribute visibility lifetime that implies, * without polluting sysfs with empty attribute directories. * Ex. * * static umode_t example_attr_visible(struct kobject *kobj, * struct attribute *attr, int n) * { * if (example_attr_condition) * return 0; * else if (ro_attr_condition) * return 0444; * return a->mode; * } * * static bool example_group_visible(struct kobject *kobj) * { * if (example_group_condition) * return false; * return true; * } * * DEFINE_SYSFS_GROUP_VISIBLE(example); * * static struct attribute_group example_group = { * .name = "example", * .is_visible = SYSFS_GROUP_VISIBLE(example), * .attrs = &example_attrs, * }; * * Note that it expects <name>_attr_visible and <name>_group_visible to * be defined. For cases where individual attributes do not need * separate visibility consideration, only entire group visibility at * once, see DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(). */ #define DEFINE_SYSFS_GROUP_VISIBLE(name) \ static inline umode_t sysfs_group_visible_##name( \ struct kobject *kobj, struct attribute *attr, int n) \ { \ if (n == 0 && !name##_group_visible(kobj)) \ return SYSFS_GROUP_INVISIBLE; \ return name##_attr_visible(kobj, attr, n); \ } /* * DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(name): * A helper macro to pair with SYSFS_GROUP_VISIBLE() that like * DEFINE_SYSFS_GROUP_VISIBLE() controls group visibility, but does * not require the implementation of a per-attribute visibility * callback. * Ex. * * static bool example_group_visible(struct kobject *kobj) * { * if (example_group_condition) * return false; * return true; * } * * DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(example); * * static struct attribute_group example_group = { * .name = "example", * .is_visible = SYSFS_GROUP_VISIBLE(example), * .attrs = &example_attrs, * }; */ #define DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(name) \ static inline umode_t sysfs_group_visible_##name( \ struct kobject *kobj, struct attribute *a, int n) \ { \ if (n == 0 && !name##_group_visible(kobj)) \ return SYSFS_GROUP_INVISIBLE; \ return a->mode; \ } /* * Same as DEFINE_SYSFS_GROUP_VISIBLE, but for groups with only binary * attributes. If an attribute_group defines both text and binary * attributes, the group visibility is determined by the function * specified to is_visible() not is_bin_visible() */ #define DEFINE_SYSFS_BIN_GROUP_VISIBLE(name) \ static inline umode_t sysfs_group_visible_##name( \ struct kobject *kobj, const struct bin_attribute *attr, int n) \ { \ if (n == 0 && !name##_group_visible(kobj)) \ return SYSFS_GROUP_INVISIBLE; \ return name##_attr_visible(kobj, attr, n); \ } #define DEFINE_SIMPLE_SYSFS_BIN_GROUP_VISIBLE(name) \ static inline umode_t sysfs_group_visible_##name( \ struct kobject *kobj, const struct bin_attribute *a, int n) \ { \ if (n == 0 && !name##_group_visible(kobj)) \ return SYSFS_GROUP_INVISIBLE; \ return a->mode; \ } #define SYSFS_GROUP_VISIBLE(fn) sysfs_group_visible_##fn /* * Use these macros to make defining attributes easier. * See include/linux/device.h for examples.. */ #define __ATTR(_name, _mode, _show, _store) { \ .attr = {.name = __stringify(_name), \ .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ .show = _show, \ .store = _store, \ } #define __ATTR_PREALLOC(_name, _mode, _show, _store) { \ .attr = {.name = __stringify(_name), \ .mode = SYSFS_PREALLOC | VERIFY_OCTAL_PERMISSIONS(_mode) },\ .show = _show, \ .store = _store, \ } #define __ATTR_RO_MODE(_name, _mode) { \ .attr = { .name = __stringify(_name), \ .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ .show = _name##_show, \ } #define __ATTR_RO(_name) \ __ATTR_RO_MODE(_name, 0444) #define __ATTR_RW_MODE(_name, _mode) \ __ATTR(_name, _mode, _name##_show, _name##_store) #define __ATTR_WO(_name) \ __ATTR(_name, 0200, NULL, _name##_store) #define __ATTR_RW(_name) __ATTR(_name, 0644, _name##_show, _name##_store) #define __ATTR_NULL { .attr = { .name = NULL } } #ifdef CONFIG_DEBUG_LOCK_ALLOC #define __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) { \ .attr = {.name = __stringify(_name), .mode = _mode, \ .ignore_lockdep = true }, \ .show = _show, \ .store = _store, \ } #else #define __ATTR_IGNORE_LOCKDEP __ATTR #endif #define __ATTRIBUTE_GROUPS(_name) \ static const struct attribute_group *_name##_groups[] = { \ &_name##_group, \ NULL, \ } #define ATTRIBUTE_GROUPS(_name) \ static const struct attribute_group _name##_group = { \ .attrs = _Generic(_name##_attrs, \ struct attribute **: \ _name##_attrs, \ const struct attribute *const *: \ (void *)_name##_attrs \ ), \ }; \ __ATTRIBUTE_GROUPS(_name) #define BIN_ATTRIBUTE_GROUPS(_name) \ static const struct attribute_group _name##_group = { \ .bin_attrs = _name##_attrs, \ }; \ __ATTRIBUTE_GROUPS(_name) struct file; struct vm_area_struct; struct address_space; struct bin_attribute { struct attribute attr; size_t size; void *private; struct address_space *(*f_mapping)(void); ssize_t (*read)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); ssize_t (*write)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); loff_t (*llseek)(struct file *, struct kobject *, const struct bin_attribute *, loff_t, int); int (*mmap)(struct file *, struct kobject *, const struct bin_attribute *attr, struct vm_area_struct *vma); }; /** * sysfs_bin_attr_init - initialize a dynamically allocated bin_attribute * @attr: struct bin_attribute to initialize * * Initialize a dynamically allocated struct bin_attribute so we * can make lockdep happy. This is a new requirement for * attributes and initially this is only needed when lockdep is * enabled. Lockdep gives a nice error when your attribute is * added to sysfs if you don't have this. */ #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr) /* macros to create static binary attributes easier */ #define __BIN_ATTR(_name, _mode, _read, _write, _size) { \ .attr = { .name = __stringify(_name), .mode = _mode }, \ .read = _read, \ .write = _write, \ .size = _size, \ } #define __BIN_ATTR_RO(_name, _size) \ __BIN_ATTR(_name, 0444, _name##_read, NULL, _size) #define __BIN_ATTR_WO(_name, _size) \ __BIN_ATTR(_name, 0200, NULL, _name##_write, _size) #define __BIN_ATTR_RW(_name, _size) \ __BIN_ATTR(_name, 0644, _name##_read, _name##_write, _size) #define __BIN_ATTR_NULL __ATTR_NULL #define BIN_ATTR(_name, _mode, _read, _write, _size) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR(_name, _mode, _read, \ _write, _size) #define BIN_ATTR_RO(_name, _size) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR_RO(_name, _size) #define BIN_ATTR_WO(_name, _size) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR_WO(_name, _size) #define BIN_ATTR_RW(_name, _size) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size) #define __BIN_ATTR_ADMIN_RO(_name, _size) \ __BIN_ATTR(_name, 0400, _name##_read, NULL, _size) #define __BIN_ATTR_ADMIN_RW(_name, _size) \ __BIN_ATTR(_name, 0600, _name##_read, _name##_write, _size) #define BIN_ATTR_ADMIN_RO(_name, _size) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RO(_name, _size) #define BIN_ATTR_ADMIN_RW(_name, _size) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RW(_name, _size) #define __BIN_ATTR_SIMPLE_RO(_name, _mode) \ __BIN_ATTR(_name, _mode, sysfs_bin_attr_simple_read, NULL, 0) #define BIN_ATTR_SIMPLE_RO(_name) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR_SIMPLE_RO(_name, 0444) #define BIN_ATTR_SIMPLE_ADMIN_RO(_name) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR_SIMPLE_RO(_name, 0400) struct sysfs_ops { ssize_t (*show)(struct kobject *, struct attribute *, char *); ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t); }; #ifdef CONFIG_SYSFS int __must_check sysfs_create_dir_ns(struct kobject *kobj, const void *ns); void sysfs_remove_dir(struct kobject *kobj); int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, const void *new_ns); int __must_check sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, const void *new_ns); int __must_check sysfs_create_mount_point(struct kobject *parent_kobj, const char *name); void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name); int __must_check sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns); int __must_check sysfs_create_files(struct kobject *kobj, const struct attribute * const *attr); int __must_check sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, umode_t mode); struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, const struct attribute *attr); void sysfs_unbreak_active_protection(struct kernfs_node *kn); void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns); bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr); void sysfs_remove_files(struct kobject *kobj, const struct attribute * const *attr); int __must_check sysfs_create_bin_file(struct kobject *kobj, const struct bin_attribute *attr); void sysfs_remove_bin_file(struct kobject *kobj, const struct bin_attribute *attr); int __must_check sysfs_create_link(struct kobject *kobj, struct kobject *target, const char *name); int __must_check sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target, const char *name); void sysfs_remove_link(struct kobject *kobj, const char *name); int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *target, const char *old_name, const char *new_name, const void *new_ns); void sysfs_delete_link(struct kobject *dir, struct kobject *targ, const char *name); int __must_check sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp); int __must_check sysfs_create_groups(struct kobject *kobj, const struct attribute_group **groups); int __must_check sysfs_update_groups(struct kobject *kobj, const struct attribute_group **groups); int sysfs_update_group(struct kobject *kobj, const struct attribute_group *grp); void sysfs_remove_group(struct kobject *kobj, const struct attribute_group *grp); void sysfs_remove_groups(struct kobject *kobj, const struct attribute_group **groups); int sysfs_add_file_to_group(struct kobject *kobj, const struct attribute *attr, const char *group); void sysfs_remove_file_from_group(struct kobject *kobj, const struct attribute *attr, const char *group); int sysfs_merge_group(struct kobject *kobj, const struct attribute_group *grp); void sysfs_unmerge_group(struct kobject *kobj, const struct attribute_group *grp); int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, struct kobject *target, const char *link_name); void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, const char *link_name); int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, struct kobject *target_kobj, const char *target_name, const char *symlink_name); void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); int __must_check sysfs_init(void); static inline void sysfs_enable_ns(struct kernfs_node *kn) { return kernfs_enable_ns(kn); } int sysfs_file_change_owner(struct kobject *kobj, const char *name, kuid_t kuid, kgid_t kgid); int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid); int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ, const char *name, kuid_t kuid, kgid_t kgid); int sysfs_groups_change_owner(struct kobject *kobj, const struct attribute_group **groups, kuid_t kuid, kgid_t kgid); int sysfs_group_change_owner(struct kobject *kobj, const struct attribute_group *groups, kuid_t kuid, kgid_t kgid); __printf(2, 3) int sysfs_emit(char *buf, const char *fmt, ...); __printf(3, 4) int sysfs_emit_at(char *buf, int at, const char *fmt, ...); ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count); #else /* CONFIG_SYSFS */ static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { return 0; } static inline void sysfs_remove_dir(struct kobject *kobj) { } static inline int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, const void *new_ns) { return 0; } static inline int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, const void *new_ns) { return 0; } static inline int sysfs_create_mount_point(struct kobject *parent_kobj, const char *name) { return 0; } static inline void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name) { } static inline int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) { return 0; } static inline int sysfs_create_files(struct kobject *kobj, const struct attribute * const *attr) { return 0; } static inline int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, umode_t mode) { return 0; } static inline struct kernfs_node * sysfs_break_active_protection(struct kobject *kobj, const struct attribute *attr) { return NULL; } static inline void sysfs_unbreak_active_protection(struct kernfs_node *kn) { } static inline void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) { } static inline bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr) { return false; } static inline void sysfs_remove_files(struct kobject *kobj, const struct attribute * const *attr) { } static inline int sysfs_create_bin_file(struct kobject *kobj, const struct bin_attribute *attr) { return 0; } static inline void sysfs_remove_bin_file(struct kobject *kobj, const struct bin_attribute *attr) { } static inline int sysfs_create_link(struct kobject *kobj, struct kobject *target, const char *name) { return 0; } static inline int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target, const char *name) { return 0; } static inline void sysfs_remove_link(struct kobject *kobj, const char *name) { } static inline int sysfs_rename_link_ns(struct kobject *k, struct kobject *t, const char *old_name, const char *new_name, const void *ns) { return 0; } static inline void sysfs_delete_link(struct kobject *k, struct kobject *t, const char *name) { } static inline int sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp) { return 0; } static inline int sysfs_create_groups(struct kobject *kobj, const struct attribute_group **groups) { return 0; } static inline int sysfs_update_groups(struct kobject *kobj, const struct attribute_group **groups) { return 0; } static inline int sysfs_update_group(struct kobject *kobj, const struct attribute_group *grp) { return 0; } static inline void sysfs_remove_group(struct kobject *kobj, const struct attribute_group *grp) { } static inline void sysfs_remove_groups(struct kobject *kobj, const struct attribute_group **groups) { } static inline int sysfs_add_file_to_group(struct kobject *kobj, const struct attribute *attr, const char *group) { return 0; } static inline void sysfs_remove_file_from_group(struct kobject *kobj, const struct attribute *attr, const char *group) { } static inline int sysfs_merge_group(struct kobject *kobj, const struct attribute_group *grp) { return 0; } static inline void sysfs_unmerge_group(struct kobject *kobj, const struct attribute_group *grp) { } static inline int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, struct kobject *target, const char *link_name) { return 0; } static inline void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, const char *link_name) { } static inline int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, struct kobject *target_kobj, const char *target_name, const char *symlink_name) { return 0; } static inline void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr) { } static inline int __must_check sysfs_init(void) { return 0; } static inline void sysfs_enable_ns(struct kernfs_node *kn) { } static inline int sysfs_file_change_owner(struct kobject *kobj, const char *name, kuid_t kuid, kgid_t kgid) { return 0; } static inline int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ, const char *name, kuid_t kuid, kgid_t kgid) { return 0; } static inline int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid) { return 0; } static inline int sysfs_groups_change_owner(struct kobject *kobj, const struct attribute_group **groups, kuid_t kuid, kgid_t kgid) { return 0; } static inline int sysfs_group_change_owner(struct kobject *kobj, const struct attribute_group *groups, kuid_t kuid, kgid_t kgid) { return 0; } __printf(2, 3) static inline int sysfs_emit(char *buf, const char *fmt, ...) { return 0; } __printf(3, 4) static inline int sysfs_emit_at(char *buf, int at, const char *fmt, ...) { return 0; } static inline ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { return 0; } #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, const struct attribute *attr) { return sysfs_create_file_ns(kobj, attr, NULL); } static inline void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr) { sysfs_remove_file_ns(kobj, attr, NULL); } static inline int sysfs_rename_link(struct kobject *kobj, struct kobject *target, const char *old_name, const char *new_name) { return sysfs_rename_link_ns(kobj, target, old_name, new_name, NULL); } static inline void sysfs_notify_dirent(struct kernfs_node *kn) { kernfs_notify(kn); } static inline struct kernfs_node *sysfs_get_dirent(struct kernfs_node *parent, const char *name) { return kernfs_find_and_get(parent, name); } static inline struct kernfs_node *sysfs_get(struct kernfs_node *kn) { kernfs_get(kn); return kn; } static inline void sysfs_put(struct kernfs_node *kn) { kernfs_put(kn); } /* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */ #define VERIFY_OCTAL_PERMISSIONS(perms) \ (BUILD_BUG_ON_ZERO((perms) < 0) + \ BUILD_BUG_ON_ZERO((perms) > 0777) + \ /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */ \ BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) + \ BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) + \ /* USER_WRITABLE >= GROUP_WRITABLE */ \ BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) + \ /* OTHER_WRITABLE? Generally considered a bad idea. */ \ BUILD_BUG_ON_ZERO((perms) & 2) + \ (perms)) #endif /* _SYSFS_H_ */
5 1 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2016 Masaki Ota <masaki.ota@jp.alps.com> */ #include <linux/kernel.h> #include <linux/hid.h> #include <linux/input.h> #include <linux/input/mt.h> #include <linux/module.h> #include <linux/unaligned.h> #include "hid-ids.h" /* ALPS Device Product ID */ #define HID_PRODUCT_ID_T3_BTNLESS 0xD0C0 #define HID_PRODUCT_ID_COSMO 0x1202 #define HID_PRODUCT_ID_U1_PTP_1 0x1207 #define HID_PRODUCT_ID_U1 0x1209 #define HID_PRODUCT_ID_U1_PTP_2 0x120A #define HID_PRODUCT_ID_U1_DUAL 0x120B #define HID_PRODUCT_ID_T4_BTNLESS 0x120C #define DEV_SINGLEPOINT 0x01 #define DEV_DUALPOINT 0x02 #define U1_MOUSE_REPORT_ID 0x01 /* Mouse data ReportID */ #define U1_ABSOLUTE_REPORT_ID 0x03 /* Absolute data ReportID */ #define U1_ABSOLUTE_REPORT_ID_SECD 0x02 /* FW-PTP Absolute data ReportID */ #define U1_FEATURE_REPORT_ID 0x05 /* Feature ReportID */ #define U1_SP_ABSOLUTE_REPORT_ID 0x06 /* Feature ReportID */ #define U1_FEATURE_REPORT_LEN 0x08 /* Feature Report Length */ #define U1_FEATURE_REPORT_LEN_ALL 0x0A #define U1_CMD_REGISTER_READ 0xD1 #define U1_CMD_REGISTER_WRITE 0xD2 #define U1_DEVTYPE_SP_SUPPORT 0x10 /* SP Support */ #define U1_DISABLE_DEV 0x01 #define U1_TP_ABS_MODE 0x02 #define U1_SP_ABS_MODE 0x80 #define ADDRESS_U1_DEV_CTRL_1 0x00800040 #define ADDRESS_U1_DEVICE_TYP 0x00800043 #define ADDRESS_U1_NUM_SENS_X 0x00800047 #define ADDRESS_U1_NUM_SENS_Y 0x00800048 #define ADDRESS_U1_PITCH_SENS_X 0x00800049 #define ADDRESS_U1_PITCH_SENS_Y 0x0080004A #define ADDRESS_U1_RESO_DWN_ABS 0x0080004E #define ADDRESS_U1_PAD_BTN 0x00800052 #define ADDRESS_U1_SP_BTN 0x0080009F #define T4_INPUT_REPORT_LEN sizeof(struct t4_input_report) #define T4_FEATURE_REPORT_LEN T4_INPUT_REPORT_LEN #define T4_FEATURE_REPORT_ID 7 #define T4_CMD_REGISTER_READ 0x08 #define T4_CMD_REGISTER_WRITE 0x07 #define T4_ADDRESS_BASE 0xC2C0 #define PRM_SYS_CONFIG_1 (T4_ADDRESS_BASE + 0x0002) #define T4_PRM_FEED_CONFIG_1 (T4_ADDRESS_BASE + 0x0004) #define T4_PRM_FEED_CONFIG_4 (T4_ADDRESS_BASE + 0x001A) #define T4_PRM_ID_CONFIG_3 (T4_ADDRESS_BASE + 0x00B0) #define T4_FEEDCFG4_ADVANCED_ABS_ENABLE 0x01 #define T4_I2C_ABS 0x78 #define T4_COUNT_PER_ELECTRODE 256 #define MAX_TOUCHES 5 enum dev_num { U1, T4, UNKNOWN, }; /** * struct alps_dev * * @input: pointer to the kernel input device * @input2: pointer to the kernel input2 device * @hdev: pointer to the struct hid_device * * @dev_type: device type * @max_fingers: total number of fingers * @has_sp: boolean of sp existense * @sp_btn_info: button information * @x_active_len_mm: active area length of X (mm) * @y_active_len_mm: active area length of Y (mm) * @x_max: maximum x coordinate value * @y_max: maximum y coordinate value * @x_min: minimum x coordinate value * @y_min: minimum y coordinate value * @btn_cnt: number of buttons * @sp_btn_cnt: number of stick buttons */ struct alps_dev { struct input_dev *input; struct input_dev *input2; struct hid_device *hdev; enum dev_num dev_type; u8 max_fingers; u8 has_sp; u8 sp_btn_info; u32 x_active_len_mm; u32 y_active_len_mm; u32 x_max; u32 y_max; u32 x_min; u32 y_min; u32 btn_cnt; u32 sp_btn_cnt; }; struct t4_contact_data { u8 palm; u8 x_lo; u8 x_hi; u8 y_lo; u8 y_hi; }; struct t4_input_report { u8 reportID; u8 numContacts; struct t4_contact_data contact[5]; u8 button; u8 track[5]; u8 zx[5], zy[5]; u8 palmTime[5]; u8 kilroy; u16 timeStamp; }; static u16 t4_calc_check_sum(u8 *buffer, unsigned long offset, unsigned long length) { u16 sum1 = 0xFF, sum2 = 0xFF; unsigned long i = 0; if (offset + length >= 50) return 0; while (length > 0) { u32 tlen = length > 20 ? 20 : length; length -= tlen; do { sum1 += buffer[offset + i]; sum2 += sum1; i++; } while (--tlen > 0); sum1 = (sum1 & 0xFF) + (sum1 >> 8); sum2 = (sum2 & 0xFF) + (sum2 >> 8); } sum1 = (sum1 & 0xFF) + (sum1 >> 8); sum2 = (sum2 & 0xFF) + (sum2 >> 8); return(sum2 << 8 | sum1); } static int t4_read_write_register(struct hid_device *hdev, u32 address, u8 *read_val, u8 write_val, bool read_flag) { int ret; u16 check_sum; u8 *input; u8 *readbuf = NULL; input = kzalloc(T4_FEATURE_REPORT_LEN, GFP_KERNEL); if (!input) return -ENOMEM; input[0] = T4_FEATURE_REPORT_ID; if (read_flag) { input[1] = T4_CMD_REGISTER_READ; input[8] = 0x00; } else { input[1] = T4_CMD_REGISTER_WRITE; input[8] = write_val; } put_unaligned_le32(address, input + 2); input[6] = 1; input[7] = 0; /* Calculate the checksum */ check_sum = t4_calc_check_sum(input, 1, 8); input[9] = (u8)check_sum; input[10] = (u8)(check_sum >> 8); input[11] = 0; ret = hid_hw_raw_request(hdev, T4_FEATURE_REPORT_ID, input, T4_FEATURE_REPORT_LEN, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); if (ret < 0) { dev_err(&hdev->dev, "failed to read command (%d)\n", ret); goto exit; } if (read_flag) { readbuf = kzalloc(T4_FEATURE_REPORT_LEN, GFP_KERNEL); if (!readbuf) { ret = -ENOMEM; goto exit; } ret = hid_hw_raw_request(hdev, T4_FEATURE_REPORT_ID, readbuf, T4_FEATURE_REPORT_LEN, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); if (ret < 0) { dev_err(&hdev->dev, "failed read register (%d)\n", ret); goto exit_readbuf; } ret = -EINVAL; if (*(u32 *)&readbuf[6] != address) { dev_err(&hdev->dev, "read register address error (%x,%x)\n", *(u32 *)&readbuf[6], address); goto exit_readbuf; } if (*(u16 *)&readbuf[10] != 1) { dev_err(&hdev->dev, "read register size error (%x)\n", *(u16 *)&readbuf[10]); goto exit_readbuf; } check_sum = t4_calc_check_sum(readbuf, 6, 7); if (*(u16 *)&readbuf[13] != check_sum) { dev_err(&hdev->dev, "read register checksum error (%x,%x)\n", *(u16 *)&readbuf[13], check_sum); goto exit_readbuf; } *read_val = readbuf[12]; } ret = 0; exit_readbuf: kfree(readbuf); exit: kfree(input); return ret; } static int u1_read_write_register(struct hid_device *hdev, u32 address, u8 *read_val, u8 write_val, bool read_flag) { int ret, i; u8 check_sum; u8 *input; u8 *readbuf; input = kzalloc(U1_FEATURE_REPORT_LEN, GFP_KERNEL); if (!input) return -ENOMEM; input[0] = U1_FEATURE_REPORT_ID; if (read_flag) { input[1] = U1_CMD_REGISTER_READ; input[6] = 0x00; } else { input[1] = U1_CMD_REGISTER_WRITE; input[6] = write_val; } put_unaligned_le32(address, input + 2); /* Calculate the checksum */ check_sum = U1_FEATURE_REPORT_LEN_ALL; for (i = 0; i < U1_FEATURE_REPORT_LEN - 1; i++) check_sum += input[i]; input[7] = check_sum; ret = hid_hw_raw_request(hdev, U1_FEATURE_REPORT_ID, input, U1_FEATURE_REPORT_LEN, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); if (ret < 0) { dev_err(&hdev->dev, "failed to read command (%d)\n", ret); goto exit; } if (read_flag) { readbuf = kzalloc(U1_FEATURE_REPORT_LEN, GFP_KERNEL); if (!readbuf) { ret = -ENOMEM; goto exit; } ret = hid_hw_raw_request(hdev, U1_FEATURE_REPORT_ID, readbuf, U1_FEATURE_REPORT_LEN, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); if (ret < 0) { dev_err(&hdev->dev, "failed read register (%d)\n", ret); kfree(readbuf); goto exit; } *read_val = readbuf[6]; kfree(readbuf); } ret = 0; exit: kfree(input); return ret; } static int t4_raw_event(struct alps_dev *hdata, u8 *data, int size) { unsigned int x, y, z; int i; struct t4_input_report *p_report = (struct t4_input_report *)data; if (!data) return 0; for (i = 0; i < hdata->max_fingers; i++) { x = p_report->contact[i].x_hi << 8 | p_report->contact[i].x_lo; y = p_report->contact[i].y_hi << 8 | p_report->contact[i].y_lo; y = hdata->y_max - y + hdata->y_min; z = (p_report->contact[i].palm < 0x80 && p_report->contact[i].palm > 0) * 62; if (x == 0xffff) { x = 0; y = 0; z = 0; } input_mt_slot(hdata->input, i); input_mt_report_slot_state(hdata->input, MT_TOOL_FINGER, z != 0); if (!z) continue; input_report_abs(hdata->input, ABS_MT_POSITION_X, x); input_report_abs(hdata->input, ABS_MT_POSITION_Y, y); input_report_abs(hdata->input, ABS_MT_PRESSURE, z); } input_mt_sync_frame(hdata->input); input_report_key(hdata->input, BTN_LEFT, p_report->button); input_sync(hdata->input); return 1; } static int u1_raw_event(struct alps_dev *hdata, u8 *data, int size) { unsigned int x, y, z; int i; short sp_x, sp_y; if (!data) return 0; switch (data[0]) { case U1_MOUSE_REPORT_ID: break; case U1_FEATURE_REPORT_ID: break; case U1_ABSOLUTE_REPORT_ID: case U1_ABSOLUTE_REPORT_ID_SECD: for (i = 0; i < hdata->max_fingers; i++) { u8 *contact = &data[i * 5]; x = get_unaligned_le16(contact + 3); y = get_unaligned_le16(contact + 5); z = contact[7] & 0x7F; input_mt_slot(hdata->input, i); if (z != 0) { input_mt_report_slot_state(hdata->input, MT_TOOL_FINGER, 1); input_report_abs(hdata->input, ABS_MT_POSITION_X, x); input_report_abs(hdata->input, ABS_MT_POSITION_Y, y); input_report_abs(hdata->input, ABS_MT_PRESSURE, z); } else { input_mt_report_slot_inactive(hdata->input); } } input_mt_sync_frame(hdata->input); input_report_key(hdata->input, BTN_LEFT, data[1] & 0x1); input_report_key(hdata->input, BTN_RIGHT, (data[1] & 0x2)); input_report_key(hdata->input, BTN_MIDDLE, (data[1] & 0x4)); input_sync(hdata->input); return 1; case U1_SP_ABSOLUTE_REPORT_ID: sp_x = get_unaligned_le16(data+2); sp_y = get_unaligned_le16(data+4); sp_x = sp_x / 8; sp_y = sp_y / 8; input_report_rel(hdata->input2, REL_X, sp_x); input_report_rel(hdata->input2, REL_Y, sp_y); input_report_key(hdata->input2, BTN_LEFT, data[1] & 0x1); input_report_key(hdata->input2, BTN_RIGHT, (data[1] & 0x2)); input_report_key(hdata->input2, BTN_MIDDLE, (data[1] & 0x4)); input_sync(hdata->input2); return 1; } return 0; } static int alps_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { int ret = 0; struct alps_dev *hdata = hid_get_drvdata(hdev); switch (hdev->product) { case HID_PRODUCT_ID_T4_BTNLESS: ret = t4_raw_event(hdata, data, size); break; default: ret = u1_raw_event(hdata, data, size); break; } return ret; } static int __maybe_unused alps_post_reset(struct hid_device *hdev) { int ret = -1; struct alps_dev *data = hid_get_drvdata(hdev); switch (data->dev_type) { case T4: ret = t4_read_write_register(hdev, T4_PRM_FEED_CONFIG_1, NULL, T4_I2C_ABS, false); if (ret < 0) { dev_err(&hdev->dev, "failed T4_PRM_FEED_CONFIG_1 (%d)\n", ret); goto exit; } ret = t4_read_write_register(hdev, T4_PRM_FEED_CONFIG_4, NULL, T4_FEEDCFG4_ADVANCED_ABS_ENABLE, false); if (ret < 0) { dev_err(&hdev->dev, "failed T4_PRM_FEED_CONFIG_4 (%d)\n", ret); goto exit; } break; case U1: ret = u1_read_write_register(hdev, ADDRESS_U1_DEV_CTRL_1, NULL, U1_TP_ABS_MODE | U1_SP_ABS_MODE, false); if (ret < 0) { dev_err(&hdev->dev, "failed to change TP mode (%d)\n", ret); goto exit; } break; default: break; } exit: return ret; } static int __maybe_unused alps_post_resume(struct hid_device *hdev) { return alps_post_reset(hdev); } static int u1_init(struct hid_device *hdev, struct alps_dev *pri_data) { int ret; u8 tmp, dev_ctrl, sen_line_num_x, sen_line_num_y; u8 pitch_x, pitch_y, resolution; /* Device initialization */ ret = u1_read_write_register(hdev, ADDRESS_U1_DEV_CTRL_1, &dev_ctrl, 0, true); if (ret < 0) { dev_err(&hdev->dev, "failed U1_DEV_CTRL_1 (%d)\n", ret); goto exit; } dev_ctrl &= ~U1_DISABLE_DEV; dev_ctrl |= U1_TP_ABS_MODE; ret = u1_read_write_register(hdev, ADDRESS_U1_DEV_CTRL_1, NULL, dev_ctrl, false); if (ret < 0) { dev_err(&hdev->dev, "failed to change TP mode (%d)\n", ret); goto exit; } ret = u1_read_write_register(hdev, ADDRESS_U1_NUM_SENS_X, &sen_line_num_x, 0, true); if (ret < 0) { dev_err(&hdev->dev, "failed U1_NUM_SENS_X (%d)\n", ret); goto exit; } ret = u1_read_write_register(hdev, ADDRESS_U1_NUM_SENS_Y, &sen_line_num_y, 0, true); if (ret < 0) { dev_err(&hdev->dev, "failed U1_NUM_SENS_Y (%d)\n", ret); goto exit; } ret = u1_read_write_register(hdev, ADDRESS_U1_PITCH_SENS_X, &pitch_x, 0, true); if (ret < 0) { dev_err(&hdev->dev, "failed U1_PITCH_SENS_X (%d)\n", ret); goto exit; } ret = u1_read_write_register(hdev, ADDRESS_U1_PITCH_SENS_Y, &pitch_y, 0, true); if (ret < 0) { dev_err(&hdev->dev, "failed U1_PITCH_SENS_Y (%d)\n", ret); goto exit; } ret = u1_read_write_register(hdev, ADDRESS_U1_RESO_DWN_ABS, &resolution, 0, true); if (ret < 0) { dev_err(&hdev->dev, "failed U1_RESO_DWN_ABS (%d)\n", ret); goto exit; } pri_data->x_active_len_mm = (pitch_x * (sen_line_num_x - 1)) / 10; pri_data->y_active_len_mm = (pitch_y * (sen_line_num_y - 1)) / 10; pri_data->x_max = (resolution << 2) * (sen_line_num_x - 1); pri_data->x_min = 1; pri_data->y_max = (resolution << 2) * (sen_line_num_y - 1); pri_data->y_min = 1; ret = u1_read_write_register(hdev, ADDRESS_U1_PAD_BTN, &tmp, 0, true); if (ret < 0) { dev_err(&hdev->dev, "failed U1_PAD_BTN (%d)\n", ret); goto exit; } if ((tmp & 0x0F) == (tmp & 0xF0) >> 4) { pri_data->btn_cnt = (tmp & 0x0F); } else { /* Button pad */ pri_data->btn_cnt = 1; } pri_data->has_sp = 0; /* Check StickPointer device */ ret = u1_read_write_register(hdev, ADDRESS_U1_DEVICE_TYP, &tmp, 0, true); if (ret < 0) { dev_err(&hdev->dev, "failed U1_DEVICE_TYP (%d)\n", ret); goto exit; } if (tmp & U1_DEVTYPE_SP_SUPPORT) { dev_ctrl |= U1_SP_ABS_MODE; ret = u1_read_write_register(hdev, ADDRESS_U1_DEV_CTRL_1, NULL, dev_ctrl, false); if (ret < 0) { dev_err(&hdev->dev, "failed SP mode (%d)\n", ret); goto exit; } ret = u1_read_write_register(hdev, ADDRESS_U1_SP_BTN, &pri_data->sp_btn_info, 0, true); if (ret < 0) { dev_err(&hdev->dev, "failed U1_SP_BTN (%d)\n", ret); goto exit; } pri_data->has_sp = 1; } pri_data->max_fingers = 5; exit: return ret; } static int T4_init(struct hid_device *hdev, struct alps_dev *pri_data) { int ret; u8 tmp, sen_line_num_x, sen_line_num_y; ret = t4_read_write_register(hdev, T4_PRM_ID_CONFIG_3, &tmp, 0, true); if (ret < 0) { dev_err(&hdev->dev, "failed T4_PRM_ID_CONFIG_3 (%d)\n", ret); goto exit; } sen_line_num_x = 16 + ((tmp & 0x0F) | (tmp & 0x08 ? 0xF0 : 0)); sen_line_num_y = 12 + (((tmp & 0xF0) >> 4) | (tmp & 0x80 ? 0xF0 : 0)); pri_data->x_max = sen_line_num_x * T4_COUNT_PER_ELECTRODE; pri_data->x_min = T4_COUNT_PER_ELECTRODE; pri_data->y_max = sen_line_num_y * T4_COUNT_PER_ELECTRODE; pri_data->y_min = T4_COUNT_PER_ELECTRODE; pri_data->x_active_len_mm = pri_data->y_active_len_mm = 0; pri_data->btn_cnt = 1; ret = t4_read_write_register(hdev, PRM_SYS_CONFIG_1, &tmp, 0, true); if (ret < 0) { dev_err(&hdev->dev, "failed PRM_SYS_CONFIG_1 (%d)\n", ret); goto exit; } tmp |= 0x02; ret = t4_read_write_register(hdev, PRM_SYS_CONFIG_1, NULL, tmp, false); if (ret < 0) { dev_err(&hdev->dev, "failed PRM_SYS_CONFIG_1 (%d)\n", ret); goto exit; } ret = t4_read_write_register(hdev, T4_PRM_FEED_CONFIG_1, NULL, T4_I2C_ABS, false); if (ret < 0) { dev_err(&hdev->dev, "failed T4_PRM_FEED_CONFIG_1 (%d)\n", ret); goto exit; } ret = t4_read_write_register(hdev, T4_PRM_FEED_CONFIG_4, NULL, T4_FEEDCFG4_ADVANCED_ABS_ENABLE, false); if (ret < 0) { dev_err(&hdev->dev, "failed T4_PRM_FEED_CONFIG_4 (%d)\n", ret); goto exit; } pri_data->max_fingers = 5; pri_data->has_sp = 0; exit: return ret; } static int alps_sp_open(struct input_dev *dev) { struct hid_device *hid = input_get_drvdata(dev); return hid_hw_open(hid); } static void alps_sp_close(struct input_dev *dev) { struct hid_device *hid = input_get_drvdata(dev); hid_hw_close(hid); } static int alps_input_configured(struct hid_device *hdev, struct hid_input *hi) { struct alps_dev *data = hid_get_drvdata(hdev); struct input_dev *input = hi->input, *input2; int ret; int res_x, res_y, i; data->input = input; hid_dbg(hdev, "Opening low level driver\n"); ret = hid_hw_open(hdev); if (ret) return ret; /* Allow incoming hid reports */ hid_device_io_start(hdev); switch (data->dev_type) { case T4: ret = T4_init(hdev, data); break; case U1: ret = u1_init(hdev, data); break; default: break; } if (ret) goto exit; __set_bit(EV_ABS, input->evbit); input_set_abs_params(input, ABS_MT_POSITION_X, data->x_min, data->x_max, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, data->y_min, data->y_max, 0, 0); if (data->x_active_len_mm && data->y_active_len_mm) { res_x = (data->x_max - 1) / data->x_active_len_mm; res_y = (data->y_max - 1) / data->y_active_len_mm; input_abs_set_res(input, ABS_MT_POSITION_X, res_x); input_abs_set_res(input, ABS_MT_POSITION_Y, res_y); } input_set_abs_params(input, ABS_MT_PRESSURE, 0, 64, 0, 0); input_mt_init_slots(input, data->max_fingers, INPUT_MT_POINTER); __set_bit(EV_KEY, input->evbit); if (data->btn_cnt == 1) __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); for (i = 0; i < data->btn_cnt; i++) __set_bit(BTN_LEFT + i, input->keybit); /* Stick device initialization */ if (data->has_sp) { input2 = input_allocate_device(); if (!input2) { ret = -ENOMEM; goto exit; } data->input2 = input2; input2->phys = input->phys; input2->name = "DualPoint Stick"; input2->id.bustype = BUS_I2C; input2->id.vendor = input->id.vendor; input2->id.product = input->id.product; input2->id.version = input->id.version; input2->dev.parent = input->dev.parent; input_set_drvdata(input2, hdev); input2->open = alps_sp_open; input2->close = alps_sp_close; __set_bit(EV_KEY, input2->evbit); data->sp_btn_cnt = (data->sp_btn_info & 0x0F); for (i = 0; i < data->sp_btn_cnt; i++) __set_bit(BTN_LEFT + i, input2->keybit); __set_bit(EV_REL, input2->evbit); __set_bit(REL_X, input2->relbit); __set_bit(REL_Y, input2->relbit); __set_bit(INPUT_PROP_POINTER, input2->propbit); __set_bit(INPUT_PROP_POINTING_STICK, input2->propbit); if (input_register_device(data->input2)) { input_free_device(input2); ret = -ENOENT; goto exit; } } exit: hid_device_io_stop(hdev); hid_hw_close(hdev); return ret; } static int alps_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { return -1; } static int alps_probe(struct hid_device *hdev, const struct hid_device_id *id) { struct alps_dev *data = NULL; int ret; data = devm_kzalloc(&hdev->dev, sizeof(struct alps_dev), GFP_KERNEL); if (!data) return -ENOMEM; data->hdev = hdev; hid_set_drvdata(hdev, data); hdev->quirks |= HID_QUIRK_NO_INIT_REPORTS; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); return ret; } switch (hdev->product) { case HID_DEVICE_ID_ALPS_T4_BTNLESS: data->dev_type = T4; break; case HID_DEVICE_ID_ALPS_U1_DUAL: case HID_DEVICE_ID_ALPS_U1: case HID_DEVICE_ID_ALPS_U1_UNICORN_LEGACY: data->dev_type = U1; break; default: data->dev_type = UNKNOWN; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "hw start failed\n"); return ret; } return 0; } static const struct hid_device_id alps_id[] = { { HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_DUAL) }, { HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1) }, { HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_UNICORN_LEGACY) }, { HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_T4_BTNLESS) }, { } }; MODULE_DEVICE_TABLE(hid, alps_id); static struct hid_driver alps_driver = { .name = "hid-alps", .id_table = alps_id, .probe = alps_probe, .raw_event = alps_raw_event, .input_mapping = alps_input_mapping, .input_configured = alps_input_configured, .resume = pm_ptr(alps_post_resume), .reset_resume = pm_ptr(alps_post_reset), }; module_hid_driver(alps_driver); MODULE_AUTHOR("Masaki Ota <masaki.ota@jp.alps.com>"); MODULE_DESCRIPTION("ALPS HID driver"); MODULE_LICENSE("GPL");
10 1294 269 268 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MMAN_H #define _LINUX_MMAN_H #include <linux/fs.h> #include <linux/mm.h> #include <linux/percpu_counter.h> #include <linux/atomic.h> #include <uapi/linux/mman.h> /* * Arrange for legacy / undefined architecture specific flags to be * ignored by mmap handling code. */ #ifndef MAP_32BIT #define MAP_32BIT 0 #endif #ifndef MAP_ABOVE4G #define MAP_ABOVE4G 0 #endif #ifndef MAP_HUGE_2MB #define MAP_HUGE_2MB 0 #endif #ifndef MAP_HUGE_1GB #define MAP_HUGE_1GB 0 #endif #ifndef MAP_UNINITIALIZED #define MAP_UNINITIALIZED 0 #endif #ifndef MAP_SYNC #define MAP_SYNC 0 #endif /* * The historical set of flags that all mmap implementations implicitly * support when a ->mmap_validate() op is not provided in file_operations. * * MAP_EXECUTABLE and MAP_DENYWRITE are completely ignored throughout the * kernel. */ #define LEGACY_MAP_MASK (MAP_SHARED \ | MAP_PRIVATE \ | MAP_FIXED \ | MAP_ANONYMOUS \ | MAP_DENYWRITE \ | MAP_EXECUTABLE \ | MAP_UNINITIALIZED \ | MAP_GROWSDOWN \ | MAP_LOCKED \ | MAP_NORESERVE \ | MAP_POPULATE \ | MAP_NONBLOCK \ | MAP_STACK \ | MAP_HUGETLB \ | MAP_32BIT \ | MAP_ABOVE4G \ | MAP_HUGE_2MB \ | MAP_HUGE_1GB) extern int sysctl_overcommit_memory; extern struct percpu_counter vm_committed_as; #ifdef CONFIG_SMP extern s32 vm_committed_as_batch; extern void mm_compute_batch(int overcommit_policy); #else #define vm_committed_as_batch 0 static inline void mm_compute_batch(int overcommit_policy) { } #endif unsigned long vm_memory_committed(void); static inline void vm_acct_memory(long pages) { percpu_counter_add_batch(&vm_committed_as, pages, vm_committed_as_batch); } static inline void vm_unacct_memory(long pages) { vm_acct_memory(-pages); } /* * Allow architectures to handle additional protection and flag bits. The * overriding macros must be defined in the arch-specific asm/mman.h file. */ #ifndef arch_calc_vm_prot_bits #define arch_calc_vm_prot_bits(prot, pkey) 0 #endif #ifndef arch_calc_vm_flag_bits #define arch_calc_vm_flag_bits(file, flags) 0 #endif #ifndef arch_validate_prot /* * This is called from mprotect(). PROT_GROWSDOWN and PROT_GROWSUP have * already been masked out. * * Returns true if the prot flags are valid */ static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) { return (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM)) == 0; } #define arch_validate_prot arch_validate_prot #endif #ifndef arch_validate_flags /* * This is called from mmap() and mprotect() with the updated vma->vm_flags. * * Returns true if the VM_* flags are valid. */ static inline bool arch_validate_flags(unsigned long flags) { return true; } #define arch_validate_flags arch_validate_flags #endif /* * Optimisation macro. It is equivalent to: * (x & bit1) ? bit2 : 0 * but this version is faster. * ("bit1" and "bit2" must be single bits) */ #define _calc_vm_trans(x, bit1, bit2) \ ((!(bit1) || !(bit2)) ? 0 : \ ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \ : ((x) & (bit1)) / ((bit1) / (bit2)))) /* * Combine the mmap "prot" argument into "vm_flags" used internally. */ static inline vm_flags_t calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { return _calc_vm_trans(prot, PROT_READ, VM_READ ) | _calc_vm_trans(prot, PROT_WRITE, VM_WRITE) | _calc_vm_trans(prot, PROT_EXEC, VM_EXEC) | arch_calc_vm_prot_bits(prot, pkey); } /* * Combine the mmap "flags" argument into "vm_flags" used internally. */ static inline vm_flags_t calc_vm_flag_bits(struct file *file, unsigned long flags) { return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | #ifdef CONFIG_TRANSPARENT_HUGEPAGE _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | #endif arch_calc_vm_flag_bits(file, flags); } unsigned long vm_commit_limit(void); #ifndef arch_memory_deny_write_exec_supported static inline bool arch_memory_deny_write_exec_supported(void) { return true; } #define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported #endif /* * Denies creating a writable executable mapping or gaining executable permissions. * * This denies the following: * * a) mmap(PROT_WRITE | PROT_EXEC) * * b) mmap(PROT_WRITE) * mprotect(PROT_EXEC) * * c) mmap(PROT_WRITE) * mprotect(PROT_READ) * mprotect(PROT_EXEC) * * But allows the following: * * d) mmap(PROT_READ | PROT_EXEC) * mmap(PROT_READ | PROT_EXEC | PROT_BTI) * * This is only applicable if the user has set the Memory-Deny-Write-Execute * (MDWE) protection mask for the current process. * * @old specifies the VMA flags the VMA originally possessed, and @new the ones * we propose to set. * * Return: false if proposed change is OK, true if not ok and should be denied. */ static inline bool map_deny_write_exec(unsigned long old, unsigned long new) { /* If MDWE is disabled, we have nothing to deny. */ if (!mm_flags_test(MMF_HAS_MDWE, current->mm)) return false; /* If the new VMA is not executable, we have nothing to deny. */ if (!(new & VM_EXEC)) return false; /* Under MDWE we do not accept newly writably executable VMAs... */ if (new & VM_WRITE) return true; /* ...nor previously non-executable VMAs becoming executable. */ if (!(old & VM_EXEC)) return true; return false; } #endif /* _LINUX_MMAN_H */
5085 5094 2549 25 331 329 329 329 57 57 2 50 56 2482 2476 2473 2482 3779 3785 2457 2 2459 2457 3787 3786 2457 2733 2727 2228 2223 2225 1 2229 326 25 326 326 326 326 326 25 25 2459 2228 4055 4061 326 326 326 326 63 63 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 // SPDX-License-Identifier: GPL-2.0 /* * bus.c - bus driver management * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs * Copyright (c) 2007 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (c) 2007 Novell Inc. * Copyright (c) 2023 Greg Kroah-Hartman <gregkh@linuxfoundation.org> */ #include <linux/async.h> #include <linux/device/bus.h> #include <linux/device.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/string.h> #include <linux/mutex.h> #include <linux/sysfs.h> #include "base.h" #include "power/power.h" /* /sys/devices/system */ static struct kset *system_kset; /* /sys/bus */ static struct kset *bus_kset; #define to_bus_attr(_attr) container_of(_attr, struct bus_attribute, attr) /* * sysfs bindings for drivers */ #define to_drv_attr(_attr) container_of(_attr, struct driver_attribute, attr) #define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ struct driver_attribute driver_attr_##_name = \ __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) static int __must_check bus_rescan_devices_helper(struct device *dev, void *data); /** * bus_to_subsys - Turn a struct bus_type into a struct subsys_private * * @bus: pointer to the struct bus_type to look up * * The driver core internals needs to work on the subsys_private structure, not * the external struct bus_type pointer. This function walks the list of * registered busses in the system and finds the matching one and returns the * internal struct subsys_private that relates to that bus. * * Note, the reference count of the return value is INCREMENTED if it is not * NULL. A call to subsys_put() must be done when finished with the pointer in * order for it to be properly freed. */ struct subsys_private *bus_to_subsys(const struct bus_type *bus) { struct subsys_private *sp = NULL; struct kobject *kobj; if (!bus || !bus_kset) return NULL; spin_lock(&bus_kset->list_lock); if (list_empty(&bus_kset->list)) goto done; list_for_each_entry(kobj, &bus_kset->list, entry) { struct kset *kset = container_of(kobj, struct kset, kobj); sp = container_of_const(kset, struct subsys_private, subsys); if (sp->bus == bus) goto done; } sp = NULL; done: sp = subsys_get(sp); spin_unlock(&bus_kset->list_lock); return sp; } static const struct bus_type *bus_get(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); if (sp) return bus; return NULL; } static void bus_put(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); /* two puts are required as the call to bus_to_subsys incremented it again */ subsys_put(sp); subsys_put(sp); } static ssize_t drv_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct driver_attribute *drv_attr = to_drv_attr(attr); struct driver_private *drv_priv = to_driver(kobj); ssize_t ret = -EIO; if (drv_attr->show) ret = drv_attr->show(drv_priv->driver, buf); return ret; } static ssize_t drv_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct driver_attribute *drv_attr = to_drv_attr(attr); struct driver_private *drv_priv = to_driver(kobj); ssize_t ret = -EIO; if (drv_attr->store) ret = drv_attr->store(drv_priv->driver, buf, count); return ret; } static const struct sysfs_ops driver_sysfs_ops = { .show = drv_attr_show, .store = drv_attr_store, }; static void driver_release(struct kobject *kobj) { struct driver_private *drv_priv = to_driver(kobj); pr_debug("driver: '%s': %s\n", kobject_name(kobj), __func__); kfree(drv_priv); } static const struct kobj_type driver_ktype = { .sysfs_ops = &driver_sysfs_ops, .release = driver_release, }; /* * sysfs bindings for buses */ static ssize_t bus_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct bus_attribute *bus_attr = to_bus_attr(attr); struct subsys_private *subsys_priv = to_subsys_private(kobj); /* return -EIO for reading a bus attribute without show() */ ssize_t ret = -EIO; if (bus_attr->show) ret = bus_attr->show(subsys_priv->bus, buf); return ret; } static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct bus_attribute *bus_attr = to_bus_attr(attr); struct subsys_private *subsys_priv = to_subsys_private(kobj); /* return -EIO for writing a bus attribute without store() */ ssize_t ret = -EIO; if (bus_attr->store) ret = bus_attr->store(subsys_priv->bus, buf, count); return ret; } static const struct sysfs_ops bus_sysfs_ops = { .show = bus_attr_show, .store = bus_attr_store, }; int bus_create_file(const struct bus_type *bus, struct bus_attribute *attr) { struct subsys_private *sp = bus_to_subsys(bus); int error; if (!sp) return -EINVAL; error = sysfs_create_file(&sp->subsys.kobj, &attr->attr); subsys_put(sp); return error; } EXPORT_SYMBOL_GPL(bus_create_file); void bus_remove_file(const struct bus_type *bus, struct bus_attribute *attr) { struct subsys_private *sp = bus_to_subsys(bus); if (!sp) return; sysfs_remove_file(&sp->subsys.kobj, &attr->attr); subsys_put(sp); } EXPORT_SYMBOL_GPL(bus_remove_file); static void bus_release(struct kobject *kobj) { struct subsys_private *priv = to_subsys_private(kobj); lockdep_unregister_key(&priv->lock_key); kfree(priv); } static const struct kobj_type bus_ktype = { .sysfs_ops = &bus_sysfs_ops, .release = bus_release, }; static int bus_uevent_filter(const struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); if (ktype == &bus_ktype) return 1; return 0; } static const struct kset_uevent_ops bus_uevent_ops = { .filter = bus_uevent_filter, }; /* Manually detach a device from its associated driver. */ static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count) { const struct bus_type *bus = bus_get(drv->bus); struct device *dev; int err = -ENODEV; dev = bus_find_device_by_name(bus, NULL, buf); if (dev && dev->driver == drv) { device_driver_detach(dev); err = count; } put_device(dev); bus_put(bus); return err; } static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, 0200, NULL, unbind_store); /* * Manually attach a device to a driver. * Note: the driver must want to bind to the device, * it is not possible to override the driver's id table. */ static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count) { const struct bus_type *bus = bus_get(drv->bus); struct device *dev; int err = -ENODEV; dev = bus_find_device_by_name(bus, NULL, buf); if (dev && driver_match_device(drv, dev)) { err = device_driver_attach(drv, dev); if (!err) { /* success */ err = count; } } put_device(dev); bus_put(bus); return err; } static DRIVER_ATTR_IGNORE_LOCKDEP(bind, 0200, NULL, bind_store); static ssize_t drivers_autoprobe_show(const struct bus_type *bus, char *buf) { struct subsys_private *sp = bus_to_subsys(bus); int ret; if (!sp) return -EINVAL; ret = sysfs_emit(buf, "%d\n", sp->drivers_autoprobe); subsys_put(sp); return ret; } static ssize_t drivers_autoprobe_store(const struct bus_type *bus, const char *buf, size_t count) { struct subsys_private *sp = bus_to_subsys(bus); if (!sp) return -EINVAL; if (buf[0] == '0') sp->drivers_autoprobe = 0; else sp->drivers_autoprobe = 1; subsys_put(sp); return count; } static ssize_t drivers_probe_store(const struct bus_type *bus, const char *buf, size_t count) { struct device *dev; int err = -EINVAL; dev = bus_find_device_by_name(bus, NULL, buf); if (!dev) return -ENODEV; if (bus_rescan_devices_helper(dev, NULL) == 0) err = count; put_device(dev); return err; } static struct device *next_device(struct klist_iter *i) { struct klist_node *n = klist_next(i); struct device *dev = NULL; struct device_private *dev_prv; if (n) { dev_prv = to_device_private_bus(n); dev = dev_prv->device; } return dev; } static struct device *prev_device(struct klist_iter *i) { struct klist_node *n = klist_prev(i); struct device *dev = NULL; struct device_private *dev_prv; if (n) { dev_prv = to_device_private_bus(n); dev = dev_prv->device; } return dev; } /** * bus_for_each_dev - device iterator. * @bus: bus type. * @start: device to start iterating from. * @data: data for the callback. * @fn: function to be called for each device. * * Iterate over @bus's list of devices, and call @fn for each, * passing it @data. If @start is not NULL, we use that device to * begin iterating from. * * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. * * NOTE: The device that returns a non-zero value is not retained * in any way, nor is its refcount incremented. If the caller needs * to retain this data, it should do so, and increment the reference * count in the supplied callback. */ int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data, device_iter_t fn) { struct subsys_private *sp = bus_to_subsys(bus); struct klist_iter i; struct device *dev; int error = 0; if (!sp) return -EINVAL; klist_iter_init_node(&sp->klist_devices, &i, (start ? &start->p->knode_bus : NULL)); while (!error && (dev = next_device(&i))) error = fn(dev, data); klist_iter_exit(&i); subsys_put(sp); return error; } EXPORT_SYMBOL_GPL(bus_for_each_dev); /** * bus_find_device - device iterator for locating a particular device. * @bus: bus type * @start: Device to begin with * @data: Data to pass to match function * @match: Callback function to check device * * This is similar to the bus_for_each_dev() function above, but it * returns a reference to a device that is 'found' for later use, as * determined by the @match callback. * * The callback should return 0 if the device doesn't match and non-zero * if it does. If the callback returns non-zero, this function will * return to the caller and not iterate over any more devices. */ struct device *bus_find_device(const struct bus_type *bus, struct device *start, const void *data, device_match_t match) { struct subsys_private *sp = bus_to_subsys(bus); struct klist_iter i; struct device *dev; if (!sp) return NULL; klist_iter_init_node(&sp->klist_devices, &i, (start ? &start->p->knode_bus : NULL)); while ((dev = next_device(&i))) { if (match(dev, data)) { get_device(dev); break; } } klist_iter_exit(&i); subsys_put(sp); return dev; } EXPORT_SYMBOL_GPL(bus_find_device); struct device *bus_find_device_reverse(const struct bus_type *bus, struct device *start, const void *data, device_match_t match) { struct subsys_private *sp = bus_to_subsys(bus); struct klist_iter i; struct device *dev; if (!sp) return NULL; klist_iter_init_node(&sp->klist_devices, &i, (start ? &start->p->knode_bus : NULL)); while ((dev = prev_device(&i))) { if (match(dev, data)) { get_device(dev); break; } } klist_iter_exit(&i); subsys_put(sp); return dev; } EXPORT_SYMBOL_GPL(bus_find_device_reverse); static struct device_driver *next_driver(struct klist_iter *i) { struct klist_node *n = klist_next(i); struct driver_private *drv_priv; if (n) { drv_priv = container_of(n, struct driver_private, knode_bus); return drv_priv->driver; } return NULL; } /** * bus_for_each_drv - driver iterator * @bus: bus we're dealing with. * @start: driver to start iterating on. * @data: data to pass to the callback. * @fn: function to call for each driver. * * This is nearly identical to the device iterator above. * We iterate over each driver that belongs to @bus, and call * @fn for each. If @fn returns anything but 0, we break out * and return it. If @start is not NULL, we use it as the head * of the list. * * NOTE: we don't return the driver that returns a non-zero * value, nor do we leave the reference count incremented for that * driver. If the caller needs to know that info, it must set it * in the callback. It must also be sure to increment the refcount * so it doesn't disappear before returning to the caller. */ int bus_for_each_drv(const struct bus_type *bus, struct device_driver *start, void *data, int (*fn)(struct device_driver *, void *)) { struct subsys_private *sp = bus_to_subsys(bus); struct klist_iter i; struct device_driver *drv; int error = 0; if (!sp) return -EINVAL; klist_iter_init_node(&sp->klist_drivers, &i, start ? &start->p->knode_bus : NULL); while ((drv = next_driver(&i)) && !error) error = fn(drv, data); klist_iter_exit(&i); subsys_put(sp); return error; } EXPORT_SYMBOL_GPL(bus_for_each_drv); static ssize_t driver_override_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int ret; ret = __device_set_driver_override(dev, buf, count); if (ret) return ret; return count; } static ssize_t driver_override_show(struct device *dev, struct device_attribute *attr, char *buf) { guard(spinlock)(&dev->driver_override.lock); return sysfs_emit(buf, "%s\n", dev->driver_override.name); } static DEVICE_ATTR_RW(driver_override); static struct attribute *driver_override_dev_attrs[] = { &dev_attr_driver_override.attr, NULL, }; static const struct attribute_group driver_override_dev_group = { .attrs = driver_override_dev_attrs, }; /** * bus_add_device - add device to bus * @dev: device being added * * - Add device's bus attributes. * - Create links to device's bus. * - Add the device to its bus's list of devices. */ int bus_add_device(struct device *dev) { struct subsys_private *sp = bus_to_subsys(dev->bus); int error; if (!sp) { /* * This is a normal operation for many devices that do not * have a bus assigned to them, just say that all went * well. */ return 0; } /* * Reference in sp is now incremented and will be dropped when * the device is removed from the bus */ pr_debug("bus: '%s': add device %s\n", sp->bus->name, dev_name(dev)); error = device_add_groups(dev, sp->bus->dev_groups); if (error) goto out_put; if (dev->bus->driver_override) { error = device_add_group(dev, &driver_override_dev_group); if (error) goto out_groups; } error = sysfs_create_link(&sp->devices_kset->kobj, &dev->kobj, dev_name(dev)); if (error) goto out_override; error = sysfs_create_link(&dev->kobj, &sp->subsys.kobj, "subsystem"); if (error) goto out_subsys; klist_add_tail(&dev->p->knode_bus, &sp->klist_devices); return 0; out_subsys: sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev)); out_override: if (dev->bus->driver_override) device_remove_group(dev, &driver_override_dev_group); out_groups: device_remove_groups(dev, sp->bus->dev_groups); out_put: subsys_put(sp); return error; } /** * bus_probe_device - probe drivers for a new device * @dev: device to probe * * - Automatically probe for a driver if the bus allows it. */ void bus_probe_device(struct device *dev) { struct subsys_private *sp = bus_to_subsys(dev->bus); struct subsys_interface *sif; if (!sp) return; device_initial_probe(dev); mutex_lock(&sp->mutex); list_for_each_entry(sif, &sp->interfaces, node) if (sif->add_dev) sif->add_dev(dev, sif); mutex_unlock(&sp->mutex); subsys_put(sp); } /** * bus_remove_device - remove device from bus * @dev: device to be removed * * - Remove device from all interfaces. * - Remove symlink from bus' directory. * - Delete device from bus's list. * - Detach from its driver. * - Drop reference taken in bus_add_device(). */ void bus_remove_device(struct device *dev) { struct subsys_private *sp = bus_to_subsys(dev->bus); struct subsys_interface *sif; if (!sp) return; mutex_lock(&sp->mutex); list_for_each_entry(sif, &sp->interfaces, node) if (sif->remove_dev) sif->remove_dev(dev, sif); mutex_unlock(&sp->mutex); sysfs_remove_link(&dev->kobj, "subsystem"); sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev)); if (dev->bus->driver_override) device_remove_group(dev, &driver_override_dev_group); device_remove_groups(dev, dev->bus->dev_groups); if (klist_node_attached(&dev->p->knode_bus)) klist_del(&dev->p->knode_bus); pr_debug("bus: '%s': remove device %s\n", dev->bus->name, dev_name(dev)); device_release_driver(dev); /* * Decrement the reference count twice, once for the bus_to_subsys() * call in the start of this function, and the second one from the * reference increment in bus_add_device() */ subsys_put(sp); subsys_put(sp); } static int __must_check add_bind_files(struct device_driver *drv) { int ret; ret = driver_create_file(drv, &driver_attr_unbind); if (ret == 0) { ret = driver_create_file(drv, &driver_attr_bind); if (ret) driver_remove_file(drv, &driver_attr_unbind); } return ret; } static void remove_bind_files(struct device_driver *drv) { driver_remove_file(drv, &driver_attr_bind); driver_remove_file(drv, &driver_attr_unbind); } static BUS_ATTR_WO(drivers_probe); static BUS_ATTR_RW(drivers_autoprobe); static int add_probe_files(const struct bus_type *bus) { int retval; retval = bus_create_file(bus, &bus_attr_drivers_probe); if (retval) goto out; retval = bus_create_file(bus, &bus_attr_drivers_autoprobe); if (retval) bus_remove_file(bus, &bus_attr_drivers_probe); out: return retval; } static void remove_probe_files(const struct bus_type *bus) { bus_remove_file(bus, &bus_attr_drivers_autoprobe); bus_remove_file(bus, &bus_attr_drivers_probe); } static ssize_t uevent_store(struct device_driver *drv, const char *buf, size_t count) { int rc; rc = kobject_synth_uevent(&drv->p->kobj, buf, count); return rc ? rc : count; } static DRIVER_ATTR_WO(uevent); /** * bus_add_driver - Add a driver to the bus. * @drv: driver. */ int bus_add_driver(struct device_driver *drv) { struct subsys_private *sp = bus_to_subsys(drv->bus); struct driver_private *priv; int error = 0; if (!sp) return -EINVAL; /* * Reference in sp is now incremented and will be dropped when * the driver is removed from the bus */ pr_debug("bus: '%s': add driver %s\n", sp->bus->name, drv->name); priv = kzalloc_obj(*priv); if (!priv) { error = -ENOMEM; goto out_put_bus; } klist_init(&priv->klist_devices, NULL, NULL); priv->driver = drv; drv->p = priv; priv->kobj.kset = sp->drivers_kset; error = kobject_init_and_add(&priv->kobj, &driver_ktype, NULL, "%s", drv->name); if (error) goto out_unregister; klist_add_tail(&priv->knode_bus, &sp->klist_drivers); if (sp->drivers_autoprobe) { error = driver_attach(drv); if (error) goto out_del_list; } error = module_add_driver(drv->owner, drv); if (error) { printk(KERN_ERR "%s: failed to create module links for %s\n", __func__, drv->name); goto out_detach; } error = driver_create_file(drv, &driver_attr_uevent); if (error) { printk(KERN_ERR "%s: uevent attr (%s) failed\n", __func__, drv->name); } error = driver_add_groups(drv, sp->bus->drv_groups); if (error) { /* How the hell do we get out of this pickle? Give up */ printk(KERN_ERR "%s: driver_add_groups(%s) failed\n", __func__, drv->name); } if (!drv->suppress_bind_attrs) { error = add_bind_files(drv); if (error) { /* Ditto */ printk(KERN_ERR "%s: add_bind_files(%s) failed\n", __func__, drv->name); } } return 0; out_detach: driver_detach(drv); out_del_list: klist_del(&priv->knode_bus); out_unregister: kobject_put(&priv->kobj); /* drv->p is freed in driver_release() */ drv->p = NULL; out_put_bus: subsys_put(sp); return error; } /** * bus_remove_driver - delete driver from bus's knowledge. * @drv: driver. * * Detach the driver from the devices it controls, and remove * it from its bus's list of drivers. Finally, we drop the reference * to the bus we took in bus_add_driver(). */ void bus_remove_driver(struct device_driver *drv) { struct subsys_private *sp = bus_to_subsys(drv->bus); if (!sp) return; pr_debug("bus: '%s': remove driver %s\n", sp->bus->name, drv->name); if (!drv->suppress_bind_attrs) remove_bind_files(drv); driver_remove_groups(drv, sp->bus->drv_groups); driver_remove_file(drv, &driver_attr_uevent); klist_remove(&drv->p->knode_bus); driver_detach(drv); module_remove_driver(drv); kobject_put(&drv->p->kobj); /* * Decrement the reference count twice, once for the bus_to_subsys() * call in the start of this function, and the second one from the * reference increment in bus_add_driver() */ subsys_put(sp); subsys_put(sp); } /* Helper for bus_rescan_devices's iter */ static int __must_check bus_rescan_devices_helper(struct device *dev, void *data) { int ret = 0; if (!dev->driver) { if (dev->parent && dev->bus->need_parent_lock) device_lock(dev->parent); ret = device_attach(dev); if (dev->parent && dev->bus->need_parent_lock) device_unlock(dev->parent); } return ret < 0 ? ret : 0; } /** * bus_rescan_devices - rescan devices on the bus for possible drivers * @bus: the bus to scan. * * This function will look for devices on the bus with no driver * attached and rescan it against existing drivers to see if it matches * any by calling device_attach() for the unbound devices. */ int bus_rescan_devices(const struct bus_type *bus) { return bus_for_each_dev(bus, NULL, NULL, bus_rescan_devices_helper); } EXPORT_SYMBOL_GPL(bus_rescan_devices); /** * device_reprobe - remove driver for a device and probe for a new driver * @dev: the device to reprobe * * This function detaches the attached driver (if any) for the given * device and restarts the driver probing process. It is intended * to use if probing criteria changed during a devices lifetime and * driver attachment should change accordingly. */ int device_reprobe(struct device *dev) { if (dev->driver) device_driver_detach(dev); return bus_rescan_devices_helper(dev, NULL); } EXPORT_SYMBOL_GPL(device_reprobe); static void klist_devices_get(struct klist_node *n) { struct device_private *dev_prv = to_device_private_bus(n); struct device *dev = dev_prv->device; get_device(dev); } static void klist_devices_put(struct klist_node *n) { struct device_private *dev_prv = to_device_private_bus(n); struct device *dev = dev_prv->device; put_device(dev); } static ssize_t bus_uevent_store(const struct bus_type *bus, const char *buf, size_t count) { struct subsys_private *sp = bus_to_subsys(bus); int ret; if (!sp) return -EINVAL; ret = kobject_synth_uevent(&sp->subsys.kobj, buf, count); subsys_put(sp); if (ret) return ret; return count; } /* * "open code" the old BUS_ATTR() macro here. We want to use BUS_ATTR_WO() * here, but can not use it as earlier in the file we have * DEVICE_ATTR_WO(uevent), which would cause a clash with the with the store * function name. */ static struct bus_attribute bus_attr_uevent = __ATTR(uevent, 0200, NULL, bus_uevent_store); /** * bus_register - register a driver-core subsystem * @bus: bus to register * * Once we have that, we register the bus with the kobject * infrastructure, then register the children subsystems it has: * the devices and drivers that belong to the subsystem. */ int bus_register(const struct bus_type *bus) { int retval; struct subsys_private *priv; struct kobject *bus_kobj; struct lock_class_key *key; priv = kzalloc_obj(struct subsys_private); if (!priv) return -ENOMEM; priv->bus = bus; BLOCKING_INIT_NOTIFIER_HEAD(&priv->bus_notifier); bus_kobj = &priv->subsys.kobj; retval = kobject_set_name(bus_kobj, "%s", bus->name); if (retval) goto out; bus_kobj->kset = bus_kset; bus_kobj->ktype = &bus_ktype; priv->drivers_autoprobe = 1; retval = kset_register(&priv->subsys); if (retval) goto out; retval = bus_create_file(bus, &bus_attr_uevent); if (retval) goto bus_uevent_fail; priv->devices_kset = kset_create_and_add("devices", NULL, bus_kobj); if (!priv->devices_kset) { retval = -ENOMEM; goto bus_devices_fail; } priv->drivers_kset = kset_create_and_add("drivers", NULL, bus_kobj); if (!priv->drivers_kset) { retval = -ENOMEM; goto bus_drivers_fail; } INIT_LIST_HEAD(&priv->interfaces); key = &priv->lock_key; lockdep_register_key(key); __mutex_init(&priv->mutex, "subsys mutex", key); klist_init(&priv->klist_devices, klist_devices_get, klist_devices_put); klist_init(&priv->klist_drivers, NULL, NULL); retval = add_probe_files(bus); if (retval) goto bus_probe_files_fail; retval = sysfs_create_groups(bus_kobj, bus->bus_groups); if (retval) goto bus_groups_fail; pr_debug("bus: '%s': registered\n", bus->name); return 0; bus_groups_fail: remove_probe_files(bus); bus_probe_files_fail: kset_unregister(priv->drivers_kset); bus_drivers_fail: kset_unregister(priv->devices_kset); bus_devices_fail: bus_remove_file(bus, &bus_attr_uevent); bus_uevent_fail: kset_unregister(&priv->subsys); /* Above kset_unregister() will kfree @priv */ priv = NULL; out: kfree(priv); return retval; } EXPORT_SYMBOL_GPL(bus_register); /** * bus_unregister - remove a bus from the system * @bus: bus. * * Unregister the child subsystems and the bus itself. * Finally, we call bus_put() to release the refcount */ void bus_unregister(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); struct kobject *bus_kobj; if (!sp) return; pr_debug("bus: '%s': unregistering\n", bus->name); if (sp->dev_root) device_unregister(sp->dev_root); bus_kobj = &sp->subsys.kobj; sysfs_remove_groups(bus_kobj, bus->bus_groups); remove_probe_files(bus); bus_remove_file(bus, &bus_attr_uevent); kset_unregister(sp->drivers_kset); kset_unregister(sp->devices_kset); kset_unregister(&sp->subsys); subsys_put(sp); } EXPORT_SYMBOL_GPL(bus_unregister); int bus_register_notifier(const struct bus_type *bus, struct notifier_block *nb) { struct subsys_private *sp = bus_to_subsys(bus); int retval; if (!sp) return -EINVAL; retval = blocking_notifier_chain_register(&sp->bus_notifier, nb); subsys_put(sp); return retval; } EXPORT_SYMBOL_GPL(bus_register_notifier); int bus_unregister_notifier(const struct bus_type *bus, struct notifier_block *nb) { struct subsys_private *sp = bus_to_subsys(bus); int retval; if (!sp) return -EINVAL; retval = blocking_notifier_chain_unregister(&sp->bus_notifier, nb); subsys_put(sp); return retval; } EXPORT_SYMBOL_GPL(bus_unregister_notifier); void bus_notify(struct device *dev, enum bus_notifier_event value) { struct subsys_private *sp = bus_to_subsys(dev->bus); if (!sp) return; blocking_notifier_call_chain(&sp->bus_notifier, value, dev); subsys_put(sp); } struct kset *bus_get_kset(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); struct kset *kset; if (!sp) return NULL; kset = &sp->subsys; subsys_put(sp); return kset; } EXPORT_SYMBOL_GPL(bus_get_kset); /* * Yes, this forcibly breaks the klist abstraction temporarily. It * just wants to sort the klist, not change reference counts and * take/drop locks rapidly in the process. It does all this while * holding the lock for the list, so objects can't otherwise be * added/removed while we're swizzling. */ static void device_insertion_sort_klist(struct device *a, struct list_head *list, int (*compare)(const struct device *a, const struct device *b)) { struct klist_node *n; struct device_private *dev_prv; struct device *b; list_for_each_entry(n, list, n_node) { dev_prv = to_device_private_bus(n); b = dev_prv->device; if (compare(a, b) <= 0) { list_move_tail(&a->p->knode_bus.n_node, &b->p->knode_bus.n_node); return; } } list_move_tail(&a->p->knode_bus.n_node, list); } void bus_sort_breadthfirst(const struct bus_type *bus, int (*compare)(const struct device *a, const struct device *b)) { struct subsys_private *sp = bus_to_subsys(bus); LIST_HEAD(sorted_devices); struct klist_node *n, *tmp; struct device_private *dev_prv; struct device *dev; struct klist *device_klist; if (!sp) return; device_klist = &sp->klist_devices; spin_lock(&device_klist->k_lock); list_for_each_entry_safe(n, tmp, &device_klist->k_list, n_node) { dev_prv = to_device_private_bus(n); dev = dev_prv->device; device_insertion_sort_klist(dev, &sorted_devices, compare); } list_splice(&sorted_devices, &device_klist->k_list); spin_unlock(&device_klist->k_lock); subsys_put(sp); } EXPORT_SYMBOL_GPL(bus_sort_breadthfirst); struct subsys_dev_iter { struct klist_iter ki; const struct device_type *type; }; /** * subsys_dev_iter_init - initialize subsys device iterator * @iter: subsys iterator to initialize * @sp: the subsys private (i.e. bus) we wanna iterate over * @start: the device to start iterating from, if any * @type: device_type of the devices to iterate over, NULL for all * * Initialize subsys iterator @iter such that it iterates over devices * of @subsys. If @start is set, the list iteration will start there, * otherwise if it is NULL, the iteration starts at the beginning of * the list. */ static void subsys_dev_iter_init(struct subsys_dev_iter *iter, struct subsys_private *sp, struct device *start, const struct device_type *type) { struct klist_node *start_knode = NULL; if (start) start_knode = &start->p->knode_bus; klist_iter_init_node(&sp->klist_devices, &iter->ki, start_knode); iter->type = type; } /** * subsys_dev_iter_next - iterate to the next device * @iter: subsys iterator to proceed * * Proceed @iter to the next device and return it. Returns NULL if * iteration is complete. * * The returned device is referenced and won't be released till * iterator is proceed to the next device or exited. The caller is * free to do whatever it wants to do with the device including * calling back into subsys code. */ static struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter) { struct klist_node *knode; struct device *dev; for (;;) { knode = klist_next(&iter->ki); if (!knode) return NULL; dev = to_device_private_bus(knode)->device; if (!iter->type || iter->type == dev->type) return dev; } } /** * subsys_dev_iter_exit - finish iteration * @iter: subsys iterator to finish * * Finish an iteration. Always call this function after iteration is * complete whether the iteration ran till the end or not. */ static void subsys_dev_iter_exit(struct subsys_dev_iter *iter) { klist_iter_exit(&iter->ki); } int subsys_interface_register(struct subsys_interface *sif) { struct subsys_private *sp; struct subsys_dev_iter iter; struct device *dev; if (!sif || !sif->subsys) return -ENODEV; sp = bus_to_subsys(sif->subsys); if (!sp) return -EINVAL; /* * Reference in sp is now incremented and will be dropped when * the interface is removed from the bus */ mutex_lock(&sp->mutex); list_add_tail(&sif->node, &sp->interfaces); if (sif->add_dev) { subsys_dev_iter_init(&iter, sp, NULL, NULL); while ((dev = subsys_dev_iter_next(&iter))) sif->add_dev(dev, sif); subsys_dev_iter_exit(&iter); } mutex_unlock(&sp->mutex); return 0; } EXPORT_SYMBOL_GPL(subsys_interface_register); void subsys_interface_unregister(struct subsys_interface *sif) { struct subsys_private *sp; struct subsys_dev_iter iter; struct device *dev; if (!sif || !sif->subsys) return; sp = bus_to_subsys(sif->subsys); if (!sp) return; mutex_lock(&sp->mutex); list_del_init(&sif->node); if (sif->remove_dev) { subsys_dev_iter_init(&iter, sp, NULL, NULL); while ((dev = subsys_dev_iter_next(&iter))) sif->remove_dev(dev, sif); subsys_dev_iter_exit(&iter); } mutex_unlock(&sp->mutex); /* * Decrement the reference count twice, once for the bus_to_subsys() * call in the start of this function, and the second one from the * reference increment in subsys_interface_register() */ subsys_put(sp); subsys_put(sp); } EXPORT_SYMBOL_GPL(subsys_interface_unregister); static void system_root_device_release(struct device *dev) { kfree(dev); } static int subsys_register(const struct bus_type *subsys, const struct attribute_group **groups, struct kobject *parent_of_root) { struct subsys_private *sp; struct device *dev; int err; err = bus_register(subsys); if (err < 0) return err; sp = bus_to_subsys(subsys); if (!sp) { err = -EINVAL; goto err_sp; } dev = kzalloc_obj(struct device); if (!dev) { err = -ENOMEM; goto err_dev; } err = dev_set_name(dev, "%s", subsys->name); if (err < 0) goto err_name; dev->kobj.parent = parent_of_root; dev->groups = groups; dev->release = system_root_device_release; err = device_register(dev); if (err < 0) goto err_dev_reg; sp->dev_root = dev; subsys_put(sp); return 0; err_dev_reg: put_device(dev); dev = NULL; err_name: kfree(dev); err_dev: subsys_put(sp); err_sp: bus_unregister(subsys); return err; } /** * subsys_system_register - register a subsystem at /sys/devices/system/ * @subsys: system subsystem * @groups: default attributes for the root device * * All 'system' subsystems have a /sys/devices/system/<name> root device * with the name of the subsystem. The root device can carry subsystem- * wide attributes. All registered devices are below this single root * device and are named after the subsystem with a simple enumeration * number appended. The registered devices are not explicitly named; * only 'id' in the device needs to be set. * * Do not use this interface for anything new, it exists for compatibility * with bad ideas only. New subsystems should use plain subsystems; and * add the subsystem-wide attributes should be added to the subsystem * directory itself and not some create fake root-device placed in * /sys/devices/system/<name>. */ int subsys_system_register(const struct bus_type *subsys, const struct attribute_group **groups) { return subsys_register(subsys, groups, &system_kset->kobj); } EXPORT_SYMBOL_GPL(subsys_system_register); /** * subsys_virtual_register - register a subsystem at /sys/devices/virtual/ * @subsys: virtual subsystem * @groups: default attributes for the root device * * All 'virtual' subsystems have a /sys/devices/system/<name> root device * with the name of the subsystem. The root device can carry subsystem-wide * attributes. All registered devices are below this single root device. * There's no restriction on device naming. This is for kernel software * constructs which need sysfs interface. */ int subsys_virtual_register(const struct bus_type *subsys, const struct attribute_group **groups) { struct kobject *virtual_dir; virtual_dir = virtual_device_parent(); if (!virtual_dir) return -ENOMEM; return subsys_register(subsys, groups, virtual_dir); } EXPORT_SYMBOL_GPL(subsys_virtual_register); /** * driver_find - locate driver on a bus by its name. * @name: name of the driver. * @bus: bus to scan for the driver. * * Call kset_find_obj() to iterate over list of drivers on * a bus to find driver by name. Return driver if found. * * This routine provides no locking to prevent the driver it returns * from being unregistered or unloaded while the caller is using it. * The caller is responsible for preventing this. */ struct device_driver *driver_find(const char *name, const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); struct kobject *k; struct driver_private *priv; if (!sp) return NULL; k = kset_find_obj(sp->drivers_kset, name); subsys_put(sp); if (!k) return NULL; priv = to_driver(k); /* Drop reference added by kset_find_obj() */ kobject_put(k); return priv->driver; } EXPORT_SYMBOL_GPL(driver_find); /* * Warning, the value could go to "removed" instantly after calling this function, so be very * careful when calling it... */ bool bus_is_registered(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); bool is_initialized = false; if (sp) { is_initialized = true; subsys_put(sp); } return is_initialized; } /** * bus_get_dev_root - return a pointer to the "device root" of a bus * @bus: bus to return the device root of. * * If a bus has a "device root" structure, return it, WITH THE REFERENCE * COUNT INCREMENTED. * * Note, when finished with the device, a call to put_device() is required. * * If the device root is not present (or bus is not a valid pointer), NULL * will be returned. */ struct device *bus_get_dev_root(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); struct device *dev_root; if (!sp) return NULL; dev_root = get_device(sp->dev_root); subsys_put(sp); return dev_root; } EXPORT_SYMBOL_GPL(bus_get_dev_root); int __init buses_init(void) { bus_kset = kset_create_and_add("bus", &bus_uevent_ops, NULL); if (!bus_kset) return -ENOMEM; system_kset = kset_create_and_add("system", NULL, &devices_kset->kobj); if (!system_kset) { /* Do error handling here as devices_init() do */ kset_unregister(bus_kset); bus_kset = NULL; pr_err("%s: failed to create and add kset 'bus'\n", __func__); return -ENOMEM; } return 0; }
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ #ifndef __XFS_MOUNT_H__ #define __XFS_MOUNT_H__ struct xlog; struct xfs_inode; struct xfs_mru_cache; struct xfs_ail; struct xfs_quotainfo; struct xfs_da_geometry; struct xfs_perag; struct xfs_healthmon; /* dynamic preallocation free space thresholds, 5% down to 1% */ enum { XFS_LOWSP_1_PCNT = 0, XFS_LOWSP_2_PCNT, XFS_LOWSP_3_PCNT, XFS_LOWSP_4_PCNT, XFS_LOWSP_5_PCNT, XFS_LOWSP_MAX, }; /* * Error Configuration * * Error classes define the subsystem the configuration belongs to. * Error numbers define the errors that are configurable. */ enum { XFS_ERR_METADATA, XFS_ERR_CLASS_MAX, }; enum { XFS_ERR_DEFAULT, XFS_ERR_EIO, XFS_ERR_ENOSPC, XFS_ERR_ENODEV, XFS_ERR_ERRNO_MAX, }; #define XFS_ERR_RETRY_FOREVER -1 /* * Although retry_timeout is in jiffies which is normally an unsigned long, * we limit the retry timeout to 86400 seconds, or one day. So even a * signed 32-bit long is sufficient for a HZ value up to 24855. Making it * signed lets us store the special "-1" value, meaning retry forever. */ struct xfs_error_cfg { struct xfs_kobj kobj; int max_retries; long retry_timeout; /* in jiffies, -1 = infinite */ }; /* * Per-cpu deferred inode inactivation GC lists. */ struct xfs_inodegc { struct xfs_mount *mp; struct llist_head list; struct delayed_work work; int error; /* approximate count of inodes in the list */ unsigned int items; unsigned int shrinker_hits; unsigned int cpu; }; /* * Container for each type of groups, used to look up individual groups and * describes the geometry. */ struct xfs_groups { struct xarray xa; /* * Maximum capacity of the group in FSBs. * * Each group is laid out densely in the daddr space. For the * degenerate case of a pre-rtgroups filesystem, the incore rtgroup * pretends to have a zero-block and zero-blklog rtgroup. */ uint32_t blocks; /* * Log(2) of the logical size of each group. * * Compared to the blocks field above this is rounded up to the next * power of two, and thus lays out the xfs_fsblock_t/xfs_rtblock_t * space sparsely with a hole from blocks to (1 << blklog) at the end * of each group. */ uint8_t blklog; /* * Zoned devices can have gaps beyond the usable capacity of a zone and * the end in the LBA/daddr address space. In other words, the hardware * equivalent to the RT groups already takes care of the power of 2 * alignment for us. In this case the sparse FSB/RTB address space maps * 1:1 to the device address space. */ bool has_daddr_gaps; /* * Mask to extract the group-relative block number from a FSB. * For a pre-rtgroups filesystem we pretend to have one very large * rtgroup, so this mask must be 64-bit. */ uint64_t blkmask; /* * Start of the first group in the device. This is used to support a * RT device following the data device on the same block device for * SMR hard drives. */ xfs_fsblock_t start_fsb; /* * Maximum length of an atomic write for files stored in this * collection of allocation groups, in fsblocks. */ xfs_extlen_t awu_max; }; struct xfs_freecounter { /* free blocks for general use: */ struct percpu_counter count; /* total reserved blocks: */ uint64_t res_total; /* available reserved blocks: */ uint64_t res_avail; /* reserved blks @ remount,ro: */ uint64_t res_saved; }; /* * The struct xfsmount layout is optimised to separate read-mostly variables * from variables that are frequently modified. We put the read-mostly variables * first, then place all the other variables at the end. * * Typically, read-mostly variables are those that are set at mount time and * never changed again, or only change rarely as a result of things like sysfs * knobs being tweaked. */ typedef struct xfs_mount { struct xfs_sb m_sb; /* copy of fs superblock */ struct super_block *m_super; struct xfs_ail *m_ail; /* fs active log item list */ struct xfs_buf *m_sb_bp; /* buffer for superblock */ struct xfs_buf *m_rtsb_bp; /* realtime superblock */ char *m_rtname; /* realtime device name */ char *m_logname; /* external log device name */ struct xfs_da_geometry *m_dir_geo; /* directory block geometry */ struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */ struct xlog *m_log; /* log specific stuff */ struct xfs_inode *m_rootip; /* pointer to root directory */ struct xfs_inode *m_metadirip; /* ptr to metadata directory */ struct xfs_inode *m_rtdirip; /* ptr to realtime metadir */ struct xfs_quotainfo *m_quotainfo; /* disk quota information */ struct xfs_buftarg *m_ddev_targp; /* data device */ struct xfs_buftarg *m_logdev_targp;/* log device */ struct xfs_buftarg *m_rtdev_targp; /* rt device */ void __percpu *m_inodegc; /* percpu inodegc structures */ struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ struct workqueue_struct *m_buf_workqueue; struct workqueue_struct *m_unwritten_workqueue; struct workqueue_struct *m_reclaim_workqueue; struct workqueue_struct *m_sync_workqueue; struct workqueue_struct *m_blockgc_wq; struct workqueue_struct *m_inodegc_wq; int m_bsize; /* fs logical block size */ uint8_t m_blkbit_log; /* blocklog + NBBY */ uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ uint8_t m_agno_log; /* log #ag's */ uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ int8_t m_rtxblklog; /* log2 of rextsize, if possible */ uint m_blockmask; /* sb_blocksize-1 */ uint m_blockwsize; /* sb_blocksize in words */ /* number of rt extents per rt bitmap block if rtgroups enabled */ unsigned int m_rtx_per_rbmblock; uint m_alloc_mxr[2]; /* max alloc btree records */ uint m_alloc_mnr[2]; /* min alloc btree records */ uint m_bmap_dmxr[2]; /* max bmap btree records */ uint m_bmap_dmnr[2]; /* min bmap btree records */ uint m_rmap_mxr[2]; /* max rmap btree records */ uint m_rmap_mnr[2]; /* min rmap btree records */ uint m_rtrmap_mxr[2]; /* max rtrmap btree records */ uint m_rtrmap_mnr[2]; /* min rtrmap btree records */ uint m_refc_mxr[2]; /* max refc btree records */ uint m_refc_mnr[2]; /* min refc btree records */ uint m_rtrefc_mxr[2]; /* max rtrefc btree records */ uint m_rtrefc_mnr[2]; /* min rtrefc btree records */ uint m_alloc_maxlevels; /* max alloc btree levels */ uint m_bm_maxlevels[2]; /* max bmap btree levels */ uint m_rmap_maxlevels; /* max rmap btree levels */ uint m_rtrmap_maxlevels; /* max rtrmap btree level */ uint m_refc_maxlevels; /* max refcount btree level */ uint m_rtrefc_maxlevels; /* max rtrefc btree level */ unsigned int m_agbtree_maxlevels; /* max level of all AG btrees */ unsigned int m_rtbtree_maxlevels; /* max level of all rt btrees */ xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ uint m_alloc_set_aside; /* space we can't use */ uint m_ag_max_usable; /* max space per AG */ int m_dalign; /* stripe unit */ int m_swidth; /* stripe width */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ uint m_allocsize_log;/* min write size log bytes */ uint m_allocsize_blocks; /* min write size blocks */ int m_logbufs; /* number of log buffers */ int m_logbsize; /* size of each log buffer */ unsigned int m_rsumlevels; /* rt summary levels */ xfs_filblks_t m_rsumblocks; /* size of rt summary, FSBs */ int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_qflags; /* quota status flags */ uint64_t m_features; /* active filesystem features */ uint64_t m_low_space[XFS_LOWSP_MAX]; uint64_t m_low_rtexts[XFS_LOWSP_MAX]; uint64_t m_rtxblkmask; /* rt extent block mask */ struct xfs_ino_geometry m_ino_geo; /* inode geometry */ struct xfs_trans_resv m_resv; /* precomputed res values */ /* low free space thresholds */ unsigned long m_opstate; /* dynamic state flags */ bool m_always_cow; bool m_fail_unmount; bool m_finobt_nores; /* no per-AG finobt resv. */ bool m_update_sb; /* sb needs update in mount */ unsigned int m_max_open_zones; unsigned int m_zonegc_low_space; /* max_atomic_write mount option value */ unsigned long long m_awu_max_bytes; /* * Bitsets of per-fs metadata that have been checked and/or are sick. * Callers must hold m_sb_lock to access these two fields. */ uint8_t m_fs_checked; uint8_t m_fs_sick; /* * Bitsets of rt metadata that have been checked and/or are sick. * Callers must hold m_sb_lock to access this field. */ uint8_t m_rt_checked; uint8_t m_rt_sick; /* * End of read-mostly variables. Frequently written variables and locks * should be placed below this comment from now on. The first variable * here is marked as cacheline aligned so they it is separated from * the read-mostly variables. */ spinlock_t ____cacheline_aligned m_sb_lock; /* sb counter lock */ struct percpu_counter m_icount; /* allocated inodes counter */ struct percpu_counter m_ifree; /* free inodes counter */ struct xfs_freecounter m_free[XC_FREE_NR]; /* * Count of data device blocks reserved for delayed allocations, * including indlen blocks. Does not include allocated CoW staging * extents or anything related to the rt device. */ struct percpu_counter m_delalloc_blks; /* * RT version of the above. */ struct percpu_counter m_delalloc_rtextents; /* * Global count of allocation btree blocks in use across all AGs. Only * used when perag reservation is enabled. Helps prevent block * reservation from attempting to reserve allocation btree blocks. */ atomic64_t m_allocbt_blks; struct xfs_groups m_groups[XG_TYPE_MAX]; struct delayed_work m_reclaim_work; /* background inode reclaim */ struct xfs_zone_info *m_zone_info; /* zone allocator information */ struct dentry *m_debugfs; /* debugfs parent */ struct xfs_kobj m_kobj; struct xfs_kobj m_error_kobj; struct xfs_kobj m_error_meta_kobj; struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; struct xstats m_stats; /* per-fs stats */ #ifdef CONFIG_XFS_ONLINE_SCRUB_STATS struct xchk_stats *m_scrub_stats; #endif struct xfs_kobj m_zoned_kobj; xfs_agnumber_t m_agfrotor; /* last ag where space found */ atomic_t m_agirotor; /* last ag dir inode alloced */ atomic_t m_rtgrotor; /* last rtgroup rtpicked */ struct mutex m_metafile_resv_lock; uint64_t m_metafile_resv_target; uint64_t m_metafile_resv_used; uint64_t m_metafile_resv_avail; /* Memory shrinker to throttle and reprioritize inodegc */ struct shrinker *m_inodegc_shrinker; /* * Workqueue item so that we can coalesce multiple inode flush attempts * into a single flush. */ struct work_struct m_flush_inodes_work; /* * Generation of the filesysyem layout. This is incremented by each * growfs, and used by the pNFS server to ensure the client updates * its view of the block device once it gets a layout that might * reference the newly added blocks. Does not need to be persistent * as long as we only allow file system size increments, but if we * ever support shrinks it would have to be persisted in addition * to various other kinds of pain inflicted on the pNFS server. */ uint32_t m_generation; struct mutex m_growlock; /* growfs mutex */ #ifdef DEBUG /* * Frequency with which errors are injected. Replaces xfs_etest; the * value stored in here is the inverse of the frequency with which the * error triggers. 1 = always, 2 = half the time, etc. */ unsigned int *m_errortag; struct xfs_kobj m_errortag_kobj; #endif /* cpus that have inodes queued for inactivation */ struct cpumask m_inodegc_cpumask; /* Hook to feed dirent updates to an active online repair. */ struct xfs_hooks m_dir_update_hooks; /* Private data referring to a health monitor object. */ struct xfs_healthmon __rcu *m_healthmon; } xfs_mount_t; #define M_IGEO(mp) (&(mp)->m_ino_geo) /* * Flags for m_features. * * These are all the active features in the filesystem, regardless of how * they are configured. */ #define XFS_FEAT_ATTR (1ULL << 0) /* xattrs present in fs */ #define XFS_FEAT_NLINK (1ULL << 1) /* 32 bit link counts */ #define XFS_FEAT_QUOTA (1ULL << 2) /* quota active */ #define XFS_FEAT_ALIGN (1ULL << 3) /* inode alignment */ #define XFS_FEAT_DALIGN (1ULL << 4) /* data alignment */ #define XFS_FEAT_LOGV2 (1ULL << 5) /* version 2 logs */ #define XFS_FEAT_SECTOR (1ULL << 6) /* sector size > 512 bytes */ #define XFS_FEAT_EXTFLG (1ULL << 7) /* unwritten extents */ #define XFS_FEAT_ASCIICI (1ULL << 8) /* ASCII only case-insens. */ #define XFS_FEAT_LAZYSBCOUNT (1ULL << 9) /* Superblk counters */ #define XFS_FEAT_PARENT (1ULL << 11) /* parent pointers */ #define XFS_FEAT_PROJID32 (1ULL << 12) /* 32 bit project id */ #define XFS_FEAT_CRC (1ULL << 13) /* metadata CRCs */ #define XFS_FEAT_V3INODES (1ULL << 14) /* Version 3 inodes */ #define XFS_FEAT_PQUOTINO (1ULL << 15) /* non-shared proj/grp quotas */ #define XFS_FEAT_FTYPE (1ULL << 16) /* inode type in dir */ #define XFS_FEAT_FINOBT (1ULL << 17) /* free inode btree */ #define XFS_FEAT_RMAPBT (1ULL << 18) /* reverse map btree */ #define XFS_FEAT_REFLINK (1ULL << 19) /* reflinked files */ #define XFS_FEAT_SPINODES (1ULL << 20) /* sparse inode chunks */ #define XFS_FEAT_META_UUID (1ULL << 21) /* metadata UUID */ #define XFS_FEAT_REALTIME (1ULL << 22) /* realtime device present */ #define XFS_FEAT_INOBTCNT (1ULL << 23) /* inobt block counts */ #define XFS_FEAT_BIGTIME (1ULL << 24) /* large timestamps */ #define XFS_FEAT_NEEDSREPAIR (1ULL << 25) /* needs xfs_repair */ #define XFS_FEAT_NREXT64 (1ULL << 26) /* large extent counters */ #define XFS_FEAT_EXCHANGE_RANGE (1ULL << 27) /* exchange range */ #define XFS_FEAT_METADIR (1ULL << 28) /* metadata directory tree */ #define XFS_FEAT_ZONED (1ULL << 29) /* zoned RT device */ /* Mount features */ #define XFS_FEAT_NOLIFETIME (1ULL << 47) /* disable lifetime hints */ #define XFS_FEAT_NOALIGN (1ULL << 49) /* ignore alignment */ #define XFS_FEAT_ALLOCSIZE (1ULL << 50) /* user specified allocation size */ #define XFS_FEAT_LARGE_IOSIZE (1ULL << 51) /* report large preferred * I/O size in stat() */ #define XFS_FEAT_WSYNC (1ULL << 52) /* synchronous metadata ops */ #define XFS_FEAT_DIRSYNC (1ULL << 53) /* synchronous directory ops */ #define XFS_FEAT_DISCARD (1ULL << 54) /* discard unused blocks */ #define XFS_FEAT_GRPID (1ULL << 55) /* group-ID assigned from directory */ #define XFS_FEAT_SMALL_INUMS (1ULL << 56) /* user wants 32bit inodes */ #define XFS_FEAT_SWALLOC (1ULL << 58) /* stripe width allocation */ #define XFS_FEAT_FILESTREAMS (1ULL << 59) /* use filestreams allocator */ #define XFS_FEAT_DAX_ALWAYS (1ULL << 60) /* DAX always enabled */ #define XFS_FEAT_DAX_NEVER (1ULL << 61) /* DAX never enabled */ #define XFS_FEAT_NORECOVERY (1ULL << 62) /* no recovery - dirty fs */ #define XFS_FEAT_NOUUID (1ULL << 63) /* ignore uuid during mount */ #define __XFS_HAS_FEAT(name, NAME) \ static inline bool xfs_has_ ## name (const struct xfs_mount *mp) \ { \ return mp->m_features & XFS_FEAT_ ## NAME; \ } /* Some features can be added dynamically so they need a set wrapper, too. */ #define __XFS_ADD_FEAT(name, NAME) \ __XFS_HAS_FEAT(name, NAME); \ static inline void xfs_add_ ## name (struct xfs_mount *mp) \ { \ mp->m_features |= XFS_FEAT_ ## NAME; \ xfs_sb_version_add ## name(&mp->m_sb); \ } /* Superblock features */ __XFS_ADD_FEAT(attr, ATTR) __XFS_HAS_FEAT(nlink, NLINK) __XFS_ADD_FEAT(quota, QUOTA) __XFS_HAS_FEAT(dalign, DALIGN) __XFS_HAS_FEAT(sector, SECTOR) __XFS_HAS_FEAT(asciici, ASCIICI) __XFS_HAS_FEAT(parent, PARENT) __XFS_HAS_FEAT(ftype, FTYPE) __XFS_HAS_FEAT(finobt, FINOBT) __XFS_HAS_FEAT(rmapbt, RMAPBT) __XFS_HAS_FEAT(reflink, REFLINK) __XFS_HAS_FEAT(sparseinodes, SPINODES) __XFS_HAS_FEAT(metauuid, META_UUID) __XFS_HAS_FEAT(realtime, REALTIME) __XFS_HAS_FEAT(inobtcounts, INOBTCNT) __XFS_HAS_FEAT(bigtime, BIGTIME) __XFS_HAS_FEAT(needsrepair, NEEDSREPAIR) __XFS_HAS_FEAT(large_extent_counts, NREXT64) __XFS_HAS_FEAT(exchange_range, EXCHANGE_RANGE) __XFS_HAS_FEAT(metadir, METADIR) __XFS_HAS_FEAT(zoned, ZONED) __XFS_HAS_FEAT(nolifetime, NOLIFETIME) static inline bool xfs_has_rtgroups(const struct xfs_mount *mp) { /* all metadir file systems also allow rtgroups */ return xfs_has_metadir(mp); } static inline bool xfs_has_rtsb(const struct xfs_mount *mp) { /* all rtgroups filesystems with an rt section have an rtsb */ return xfs_has_rtgroups(mp) && xfs_has_realtime(mp) && !xfs_has_zoned(mp); } static inline bool xfs_has_rtrmapbt(const struct xfs_mount *mp) { return xfs_has_rtgroups(mp) && xfs_has_realtime(mp) && xfs_has_rmapbt(mp); } static inline bool xfs_has_rtreflink(const struct xfs_mount *mp) { return xfs_has_metadir(mp) && xfs_has_realtime(mp) && xfs_has_reflink(mp); } static inline bool xfs_has_nonzoned(const struct xfs_mount *mp) { return !xfs_has_zoned(mp); } static inline bool xfs_can_sw_atomic_write(struct xfs_mount *mp) { return xfs_has_reflink(mp); } /* * Some features are always on for v5 file systems, allow the compiler to * eliminiate dead code when building without v4 support. */ #define __XFS_HAS_V4_FEAT(name, NAME) \ static inline bool xfs_has_ ## name (struct xfs_mount *mp) \ { \ return !IS_ENABLED(CONFIG_XFS_SUPPORT_V4) || \ (mp->m_features & XFS_FEAT_ ## NAME); \ } #define __XFS_ADD_V4_FEAT(name, NAME) \ __XFS_HAS_V4_FEAT(name, NAME); \ static inline void xfs_add_ ## name (struct xfs_mount *mp) \ { \ if (IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) { \ mp->m_features |= XFS_FEAT_ ## NAME; \ xfs_sb_version_add ## name(&mp->m_sb); \ } \ } __XFS_HAS_V4_FEAT(align, ALIGN) __XFS_HAS_V4_FEAT(logv2, LOGV2) __XFS_HAS_V4_FEAT(extflg, EXTFLG) __XFS_HAS_V4_FEAT(lazysbcount, LAZYSBCOUNT) __XFS_ADD_V4_FEAT(projid32, PROJID32) __XFS_HAS_V4_FEAT(v3inodes, V3INODES) __XFS_HAS_V4_FEAT(crc, CRC) __XFS_HAS_V4_FEAT(pquotino, PQUOTINO) static inline void xfs_add_attr2(struct xfs_mount *mp) { if (IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) xfs_sb_version_addattr2(&mp->m_sb); } /* * Mount features * * These do not change dynamically - features that can come and go, such as 32 * bit inodes and read-only state, are kept as operational state rather than * features. */ __XFS_HAS_FEAT(noalign, NOALIGN) __XFS_HAS_FEAT(allocsize, ALLOCSIZE) __XFS_HAS_FEAT(large_iosize, LARGE_IOSIZE) __XFS_HAS_FEAT(wsync, WSYNC) __XFS_HAS_FEAT(dirsync, DIRSYNC) __XFS_HAS_FEAT(discard, DISCARD) __XFS_HAS_FEAT(grpid, GRPID) __XFS_HAS_FEAT(small_inums, SMALL_INUMS) __XFS_HAS_FEAT(swalloc, SWALLOC) __XFS_HAS_FEAT(filestreams, FILESTREAMS) __XFS_HAS_FEAT(dax_always, DAX_ALWAYS) __XFS_HAS_FEAT(dax_never, DAX_NEVER) __XFS_HAS_FEAT(norecovery, NORECOVERY) __XFS_HAS_FEAT(nouuid, NOUUID) /* * Operational mount state flags * * Use these with atomic bit ops only! */ #define XFS_OPSTATE_UNMOUNTING 0 /* filesystem is unmounting */ #define XFS_OPSTATE_CLEAN 1 /* mount was clean */ #define XFS_OPSTATE_SHUTDOWN 2 /* stop all fs operations */ #define XFS_OPSTATE_INODE32 3 /* inode32 allocator active */ #define XFS_OPSTATE_READONLY 4 /* read-only fs */ /* * If set, inactivation worker threads will be scheduled to process queued * inodegc work. If not, queued inodes remain in memory waiting to be * processed. */ #define XFS_OPSTATE_INODEGC_ENABLED 5 /* * If set, background speculative prealloc gc worker threads will be scheduled * to process queued blockgc work. If not, inodes retain their preallocations * until explicitly deleted. */ #define XFS_OPSTATE_BLOCKGC_ENABLED 6 /* Kernel has logged a warning about shrink being used on this fs. */ #define XFS_OPSTATE_WARNED_SHRINK 9 /* Kernel has logged a warning about logged xattr updates being used. */ #define XFS_OPSTATE_WARNED_LARP 10 /* Mount time quotacheck is running */ #define XFS_OPSTATE_QUOTACHECK_RUNNING 11 /* Do we want to clear log incompat flags? */ #define XFS_OPSTATE_UNSET_LOG_INCOMPAT 12 /* Filesystem can use logged extended attributes */ #define XFS_OPSTATE_USE_LARP 13 /* Kernel has logged a warning about blocksize > pagesize on this fs. */ #define XFS_OPSTATE_WARNED_LBS 14 /* Kernel has logged a warning about metadata dirs being used on this fs. */ #define XFS_OPSTATE_WARNED_METADIR 17 /* Filesystem should use qflags to determine quotaon status */ #define XFS_OPSTATE_RESUMING_QUOTAON 18 /* Kernel has logged a warning about zoned RT device being used on this fs. */ #define XFS_OPSTATE_WARNED_ZONED 19 /* (Zoned) GC is in progress */ #define XFS_OPSTATE_ZONEGC_RUNNING 20 #define __XFS_IS_OPSTATE(name, NAME) \ static inline bool xfs_is_ ## name (struct xfs_mount *mp) \ { \ return test_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \ } \ static inline bool xfs_clear_ ## name (struct xfs_mount *mp) \ { \ return test_and_clear_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \ } \ static inline bool xfs_set_ ## name (struct xfs_mount *mp) \ { \ return test_and_set_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \ } __XFS_IS_OPSTATE(unmounting, UNMOUNTING) __XFS_IS_OPSTATE(clean, CLEAN) __XFS_IS_OPSTATE(shutdown, SHUTDOWN) __XFS_IS_OPSTATE(inode32, INODE32) __XFS_IS_OPSTATE(readonly, READONLY) __XFS_IS_OPSTATE(inodegc_enabled, INODEGC_ENABLED) __XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED) #ifdef CONFIG_XFS_QUOTA __XFS_IS_OPSTATE(quotacheck_running, QUOTACHECK_RUNNING) __XFS_IS_OPSTATE(resuming_quotaon, RESUMING_QUOTAON) #else static inline bool xfs_is_quotacheck_running(struct xfs_mount *mp) { return false; } static inline bool xfs_is_resuming_quotaon(struct xfs_mount *mp) { return false; } static inline void xfs_set_resuming_quotaon(struct xfs_mount *m) { } static inline bool xfs_clear_resuming_quotaon(struct xfs_mount *mp) { return false; } #endif /* CONFIG_XFS_QUOTA */ __XFS_IS_OPSTATE(done_with_log_incompat, UNSET_LOG_INCOMPAT) __XFS_IS_OPSTATE(using_logged_xattrs, USE_LARP) __XFS_IS_OPSTATE(zonegc_running, ZONEGC_RUNNING) static inline bool xfs_should_warn(struct xfs_mount *mp, long nr) { return !test_and_set_bit(nr, &mp->m_opstate); } #define XFS_OPSTATE_STRINGS \ { (1UL << XFS_OPSTATE_UNMOUNTING), "unmounting" }, \ { (1UL << XFS_OPSTATE_CLEAN), "clean" }, \ { (1UL << XFS_OPSTATE_SHUTDOWN), "shutdown" }, \ { (1UL << XFS_OPSTATE_INODE32), "inode32" }, \ { (1UL << XFS_OPSTATE_READONLY), "read_only" }, \ { (1UL << XFS_OPSTATE_INODEGC_ENABLED), "inodegc" }, \ { (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" }, \ { (1UL << XFS_OPSTATE_WARNED_SHRINK), "wshrink" }, \ { (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" }, \ { (1UL << XFS_OPSTATE_QUOTACHECK_RUNNING), "quotacheck" }, \ { (1UL << XFS_OPSTATE_UNSET_LOG_INCOMPAT), "unset_log_incompat" }, \ { (1UL << XFS_OPSTATE_USE_LARP), "logged_xattrs" } /* * Max and min values for mount-option defined I/O * preallocation sizes. */ #define XFS_MAX_IO_LOG 30 /* 1G */ #define XFS_MIN_IO_LOG PAGE_SHIFT void xfs_do_force_shutdown(struct xfs_mount *mp, uint32_t flags, char *fname, int lnnum); #define xfs_force_shutdown(m,f) \ xfs_do_force_shutdown(m, f, __FILE__, __LINE__) #define SHUTDOWN_META_IO_ERROR (1u << 0) /* write attempt to metadata failed */ #define SHUTDOWN_LOG_IO_ERROR (1u << 1) /* write attempt to the log failed */ #define SHUTDOWN_FORCE_UMOUNT (1u << 2) /* shutdown from a forced unmount */ #define SHUTDOWN_CORRUPT_INCORE (1u << 3) /* corrupt in-memory structures */ #define SHUTDOWN_CORRUPT_ONDISK (1u << 4) /* corrupt metadata on device */ #define SHUTDOWN_DEVICE_REMOVED (1u << 5) /* device removed underneath us */ #define XFS_SHUTDOWN_STRINGS \ { SHUTDOWN_META_IO_ERROR, "metadata_io" }, \ { SHUTDOWN_LOG_IO_ERROR, "log_io" }, \ { SHUTDOWN_FORCE_UMOUNT, "force_umount" }, \ { SHUTDOWN_CORRUPT_INCORE, "corruption" }, \ { SHUTDOWN_DEVICE_REMOVED, "device_removed" } /* * Flags for xfs_mountfs */ #define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */ static inline xfs_agnumber_t xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d) { xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d); do_div(ld, mp->m_sb.sb_agblocks); return (xfs_agnumber_t) ld; } static inline xfs_agblock_t xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) { xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d); return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks); } extern void xfs_uuid_table_free(void); uint64_t xfs_default_resblks(struct xfs_mount *mp, enum xfs_free_counter ctr); extern int xfs_mountfs(xfs_mount_t *mp); extern void xfs_unmountfs(xfs_mount_t *); /* * Deltas for the block count can vary from 1 to very large, but lock contention * only occurs on frequent small block count updates such as in the delayed * allocation path for buffered writes (page a time updates). Hence we set * a large batch count (1024) to minimise global counter updates except when * we get near to ENOSPC and we have to be very accurate with our updates. */ #define XFS_FDBLOCKS_BATCH 1024 uint64_t xfs_freecounter_unavailable(struct xfs_mount *mp, enum xfs_free_counter ctr); /* * Sum up the freecount, but never return negative values. */ static inline s64 xfs_sum_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr) { return percpu_counter_sum_positive(&mp->m_free[ctr].count); } /* * Same as above, but does return negative values. Mostly useful for * special cases like repair and tracing. */ static inline s64 xfs_sum_freecounter_raw(struct xfs_mount *mp, enum xfs_free_counter ctr) { return percpu_counter_sum(&mp->m_free[ctr].count); } /* * This just provides and estimate without the cpu-local updates, use * xfs_sum_freecounter for the exact value. */ static inline s64 xfs_estimate_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr) { return percpu_counter_read_positive(&mp->m_free[ctr].count); } static inline int xfs_compare_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr, s64 rhs, s32 batch) { return __percpu_counter_compare(&mp->m_free[ctr].count, rhs, batch); } static inline void xfs_set_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr, uint64_t val) { percpu_counter_set(&mp->m_free[ctr].count, val); } int xfs_dec_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr, uint64_t delta, bool rsvd); void xfs_add_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr, uint64_t delta); static inline int xfs_dec_fdblocks(struct xfs_mount *mp, uint64_t delta, bool reserved) { return xfs_dec_freecounter(mp, XC_FREE_BLOCKS, delta, reserved); } static inline void xfs_add_fdblocks(struct xfs_mount *mp, uint64_t delta) { xfs_add_freecounter(mp, XC_FREE_BLOCKS, delta); } static inline int xfs_dec_frextents(struct xfs_mount *mp, uint64_t delta) { return xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, delta, false); } static inline void xfs_add_frextents(struct xfs_mount *mp, uint64_t delta) { xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, delta); } extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); extern bool xfs_fs_writable(struct xfs_mount *mp, int level); extern int xfs_sb_validate_fsb_count(struct xfs_sb *, uint64_t); extern int xfs_dev_is_read_only(struct xfs_mount *, char *); extern void xfs_set_low_space_thresholds(struct xfs_mount *); int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb, xfs_off_t count_fsb); struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp, int error_class, int error); void xfs_force_summary_recalc(struct xfs_mount *mp); int xfs_add_incompat_log_feature(struct xfs_mount *mp, uint32_t feature); bool xfs_clear_incompat_log_features(struct xfs_mount *mp); void xfs_mod_delalloc(struct xfs_inode *ip, int64_t data_delta, int64_t ind_delta); static inline void xfs_mod_sb_delalloc(struct xfs_mount *mp, int64_t delta) { percpu_counter_add(&mp->m_delalloc_blks, delta); } int xfs_set_max_atomic_write_opt(struct xfs_mount *mp, unsigned long long new_max_bytes); static inline struct xfs_buftarg * xfs_group_type_buftarg( struct xfs_mount *mp, enum xfs_group_type type) { switch (type) { case XG_TYPE_AG: return mp->m_ddev_targp; case XG_TYPE_RTG: return mp->m_rtdev_targp; default: ASSERT(0); break; } return NULL; } #endif /* __XFS_MOUNT_H__ */
1 1 11 9 2 2 2 2 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 // SPDX-License-Identifier: GPL-2.0 /* USB Driver layer for GSM modems Copyright (C) 2005 Matthias Urlichs <smurf@smurf.noris.de> Portions copied from the Keyspan driver by Hugh Blemings <hugh@blemings.org> History: see the git log. Work sponsored by: Sigos GmbH, Germany <info@sigos.de> This driver exists because the "normal" serial driver doesn't work too well with GSM modems. Issues: - data loss -- one single Receive URB is not nearly enough - controlling the baud rate doesn't make sense */ #define DRIVER_AUTHOR "Matthias Urlichs <smurf@smurf.noris.de>" #define DRIVER_DESC "USB Driver for GSM modems" #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/bitops.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <linux/usb/serial.h> #include <linux/serial.h> #include "usb-wwan.h" /* * Generate DTR/RTS signals on the port using the SET_CONTROL_LINE_STATE request * in CDC ACM. */ static int usb_wwan_send_setup(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct usb_wwan_port_private *portdata; int val = 0; int ifnum; int res; portdata = usb_get_serial_port_data(port); if (portdata->dtr_state) val |= USB_CDC_CTRL_DTR; if (portdata->rts_state) val |= USB_CDC_CTRL_RTS; ifnum = serial->interface->cur_altsetting->desc.bInterfaceNumber; res = usb_autopm_get_interface(serial->interface); if (res) return res; res = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), USB_CDC_REQ_SET_CONTROL_LINE_STATE, USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE, val, ifnum, NULL, 0, USB_CTRL_SET_TIMEOUT); usb_autopm_put_interface(port->serial->interface); return res; } void usb_wwan_dtr_rts(struct usb_serial_port *port, int on) { struct usb_wwan_port_private *portdata; struct usb_wwan_intf_private *intfdata; intfdata = usb_get_serial_data(port->serial); if (!intfdata->use_send_setup) return; portdata = usb_get_serial_port_data(port); /* FIXME: locking */ portdata->rts_state = on; portdata->dtr_state = on; usb_wwan_send_setup(port); } EXPORT_SYMBOL(usb_wwan_dtr_rts); int usb_wwan_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; unsigned int value; struct usb_wwan_port_private *portdata; portdata = usb_get_serial_port_data(port); value = ((portdata->rts_state) ? TIOCM_RTS : 0) | ((portdata->dtr_state) ? TIOCM_DTR : 0) | ((portdata->cts_state) ? TIOCM_CTS : 0) | ((portdata->dsr_state) ? TIOCM_DSR : 0) | ((portdata->dcd_state) ? TIOCM_CAR : 0) | ((portdata->ri_state) ? TIOCM_RNG : 0); return value; } EXPORT_SYMBOL(usb_wwan_tiocmget); int usb_wwan_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; struct usb_wwan_port_private *portdata; struct usb_wwan_intf_private *intfdata; portdata = usb_get_serial_port_data(port); intfdata = usb_get_serial_data(port->serial); if (!intfdata->use_send_setup) return -EINVAL; /* FIXME: what locks portdata fields ? */ if (set & TIOCM_RTS) portdata->rts_state = 1; if (set & TIOCM_DTR) portdata->dtr_state = 1; if (clear & TIOCM_RTS) portdata->rts_state = 0; if (clear & TIOCM_DTR) portdata->dtr_state = 0; return usb_wwan_send_setup(port); } EXPORT_SYMBOL(usb_wwan_tiocmset); int usb_wwan_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count) { struct usb_wwan_port_private *portdata; struct usb_wwan_intf_private *intfdata; int i; int left, todo; struct urb *this_urb = NULL; /* spurious */ int err; unsigned long flags; portdata = usb_get_serial_port_data(port); intfdata = usb_get_serial_data(port->serial); dev_dbg(&port->dev, "%s: write (%d chars)\n", __func__, count); left = count; for (i = 0; left > 0 && i < N_OUT_URB; i++) { todo = left; if (todo > OUT_BUFLEN) todo = OUT_BUFLEN; this_urb = portdata->out_urbs[i]; if (test_and_set_bit(i, &portdata->out_busy)) { if (time_before(jiffies, portdata->tx_start_time[i] + 10 * HZ)) continue; usb_unlink_urb(this_urb); continue; } dev_dbg(&port->dev, "%s: endpoint %d buf %d\n", __func__, usb_pipeendpoint(this_urb->pipe), i); err = usb_autopm_get_interface_async(port->serial->interface); if (err < 0) { clear_bit(i, &portdata->out_busy); break; } /* send the data */ memcpy(this_urb->transfer_buffer, buf, todo); this_urb->transfer_buffer_length = todo; spin_lock_irqsave(&intfdata->susp_lock, flags); if (intfdata->suspended) { usb_anchor_urb(this_urb, &portdata->delayed); spin_unlock_irqrestore(&intfdata->susp_lock, flags); } else { intfdata->in_flight++; spin_unlock_irqrestore(&intfdata->susp_lock, flags); err = usb_submit_urb(this_urb, GFP_ATOMIC); if (err) { dev_err(&port->dev, "%s: submit urb %d failed: %d\n", __func__, i, err); clear_bit(i, &portdata->out_busy); spin_lock_irqsave(&intfdata->susp_lock, flags); intfdata->in_flight--; spin_unlock_irqrestore(&intfdata->susp_lock, flags); usb_autopm_put_interface_async(port->serial->interface); break; } } portdata->tx_start_time[i] = jiffies; buf += todo; left -= todo; } count -= left; dev_dbg(&port->dev, "%s: wrote (did %d)\n", __func__, count); return count; } EXPORT_SYMBOL(usb_wwan_write); static void usb_wwan_indat_callback(struct urb *urb) { int err; int endpoint; struct usb_serial_port *port; struct device *dev; unsigned char *data = urb->transfer_buffer; int status = urb->status; endpoint = usb_pipeendpoint(urb->pipe); port = urb->context; dev = &port->dev; if (status) { dev_dbg(dev, "%s: nonzero status: %d on endpoint %02x.\n", __func__, status, endpoint); /* don't resubmit on fatal errors */ if (status == -ESHUTDOWN || status == -ENOENT) return; } else { if (urb->actual_length) { tty_insert_flip_string(&port->port, data, urb->actual_length); tty_flip_buffer_push(&port->port); } else dev_dbg(dev, "%s: empty read urb received\n", __func__); } /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { if (err != -EPERM && err != -ENODEV) { dev_err(dev, "%s: resubmit read urb failed. (%d)\n", __func__, err); /* busy also in error unless we are killed */ usb_mark_last_busy(port->serial->dev); } } else { usb_mark_last_busy(port->serial->dev); } } static void usb_wwan_outdat_callback(struct urb *urb) { struct usb_serial_port *port; struct usb_wwan_port_private *portdata; struct usb_wwan_intf_private *intfdata; unsigned long flags; int i; port = urb->context; intfdata = usb_get_serial_data(port->serial); usb_serial_port_softint(port); usb_autopm_put_interface_async(port->serial->interface); portdata = usb_get_serial_port_data(port); spin_lock_irqsave(&intfdata->susp_lock, flags); intfdata->in_flight--; spin_unlock_irqrestore(&intfdata->susp_lock, flags); for (i = 0; i < N_OUT_URB; ++i) { if (portdata->out_urbs[i] == urb) { smp_mb__before_atomic(); clear_bit(i, &portdata->out_busy); break; } } } unsigned int usb_wwan_write_room(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct usb_wwan_port_private *portdata; int i; unsigned int data_len = 0; struct urb *this_urb; portdata = usb_get_serial_port_data(port); for (i = 0; i < N_OUT_URB; i++) { this_urb = portdata->out_urbs[i]; if (this_urb && !test_bit(i, &portdata->out_busy)) data_len += OUT_BUFLEN; } dev_dbg(&port->dev, "%s: %u\n", __func__, data_len); return data_len; } EXPORT_SYMBOL(usb_wwan_write_room); unsigned int usb_wwan_chars_in_buffer(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct usb_wwan_port_private *portdata; int i; unsigned int data_len = 0; struct urb *this_urb; portdata = usb_get_serial_port_data(port); for (i = 0; i < N_OUT_URB; i++) { this_urb = portdata->out_urbs[i]; /* FIXME: This locking is insufficient as this_urb may go unused during the test */ if (this_urb && test_bit(i, &portdata->out_busy)) data_len += this_urb->transfer_buffer_length; } dev_dbg(&port->dev, "%s: %u\n", __func__, data_len); return data_len; } EXPORT_SYMBOL(usb_wwan_chars_in_buffer); int usb_wwan_open(struct tty_struct *tty, struct usb_serial_port *port) { struct usb_wwan_port_private *portdata; struct usb_wwan_intf_private *intfdata; struct usb_serial *serial = port->serial; int i, err; struct urb *urb; portdata = usb_get_serial_port_data(port); intfdata = usb_get_serial_data(serial); if (port->interrupt_in_urb) { err = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); if (err) { dev_err(&port->dev, "%s: submit int urb failed: %d\n", __func__, err); } } /* Start reading from the IN endpoint */ for (i = 0; i < N_IN_URB; i++) { urb = portdata->in_urbs[i]; if (!urb) continue; err = usb_submit_urb(urb, GFP_KERNEL); if (err) { dev_err(&port->dev, "%s: submit read urb %d failed: %d\n", __func__, i, err); } } spin_lock_irq(&intfdata->susp_lock); if (++intfdata->open_ports == 1) serial->interface->needs_remote_wakeup = 1; spin_unlock_irq(&intfdata->susp_lock); /* this balances a get in the generic USB serial code */ usb_autopm_put_interface(serial->interface); return 0; } EXPORT_SYMBOL(usb_wwan_open); static void unbusy_queued_urb(struct urb *urb, struct usb_wwan_port_private *portdata) { int i; for (i = 0; i < N_OUT_URB; i++) { if (urb == portdata->out_urbs[i]) { clear_bit(i, &portdata->out_busy); break; } } } void usb_wwan_close(struct usb_serial_port *port) { int i; struct usb_serial *serial = port->serial; struct usb_wwan_port_private *portdata; struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial); struct urb *urb; portdata = usb_get_serial_port_data(port); /* * Need to take susp_lock to make sure port is not already being * resumed, but no need to hold it due to the tty-port initialized * flag. */ spin_lock_irq(&intfdata->susp_lock); if (--intfdata->open_ports == 0) serial->interface->needs_remote_wakeup = 0; spin_unlock_irq(&intfdata->susp_lock); for (;;) { urb = usb_get_from_anchor(&portdata->delayed); if (!urb) break; unbusy_queued_urb(urb, portdata); usb_autopm_put_interface_async(serial->interface); } for (i = 0; i < N_IN_URB; i++) usb_kill_urb(portdata->in_urbs[i]); for (i = 0; i < N_OUT_URB; i++) usb_kill_urb(portdata->out_urbs[i]); usb_kill_urb(port->interrupt_in_urb); usb_autopm_get_interface_no_resume(serial->interface); } EXPORT_SYMBOL(usb_wwan_close); static struct urb *usb_wwan_setup_urb(struct usb_serial_port *port, int endpoint, int dir, void *ctx, char *buf, int len, void (*callback) (struct urb *)) { struct usb_serial *serial = port->serial; struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial); struct urb *urb; urb = usb_alloc_urb(0, GFP_KERNEL); /* No ISO */ if (!urb) return NULL; usb_fill_bulk_urb(urb, serial->dev, usb_sndbulkpipe(serial->dev, endpoint) | dir, buf, len, callback, ctx); if (intfdata->use_zlp && dir == USB_DIR_OUT) urb->transfer_flags |= URB_ZERO_PACKET; return urb; } int usb_wwan_port_probe(struct usb_serial_port *port) { struct usb_wwan_port_private *portdata; struct urb *urb; u8 *buffer; int i; if (!port->bulk_in_size || !port->bulk_out_size) return -ENODEV; portdata = kzalloc_obj(*portdata); if (!portdata) return -ENOMEM; init_usb_anchor(&portdata->delayed); for (i = 0; i < N_IN_URB; i++) { buffer = (u8 *)__get_free_page(GFP_KERNEL); if (!buffer) goto bail_out_error; portdata->in_buffer[i] = buffer; urb = usb_wwan_setup_urb(port, port->bulk_in_endpointAddress, USB_DIR_IN, port, buffer, IN_BUFLEN, usb_wwan_indat_callback); portdata->in_urbs[i] = urb; } for (i = 0; i < N_OUT_URB; i++) { buffer = kmalloc(OUT_BUFLEN, GFP_KERNEL); if (!buffer) goto bail_out_error2; portdata->out_buffer[i] = buffer; urb = usb_wwan_setup_urb(port, port->bulk_out_endpointAddress, USB_DIR_OUT, port, buffer, OUT_BUFLEN, usb_wwan_outdat_callback); portdata->out_urbs[i] = urb; } usb_set_serial_port_data(port, portdata); return 0; bail_out_error2: for (i = 0; i < N_OUT_URB; i++) { usb_free_urb(portdata->out_urbs[i]); kfree(portdata->out_buffer[i]); } bail_out_error: for (i = 0; i < N_IN_URB; i++) { usb_free_urb(portdata->in_urbs[i]); free_page((unsigned long)portdata->in_buffer[i]); } kfree(portdata); return -ENOMEM; } EXPORT_SYMBOL_GPL(usb_wwan_port_probe); void usb_wwan_port_remove(struct usb_serial_port *port) { int i; struct usb_wwan_port_private *portdata; portdata = usb_get_serial_port_data(port); usb_set_serial_port_data(port, NULL); for (i = 0; i < N_IN_URB; i++) { usb_free_urb(portdata->in_urbs[i]); free_page((unsigned long)portdata->in_buffer[i]); } for (i = 0; i < N_OUT_URB; i++) { usb_free_urb(portdata->out_urbs[i]); kfree(portdata->out_buffer[i]); } kfree(portdata); } EXPORT_SYMBOL(usb_wwan_port_remove); #ifdef CONFIG_PM static void stop_urbs(struct usb_serial *serial) { int i, j; struct usb_serial_port *port; struct usb_wwan_port_private *portdata; for (i = 0; i < serial->num_ports; ++i) { port = serial->port[i]; portdata = usb_get_serial_port_data(port); if (!portdata) continue; for (j = 0; j < N_IN_URB; j++) usb_kill_urb(portdata->in_urbs[j]); for (j = 0; j < N_OUT_URB; j++) usb_kill_urb(portdata->out_urbs[j]); usb_kill_urb(port->interrupt_in_urb); } } int usb_wwan_suspend(struct usb_serial *serial, pm_message_t message) { struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial); spin_lock_irq(&intfdata->susp_lock); if (PMSG_IS_AUTO(message)) { if (intfdata->in_flight) { spin_unlock_irq(&intfdata->susp_lock); return -EBUSY; } } intfdata->suspended = 1; spin_unlock_irq(&intfdata->susp_lock); stop_urbs(serial); return 0; } EXPORT_SYMBOL(usb_wwan_suspend); /* Caller must hold susp_lock. */ static int usb_wwan_submit_delayed_urbs(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct usb_wwan_intf_private *data = usb_get_serial_data(serial); struct usb_wwan_port_private *portdata; struct urb *urb; int err_count = 0; int err; portdata = usb_get_serial_port_data(port); for (;;) { urb = usb_get_from_anchor(&portdata->delayed); if (!urb) break; err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { dev_err(&port->dev, "%s: submit urb failed: %d\n", __func__, err); err_count++; unbusy_queued_urb(urb, portdata); usb_autopm_put_interface_async(serial->interface); continue; } data->in_flight++; } if (err_count) return -EIO; return 0; } int usb_wwan_resume(struct usb_serial *serial) { int i, j; struct usb_serial_port *port; struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial); struct usb_wwan_port_private *portdata; struct urb *urb; int err; int err_count = 0; spin_lock_irq(&intfdata->susp_lock); for (i = 0; i < serial->num_ports; i++) { port = serial->port[i]; if (!tty_port_initialized(&port->port)) continue; portdata = usb_get_serial_port_data(port); if (port->interrupt_in_urb) { err = usb_submit_urb(port->interrupt_in_urb, GFP_ATOMIC); if (err) { dev_err(&port->dev, "%s: submit int urb failed: %d\n", __func__, err); err_count++; } } err = usb_wwan_submit_delayed_urbs(port); if (err) err_count++; for (j = 0; j < N_IN_URB; j++) { urb = portdata->in_urbs[j]; err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { dev_err(&port->dev, "%s: submit read urb %d failed: %d\n", __func__, i, err); err_count++; } } } intfdata->suspended = 0; spin_unlock_irq(&intfdata->susp_lock); if (err_count) return -EIO; return 0; } EXPORT_SYMBOL(usb_wwan_resume); #endif MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL v2");
204 126 245 270 81 26 200 55 124 143 16 134 182 7 14 199 14 22 354 2 262 12 12999 144 455 445 5 451 1022 76 50 16 1028 191 504 168 509 46 381 9 390 6 119 44 206 2 6 2 4 3 7 7 107 10 14 10 20 10 79 3 2 53 107 86 31 267 244 168 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_XFRM_H #define _NET_XFRM_H #include <linux/compiler.h> #include <linux/xfrm.h> #include <linux/spinlock.h> #include <linux/list.h> #include <linux/skbuff.h> #include <linux/socket.h> #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <linux/in6.h> #include <linux/mutex.h> #include <linux/audit.h> #include <linux/slab.h> #include <linux/refcount.h> #include <linux/sockptr.h> #include <net/sock.h> #include <net/dst.h> #include <net/inet_dscp.h> #include <net/ip.h> #include <net/route.h> #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/flow.h> #include <net/gro_cells.h> #include <linux/interrupt.h> #ifdef CONFIG_XFRM_STATISTICS #include <net/snmp.h> #endif #define XFRM_PROTO_ESP 50 #define XFRM_PROTO_AH 51 #define XFRM_PROTO_COMP 108 #define XFRM_PROTO_IPIP 4 #define XFRM_PROTO_IPV6 41 #define XFRM_PROTO_IPTFS IPPROTO_AGGFRAG #define XFRM_PROTO_ROUTING IPPROTO_ROUTING #define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS #define XFRM_ALIGN4(len) (((len) + 3) & ~3) #define XFRM_ALIGN8(len) (((len) + 7) & ~7) #define MODULE_ALIAS_XFRM_MODE(family, encap) \ MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap)) #define MODULE_ALIAS_XFRM_TYPE(family, proto) \ MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto)) #define MODULE_ALIAS_XFRM_OFFLOAD_TYPE(family, proto) \ MODULE_ALIAS("xfrm-offload-" __stringify(family) "-" __stringify(proto)) #ifdef CONFIG_XFRM_STATISTICS #define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field) #define XFRM_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.xfrm_statistics, field, val) #else #define XFRM_INC_STATS(net, field) ((void)(net)) #define XFRM_ADD_STATS(net, field, val) ((void)(net)) #endif /* Organization of SPD aka "XFRM rules" ------------------------------------ Basic objects: - policy rule, struct xfrm_policy (=SPD entry) - bundle of transformations, struct dst_entry == struct xfrm_dst (=SA bundle) - instance of a transformer, struct xfrm_state (=SA) - template to clone xfrm_state, struct xfrm_tmpl SPD is organized as hash table (for policies that meet minimum address prefix length setting, net->xfrm.policy_hthresh). Other policies are stored in lists, sorted into rbtree ordered by destination and source address networks. See net/xfrm/xfrm_policy.c for details. (To be compatible with existing pfkeyv2 implementations, many rules with priority of 0x7fffffff are allowed to exist and such rules are ordered in an unpredictable way, thanks to bsd folks.) If "action" is "block", then we prohibit the flow, otherwise: if "xfrms_nr" is zero, the flow passes untransformed. Otherwise, policy entry has list of up to XFRM_MAX_DEPTH transformations, described by templates xfrm_tmpl. Each template is resolved to a complete xfrm_state (see below) and we pack bundle of transformations to a dst_entry returned to requester. dst -. xfrm .-> xfrm_state #1 |---. child .-> dst -. xfrm .-> xfrm_state #2 |---. child .-> dst -. xfrm .-> xfrm_state #3 |---. child .-> NULL Resolution of xrfm_tmpl ----------------------- Template contains: 1. ->mode Mode: transport or tunnel 2. ->id.proto Protocol: AH/ESP/IPCOMP 3. ->id.daddr Remote tunnel endpoint, ignored for transport mode. Q: allow to resolve security gateway? 4. ->id.spi If not zero, static SPI. 5. ->saddr Local tunnel endpoint, ignored for transport mode. 6. ->algos List of allowed algos. Plain bitmask now. Q: ealgos, aalgos, calgos. What a mess... 7. ->share Sharing mode. Q: how to implement private sharing mode? To add struct sock* to flow id? Having this template we search through SAD searching for entries with appropriate mode/proto/algo, permitted by selector. If no appropriate entry found, it is requested from key manager. PROBLEMS: Q: How to find all the bundles referring to a physical path for PMTU discovery? Seems, dst should contain list of all parents... and enter to infinite locking hierarchy disaster. No! It is easier, we will not search for them, let them find us. We add genid to each dst plus pointer to genid of raw IP route, pmtu disc will update pmtu on raw IP route and increase its genid. dst_check() will see this for top level and trigger resyncing metrics. Plus, it will be made via sk->sk_dst_cache. Solved. */ struct xfrm_state_walk { struct list_head all; u8 state; u8 dying; u8 proto; u32 seq; struct xfrm_address_filter *filter; }; enum { XFRM_DEV_OFFLOAD_IN = 1, XFRM_DEV_OFFLOAD_OUT, XFRM_DEV_OFFLOAD_FWD, }; enum { XFRM_DEV_OFFLOAD_UNSPECIFIED, XFRM_DEV_OFFLOAD_CRYPTO, XFRM_DEV_OFFLOAD_PACKET, }; enum { XFRM_DEV_OFFLOAD_FLAG_ACQ = 1, }; struct xfrm_dev_offload { /* The device for this offload. * Device drivers should not use this directly, as that will prevent * them from working with bonding device. Instead, the device passed * to the add/delete callbacks should be used. */ struct net_device *dev; netdevice_tracker dev_tracker; /* This is a private pointer used by the bonding driver (and eventually * should be moved there). Device drivers should not use it. * Protected by xfrm_state.lock AND bond.ipsec_lock in most cases, * except in the .xdo_dev_state_del() flow, where only xfrm_state.lock * is held. */ struct net_device *real_dev; unsigned long offload_handle; u8 dir : 2; u8 type : 2; u8 flags : 2; }; struct xfrm_mode { u8 encap; u8 family; u8 flags; }; /* Flags for xfrm_mode. */ enum { XFRM_MODE_FLAG_TUNNEL = 1, }; enum xfrm_replay_mode { XFRM_REPLAY_MODE_LEGACY, XFRM_REPLAY_MODE_BMP, XFRM_REPLAY_MODE_ESN, }; /* Full description of state of transformer. */ struct xfrm_state { possible_net_t xs_net; union { struct hlist_node gclist; struct hlist_node bydst; }; union { struct hlist_node dev_gclist; struct hlist_node bysrc; }; struct hlist_node byspi; struct hlist_node byseq; struct hlist_node state_cache; struct hlist_node state_cache_input; refcount_t refcnt; spinlock_t lock; u32 pcpu_num; struct xfrm_id id; struct xfrm_selector sel; struct xfrm_mark mark; u32 if_id; u32 tfcpad; u32 genid; /* Key manager bits */ struct xfrm_state_walk km; /* Parameters of this state. */ struct { u32 reqid; u8 mode; u8 replay_window; u8 aalgo, ealgo, calgo; u8 flags; u16 family; xfrm_address_t saddr; int header_len; int enc_hdr_len; int trailer_len; u32 extra_flags; struct xfrm_mark smark; } props; struct xfrm_lifetime_cfg lft; /* Data for transformer */ struct xfrm_algo_auth *aalg; struct xfrm_algo *ealg; struct xfrm_algo *calg; struct xfrm_algo_aead *aead; const char *geniv; /* mapping change rate limiting */ __be16 new_mapping_sport; u32 new_mapping; /* seconds */ u32 mapping_maxage; /* seconds for input SA */ /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; /* NAT keepalive */ u32 nat_keepalive_interval; /* seconds */ time64_t nat_keepalive_expiration; /* Data for care-of address */ xfrm_address_t *coaddr; /* IPComp needs an IPIP tunnel for handling uncompressed packets */ struct xfrm_state *tunnel; /* If a tunnel, number of users + 1 */ atomic_t tunnel_users; /* State for replay detection */ struct xfrm_replay_state replay; struct xfrm_replay_state_esn *replay_esn; /* Replay detection state at the time we sent the last notification */ struct xfrm_replay_state preplay; struct xfrm_replay_state_esn *preplay_esn; /* replay detection mode */ enum xfrm_replay_mode repl_mode; /* internal flag that only holds state for delayed aevent at the * moment */ u32 xflags; /* Replay detection notification settings */ u32 replay_maxage; u32 replay_maxdiff; /* Replay detection notification timer */ struct timer_list rtimer; /* Statistics */ struct xfrm_stats stats; struct xfrm_lifetime_cur curlft; struct hrtimer mtimer; struct xfrm_dev_offload xso; /* used to fix curlft->add_time when changing date */ long saved_tmo; /* Last used time */ time64_t lastused; struct page_frag xfrag; /* Reference to data common to all the instances of this * transformer. */ const struct xfrm_type *type; struct xfrm_mode inner_mode; struct xfrm_mode inner_mode_iaf; struct xfrm_mode outer_mode; const struct xfrm_type_offload *type_offload; /* Security context */ struct xfrm_sec_ctx *security; /* Private data of this transformer, format is opaque, * interpreted by xfrm_type methods. */ void *data; u8 dir; const struct xfrm_mode_cbs *mode_cbs; void *mode_data; }; static inline struct net *xs_net(struct xfrm_state *x) { return read_pnet(&x->xs_net); } /* xflags - make enum if more show up */ #define XFRM_TIME_DEFER 1 #define XFRM_SOFT_EXPIRE 2 enum { XFRM_STATE_VOID, XFRM_STATE_ACQ, XFRM_STATE_VALID, XFRM_STATE_ERROR, XFRM_STATE_EXPIRED, XFRM_STATE_DEAD }; /* callback structure passed from either netlink or pfkey */ struct km_event { union { u32 hard; u32 proto; u32 byid; u32 aevent; u32 type; } data; u32 seq; u32 portid; u32 event; struct net *net; }; struct xfrm_if_decode_session_result { struct net *net; u32 if_id; }; struct xfrm_if_cb { bool (*decode_session)(struct sk_buff *skb, unsigned short family, struct xfrm_if_decode_session_result *res); }; void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); void xfrm_if_unregister_cb(void); struct xfrm_dst_lookup_params { struct net *net; dscp_t dscp; int oif; xfrm_address_t *saddr; xfrm_address_t *daddr; u32 mark; __u8 ipproto; union flowi_uli uli; }; struct net_device; struct xfrm_type; struct xfrm_dst; struct xfrm_policy_afinfo { struct dst_ops *dst_ops; struct dst_entry *(*dst_lookup)(const struct xfrm_dst_lookup_params *params); int (*get_saddr)(xfrm_address_t *saddr, const struct xfrm_dst_lookup_params *params); int (*fill_dst)(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl); struct dst_entry *(*blackhole_route)(struct net *net, struct dst_entry *orig); }; int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int family); void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo); void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c); void km_state_notify(struct xfrm_state *x, const struct km_event *c); struct xfrm_tmpl; int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); void km_state_expired(struct xfrm_state *x, int hard, u32 portid); int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { u8 family; u8 proto; const struct xfrm_type_offload *type_offload_esp; const struct xfrm_type *type_esp; const struct xfrm_type *type_ipip; const struct xfrm_type *type_ipip6; const struct xfrm_type *type_comp; const struct xfrm_type *type_ah; const struct xfrm_type *type_routing; const struct xfrm_type *type_dstopts; int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*transport_finish)(struct sk_buff *skb, int async); void (*local_error)(struct sk_buff *skb, u32 mtu); }; int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family); struct xfrm_input_afinfo { u8 family; bool is_ipip; int (*callback)(struct sk_buff *skb, u8 protocol, int err); }; int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo); int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo); void xfrm_flush_gc(void); struct xfrm_type { struct module *owner; u8 proto; u8 flags; #define XFRM_TYPE_NON_FRAGMENT 1 #define XFRM_TYPE_REPLAY_PROT 2 #define XFRM_TYPE_LOCAL_COADDR 4 #define XFRM_TYPE_REMOTE_COADDR 8 int (*init_state)(struct xfrm_state *x, struct netlink_ext_ack *extack); void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct sk_buff *skb); int (*output)(struct xfrm_state *, struct sk_buff *pskb); int (*reject)(struct xfrm_state *, struct sk_buff *, const struct flowi *); }; int xfrm_register_type(const struct xfrm_type *type, unsigned short family); void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family); struct xfrm_type_offload { struct module *owner; u8 proto; void (*encap)(struct xfrm_state *, struct sk_buff *pskb); int (*input_tail)(struct xfrm_state *x, struct sk_buff *skb); int (*xmit)(struct xfrm_state *, struct sk_buff *pskb, netdev_features_t features); }; int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family); void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); void xfrm_set_type_offload(struct xfrm_state *x, bool try_load); static inline void xfrm_unset_type_offload(struct xfrm_state *x) { if (!x->type_offload) return; module_put(x->type_offload->owner); x->type_offload = NULL; } /** * struct xfrm_mode_cbs - XFRM mode callbacks * @owner: module owner or NULL * @init_state: Add/init mode specific state in `xfrm_state *x` * @clone_state: Copy mode specific values from `orig` to new state `x` * @destroy_state: Cleanup mode specific state from `xfrm_state *x` * @user_init: Process mode specific netlink attributes from user * @copy_to_user: Add netlink attributes to `attrs` based on state in `x` * @sa_len: Return space required to store mode specific netlink attributes * @get_inner_mtu: Return avail payload space after removing encap overhead * @input: Process received packet from SA using mode * @output: Output given packet using mode * @prepare_output: Add mode specific encapsulation to packet in skb. On return * `transport_header` should point at ESP header, `network_header` should * point at outer IP header and `mac_header` should opint at the * protocol/nexthdr field of the outer IP. * * One should examine and understand the specific uses of these callbacks in * xfrm for further detail on how and when these functions are called. RTSL. */ struct xfrm_mode_cbs { struct module *owner; int (*init_state)(struct xfrm_state *x); int (*clone_state)(struct xfrm_state *x, struct xfrm_state *orig); void (*destroy_state)(struct xfrm_state *x); int (*user_init)(struct net *net, struct xfrm_state *x, struct nlattr **attrs, struct netlink_ext_ack *extack); int (*copy_to_user)(struct xfrm_state *x, struct sk_buff *skb); unsigned int (*sa_len)(const struct xfrm_state *x); u32 (*get_inner_mtu)(struct xfrm_state *x, int outer_mtu); int (*input)(struct xfrm_state *x, struct sk_buff *skb); int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*prepare_output)(struct xfrm_state *x, struct sk_buff *skb); }; int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs); void xfrm_unregister_mode_cbs(u8 mode); static inline int xfrm_af2proto(unsigned int family) { switch(family) { case AF_INET: return IPPROTO_IPIP; case AF_INET6: return IPPROTO_IPV6; default: return 0; } } static inline const struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto) { if ((x->sel.family != AF_UNSPEC) || (ipproto == IPPROTO_IPIP && x->props.family == AF_INET) || (ipproto == IPPROTO_IPV6 && x->props.family == AF_INET6)) return &x->inner_mode; else return &x->inner_mode_iaf; } struct xfrm_tmpl { /* id in template is interpreted as: * daddr - destination of tunnel, may be zero for transport mode. * spi - zero to acquire spi. Not zero if spi is static, then * daddr must be fixed too. * proto - AH/ESP/IPCOMP */ struct xfrm_id id; /* Source address of tunnel. Ignored, if it is not a tunnel. */ xfrm_address_t saddr; unsigned short encap_family; u32 reqid; /* Mode: transport, tunnel etc. */ u8 mode; /* Sharing mode: unique, this session only, this user only etc. */ u8 share; /* May skip this transfomration if no SA is found */ u8 optional; /* Skip aalgos/ealgos/calgos checks. */ u8 allalgs; /* Bit mask of algos allowed for acquisition */ u32 aalgos; u32 ealgos; u32 calgos; }; #define XFRM_MAX_DEPTH 6 #define XFRM_MAX_OFFLOAD_DEPTH 1 struct xfrm_policy_walk_entry { struct list_head all; u8 dead; }; struct xfrm_policy_walk { struct xfrm_policy_walk_entry walk; u8 type; u32 seq; }; struct xfrm_policy_queue { struct sk_buff_head hold_queue; struct timer_list hold_timer; unsigned long timeout; }; /** * struct xfrm_policy - xfrm policy * @xp_net: network namespace the policy lives in * @bydst: hlist node for SPD hash table or rbtree list * @byidx: hlist node for index hash table * @state_cache_list: hlist head for policy cached xfrm states * @lock: serialize changes to policy structure members * @refcnt: reference count, freed once it reaches 0 * @pos: kernel internal tie-breaker to determine age of policy * @timer: timer * @genid: generation, used to invalidate old policies * @priority: priority, set by userspace * @index: policy index (autogenerated) * @if_id: virtual xfrm interface id * @mark: packet mark * @selector: selector * @lft: liftime configuration data * @curlft: liftime state * @walk: list head on pernet policy list * @polq: queue to hold packets while aqcuire operaion in progress * @bydst_reinsert: policy tree node needs to be merged * @type: XFRM_POLICY_TYPE_MAIN or _SUB * @action: XFRM_POLICY_ALLOW or _BLOCK * @flags: XFRM_POLICY_LOCALOK, XFRM_POLICY_ICMP * @xfrm_nr: number of used templates in @xfrm_vec * @family: protocol family * @security: SELinux security label * @xfrm_vec: array of templates to resolve state * @rcu: rcu head, used to defer memory release * @xdo: hardware offload state */ struct xfrm_policy { possible_net_t xp_net; struct hlist_node bydst; struct hlist_node byidx; struct hlist_head state_cache_list; /* This lock only affects elements except for entry. */ rwlock_t lock; refcount_t refcnt; u32 pos; struct timer_list timer; atomic_t genid; u32 priority; u32 index; u32 if_id; struct xfrm_mark mark; struct xfrm_selector selector; struct xfrm_lifetime_cfg lft; struct xfrm_lifetime_cur curlft; struct xfrm_policy_walk_entry walk; struct xfrm_policy_queue polq; bool bydst_reinsert; u8 type; u8 action; u8 flags; u8 xfrm_nr; u16 family; struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; struct rcu_head rcu; struct xfrm_dev_offload xdo; }; static inline struct net *xp_net(const struct xfrm_policy *xp) { return read_pnet(&xp->xp_net); } struct xfrm_kmaddress { xfrm_address_t local; xfrm_address_t remote; u32 reserved; u16 family; }; struct xfrm_migrate { xfrm_address_t old_daddr; xfrm_address_t old_saddr; xfrm_address_t new_daddr; xfrm_address_t new_saddr; u8 proto; u8 mode; u16 reserved; u32 reqid; u16 old_family; u16 new_family; }; #define XFRM_KM_TIMEOUT 30 /* what happened */ #define XFRM_REPLAY_UPDATE XFRM_AE_CR #define XFRM_REPLAY_TIMEOUT XFRM_AE_CE /* default aevent timeout in units of 100ms */ #define XFRM_AE_ETIME 10 /* Async Event timer multiplier */ #define XFRM_AE_ETH_M 10 /* default seq threshold size */ #define XFRM_AE_SEQT_SIZE 2 struct xfrm_mgr { struct list_head list; int (*notify)(struct xfrm_state *x, const struct km_event *c); int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp); struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, const struct km_event *c); int (*report)(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); int (*migrate)(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap); bool (*is_alive)(const struct km_event *c); }; void xfrm_register_km(struct xfrm_mgr *km); void xfrm_unregister_km(struct xfrm_mgr *km); struct xfrm_tunnel_skb_cb { union { struct inet_skb_parm h4; struct inet6_skb_parm h6; } header; union { struct ip_tunnel *ip4; struct ip6_tnl *ip6; } tunnel; }; #define XFRM_TUNNEL_SKB_CB(__skb) ((struct xfrm_tunnel_skb_cb *)&((__skb)->cb[0])) /* * This structure is used for the duration where packets are being * transformed by IPsec. As soon as the packet leaves IPsec the * area beyond the generic IP part may be overwritten. */ struct xfrm_skb_cb { struct xfrm_tunnel_skb_cb header; /* Sequence number for replay protection. */ union { struct { __u32 low; __u32 hi; } output; struct { __be32 low; __be32 hi; } input; } seq; }; #define XFRM_SKB_CB(__skb) ((struct xfrm_skb_cb *)&((__skb)->cb[0])) /* * This structure is used by the afinfo prepare_input/prepare_output functions * to transmit header information to the mode input/output functions. */ struct xfrm_mode_skb_cb { struct xfrm_tunnel_skb_cb header; /* Copied from header for IPv4, always set to zero and DF for IPv6. */ __be16 id; __be16 frag_off; /* IP header length (excluding options or extension headers). */ u8 ihl; /* TOS for IPv4, class for IPv6. */ u8 tos; /* TTL for IPv4, hop limitfor IPv6. */ u8 ttl; /* Protocol for IPv4, NH for IPv6. */ u8 protocol; /* Option length for IPv4, zero for IPv6. */ u8 optlen; /* Used by IPv6 only, zero for IPv4. */ u8 flow_lbl[3]; }; #define XFRM_MODE_SKB_CB(__skb) ((struct xfrm_mode_skb_cb *)&((__skb)->cb[0])) /* * This structure is used by the input processing to locate the SPI and * related information. */ struct xfrm_spi_skb_cb { struct xfrm_tunnel_skb_cb header; unsigned int daddroff; unsigned int family; __be32 seq; }; #define XFRM_SPI_SKB_CB(__skb) ((struct xfrm_spi_skb_cb *)&((__skb)->cb[0])) #ifdef CONFIG_AUDITSYSCALL static inline struct audit_buffer *xfrm_audit_start(const char *op) { struct audit_buffer *audit_buf = NULL; if (audit_enabled == AUDIT_OFF) return NULL; audit_buf = audit_log_start(audit_context(), GFP_ATOMIC, AUDIT_MAC_IPSEC_EVENT); if (audit_buf == NULL) return NULL; audit_log_format(audit_buf, "op=%s", op); return audit_buf; } static inline void xfrm_audit_helper_usrinfo(bool task_valid, struct audit_buffer *audit_buf) { const unsigned int auid = from_kuid(&init_user_ns, task_valid ? audit_get_loginuid(current) : INVALID_UID); const unsigned int ses = task_valid ? audit_get_sessionid(current) : AUDIT_SID_UNSET; audit_log_format(audit_buf, " auid=%u ses=%u", auid, ses); audit_log_task_context(audit_buf); } void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, bool task_valid); void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid); void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid); void xfrm_audit_state_replay_overflow(struct xfrm_state *x, struct sk_buff *skb); void xfrm_audit_state_replay(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq); void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family); void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family, __be32 net_spi, __be32 net_seq); void xfrm_audit_state_icvfail(struct xfrm_state *x, struct sk_buff *skb, u8 proto); #else static inline void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid) { } static inline void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, bool task_valid) { } static inline void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid) { } static inline void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid) { } static inline void xfrm_audit_state_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) { } static inline void xfrm_audit_state_replay(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq) { } static inline void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family) { } static inline void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family, __be32 net_spi, __be32 net_seq) { } static inline void xfrm_audit_state_icvfail(struct xfrm_state *x, struct sk_buff *skb, u8 proto) { } #endif /* CONFIG_AUDITSYSCALL */ static inline void xfrm_pol_hold(struct xfrm_policy *policy) { if (likely(policy != NULL)) refcount_inc(&policy->refcnt); } void xfrm_policy_destroy(struct xfrm_policy *policy); static inline void xfrm_pol_put(struct xfrm_policy *policy) { if (refcount_dec_and_test(&policy->refcnt)) xfrm_policy_destroy(policy); } static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) { int i; for (i = npols - 1; i >= 0; --i) xfrm_pol_put(pols[i]); } void __xfrm_state_destroy(struct xfrm_state *); static inline void __xfrm_state_put(struct xfrm_state *x) { refcount_dec(&x->refcnt); } static inline void xfrm_state_put(struct xfrm_state *x) { if (refcount_dec_and_test(&x->refcnt)) __xfrm_state_destroy(x); } static inline void xfrm_state_hold(struct xfrm_state *x) { refcount_inc(&x->refcnt); } static inline bool addr_match(const void *token1, const void *token2, unsigned int prefixlen) { const __be32 *a1 = token1; const __be32 *a2 = token2; unsigned int pdw; unsigned int pbi; pdw = prefixlen >> 5; /* num of whole u32 in prefix */ pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */ if (pdw) if (memcmp(a1, a2, pdw << 2)) return false; if (pbi) { __be32 mask; mask = htonl((0xffffffff) << (32 - pbi)); if ((a1[pdw] ^ a2[pdw]) & mask) return false; } return true; } static inline bool addr4_match(__be32 a1, __be32 a2, u8 prefixlen) { /* C99 6.5.7 (3): u32 << 32 is undefined behaviour */ if (sizeof(long) == 4 && prefixlen == 0) return true; return !((a1 ^ a2) & htonl(~0UL << (32 - prefixlen))); } static __inline__ __be16 xfrm_flowi_sport(const struct flowi *fl, const union flowi_uli *uli) { __be16 port; switch(fl->flowi_proto) { case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_UDPLITE: case IPPROTO_SCTP: port = uli->ports.sport; break; case IPPROTO_ICMP: case IPPROTO_ICMPV6: port = htons(uli->icmpt.type); break; case IPPROTO_MH: port = htons(uli->mht.type); break; case IPPROTO_GRE: port = htons(ntohl(uli->gre_key) >> 16); break; default: port = 0; /*XXX*/ } return port; } static __inline__ __be16 xfrm_flowi_dport(const struct flowi *fl, const union flowi_uli *uli) { __be16 port; switch(fl->flowi_proto) { case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_UDPLITE: case IPPROTO_SCTP: port = uli->ports.dport; break; case IPPROTO_ICMP: case IPPROTO_ICMPV6: port = htons(uli->icmpt.code); break; case IPPROTO_GRE: port = htons(ntohl(uli->gre_key) & 0xffff); break; default: port = 0; /*XXX*/ } return port; } bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, unsigned short family); #ifdef CONFIG_SECURITY_NETWORK_XFRM /* If neither has a context --> match * Otherwise, both must have a context and the sids, doi, alg must match */ static inline bool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2) { return ((!s1 && !s2) || (s1 && s2 && (s1->ctx_sid == s2->ctx_sid) && (s1->ctx_doi == s2->ctx_doi) && (s1->ctx_alg == s2->ctx_alg))); } #else static inline bool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2) { return true; } #endif /* A struct encoding bundle of transformations to apply to some set of flow. * * xdst->child points to the next element of bundle. * dst->xfrm points to an instanse of transformer. * * Due to unfortunate limitations of current routing cache, which we * have no time to fix, it mirrors struct rtable and bound to the same * routing key, including saddr,daddr. However, we can have many of * bundles differing by session id. All the bundles grow from a parent * policy rule. */ struct xfrm_dst { union { struct dst_entry dst; struct rtable rt; struct rt6_info rt6; } u; struct dst_entry *route; struct dst_entry *child; struct dst_entry *path; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; u32 xfrm_genid; u32 policy_genid; u32 route_mtu_cached; u32 child_mtu_cached; u32 route_cookie; u32 path_cookie; }; static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst) { #ifdef CONFIG_XFRM if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) { const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst; return xdst->path; } #endif return (struct dst_entry *) dst; } static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst) { #ifdef CONFIG_XFRM if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) { struct xfrm_dst *xdst = (struct xfrm_dst *) dst; return xdst->child; } #endif return NULL; } #ifdef CONFIG_XFRM static inline void xfrm_dst_set_child(struct xfrm_dst *xdst, struct dst_entry *child) { xdst->child = child; } static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) { xfrm_pols_put(xdst->pols, xdst->num_pols); dst_release(xdst->route); if (likely(xdst->u.dst.xfrm)) xfrm_state_put(xdst->u.dst.xfrm); } #endif void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); struct xfrm_if_parms { int link; /* ifindex of underlying L2 interface */ u32 if_id; /* interface identifier */ bool collect_md; }; struct xfrm_if { struct xfrm_if __rcu *next; /* next interface in list */ struct net_device *dev; /* virtual device associated with interface */ struct net *net; /* netns for packet i/o */ struct xfrm_if_parms p; /* interface parms */ struct gro_cells gro_cells; }; struct xfrm_offload { /* Output sequence number for replay protection on offloading. */ struct { __u32 low; __u32 hi; } seq; __u32 flags; #define SA_DELETE_REQ 1 #define CRYPTO_DONE 2 #define CRYPTO_NEXT_DONE 4 #define CRYPTO_FALLBACK 8 #define XFRM_GSO_SEGMENT 16 #define XFRM_GRO 32 /* 64 is free */ #define XFRM_DEV_RESUME 128 #define XFRM_XMIT 256 __u32 status; #define CRYPTO_SUCCESS 1 #define CRYPTO_GENERIC_ERROR 2 #define CRYPTO_TRANSPORT_AH_AUTH_FAILED 4 #define CRYPTO_TRANSPORT_ESP_AUTH_FAILED 8 #define CRYPTO_TUNNEL_AH_AUTH_FAILED 16 #define CRYPTO_TUNNEL_ESP_AUTH_FAILED 32 #define CRYPTO_INVALID_PACKET_SYNTAX 64 #define CRYPTO_INVALID_PROTOCOL 128 /* Used to keep whole l2 header for transport mode GRO */ __u16 orig_mac_len; __u8 proto; __u8 inner_ipproto; }; struct sec_path { struct xfrm_state *xvec[XFRM_MAX_DEPTH]; struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH]; u8 len; u8 olen; u8 verified_cnt; }; struct sec_path *secpath_set(struct sk_buff *skb); static inline void secpath_reset(struct sk_buff *skb) { #ifdef CONFIG_XFRM skb_ext_del(skb, SKB_EXT_SEC_PATH); #endif } static inline int xfrm_addr_any(const xfrm_address_t *addr, unsigned short family) { switch (family) { case AF_INET: return addr->a4 == 0; case AF_INET6: return ipv6_addr_any(&addr->in6); } return 0; } static inline int __xfrm4_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x) { return (tmpl->saddr.a4 && tmpl->saddr.a4 != x->props.saddr.a4); } static inline int __xfrm6_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x) { return (!ipv6_addr_any((struct in6_addr*)&tmpl->saddr) && !ipv6_addr_equal((struct in6_addr *)&tmpl->saddr, (struct in6_addr*)&x->props.saddr)); } static inline int xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, unsigned short family) { switch (family) { case AF_INET: return __xfrm4_state_addr_cmp(tmpl, x); case AF_INET6: return __xfrm6_state_addr_cmp(tmpl, x); } return !0; } #ifdef CONFIG_XFRM static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { struct sec_path *sp = skb_sec_path(skb); return sp->xvec[sp->len - 1]; } #endif static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) { #ifdef CONFIG_XFRM struct sec_path *sp = skb_sec_path(skb); if (!sp || !sp->olen || sp->len != sp->olen) return NULL; return &sp->ovec[sp->olen - 1]; #else return NULL; #endif } #ifdef CONFIG_XFRM int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, unsigned short family); static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb, int dir) { if (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) return net->xfrm.policy_default[dir] == XFRM_USERPOLICY_ACCEPT; return false; } static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb, int dir, unsigned short family) { if (dir != XFRM_POLICY_OUT && family == AF_INET) { /* same dst may be used for traffic originating from * devices with different policy settings. */ return IPCB(skb)->flags & IPSKB_NOPOLICY; } return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY); } static inline int __xfrm_policy_check2(struct sock *sk, int dir, struct sk_buff *skb, unsigned int family, int reverse) { struct net *net = dev_net(skb->dev); int ndir = dir | (reverse ? XFRM_POLICY_MASK + 1 : 0); struct xfrm_offload *xo = xfrm_offload(skb); struct xfrm_state *x; if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); if (xo) { x = xfrm_input_state(skb); if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) { bool check = (xo->flags & CRYPTO_DONE) && (xo->status & CRYPTO_SUCCESS); /* The packets here are plain ones and secpath was * needed to indicate that hardware already handled * them and there is no need to do nothing in addition. * * Consume secpath which was set by drivers. */ secpath_reset(skb); return check; } } return __xfrm_check_nopolicy(net, skb, dir) || __xfrm_check_dev_nopolicy(skb, dir, family) || __xfrm_policy_check(sk, ndir, skb, family); } static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { return __xfrm_policy_check2(sk, dir, skb, family, 0); } static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb) { return xfrm_policy_check(sk, dir, skb, AF_INET); } static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb) { return xfrm_policy_check(sk, dir, skb, AF_INET6); } static inline int xfrm4_policy_check_reverse(struct sock *sk, int dir, struct sk_buff *skb) { return __xfrm_policy_check2(sk, dir, skb, AF_INET, 1); } static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, struct sk_buff *skb) { return __xfrm_policy_check2(sk, dir, skb, AF_INET6, 1); } int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse); static inline int xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { return __xfrm_decode_session(net, skb, fl, family, 0); } static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { return __xfrm_decode_session(net, skb, fl, family, 1); } int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); if (!net->xfrm.policy_count[XFRM_POLICY_OUT] && net->xfrm.policy_default[XFRM_POLICY_OUT] == XFRM_USERPOLICY_ACCEPT) return true; return (skb_dst(skb)->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); } static inline int xfrm4_route_forward(struct sk_buff *skb) { return xfrm_route_forward(skb, AF_INET); } static inline int xfrm6_route_forward(struct sk_buff *skb) { return xfrm_route_forward(skb, AF_INET6); } int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk); static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { if (!sk_fullsock(osk)) return 0; sk->sk_policy[0] = NULL; sk->sk_policy[1] = NULL; if (unlikely(osk->sk_policy[0] || osk->sk_policy[1])) return __xfrm_sk_clone_policy(sk, osk); return 0; } int xfrm_policy_delete(struct xfrm_policy *pol, int dir); static inline void xfrm_sk_free_policy(struct sock *sk) { struct xfrm_policy *pol; pol = rcu_dereference_protected(sk->sk_policy[0], 1); if (unlikely(pol != NULL)) { xfrm_policy_delete(pol, XFRM_POLICY_MAX); sk->sk_policy[0] = NULL; } pol = rcu_dereference_protected(sk->sk_policy[1], 1); if (unlikely(pol != NULL)) { xfrm_policy_delete(pol, XFRM_POLICY_MAX+1); sk->sk_policy[1] = NULL; } } #else static inline void xfrm_sk_free_policy(struct sock *sk) {} static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; } static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; } static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb) { return 1; } static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb) { return 1; } static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { return 1; } static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { return -ENOSYS; } static inline int xfrm4_policy_check_reverse(struct sock *sk, int dir, struct sk_buff *skb) { return 1; } static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, struct sk_buff *skb) { return 1; } #endif static __inline__ xfrm_address_t *xfrm_flowi_daddr(const struct flowi *fl, unsigned short family) { switch (family){ case AF_INET: return (xfrm_address_t *)&fl->u.ip4.daddr; case AF_INET6: return (xfrm_address_t *)&fl->u.ip6.daddr; } return NULL; } static __inline__ xfrm_address_t *xfrm_flowi_saddr(const struct flowi *fl, unsigned short family) { switch (family){ case AF_INET: return (xfrm_address_t *)&fl->u.ip4.saddr; case AF_INET6: return (xfrm_address_t *)&fl->u.ip6.saddr; } return NULL; } static __inline__ void xfrm_flowi_addr_get(const struct flowi *fl, xfrm_address_t *saddr, xfrm_address_t *daddr, unsigned short family) { switch(family) { case AF_INET: memcpy(&saddr->a4, &fl->u.ip4.saddr, sizeof(saddr->a4)); memcpy(&daddr->a4, &fl->u.ip4.daddr, sizeof(daddr->a4)); break; case AF_INET6: saddr->in6 = fl->u.ip6.saddr; daddr->in6 = fl->u.ip6.daddr; break; } } static __inline__ int __xfrm4_state_addr_check(const struct xfrm_state *x, const xfrm_address_t *daddr, const xfrm_address_t *saddr) { if (daddr->a4 == x->id.daddr.a4 && (saddr->a4 == x->props.saddr.a4 || !saddr->a4 || !x->props.saddr.a4)) return 1; return 0; } static __inline__ int __xfrm6_state_addr_check(const struct xfrm_state *x, const xfrm_address_t *daddr, const xfrm_address_t *saddr) { if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) && (ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) || ipv6_addr_any((struct in6_addr *)saddr) || ipv6_addr_any((struct in6_addr *)&x->props.saddr))) return 1; return 0; } static __inline__ int xfrm_state_addr_check(const struct xfrm_state *x, const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family) { switch (family) { case AF_INET: return __xfrm4_state_addr_check(x, daddr, saddr); case AF_INET6: return __xfrm6_state_addr_check(x, daddr, saddr); } return 0; } static __inline__ int xfrm_state_addr_flow_check(const struct xfrm_state *x, const struct flowi *fl, unsigned short family) { switch (family) { case AF_INET: return __xfrm4_state_addr_check(x, (const xfrm_address_t *)&fl->u.ip4.daddr, (const xfrm_address_t *)&fl->u.ip4.saddr); case AF_INET6: return __xfrm6_state_addr_check(x, (const xfrm_address_t *)&fl->u.ip6.daddr, (const xfrm_address_t *)&fl->u.ip6.saddr); } return 0; } static inline int xfrm_state_kern(const struct xfrm_state *x) { return atomic_read(&x->tunnel_users); } static inline bool xfrm_id_proto_valid(u8 proto) { switch (proto) { case IPPROTO_AH: case IPPROTO_ESP: case IPPROTO_COMP: #if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: #endif return true; default: return false; } } /* IPSEC_PROTO_ANY only matches 3 IPsec protocols, 0 could match all. */ static inline int xfrm_id_proto_match(u8 proto, u8 userproto) { return (!userproto || proto == userproto || (userproto == IPSEC_PROTO_ANY && (proto == IPPROTO_AH || proto == IPPROTO_ESP || proto == IPPROTO_COMP))); } /* * xfrm algorithm information */ struct xfrm_algo_aead_info { char *geniv; u16 icv_truncbits; }; struct xfrm_algo_auth_info { u16 icv_truncbits; u16 icv_fullbits; }; struct xfrm_algo_encr_info { char *geniv; u16 blockbits; u16 defkeybits; }; struct xfrm_algo_comp_info { u16 threshold; }; struct xfrm_algo_desc { char *name; char *compat; u8 available:1; u8 pfkey_supported:1; union { struct xfrm_algo_aead_info aead; struct xfrm_algo_auth_info auth; struct xfrm_algo_encr_info encr; struct xfrm_algo_comp_info comp; } uinfo; struct sadb_alg desc; }; /* XFRM protocol handlers. */ struct xfrm4_protocol { int (*handler)(struct sk_buff *skb); int (*input_handler)(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, u32 info); struct xfrm4_protocol __rcu *next; int priority; }; struct xfrm6_protocol { int (*handler)(struct sk_buff *skb); int (*input_handler)(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); struct xfrm6_protocol __rcu *next; int priority; }; /* XFRM tunnel handlers. */ struct xfrm_tunnel { int (*handler)(struct sk_buff *skb); int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, u32 info); struct xfrm_tunnel __rcu *next; int priority; }; struct xfrm6_tunnel { int (*handler)(struct sk_buff *skb); int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); struct xfrm6_tunnel __rcu *next; int priority; }; void xfrm_init(void); void xfrm4_init(void); int xfrm_state_init(struct net *net); void xfrm_state_fini(struct net *net); void xfrm4_state_init(void); void xfrm4_protocol_init(void); #ifdef CONFIG_XFRM int xfrm6_init(void); void xfrm6_fini(void); int xfrm6_state_init(void); void xfrm6_state_fini(void); int xfrm6_protocol_init(void); void xfrm6_protocol_fini(void); #else static inline int xfrm6_init(void) { return 0; } static inline void xfrm6_fini(void) { ; } #endif #ifdef CONFIG_XFRM_STATISTICS int xfrm_proc_init(struct net *net); void xfrm_proc_fini(struct net *net); #endif int xfrm_sysctl_init(struct net *net); #ifdef CONFIG_SYSCTL void xfrm_sysctl_fini(struct net *net); #else static inline void xfrm_sysctl_fini(struct net *net) { } #endif void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto, struct xfrm_address_filter *filter); int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net); struct xfrm_state *xfrm_state_alloc(struct net *net); void xfrm_state_free(struct xfrm_state *x); struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, const struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family, u32 if_id); struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, u8 mode, u8 proto, u32 reqid); struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, unsigned short family); int xfrm_state_check_expire(struct xfrm_state *x); void xfrm_state_update_stats(struct net *net); #ifdef CONFIG_XFRM_OFFLOAD static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) { struct xfrm_dev_offload *xdo = &x->xso; struct net_device *dev = READ_ONCE(xdo->dev); if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_state_update_stats) dev->xfrmdev_ops->xdo_dev_state_update_stats(x); } #else static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) {} #endif void xfrm_state_insert(struct xfrm_state *x); int xfrm_state_add(struct xfrm_state *x); int xfrm_state_update(struct xfrm_state *x); struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr, u8 proto, unsigned short family); #ifdef CONFIG_XFRM_SUB_POLICY void xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, unsigned short family); void xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, unsigned short family); #else static inline void xfrm_tmpl_sort(struct xfrm_tmpl **d, struct xfrm_tmpl **s, int n, unsigned short family) { } static inline void xfrm_state_sort(struct xfrm_state **d, struct xfrm_state **s, int n, unsigned short family) { } #endif struct xfrmk_sadinfo { u32 sadhcnt; /* current hash bkts */ u32 sadhmcnt; /* max allowed hash bkts */ u32 sadcnt; /* current running count */ }; struct xfrmk_spdinfo { u32 incnt; u32 outcnt; u32 fwdcnt; u32 inscnt; u32 outscnt; u32 fwdscnt; u32 spdhcnt; u32 spdhmcnt; }; struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num); int xfrm_state_delete(struct xfrm_state *x); int xfrm_state_flush(struct net *net, u8 proto, bool task_valid); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); int xfrm_dev_policy_flush(struct net *net, struct net_device *dev, bool task_valid); void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack); u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); int __xfrm_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack); int xfrm_init_state(struct xfrm_state *x); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, int (*finish)(struct net *, struct sock *, struct sk_buff *)); int xfrm_trans_queue(struct sk_buff *skb, int (*finish)(struct net *, struct sock *, struct sk_buff *)); int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err); int xfrm_output(struct sock *sk, struct sk_buff *skb); int xfrm4_tunnel_check_size(struct sk_buff *skb); #if IS_ENABLED(CONFIG_IPV6) int xfrm6_tunnel_check_size(struct sk_buff *skb); #else static inline int xfrm6_tunnel_check_size(struct sk_buff *skb) { return -EMSGSIZE; } #endif #if IS_ENABLED(CONFIG_NET_PKTGEN) int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb); #endif void xfrm_local_error(struct sk_buff *skb, int mtu); int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm4_transport_finish(struct sk_buff *skb, int async); int xfrm4_rcv(struct sk_buff *skb); static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); return xfrm_input(skb, nexthdr, spi, 0); } int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol); int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char protocol); int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); void xfrm4_local_error(struct sk_buff *skb, u32 mtu); int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t); int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm6_transport_finish(struct sk_buff *skb, int async); int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t); int xfrm6_rcv(struct sk_buff *skb); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); void xfrm6_local_error(struct sk_buff *skb, u32 mtu); int xfrm6_protocol_register(struct xfrm6_protocol *handler, unsigned char protocol); int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, unsigned char protocol); int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family); __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr); int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); #ifdef CONFIG_XFRM void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, struct sk_buff *skb); struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, struct sk_buff *skb); int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen); #else static inline int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen) { return -ENOPROTOOPT; } #endif struct dst_entry *__xfrm_dst_lookup(int family, const struct xfrm_dst_lookup_params *params); struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type); int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *); void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err); struct xfrm_policy *xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id, u8 type, int dir, u32 id, int delete, int *err); int xfrm_policy_flush(struct net *net, u8 type, bool task_valid); void xfrm_policy_hash_rebuild(struct net *net); u32 xfrm_get_acqseq(void); int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack); int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi, struct netlink_ext_ack *extack); struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create, unsigned short family); int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); #ifdef CONFIG_XFRM_MIGRATE int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap); struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, u32 if_id); struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m, struct xfrm_encap_tmpl *encap, struct net *net, struct xfrm_user_offload *xuo, struct netlink_ext_ack *extack); int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k, struct net *net, struct xfrm_encap_tmpl *encap, u32 if_id, struct netlink_ext_ack *extack, struct xfrm_user_offload *xuo); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid); int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); void xfrm_input_init(void); int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq); void xfrm_probe_algs(void); int xfrm_count_pfkey_auth_supported(void); int xfrm_count_pfkey_enc_supported(void); struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx); struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx); struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id); struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id); struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id); struct xfrm_algo_desc *xfrm_aalg_get_byname(const char *name, int probe); struct xfrm_algo_desc *xfrm_ealg_get_byname(const char *name, int probe); struct xfrm_algo_desc *xfrm_calg_get_byname(const char *name, int probe); struct xfrm_algo_desc *xfrm_aead_get_byname(const char *name, int icv_len, int probe); static inline bool xfrm6_addr_equal(const xfrm_address_t *a, const xfrm_address_t *b) { return ipv6_addr_equal((const struct in6_addr *)a, (const struct in6_addr *)b); } static inline bool xfrm_addr_equal(const xfrm_address_t *a, const xfrm_address_t *b, sa_family_t family) { switch (family) { default: case AF_INET: return ((__force u32)a->a4 ^ (__force u32)b->a4) == 0; case AF_INET6: return xfrm6_addr_equal(a, b); } } static inline int xfrm_policy_id2dir(u32 index) { return index & 7; } #ifdef CONFIG_XFRM void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq); int xfrm_replay_check(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq); void xfrm_replay_notify(struct xfrm_state *x, int event); int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb); int xfrm_replay_recheck(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq); static inline int xfrm_aevent_is_on(struct net *net) { struct sock *nlsk; int ret = 0; rcu_read_lock(); nlsk = rcu_dereference(net->xfrm.nlsk); if (nlsk) ret = netlink_has_listeners(nlsk, XFRMNLGRP_AEVENTS); rcu_read_unlock(); return ret; } static inline int xfrm_acquire_is_on(struct net *net) { struct sock *nlsk; int ret = 0; rcu_read_lock(); nlsk = rcu_dereference(net->xfrm.nlsk); if (nlsk) ret = netlink_has_listeners(nlsk, XFRMNLGRP_ACQUIRE); rcu_read_unlock(); return ret; } #endif static inline unsigned int aead_len(struct xfrm_algo_aead *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } static inline unsigned int xfrm_alg_len(const struct xfrm_algo *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } static inline unsigned int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } static inline unsigned int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay_esn) { return sizeof(*replay_esn) + replay_esn->bmp_len * sizeof(__u32); } #ifdef CONFIG_XFRM_MIGRATE static inline int xfrm_replay_clone(struct xfrm_state *x, struct xfrm_state *orig) { x->replay_esn = kmemdup(orig->replay_esn, xfrm_replay_state_esn_len(orig->replay_esn), GFP_KERNEL); if (!x->replay_esn) return -ENOMEM; x->preplay_esn = kmemdup(orig->preplay_esn, xfrm_replay_state_esn_len(orig->preplay_esn), GFP_KERNEL); if (!x->preplay_esn) return -ENOMEM; return 0; } static inline struct xfrm_algo_aead *xfrm_algo_aead_clone(struct xfrm_algo_aead *orig) { return kmemdup(orig, aead_len(orig), GFP_KERNEL); } static inline struct xfrm_algo *xfrm_algo_clone(struct xfrm_algo *orig) { return kmemdup(orig, xfrm_alg_len(orig), GFP_KERNEL); } static inline struct xfrm_algo_auth *xfrm_algo_auth_clone(struct xfrm_algo_auth *orig) { return kmemdup(orig, xfrm_alg_auth_len(orig), GFP_KERNEL); } static inline void xfrm_states_put(struct xfrm_state **states, int n) { int i; for (i = 0; i < n; i++) xfrm_state_put(*(states + i)); } static inline void xfrm_states_delete(struct xfrm_state **states, int n) { int i; for (i = 0; i < n; i++) xfrm_state_delete(*(states + i)); } #endif void __init xfrm_dev_init(void); #ifdef CONFIG_XFRM_OFFLOAD void xfrm_dev_resume(struct sk_buff *skb); void xfrm_dev_backlog(struct softnet_data *sd); struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo, struct netlink_ext_ack *extack); int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, struct xfrm_user_offload *xuo, u8 dir, struct netlink_ext_ack *extack); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); void xfrm_dev_state_delete(struct xfrm_state *x); void xfrm_dev_state_free(struct xfrm_state *x); static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) { struct xfrm_dev_offload *xso = &x->xso; struct net_device *dev = READ_ONCE(xso->dev); if (dev && dev->xfrmdev_ops->xdo_dev_state_advance_esn) dev->xfrmdev_ops->xdo_dev_state_advance_esn(x); } static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) { struct xfrm_state *x = dst->xfrm; struct xfrm_dst *xdst; if (!x || !x->type_offload) return false; xdst = (struct xfrm_dst *) dst; if (!x->xso.offload_handle && !xdst->child->xfrm) return true; if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) && !xdst->child->xfrm) return true; return false; } static inline void xfrm_dev_policy_delete(struct xfrm_policy *x) { struct xfrm_dev_offload *xdo = &x->xdo; struct net_device *dev = xdo->dev; if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_policy_delete) dev->xfrmdev_ops->xdo_dev_policy_delete(x); } static inline void xfrm_dev_policy_free(struct xfrm_policy *x) { struct xfrm_dev_offload *xdo = &x->xdo; struct net_device *dev = xdo->dev; if (dev && dev->xfrmdev_ops) { if (dev->xfrmdev_ops->xdo_dev_policy_free) dev->xfrmdev_ops->xdo_dev_policy_free(x); xdo->dev = NULL; netdev_put(dev, &xdo->dev_tracker); } } #else static inline void xfrm_dev_resume(struct sk_buff *skb) { } static inline void xfrm_dev_backlog(struct softnet_data *sd) { } static inline struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) { return skb; } static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo, struct netlink_ext_ack *extack) { return 0; } static inline void xfrm_dev_state_delete(struct xfrm_state *x) { } static inline void xfrm_dev_state_free(struct xfrm_state *x) { } static inline int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, struct xfrm_user_offload *xuo, u8 dir, struct netlink_ext_ack *extack) { return 0; } static inline void xfrm_dev_policy_delete(struct xfrm_policy *x) { } static inline void xfrm_dev_policy_free(struct xfrm_policy *x) { } static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) { return false; } static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) { } static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) { return false; } #endif static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) { if (attrs[XFRMA_MARK]) memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(struct xfrm_mark)); else m->v = m->m = 0; return m->v & m->m; } static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m) { int ret = 0; if (m->m | m->v) ret = nla_put(skb, XFRMA_MARK, sizeof(struct xfrm_mark), m); return ret; } static inline __u32 xfrm_smark_get(__u32 mark, struct xfrm_state *x) { struct xfrm_mark *m = &x->props.smark; return (m->v & m->m) | (mark & ~m->m); } static inline int xfrm_if_id_put(struct sk_buff *skb, __u32 if_id) { int ret = 0; if (if_id) ret = nla_put_u32(skb, XFRMA_IF_ID, if_id); return ret; } static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x, unsigned int family) { bool tunnel = false; switch(family) { case AF_INET: if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) tunnel = true; break; case AF_INET6: if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6) tunnel = true; break; } if (tunnel && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)) return -EINVAL; return 0; } extern const int xfrm_msg_min[XFRM_NR_MSGTYPES]; extern const struct nla_policy xfrma_policy[XFRMA_MAX+1]; struct xfrm_translator { /* Allocate frag_list and put compat translation there */ int (*alloc_compat)(struct sk_buff *skb, const struct nlmsghdr *src); /* Allocate nlmsg with 64-bit translaton of received 32-bit message */ struct nlmsghdr *(*rcv_msg_compat)(const struct nlmsghdr *nlh, int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack); /* Translate 32-bit user_policy from sockptr */ int (*xlate_user_policy_sockptr)(u8 **pdata32, int optlen); struct module *owner; }; #if IS_ENABLED(CONFIG_XFRM_USER_COMPAT) extern int xfrm_register_translator(struct xfrm_translator *xtr); extern int xfrm_unregister_translator(struct xfrm_translator *xtr); extern struct xfrm_translator *xfrm_get_translator(void); extern void xfrm_put_translator(struct xfrm_translator *xtr); #else static inline struct xfrm_translator *xfrm_get_translator(void) { return NULL; } static inline void xfrm_put_translator(struct xfrm_translator *xtr) { } #endif #if IS_ENABLED(CONFIG_IPV6) static inline bool xfrm6_local_dontfrag(const struct sock *sk) { int proto; if (!sk || sk->sk_family != AF_INET6) return false; proto = sk->sk_protocol; if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) return inet6_test_bit(DONTFRAG, sk); return false; } #endif #if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ (IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) extern struct metadata_dst __percpu *xfrm_bpf_md_dst; int register_xfrm_interface_bpf(void); #else static inline int register_xfrm_interface_bpf(void) { return 0; } #endif #if IS_ENABLED(CONFIG_DEBUG_INFO_BTF) int register_xfrm_state_bpf(void); #else static inline int register_xfrm_state_bpf(void) { return 0; } #endif int xfrm_nat_keepalive_init(unsigned short family); void xfrm_nat_keepalive_fini(unsigned short family); int xfrm_nat_keepalive_net_init(struct net *net); int xfrm_nat_keepalive_net_fini(struct net *net); void xfrm_nat_keepalive_state_updated(struct xfrm_state *x); #endif /* _NET_XFRM_H */
41 2 40 39 3 40 40 40 2 38 40 3 38 3 38 2 37 2 37 3 38 3 38 3 37 3 38 3 38 3 38 2 37 3 38 3 38 3 38 3 38 3 38 3 38 3 38 3 38 3 38 3 40 41 1 40 38 40 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 37 38 38 38 5 33 36 38 30 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 // SPDX-License-Identifier: GPL-2.0-only #include <linux/dim.h> #include "netlink.h" #include "common.h" struct coalesce_req_info { struct ethnl_req_info base; }; struct coalesce_reply_data { struct ethnl_reply_data base; struct ethtool_coalesce coalesce; struct kernel_ethtool_coalesce kernel_coalesce; u32 supported_params; }; #define COALESCE_REPDATA(__reply_base) \ container_of(__reply_base, struct coalesce_reply_data, base) #define __SUPPORTED_OFFSET ETHTOOL_A_COALESCE_RX_USECS static u32 attr_to_mask(unsigned int attr_type) { return BIT(attr_type - __SUPPORTED_OFFSET); } /* build time check that indices in ethtool_ops::supported_coalesce_params * match corresponding attribute types with an offset */ #define __CHECK_SUPPORTED_OFFSET(x) \ static_assert((ETHTOOL_ ## x) == \ BIT((ETHTOOL_A_ ## x) - __SUPPORTED_OFFSET)) __CHECK_SUPPORTED_OFFSET(COALESCE_RX_USECS); __CHECK_SUPPORTED_OFFSET(COALESCE_RX_MAX_FRAMES); __CHECK_SUPPORTED_OFFSET(COALESCE_RX_USECS_IRQ); __CHECK_SUPPORTED_OFFSET(COALESCE_RX_MAX_FRAMES_IRQ); __CHECK_SUPPORTED_OFFSET(COALESCE_TX_USECS); __CHECK_SUPPORTED_OFFSET(COALESCE_TX_MAX_FRAMES); __CHECK_SUPPORTED_OFFSET(COALESCE_TX_USECS_IRQ); __CHECK_SUPPORTED_OFFSET(COALESCE_TX_MAX_FRAMES_IRQ); __CHECK_SUPPORTED_OFFSET(COALESCE_STATS_BLOCK_USECS); __CHECK_SUPPORTED_OFFSET(COALESCE_USE_ADAPTIVE_RX); __CHECK_SUPPORTED_OFFSET(COALESCE_USE_ADAPTIVE_TX); __CHECK_SUPPORTED_OFFSET(COALESCE_PKT_RATE_LOW); __CHECK_SUPPORTED_OFFSET(COALESCE_RX_USECS_LOW); __CHECK_SUPPORTED_OFFSET(COALESCE_RX_MAX_FRAMES_LOW); __CHECK_SUPPORTED_OFFSET(COALESCE_TX_USECS_LOW); __CHECK_SUPPORTED_OFFSET(COALESCE_TX_MAX_FRAMES_LOW); __CHECK_SUPPORTED_OFFSET(COALESCE_PKT_RATE_HIGH); __CHECK_SUPPORTED_OFFSET(COALESCE_RX_USECS_HIGH); __CHECK_SUPPORTED_OFFSET(COALESCE_RX_MAX_FRAMES_HIGH); __CHECK_SUPPORTED_OFFSET(COALESCE_TX_USECS_HIGH); __CHECK_SUPPORTED_OFFSET(COALESCE_TX_MAX_FRAMES_HIGH); __CHECK_SUPPORTED_OFFSET(COALESCE_RATE_SAMPLE_INTERVAL); const struct nla_policy ethnl_coalesce_get_policy[] = { [ETHTOOL_A_COALESCE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static int coalesce_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base); struct net_device *dev = reply_base->dev; int ret; if (!dev->ethtool_ops->get_coalesce) return -EOPNOTSUPP; data->supported_params = dev->ethtool_ops->supported_coalesce_params; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; ret = dev->ethtool_ops->get_coalesce(dev, &data->coalesce, &data->kernel_coalesce, info->extack); ethnl_ops_complete(dev); return ret; } static int coalesce_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { int modersz = nla_total_size(0) + /* _PROFILE_IRQ_MODERATION, nest */ nla_total_size(sizeof(u32)) + /* _IRQ_MODERATION_USEC */ nla_total_size(sizeof(u32)) + /* _IRQ_MODERATION_PKTS */ nla_total_size(sizeof(u32)); /* _IRQ_MODERATION_COMPS */ int total_modersz = nla_total_size(0) + /* _{R,T}X_PROFILE, nest */ modersz * NET_DIM_PARAMS_NUM_PROFILES; return nla_total_size(sizeof(u32)) + /* _RX_USECS */ nla_total_size(sizeof(u32)) + /* _RX_MAX_FRAMES */ nla_total_size(sizeof(u32)) + /* _RX_USECS_IRQ */ nla_total_size(sizeof(u32)) + /* _RX_MAX_FRAMES_IRQ */ nla_total_size(sizeof(u32)) + /* _TX_USECS */ nla_total_size(sizeof(u32)) + /* _TX_MAX_FRAMES */ nla_total_size(sizeof(u32)) + /* _TX_USECS_IRQ */ nla_total_size(sizeof(u32)) + /* _TX_MAX_FRAMES_IRQ */ nla_total_size(sizeof(u32)) + /* _STATS_BLOCK_USECS */ nla_total_size(sizeof(u8)) + /* _USE_ADAPTIVE_RX */ nla_total_size(sizeof(u8)) + /* _USE_ADAPTIVE_TX */ nla_total_size(sizeof(u32)) + /* _PKT_RATE_LOW */ nla_total_size(sizeof(u32)) + /* _RX_USECS_LOW */ nla_total_size(sizeof(u32)) + /* _RX_MAX_FRAMES_LOW */ nla_total_size(sizeof(u32)) + /* _TX_USECS_LOW */ nla_total_size(sizeof(u32)) + /* _TX_MAX_FRAMES_LOW */ nla_total_size(sizeof(u32)) + /* _PKT_RATE_HIGH */ nla_total_size(sizeof(u32)) + /* _RX_USECS_HIGH */ nla_total_size(sizeof(u32)) + /* _RX_MAX_FRAMES_HIGH */ nla_total_size(sizeof(u32)) + /* _TX_USECS_HIGH */ nla_total_size(sizeof(u32)) + /* _TX_MAX_FRAMES_HIGH */ nla_total_size(sizeof(u32)) + /* _RATE_SAMPLE_INTERVAL */ nla_total_size(sizeof(u8)) + /* _USE_CQE_MODE_TX */ nla_total_size(sizeof(u8)) + /* _USE_CQE_MODE_RX */ nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_BYTES */ nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_FRAMES */ nla_total_size(sizeof(u32)) + /* _TX_AGGR_TIME_USECS */ total_modersz * 2; /* _{R,T}X_PROFILE */ } static bool coalesce_put_u32(struct sk_buff *skb, u16 attr_type, u32 val, u32 supported_params) { if (!val && !(supported_params & attr_to_mask(attr_type))) return false; return nla_put_u32(skb, attr_type, val); } static bool coalesce_put_bool(struct sk_buff *skb, u16 attr_type, u32 val, u32 supported_params) { if (!val && !(supported_params & attr_to_mask(attr_type))) return false; return nla_put_u8(skb, attr_type, !!val); } /** * coalesce_put_profile - fill reply with a nla nest with four child nla nests. * @skb: socket buffer the message is stored in * @attr_type: nest attr type ETHTOOL_A_COALESCE_*X_PROFILE * @profile: data passed to userspace * @coal_flags: modifiable parameters supported by the driver * * Put a dim profile nest attribute. Refer to ETHTOOL_A_PROFILE_IRQ_MODERATION. * * Return: 0 on success or a negative error code. */ static int coalesce_put_profile(struct sk_buff *skb, u16 attr_type, const struct dim_cq_moder *profile, u8 coal_flags) { struct nlattr *profile_attr, *moder_attr; int i, ret; if (!profile || !coal_flags) return 0; profile_attr = nla_nest_start(skb, attr_type); if (!profile_attr) return -EMSGSIZE; for (i = 0; i < NET_DIM_PARAMS_NUM_PROFILES; i++) { moder_attr = nla_nest_start(skb, ETHTOOL_A_PROFILE_IRQ_MODERATION); if (!moder_attr) { ret = -EMSGSIZE; goto cancel_profile; } if (coal_flags & DIM_COALESCE_USEC) { ret = nla_put_u32(skb, ETHTOOL_A_IRQ_MODERATION_USEC, profile[i].usec); if (ret) goto cancel_moder; } if (coal_flags & DIM_COALESCE_PKTS) { ret = nla_put_u32(skb, ETHTOOL_A_IRQ_MODERATION_PKTS, profile[i].pkts); if (ret) goto cancel_moder; } if (coal_flags & DIM_COALESCE_COMPS) { ret = nla_put_u32(skb, ETHTOOL_A_IRQ_MODERATION_COMPS, profile[i].comps); if (ret) goto cancel_moder; } nla_nest_end(skb, moder_attr); } nla_nest_end(skb, profile_attr); return 0; cancel_moder: nla_nest_cancel(skb, moder_attr); cancel_profile: nla_nest_cancel(skb, profile_attr); return ret; } static int coalesce_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base); const struct kernel_ethtool_coalesce *kcoal = &data->kernel_coalesce; const struct ethtool_coalesce *coal = &data->coalesce; u32 supported = data->supported_params; struct dim_irq_moder *moder; int ret = 0; if (coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_USECS, coal->rx_coalesce_usecs, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_MAX_FRAMES, coal->rx_max_coalesced_frames, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_USECS_IRQ, coal->rx_coalesce_usecs_irq, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, coal->rx_max_coalesced_frames_irq, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_USECS, coal->tx_coalesce_usecs, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_MAX_FRAMES, coal->tx_max_coalesced_frames, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_USECS_IRQ, coal->tx_coalesce_usecs_irq, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, coal->tx_max_coalesced_frames_irq, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, coal->stats_block_coalesce_usecs, supported) || coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, coal->use_adaptive_rx_coalesce, supported) || coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, coal->use_adaptive_tx_coalesce, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_PKT_RATE_LOW, coal->pkt_rate_low, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_USECS_LOW, coal->rx_coalesce_usecs_low, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, coal->rx_max_coalesced_frames_low, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_USECS_LOW, coal->tx_coalesce_usecs_low, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, coal->tx_max_coalesced_frames_low, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_PKT_RATE_HIGH, coal->pkt_rate_high, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_USECS_HIGH, coal->rx_coalesce_usecs_high, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, coal->rx_max_coalesced_frames_high, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_USECS_HIGH, coal->tx_coalesce_usecs_high, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, coal->tx_max_coalesced_frames_high, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, coal->rate_sample_interval, supported) || coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, kcoal->use_cqe_mode_tx, supported) || coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, kcoal->use_cqe_mode_rx, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, kcoal->tx_aggr_max_bytes, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, kcoal->tx_aggr_max_frames, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, kcoal->tx_aggr_time_usecs, supported)) return -EMSGSIZE; if (!req_base->dev || !req_base->dev->irq_moder) return 0; moder = req_base->dev->irq_moder; rcu_read_lock(); if (moder->profile_flags & DIM_PROFILE_RX) { ret = coalesce_put_profile(skb, ETHTOOL_A_COALESCE_RX_PROFILE, rcu_dereference(moder->rx_profile), moder->coal_flags); if (ret) goto out; } if (moder->profile_flags & DIM_PROFILE_TX) ret = coalesce_put_profile(skb, ETHTOOL_A_COALESCE_TX_PROFILE, rcu_dereference(moder->tx_profile), moder->coal_flags); out: rcu_read_unlock(); return ret; } /* COALESCE_SET */ static const struct nla_policy coalesce_irq_moderation_policy[] = { [ETHTOOL_A_IRQ_MODERATION_USEC] = { .type = NLA_U32 }, [ETHTOOL_A_IRQ_MODERATION_PKTS] = { .type = NLA_U32 }, [ETHTOOL_A_IRQ_MODERATION_COMPS] = { .type = NLA_U32 }, }; static const struct nla_policy coalesce_profile_policy[] = { [ETHTOOL_A_PROFILE_IRQ_MODERATION] = NLA_POLICY_NESTED(coalesce_irq_moderation_policy), }; const struct nla_policy ethnl_coalesce_set_policy[] = { [ETHTOOL_A_COALESCE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_COALESCE_RX_USECS] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_RX_MAX_FRAMES] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_RX_USECS_IRQ] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_USECS] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_MAX_FRAMES] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_USECS_IRQ] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_STATS_BLOCK_USECS] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX] = { .type = NLA_U8 }, [ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX] = { .type = NLA_U8 }, [ETHTOOL_A_COALESCE_PKT_RATE_LOW] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_RX_USECS_LOW] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_USECS_LOW] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_PKT_RATE_HIGH] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_RX_USECS_HIGH] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_USECS_HIGH] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_USE_CQE_MODE_TX] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_COALESCE_USE_CQE_MODE_RX] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_RX_PROFILE] = NLA_POLICY_NESTED(coalesce_profile_policy), [ETHTOOL_A_COALESCE_TX_PROFILE] = NLA_POLICY_NESTED(coalesce_profile_policy), }; static int ethnl_set_coalesce_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; struct dim_irq_moder *irq_moder = req_info->dev->irq_moder; struct nlattr **tb = info->attrs; u32 supported_params; u16 a; if (!ops->get_coalesce || !ops->set_coalesce) return -EOPNOTSUPP; /* make sure that only supported parameters are present */ supported_params = ops->supported_coalesce_params; if (irq_moder && irq_moder->profile_flags & DIM_PROFILE_RX) supported_params |= ETHTOOL_COALESCE_RX_PROFILE; if (irq_moder && irq_moder->profile_flags & DIM_PROFILE_TX) supported_params |= ETHTOOL_COALESCE_TX_PROFILE; for (a = ETHTOOL_A_COALESCE_RX_USECS; a < __ETHTOOL_A_COALESCE_CNT; a++) if (tb[a] && !(supported_params & attr_to_mask(a))) { NL_SET_ERR_MSG_ATTR(info->extack, tb[a], "cannot modify an unsupported parameter"); return -EINVAL; } return 1; } /** * ethnl_update_irq_moder - update a specific field in the given profile * @irq_moder: place that collects dim related information * @irq_field: field in profile to modify * @attr_type: attr type ETHTOOL_A_IRQ_MODERATION_* * @tb: netlink attribute with new values or null * @coal_bit: DIM_COALESCE_* bit from coal_flags * @mod: pointer to bool for modification tracking * @extack: netlink extended ack * * Return: 0 on success or a negative error code. */ static int ethnl_update_irq_moder(struct dim_irq_moder *irq_moder, u16 *irq_field, u16 attr_type, struct nlattr **tb, u8 coal_bit, bool *mod, struct netlink_ext_ack *extack) { int ret = 0; u32 val; if (!tb[attr_type]) return 0; if (irq_moder->coal_flags & coal_bit) { val = nla_get_u32(tb[attr_type]); if (*irq_field == val) return 0; *irq_field = val; *mod = true; } else { NL_SET_BAD_ATTR(extack, tb[attr_type]); ret = -EOPNOTSUPP; } return ret; } /** * ethnl_update_profile - get a profile nest with child nests from userspace. * @dev: netdevice to update the profile * @dst: profile get from the driver and modified by ethnl_update_profile. * @nests: nest attr ETHTOOL_A_COALESCE_*X_PROFILE to set profile. * @mod: pointer to bool for modification tracking * @extack: Netlink extended ack * * Layout of nests: * Nested ETHTOOL_A_COALESCE_*X_PROFILE attr * Nested ETHTOOL_A_PROFILE_IRQ_MODERATION attr * ETHTOOL_A_IRQ_MODERATION_USEC attr * ETHTOOL_A_IRQ_MODERATION_PKTS attr * ETHTOOL_A_IRQ_MODERATION_COMPS attr * ... * Nested ETHTOOL_A_PROFILE_IRQ_MODERATION attr * ETHTOOL_A_IRQ_MODERATION_USEC attr * ETHTOOL_A_IRQ_MODERATION_PKTS attr * ETHTOOL_A_IRQ_MODERATION_COMPS attr * * Return: 0 on success or a negative error code. */ static int ethnl_update_profile(struct net_device *dev, struct dim_cq_moder __rcu **dst, const struct nlattr *nests, bool *mod, struct netlink_ext_ack *extack) { int len_irq_moder = ARRAY_SIZE(coalesce_irq_moderation_policy); struct nlattr *tb[ARRAY_SIZE(coalesce_irq_moderation_policy)]; struct dim_irq_moder *irq_moder = dev->irq_moder; struct dim_cq_moder *new_profile, *old_profile; int ret, rem, i = 0, len; struct nlattr *nest; if (!nests) return 0; if (!*dst) return -EOPNOTSUPP; old_profile = rtnl_dereference(*dst); len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*old_profile); new_profile = kmemdup(old_profile, len, GFP_KERNEL); if (!new_profile) return -ENOMEM; nla_for_each_nested_type(nest, ETHTOOL_A_PROFILE_IRQ_MODERATION, nests, rem) { ret = nla_parse_nested(tb, len_irq_moder - 1, nest, coalesce_irq_moderation_policy, extack); if (ret) goto err_out; ret = ethnl_update_irq_moder(irq_moder, &new_profile[i].usec, ETHTOOL_A_IRQ_MODERATION_USEC, tb, DIM_COALESCE_USEC, mod, extack); if (ret) goto err_out; ret = ethnl_update_irq_moder(irq_moder, &new_profile[i].pkts, ETHTOOL_A_IRQ_MODERATION_PKTS, tb, DIM_COALESCE_PKTS, mod, extack); if (ret) goto err_out; ret = ethnl_update_irq_moder(irq_moder, &new_profile[i].comps, ETHTOOL_A_IRQ_MODERATION_COMPS, tb, DIM_COALESCE_COMPS, mod, extack); if (ret) goto err_out; i++; } /* After the profile is modified, dim itself is a dynamic * mechanism and will quickly fit to the appropriate * coalescing parameters according to the new profile. */ rcu_assign_pointer(*dst, new_profile); kfree_rcu(old_profile, rcu); return 0; err_out: kfree(new_profile); return ret; } static int __ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info, bool *dual_change) { struct kernel_ethtool_coalesce kernel_coalesce = {}; struct net_device *dev = req_info->dev; struct ethtool_coalesce coalesce = {}; bool mod_mode = false, mod = false; struct nlattr **tb = info->attrs; int ret; ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce, info->extack); if (ret < 0) return ret; /* Update values */ ethnl_update_u32(&coalesce.rx_coalesce_usecs, tb[ETHTOOL_A_COALESCE_RX_USECS], &mod); ethnl_update_u32(&coalesce.rx_max_coalesced_frames, tb[ETHTOOL_A_COALESCE_RX_MAX_FRAMES], &mod); ethnl_update_u32(&coalesce.rx_coalesce_usecs_irq, tb[ETHTOOL_A_COALESCE_RX_USECS_IRQ], &mod); ethnl_update_u32(&coalesce.rx_max_coalesced_frames_irq, tb[ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ], &mod); ethnl_update_u32(&coalesce.tx_coalesce_usecs, tb[ETHTOOL_A_COALESCE_TX_USECS], &mod); ethnl_update_u32(&coalesce.tx_max_coalesced_frames, tb[ETHTOOL_A_COALESCE_TX_MAX_FRAMES], &mod); ethnl_update_u32(&coalesce.tx_coalesce_usecs_irq, tb[ETHTOOL_A_COALESCE_TX_USECS_IRQ], &mod); ethnl_update_u32(&coalesce.tx_max_coalesced_frames_irq, tb[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ], &mod); ethnl_update_u32(&coalesce.stats_block_coalesce_usecs, tb[ETHTOOL_A_COALESCE_STATS_BLOCK_USECS], &mod); ethnl_update_u32(&coalesce.pkt_rate_low, tb[ETHTOOL_A_COALESCE_PKT_RATE_LOW], &mod); ethnl_update_u32(&coalesce.rx_coalesce_usecs_low, tb[ETHTOOL_A_COALESCE_RX_USECS_LOW], &mod); ethnl_update_u32(&coalesce.rx_max_coalesced_frames_low, tb[ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW], &mod); ethnl_update_u32(&coalesce.tx_coalesce_usecs_low, tb[ETHTOOL_A_COALESCE_TX_USECS_LOW], &mod); ethnl_update_u32(&coalesce.tx_max_coalesced_frames_low, tb[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW], &mod); ethnl_update_u32(&coalesce.pkt_rate_high, tb[ETHTOOL_A_COALESCE_PKT_RATE_HIGH], &mod); ethnl_update_u32(&coalesce.rx_coalesce_usecs_high, tb[ETHTOOL_A_COALESCE_RX_USECS_HIGH], &mod); ethnl_update_u32(&coalesce.rx_max_coalesced_frames_high, tb[ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH], &mod); ethnl_update_u32(&coalesce.tx_coalesce_usecs_high, tb[ETHTOOL_A_COALESCE_TX_USECS_HIGH], &mod); ethnl_update_u32(&coalesce.tx_max_coalesced_frames_high, tb[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH], &mod); ethnl_update_u32(&coalesce.rate_sample_interval, tb[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL], &mod); ethnl_update_u32(&kernel_coalesce.tx_aggr_max_bytes, tb[ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES], &mod); ethnl_update_u32(&kernel_coalesce.tx_aggr_max_frames, tb[ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES], &mod); ethnl_update_u32(&kernel_coalesce.tx_aggr_time_usecs, tb[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS], &mod); if (dev->irq_moder && dev->irq_moder->profile_flags & DIM_PROFILE_RX) { ret = ethnl_update_profile(dev, &dev->irq_moder->rx_profile, tb[ETHTOOL_A_COALESCE_RX_PROFILE], &mod, info->extack); if (ret < 0) return ret; } if (dev->irq_moder && dev->irq_moder->profile_flags & DIM_PROFILE_TX) { ret = ethnl_update_profile(dev, &dev->irq_moder->tx_profile, tb[ETHTOOL_A_COALESCE_TX_PROFILE], &mod, info->extack); if (ret < 0) return ret; } /* Update operation modes */ ethnl_update_bool32(&coalesce.use_adaptive_rx_coalesce, tb[ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX], &mod_mode); ethnl_update_bool32(&coalesce.use_adaptive_tx_coalesce, tb[ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX], &mod_mode); ethnl_update_u8(&kernel_coalesce.use_cqe_mode_tx, tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_TX], &mod_mode); ethnl_update_u8(&kernel_coalesce.use_cqe_mode_rx, tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_RX], &mod_mode); *dual_change = mod && mod_mode; if (!mod && !mod_mode) return 0; ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce, info->extack); return ret < 0 ? ret : 1; } static int ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info) { bool dual_change; int err, ret; /* SET_COALESCE may change operation mode and parameters in one call. * Changing operation mode may cause the driver to reset the parameter * values, and therefore ignore user input (driver does not know which * parameters come from user and which are echoed back from ->get). * To not complicate the drivers if user tries to change both the mode * and parameters at once - call the driver twice. */ err = __ethnl_set_coalesce(req_info, info, &dual_change); if (err < 0) return err; ret = err; if (ret && dual_change) { err = __ethnl_set_coalesce(req_info, info, &dual_change); if (err < 0) return err; } return ret; } const struct ethnl_request_ops ethnl_coalesce_request_ops = { .request_cmd = ETHTOOL_MSG_COALESCE_GET, .reply_cmd = ETHTOOL_MSG_COALESCE_GET_REPLY, .hdr_attr = ETHTOOL_A_COALESCE_HEADER, .req_info_size = sizeof(struct coalesce_req_info), .reply_data_size = sizeof(struct coalesce_reply_data), .prepare_data = coalesce_prepare_data, .reply_size = coalesce_reply_size, .fill_reply = coalesce_fill_reply, .set_validate = ethnl_set_coalesce_validate, .set = ethnl_set_coalesce, .set_ntf_cmd = ETHTOOL_MSG_COALESCE_NTF, };
35 19 9 3 1 1 1 9 16 11 1 3 1 2 3 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 // SPDX-License-Identifier: GPL-2.0-only /* * System calls implementing the Linux Security Module API. * * Copyright (C) 2022 Casey Schaufler <casey@schaufler-ca.com> * Copyright (C) 2022 Intel Corporation */ #include <asm/current.h> #include <linux/compiler_types.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/security.h> #include <linux/stddef.h> #include <linux/syscalls.h> #include <linux/types.h> #include <linux/lsm_hooks.h> #include <uapi/linux/lsm.h> #include "lsm.h" /** * lsm_name_to_attr - map an LSM attribute name to its ID * @name: name of the attribute * * Returns the LSM attribute value associated with @name, or 0 if * there is no mapping. */ u64 lsm_name_to_attr(const char *name) { if (!strcmp(name, "current")) return LSM_ATTR_CURRENT; if (!strcmp(name, "exec")) return LSM_ATTR_EXEC; if (!strcmp(name, "fscreate")) return LSM_ATTR_FSCREATE; if (!strcmp(name, "keycreate")) return LSM_ATTR_KEYCREATE; if (!strcmp(name, "prev")) return LSM_ATTR_PREV; if (!strcmp(name, "sockcreate")) return LSM_ATTR_SOCKCREATE; return LSM_ATTR_UNDEF; } /** * sys_lsm_set_self_attr - Set current task's security module attribute * @attr: which attribute to set * @ctx: the LSM contexts * @size: size of @ctx * @flags: reserved for future use * * Sets the calling task's LSM context. On success this function * returns 0. If the attribute specified cannot be set a negative * value indicating the reason for the error is returned. */ SYSCALL_DEFINE4(lsm_set_self_attr, unsigned int, attr, struct lsm_ctx __user *, ctx, u32, size, u32, flags) { return security_setselfattr(attr, ctx, size, flags); } /** * sys_lsm_get_self_attr - Return current task's security module attributes * @attr: which attribute to return * @ctx: the user-space destination for the information, or NULL * @size: pointer to the size of space available to receive the data * @flags: special handling options. LSM_FLAG_SINGLE indicates that only * attributes associated with the LSM identified in the passed @ctx be * reported. * * Returns the calling task's LSM contexts. On success this * function returns the number of @ctx array elements. This value * may be zero if there are no LSM contexts assigned. If @size is * insufficient to contain the return data -E2BIG is returned and * @size is set to the minimum required size. In all other cases * a negative value indicating the error is returned. */ SYSCALL_DEFINE4(lsm_get_self_attr, unsigned int, attr, struct lsm_ctx __user *, ctx, u32 __user *, size, u32, flags) { return security_getselfattr(attr, ctx, size, flags); } /** * sys_lsm_list_modules - Return a list of the active security modules * @ids: the LSM module ids * @size: pointer to size of @ids, updated on return * @flags: reserved for future use, must be zero * * Returns a list of the active LSM ids. On success this function * returns the number of @ids array elements. This value may be zero * if there are no LSMs active. If @size is insufficient to contain * the return data -E2BIG is returned and @size is set to the minimum * required size. In all other cases a negative value indicating the * error is returned. */ SYSCALL_DEFINE3(lsm_list_modules, u64 __user *, ids, u32 __user *, size, u32, flags) { u32 total_size = lsm_active_cnt * sizeof(*ids); u32 usize; int i; if (flags) return -EINVAL; if (get_user(usize, size)) return -EFAULT; if (put_user(total_size, size) != 0) return -EFAULT; if (usize < total_size) return -E2BIG; for (i = 0; i < lsm_active_cnt; i++) if (put_user(lsm_idlist[i]->id, ids++)) return -EFAULT; return lsm_active_cnt; }
22 18 11 11 10 12 14 3 10 1 6 6 3 3 1 1 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 // SPDX-License-Identifier: GPL-2.0-or-later /* * ASIX AX8817X based USB 2.0 Ethernet Devices * Copyright (C) 2003-2006 David Hollis <dhollis@davehollis.com> * Copyright (C) 2005 Phil Chang <pchang23@sbcglobal.net> * Copyright (C) 2006 James Painter <jamie.painter@iname.com> * Copyright (c) 2002-2003 TiVo Inc. */ #include "asix.h" #define AX_HOST_EN_RETRIES 30 int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data, int in_pm) { int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); BUG_ON(!dev); if (!in_pm) fn = usbnet_read_cmd; else fn = usbnet_read_cmd_nopm; ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); if (unlikely(ret < size)) { ret = ret < 0 ? ret : -ENODATA; netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n", index, ret); } return ret; } int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data, int in_pm) { int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16); BUG_ON(!dev); if (!in_pm) fn = usbnet_write_cmd; else fn = usbnet_write_cmd_nopm; ret = fn(dev, cmd, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); if (unlikely(ret < 0)) netdev_warn(dev->net, "Failed to write reg index 0x%04x: %d\n", index, ret); return ret; } void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data) { usbnet_write_cmd_async(dev, cmd, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); } static int asix_set_sw_mii(struct usbnet *dev, int in_pm) { int ret; ret = asix_write_cmd(dev, AX_CMD_SET_SW_MII, 0x0000, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to enable software MII access\n"); return ret; } static int asix_set_hw_mii(struct usbnet *dev, int in_pm) { int ret; ret = asix_write_cmd(dev, AX_CMD_SET_HW_MII, 0x0000, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to enable hardware MII access\n"); return ret; } static int asix_check_host_enable(struct usbnet *dev, int in_pm) { int i, ret; u8 smsr; for (i = 0; i < AX_HOST_EN_RETRIES; ++i) { ret = asix_set_sw_mii(dev, in_pm); if (ret == -ENODEV || ret == -ETIMEDOUT) break; usleep_range(1000, 1100); ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, in_pm); if (ret == -ENODEV) break; else if (ret < 0) continue; else if (smsr & AX_HOST_EN) break; } return i >= AX_HOST_EN_RETRIES ? -ETIMEDOUT : ret; } static void reset_asix_rx_fixup_info(struct asix_rx_fixup_info *rx) { /* Reset the variables that have a lifetime outside of * asix_rx_fixup_internal() so that future processing starts from a * known set of initial conditions. */ if (rx->ax_skb) { /* Discard any incomplete Ethernet frame in the netdev buffer */ kfree_skb(rx->ax_skb); rx->ax_skb = NULL; } /* Assume the Data header 32-bit word is at the start of the current * or next URB socket buffer so reset all the state variables. */ rx->remaining = 0; rx->split_head = false; rx->header = 0; } int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb, struct asix_rx_fixup_info *rx) { int offset = 0; u16 size; /* When an Ethernet frame spans multiple URB socket buffers, * do a sanity test for the Data header synchronisation. * Attempt to detect the situation of the previous socket buffer having * been truncated or a socket buffer was missing. These situations * cause a discontinuity in the data stream and therefore need to avoid * appending bad data to the end of the current netdev socket buffer. * Also avoid unnecessarily discarding a good current netdev socket * buffer. */ if (rx->remaining && (rx->remaining + sizeof(u32) <= skb->len)) { offset = ((rx->remaining + 1) & 0xfffe); rx->header = get_unaligned_le32(skb->data + offset); offset = 0; size = (u16)(rx->header & 0x7ff); if (size != ((~rx->header >> 16) & 0x7ff)) { netdev_err(dev->net, "asix_rx_fixup() Data Header synchronisation was lost, remaining %d\n", rx->remaining); reset_asix_rx_fixup_info(rx); } } while (offset + sizeof(u16) <= skb->len) { u16 copy_length; if (!rx->remaining) { if (skb->len - offset == sizeof(u16)) { rx->header = get_unaligned_le16( skb->data + offset); rx->split_head = true; offset += sizeof(u16); break; } if (rx->split_head == true) { rx->header |= (get_unaligned_le16( skb->data + offset) << 16); rx->split_head = false; offset += sizeof(u16); } else { rx->header = get_unaligned_le32(skb->data + offset); offset += sizeof(u32); } /* take frame length from Data header 32-bit word */ size = (u16)(rx->header & 0x7ff); if (size != ((~rx->header >> 16) & 0x7ff)) { netdev_err(dev->net, "asix_rx_fixup() Bad Header Length 0x%x, offset %d\n", rx->header, offset); reset_asix_rx_fixup_info(rx); return 0; } if (size > dev->net->mtu + ETH_HLEN + VLAN_HLEN) { netdev_dbg(dev->net, "asix_rx_fixup() Bad RX Length %d\n", size); reset_asix_rx_fixup_info(rx); return 0; } /* Sometimes may fail to get a netdev socket buffer but * continue to process the URB socket buffer so that * synchronisation of the Ethernet frame Data header * word is maintained. */ rx->ax_skb = netdev_alloc_skb_ip_align(dev->net, size); rx->remaining = size; } if (rx->remaining > skb->len - offset) { copy_length = skb->len - offset; rx->remaining -= copy_length; } else { copy_length = rx->remaining; rx->remaining = 0; } if (rx->ax_skb) { skb_put_data(rx->ax_skb, skb->data + offset, copy_length); if (!rx->remaining) { usbnet_skb_return(dev, rx->ax_skb); rx->ax_skb = NULL; } } offset += (copy_length + 1) & 0xfffe; } if (skb->len != offset) { netdev_err(dev->net, "asix_rx_fixup() Bad SKB Length %d, %d\n", skb->len, offset); reset_asix_rx_fixup_info(rx); return 0; } return 1; } int asix_rx_fixup_common(struct usbnet *dev, struct sk_buff *skb) { struct asix_common_private *dp = dev->driver_priv; struct asix_rx_fixup_info *rx = &dp->rx_fixup_info; return asix_rx_fixup_internal(dev, skb, rx); } void asix_rx_fixup_common_free(struct asix_common_private *dp) { struct asix_rx_fixup_info *rx; if (!dp) return; rx = &dp->rx_fixup_info; if (rx->ax_skb) { kfree_skb(rx->ax_skb); rx->ax_skb = NULL; } } struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { int padlen; int headroom = skb_headroom(skb); int tailroom = skb_tailroom(skb); u32 packet_len; u32 padbytes = 0xffff0000; void *ptr; padlen = ((skb->len + 4) & (dev->maxpacket - 1)) ? 0 : 4; /* We need to push 4 bytes in front of frame (packet_len) * and maybe add 4 bytes after the end (if padlen is 4) * * Avoid skb_copy_expand() expensive call, using following rules : * - We are allowed to push 4 bytes in headroom if skb_header_cloned() * is false (and if we have 4 bytes of headroom) * - We are allowed to put 4 bytes at tail if skb_cloned() * is false (and if we have 4 bytes of tailroom) * * TCP packets for example are cloned, but __skb_header_release() * was called in tcp stack, allowing us to use headroom for our needs. */ if (!skb_header_cloned(skb) && !(padlen && skb_cloned(skb)) && headroom + tailroom >= 4 + padlen) { /* following should not happen, but better be safe */ if (headroom < 4 || tailroom < padlen) { skb->data = memmove(skb->head + 4, skb->data, skb->len); skb_set_tail_pointer(skb, skb->len); } } else { struct sk_buff *skb2; skb2 = skb_copy_expand(skb, 4, padlen, flags); dev_kfree_skb_any(skb); skb = skb2; if (!skb) return NULL; } packet_len = ((skb->len ^ 0x0000ffff) << 16) + skb->len; ptr = skb_push(skb, 4); put_unaligned_le32(packet_len, ptr); if (padlen) { put_unaligned_le32(padbytes, skb_tail_pointer(skb)); skb_put(skb, sizeof(padbytes)); } usbnet_set_skb_tx_stats(skb, 1, 0); return skb; } int asix_read_phy_addr(struct usbnet *dev, bool internal) { int ret, offset; u8 buf[2]; ret = asix_read_cmd(dev, AX_CMD_READ_PHY_ID, 0, 0, 2, buf, 0); if (ret < 0) goto error; if (ret < 2) { ret = -EIO; goto error; } offset = (internal ? 1 : 0); ret = buf[offset]; if (ret >= PHY_MAX_ADDR) { netdev_err(dev->net, "invalid PHY address: %d\n", ret); return -ENODEV; } netdev_dbg(dev->net, "%s PHY address 0x%x\n", internal ? "internal" : "external", ret); return ret; error: netdev_err(dev->net, "Error reading PHY_ID register: %02x\n", ret); return ret; } int asix_sw_reset(struct usbnet *dev, u8 flags, int in_pm) { int ret; ret = asix_write_cmd(dev, AX_CMD_SW_RESET, flags, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to send software reset: %02x\n", ret); return ret; } u16 asix_read_rx_ctl(struct usbnet *dev, int in_pm) { __le16 v; int ret = asix_read_cmd(dev, AX_CMD_READ_RX_CTL, 0, 0, 2, &v, in_pm); if (ret < 0) { netdev_err(dev->net, "Error reading RX_CTL register: %02x\n", ret); goto out; } ret = le16_to_cpu(v); out: return ret; } int asix_write_rx_ctl(struct usbnet *dev, u16 mode, int in_pm) { int ret; netdev_dbg(dev->net, "asix_write_rx_ctl() - mode = 0x%04x\n", mode); ret = asix_write_cmd(dev, AX_CMD_WRITE_RX_CTL, mode, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to write RX_CTL mode to 0x%04x: %02x\n", mode, ret); return ret; } u16 asix_read_medium_status(struct usbnet *dev, int in_pm) { __le16 v; int ret = asix_read_cmd(dev, AX_CMD_READ_MEDIUM_STATUS, 0, 0, 2, &v, in_pm); if (ret < 0) { netdev_err(dev->net, "Error reading Medium Status register: %02x\n", ret); return ret; /* TODO: callers not checking for error ret */ } return le16_to_cpu(v); } int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm) { int ret; netdev_dbg(dev->net, "asix_write_medium_mode() - mode = 0x%04x\n", mode); ret = asix_write_cmd(dev, AX_CMD_WRITE_MEDIUM_MODE, mode, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to write Medium Mode mode to 0x%04x: %02x\n", mode, ret); return ret; } int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm) { int ret; netdev_dbg(dev->net, "asix_write_gpio() - value = 0x%04x\n", value); ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS, value, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to write GPIO value 0x%04x: %02x\n", value, ret); if (sleep) msleep(sleep); return ret; } /* * AX88772 & AX88178 have a 16-bit RX_CTL value */ void asix_set_multicast(struct net_device *net) { struct usbnet *dev = netdev_priv(net); struct asix_data *data = (struct asix_data *)&dev->data; u16 rx_ctl = AX_DEFAULT_RX_CTL; if (net->flags & IFF_PROMISC) { rx_ctl |= AX_RX_CTL_PRO; } else if (net->flags & IFF_ALLMULTI || netdev_mc_count(net) > AX_MAX_MCAST) { rx_ctl |= AX_RX_CTL_AMALL; } else if (netdev_mc_empty(net)) { /* just broadcast and directed */ } else { /* We use the 20 byte dev->data * for our 8 byte filter buffer * to avoid allocating memory that * is tricky to free later */ struct netdev_hw_addr *ha; u32 crc_bits; memset(data->multi_filter, 0, AX_MCAST_FILTER_SIZE); /* Build the multicast hash filter. */ netdev_for_each_mc_addr(ha, net) { crc_bits = ether_crc(ETH_ALEN, ha->addr) >> 26; data->multi_filter[crc_bits >> 3] |= 1 << (crc_bits & 7); } asix_write_cmd_async(dev, AX_CMD_WRITE_MULTI_FILTER, 0, 0, AX_MCAST_FILTER_SIZE, data->multi_filter); rx_ctl |= AX_RX_CTL_AM; } asix_write_cmd_async(dev, AX_CMD_WRITE_RX_CTL, rx_ctl, 0, 0, NULL); } static int __asix_mdio_read(struct net_device *netdev, int phy_id, int loc, bool in_pm) { struct usbnet *dev = netdev_priv(netdev); __le16 res; int ret; mutex_lock(&dev->phy_mutex); ret = asix_check_host_enable(dev, in_pm); if (ret == -ENODEV || ret == -ETIMEDOUT) { mutex_unlock(&dev->phy_mutex); return ret; } ret = asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, (__u16)loc, 2, &res, in_pm); if (ret < 0) goto out; ret = asix_set_hw_mii(dev, in_pm); out: mutex_unlock(&dev->phy_mutex); netdev_dbg(dev->net, "asix_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n", phy_id, loc, le16_to_cpu(res)); return ret < 0 ? ret : le16_to_cpu(res); } int asix_mdio_read(struct net_device *netdev, int phy_id, int loc) { return __asix_mdio_read(netdev, phy_id, loc, false); } static int __asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val, bool in_pm) { struct usbnet *dev = netdev_priv(netdev); __le16 res = cpu_to_le16(val); int ret; netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", phy_id, loc, val); mutex_lock(&dev->phy_mutex); ret = asix_check_host_enable(dev, in_pm); if (ret == -ENODEV) goto out; ret = asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, (__u16)loc, 2, &res, in_pm); if (ret < 0) goto out; ret = asix_set_hw_mii(dev, in_pm); out: mutex_unlock(&dev->phy_mutex); return ret < 0 ? ret : 0; } void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val) { __asix_mdio_write(netdev, phy_id, loc, val, false); } /* MDIO read and write wrappers for phylib */ int asix_mdio_bus_read(struct mii_bus *bus, int phy_id, int regnum) { struct usbnet *priv = bus->priv; return __asix_mdio_read(priv->net, phy_id, regnum, false); } int asix_mdio_bus_write(struct mii_bus *bus, int phy_id, int regnum, u16 val) { struct usbnet *priv = bus->priv; return __asix_mdio_write(priv->net, phy_id, regnum, val, false); } int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc) { return __asix_mdio_read(netdev, phy_id, loc, true); } void asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc, int val) { __asix_mdio_write(netdev, phy_id, loc, val, true); } void asix_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) { struct usbnet *dev = netdev_priv(net); u8 opt; if (asix_read_cmd(dev, AX_CMD_READ_MONITOR_MODE, 0, 0, 1, &opt, 0) < 0) { wolinfo->supported = 0; wolinfo->wolopts = 0; return; } wolinfo->supported = WAKE_PHY | WAKE_MAGIC; wolinfo->wolopts = 0; if (opt & AX_MONITOR_LINK) wolinfo->wolopts |= WAKE_PHY; if (opt & AX_MONITOR_MAGIC) wolinfo->wolopts |= WAKE_MAGIC; } int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) { struct usbnet *dev = netdev_priv(net); u8 opt = 0; if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) return -EINVAL; if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) opt |= AX_MONITOR_MAGIC; if (asix_write_cmd(dev, AX_CMD_WRITE_MONITOR_MODE, opt, 0, 0, NULL, 0) < 0) return -EINVAL; return 0; } int asix_get_eeprom_len(struct net_device *net) { return AX_EEPROM_LEN; } int asix_get_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, u8 *data) { struct usbnet *dev = netdev_priv(net); u16 *eeprom_buff; int first_word, last_word; int i; if (eeprom->len == 0) return -EINVAL; eeprom->magic = AX_EEPROM_MAGIC; first_word = eeprom->offset >> 1; last_word = (eeprom->offset + eeprom->len - 1) >> 1; eeprom_buff = kmalloc_array(last_word - first_word + 1, sizeof(u16), GFP_KERNEL); if (!eeprom_buff) return -ENOMEM; /* ax8817x returns 2 bytes from eeprom on read */ for (i = first_word; i <= last_word; i++) { if (asix_read_cmd(dev, AX_CMD_READ_EEPROM, i, 0, 2, &eeprom_buff[i - first_word], 0) < 0) { kfree(eeprom_buff); return -EIO; } } memcpy(data, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len); kfree(eeprom_buff); return 0; } int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, u8 *data) { struct usbnet *dev = netdev_priv(net); u16 *eeprom_buff; int first_word, last_word; int i; int ret; netdev_dbg(net, "write EEPROM len %d, offset %d, magic 0x%x\n", eeprom->len, eeprom->offset, eeprom->magic); if (eeprom->len == 0) return -EINVAL; if (eeprom->magic != AX_EEPROM_MAGIC) return -EINVAL; first_word = eeprom->offset >> 1; last_word = (eeprom->offset + eeprom->len - 1) >> 1; eeprom_buff = kmalloc_array(last_word - first_word + 1, sizeof(u16), GFP_KERNEL); if (!eeprom_buff) return -ENOMEM; /* align data to 16 bit boundaries, read the missing data from the EEPROM */ if (eeprom->offset & 1) { ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, first_word, 0, 2, &eeprom_buff[0], 0); if (ret < 0) { netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", first_word); goto free; } } if ((eeprom->offset + eeprom->len) & 1) { ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, last_word, 0, 2, &eeprom_buff[last_word - first_word], 0); if (ret < 0) { netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", last_word); goto free; } } memcpy((u8 *)eeprom_buff + (eeprom->offset & 1), data, eeprom->len); /* write data to EEPROM */ ret = asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0x0000, 0, 0, NULL, 0); if (ret < 0) { netdev_err(net, "Failed to enable EEPROM write\n"); goto free; } msleep(20); for (i = first_word; i <= last_word; i++) { netdev_dbg(net, "write to EEPROM at offset 0x%02x, data 0x%04x\n", i, eeprom_buff[i - first_word]); ret = asix_write_cmd(dev, AX_CMD_WRITE_EEPROM, i, eeprom_buff[i - first_word], 0, NULL, 0); if (ret < 0) { netdev_err(net, "Failed to write EEPROM at offset 0x%02x.\n", i); goto free; } msleep(20); } ret = asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0x0000, 0, 0, NULL, 0); if (ret < 0) { netdev_err(net, "Failed to disable EEPROM write\n"); goto free; } ret = 0; free: kfree(eeprom_buff); return ret; } int asix_set_mac_address(struct net_device *net, void *p) { struct usbnet *dev = netdev_priv(net); struct asix_data *data = (struct asix_data *)&dev->data; struct sockaddr *addr = p; if (netif_running(net)) return -EBUSY; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; eth_hw_addr_set(net, addr->sa_data); /* We use the 20 byte dev->data * for our 6 byte mac buffer * to avoid allocating memory that * is tricky to free later */ memcpy(data->mac_addr, addr->sa_data, ETH_ALEN); asix_write_cmd_async(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, data->mac_addr); return 0; }
65 49 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API for the 842 software compression algorithm. * * Copyright (C) IBM Corporation, 2011-2015 * * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com> * Seth Jennings <sjenning@linux.vnet.ibm.com> * * Rewrite: Dan Streetman <ddstreet@ieee.org> * * This is the software implementation of compression and decompression using * the 842 format. This uses the software 842 library at lib/842/ which is * only a reference implementation, and is very, very slow as compared to other * software compressors. You probably do not want to use this software * compression. If you have access to the PowerPC 842 compression hardware, you * want to use the 842 hardware compression interface, which is at: * drivers/crypto/nx/nx-842-crypto.c */ #include <crypto/internal/scompress.h> #include <linux/init.h> #include <linux/module.h> #include <linux/sw842.h> static void *crypto842_alloc_ctx(void) { void *ctx; ctx = kmalloc(SW842_MEM_COMPRESS, GFP_KERNEL); if (!ctx) return ERR_PTR(-ENOMEM); return ctx; } static void crypto842_free_ctx(void *ctx) { kfree(ctx); } static int crypto842_scompress(struct crypto_scomp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) { return sw842_compress(src, slen, dst, dlen, ctx); } static int crypto842_sdecompress(struct crypto_scomp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) { return sw842_decompress(src, slen, dst, dlen); } static struct scomp_alg scomp = { .streams = { .alloc_ctx = crypto842_alloc_ctx, .free_ctx = crypto842_free_ctx, }, .compress = crypto842_scompress, .decompress = crypto842_sdecompress, .base = { .cra_name = "842", .cra_driver_name = "842-scomp", .cra_priority = 100, .cra_module = THIS_MODULE, } }; static int __init crypto842_mod_init(void) { return crypto_register_scomp(&scomp); } module_init(crypto842_mod_init); static void __exit crypto842_mod_exit(void) { crypto_unregister_scomp(&scomp); } module_exit(crypto842_mod_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("842 Software Compression Algorithm"); MODULE_ALIAS_CRYPTO("842"); MODULE_ALIAS_CRYPTO("842-generic"); MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
1 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 // SPDX-License-Identifier: GPL-2.0-or-later /* * Force feedback support for Betop based devices * * The devices are distributed under various names and the same USB device ID * can be used in both adapters and actual game controllers. * * 0x11c2:0x2208 "BTP2185 BFM mode Joystick" * - tested with BTP2185 BFM Mode. * * 0x11C0:0x5506 "BTP2185 PC mode Joystick" * - tested with BTP2185 PC Mode. * * 0x8380:0x1850 "BTP2185 V2 PC mode USB Gamepad" * - tested with BTP2185 PC Mode with another version. * * 0x20bc:0x5500 "BTP2185 V2 BFM mode Joystick" * - tested with BTP2171s. * Copyright (c) 2014 Huang Bo <huangbobupt@163.com> */ /* */ #include <linux/input.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/hid.h> #include "hid-ids.h" struct betopff_device { struct hid_report *report; }; static int hid_betopff_play(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct betopff_device *betopff = data; __u16 left, right; left = effect->u.rumble.strong_magnitude; right = effect->u.rumble.weak_magnitude; betopff->report->field[2]->value[0] = left / 256; betopff->report->field[3]->value[0] = right / 256; hid_hw_request(hid, betopff->report, HID_REQ_SET_REPORT); return 0; } static int betopff_init(struct hid_device *hid) { struct betopff_device *betopff; struct hid_report *report; struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev; int error; int i, j; if (list_empty(&hid->inputs)) { hid_err(hid, "no inputs found\n"); return -ENODEV; } hidinput = list_first_entry(&hid->inputs, struct hid_input, list); dev = hidinput->input; if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; } report = list_first_entry(report_list, struct hid_report, list); /* * Actually there are 4 fields for 4 Bytes as below: * ----------------------------------------- * Byte0 Byte1 Byte2 Byte3 * 0x00 0x00 left_motor right_motor * ----------------------------------------- * Do init them with default value. */ if (report->maxfield < 4) { hid_err(hid, "not enough fields in the report: %d\n", report->maxfield); return -ENODEV; } for (i = 0; i < report->maxfield; i++) { if (report->field[i]->report_count < 1) { hid_err(hid, "no values in the field\n"); return -ENODEV; } for (j = 0; j < report->field[i]->report_count; j++) { report->field[i]->value[j] = 0x00; } } betopff = kzalloc_obj(*betopff); if (!betopff) return -ENOMEM; set_bit(FF_RUMBLE, dev->ffbit); error = input_ff_create_memless(dev, betopff, hid_betopff_play); if (error) { kfree(betopff); return error; } betopff->report = report; hid_hw_request(hid, betopff->report, HID_REQ_SET_REPORT); hid_info(hid, "Force feedback for betop devices by huangbo <huangbobupt@163.com>\n"); return 0; } static int betop_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; if (id->driver_data) hdev->quirks |= HID_QUIRK_MULTI_INPUT; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (ret) { hid_err(hdev, "hw start failed\n"); goto err; } betopff_init(hdev); return 0; err: return ret; } static const struct hid_device_id betop_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185BFM, 0x2208) }, { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185PC, 0x5506) }, { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2PC, 0x1850) }, { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2BFM, 0x5500) }, { } }; MODULE_DEVICE_TABLE(hid, betop_devices); static struct hid_driver betop_driver = { .name = "betop", .id_table = betop_devices, .probe = betop_probe, }; module_hid_driver(betop_driver); MODULE_DESCRIPTION("Force feedback support for Betop based devices"); MODULE_LICENSE("GPL");
33 2 20 15 9 9 3 2 9 27 15 1 4 2 11 22 18 20 20 20 19 10 9 9 9 9 9 9 6 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 // SPDX-License-Identifier: GPL-2.0-or-later /* Decoder for ASN.1 BER/DER/CER encoded bytestream * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/export.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/asn1_decoder.h> #include <linux/asn1_ber_bytecode.h> static const unsigned char asn1_op_lengths[ASN1_OP__NR] = { /* OPC TAG JMP ACT */ [ASN1_OP_MATCH] = 1 + 1, [ASN1_OP_MATCH_OR_SKIP] = 1 + 1, [ASN1_OP_MATCH_ACT] = 1 + 1 + 1, [ASN1_OP_MATCH_ACT_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_MATCH_JUMP] = 1 + 1 + 1, [ASN1_OP_MATCH_JUMP_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_MATCH_ANY] = 1, [ASN1_OP_MATCH_ANY_OR_SKIP] = 1, [ASN1_OP_MATCH_ANY_ACT] = 1 + 1, [ASN1_OP_MATCH_ANY_ACT_OR_SKIP] = 1 + 1, [ASN1_OP_COND_MATCH_OR_SKIP] = 1 + 1, [ASN1_OP_COND_MATCH_ACT_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_COND_MATCH_JUMP_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_COND_MATCH_ANY] = 1, [ASN1_OP_COND_MATCH_ANY_OR_SKIP] = 1, [ASN1_OP_COND_MATCH_ANY_ACT] = 1 + 1, [ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP] = 1 + 1, [ASN1_OP_COND_FAIL] = 1, [ASN1_OP_COMPLETE] = 1, [ASN1_OP_ACT] = 1 + 1, [ASN1_OP_MAYBE_ACT] = 1 + 1, [ASN1_OP_RETURN] = 1, [ASN1_OP_END_SEQ] = 1, [ASN1_OP_END_SEQ_OF] = 1 + 1, [ASN1_OP_END_SET] = 1, [ASN1_OP_END_SET_OF] = 1 + 1, [ASN1_OP_END_SEQ_ACT] = 1 + 1, [ASN1_OP_END_SEQ_OF_ACT] = 1 + 1 + 1, [ASN1_OP_END_SET_ACT] = 1 + 1, [ASN1_OP_END_SET_OF_ACT] = 1 + 1 + 1, }; /* * Find the length of an indefinite length object * @data: The data buffer * @datalen: The end of the innermost containing element in the buffer * @_dp: The data parse cursor (updated before returning) * @_len: Where to return the size of the element. * @_errmsg: Where to return a pointer to an error message on error */ static int asn1_find_indefinite_length(const unsigned char *data, size_t datalen, size_t *_dp, size_t *_len, const char **_errmsg) { unsigned char tag, tmp; size_t dp = *_dp, len, n; int indef_level = 1; next_tag: if (unlikely(datalen - dp < 2)) { if (datalen == dp) goto missing_eoc; goto data_overrun_error; } /* Extract a tag from the data */ tag = data[dp++]; if (tag == ASN1_EOC) { /* It appears to be an EOC. */ if (data[dp++] != 0) goto invalid_eoc; if (--indef_level <= 0) { *_len = dp - *_dp; *_dp = dp; return 0; } goto next_tag; } if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) { do { if (unlikely(datalen - dp < 2)) goto data_overrun_error; tmp = data[dp++]; } while (tmp & 0x80); } /* Extract the length */ len = data[dp++]; if (len <= 0x7f) goto check_length; if (unlikely(len == ASN1_INDEFINITE_LENGTH)) { /* Indefinite length */ if (unlikely((tag & ASN1_CONS_BIT) == ASN1_PRIM << 5)) goto indefinite_len_primitive; indef_level++; goto next_tag; } n = len - 0x80; if (unlikely(n > sizeof(len) - 1)) goto length_too_long; if (unlikely(n > datalen - dp)) goto data_overrun_error; len = 0; for (; n > 0; n--) { len <<= 8; len |= data[dp++]; } check_length: if (len > datalen - dp) goto data_overrun_error; dp += len; goto next_tag; length_too_long: *_errmsg = "Unsupported length"; goto error; indefinite_len_primitive: *_errmsg = "Indefinite len primitive not permitted"; goto error; invalid_eoc: *_errmsg = "Invalid length EOC"; goto error; data_overrun_error: *_errmsg = "Data overrun error"; goto error; missing_eoc: *_errmsg = "Missing EOC in indefinite len cons"; error: *_dp = dp; return -1; } /** * asn1_ber_decoder - Decoder BER/DER/CER ASN.1 according to pattern * @decoder: The decoder definition (produced by asn1_compiler) * @context: The caller's context (to be passed to the action functions) * @data: The encoded data * @datalen: The size of the encoded data * * Decode BER/DER/CER encoded ASN.1 data according to a bytecode pattern * produced by asn1_compiler. Action functions are called on marked tags to * allow the caller to retrieve significant data. * * LIMITATIONS: * * To keep down the amount of stack used by this function, the following limits * have been imposed: * * (1) This won't handle datalen > 65535 without increasing the size of the * cons stack elements and length_too_long checking. * * (2) The stack of constructed types is 10 deep. If the depth of non-leaf * constructed types exceeds this, the decode will fail. * * (3) The SET type (not the SET OF type) isn't really supported as tracking * what members of the set have been seen is a pain. */ int asn1_ber_decoder(const struct asn1_decoder *decoder, void *context, const unsigned char *data, size_t datalen) { const unsigned char *machine = decoder->machine; const asn1_action_t *actions = decoder->actions; size_t machlen = decoder->machlen; enum asn1_opcode op; unsigned char tag = 0, csp = 0, jsp = 0, optag = 0, hdr = 0; const char *errmsg; size_t pc = 0, dp = 0, tdp = 0, len = 0; int ret; unsigned char flags = 0; #define FLAG_INDEFINITE_LENGTH 0x01 #define FLAG_MATCHED 0x02 #define FLAG_LAST_MATCHED 0x04 /* Last tag matched */ #define FLAG_CONS 0x20 /* Corresponds to CONS bit in the opcode tag * - ie. whether or not we are going to parse * a compound type. */ #define NR_CONS_STACK 10 unsigned short cons_dp_stack[NR_CONS_STACK]; unsigned short cons_datalen_stack[NR_CONS_STACK]; unsigned char cons_hdrlen_stack[NR_CONS_STACK]; #define NR_JUMP_STACK 10 unsigned char jump_stack[NR_JUMP_STACK]; if (datalen > 65535) return -EMSGSIZE; next_op: pr_debug("next_op: pc=\e[32m%zu\e[m/%zu dp=\e[33m%zu\e[m/%zu C=%d J=%d\n", pc, machlen, dp, datalen, csp, jsp); if (unlikely(pc >= machlen)) goto machine_overrun_error; op = machine[pc];