5 5 5 1 33 5 5 5 5 5 1770 1771 14 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 | // SPDX-License-Identifier: GPL-2.0 /* drivers/net/wireless/virt_wifi.c * * A fake implementation of cfg80211_ops that can be tacked on to an ethernet * net_device to make it appear as a wireless connection. * * Copyright (C) 2018 Google, Inc. * * Author: schuffelen@google.com */ #include <net/cfg80211.h> #include <net/rtnetlink.h> #include <linux/etherdevice.h> #include <linux/math64.h> #include <linux/module.h> static struct wiphy *common_wiphy; struct virt_wifi_wiphy_priv { struct delayed_work scan_result; struct cfg80211_scan_request *scan_request; bool being_deleted; }; static struct ieee80211_channel channel_2ghz = { .band = NL80211_BAND_2GHZ, .center_freq = 2432, .hw_value = 2432, .max_power = 20, }; static struct ieee80211_rate bitrates_2ghz[] = { { .bitrate = 10 }, { .bitrate = 20 }, { .bitrate = 55 }, { .bitrate = 110 }, { .bitrate = 60 }, { .bitrate = 120 }, { .bitrate = 240 }, }; static struct ieee80211_supported_band band_2ghz = { .channels = &channel_2ghz, .bitrates = bitrates_2ghz, .band = NL80211_BAND_2GHZ, .n_channels = 1, .n_bitrates = ARRAY_SIZE(bitrates_2ghz), .ht_cap = { .ht_supported = true, .cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 | IEEE80211_HT_CAP_GRN_FLD | IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_DSSSCCK40, .ampdu_factor = 0x3, .ampdu_density = 0x6, .mcs = { .rx_mask = {0xff, 0xff}, .tx_params = IEEE80211_HT_MCS_TX_DEFINED, }, }, }; static struct ieee80211_channel channel_5ghz = { .band = NL80211_BAND_5GHZ, .center_freq = 5240, .hw_value = 5240, .max_power = 20, }; static struct ieee80211_rate bitrates_5ghz[] = { { .bitrate = 60 }, { .bitrate = 120 }, { .bitrate = 240 }, }; #define RX_MCS_MAP (IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 2 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 6 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 8 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 14) #define TX_MCS_MAP (IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 2 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 6 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 8 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | \ IEEE80211_VHT_MCS_SUPPORT_0_9 << 14) static struct ieee80211_supported_band band_5ghz = { .channels = &channel_5ghz, .bitrates = bitrates_5ghz, .band = NL80211_BAND_5GHZ, .n_channels = 1, .n_bitrates = ARRAY_SIZE(bitrates_5ghz), .ht_cap = { .ht_supported = true, .cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 | IEEE80211_HT_CAP_GRN_FLD | IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_DSSSCCK40, .ampdu_factor = 0x3, .ampdu_density = 0x6, .mcs = { .rx_mask = {0xff, 0xff}, .tx_params = IEEE80211_HT_MCS_TX_DEFINED, }, }, .vht_cap = { .vht_supported = true, .cap = IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ | IEEE80211_VHT_CAP_RXLDPC | IEEE80211_VHT_CAP_SHORT_GI_80 | IEEE80211_VHT_CAP_SHORT_GI_160 | IEEE80211_VHT_CAP_TXSTBC | IEEE80211_VHT_CAP_RXSTBC_1 | IEEE80211_VHT_CAP_RXSTBC_2 | IEEE80211_VHT_CAP_RXSTBC_3 | IEEE80211_VHT_CAP_RXSTBC_4 | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK, .vht_mcs = { .rx_mcs_map = cpu_to_le16(RX_MCS_MAP), .tx_mcs_map = cpu_to_le16(TX_MCS_MAP), } }, }; /* Assigned at module init. Guaranteed locally-administered and unicast. */ static u8 fake_router_bssid[ETH_ALEN] __ro_after_init = {}; #define VIRT_WIFI_SSID "VirtWifi" #define VIRT_WIFI_SSID_LEN 8 static void virt_wifi_inform_bss(struct wiphy *wiphy) { u64 tsf = div_u64(ktime_get_boottime_ns(), 1000); struct cfg80211_bss *informed_bss; static const struct { u8 tag; u8 len; u8 ssid[8] __nonstring; } __packed ssid = { .tag = WLAN_EID_SSID, .len = VIRT_WIFI_SSID_LEN, .ssid = VIRT_WIFI_SSID, }; informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz, CFG80211_BSS_FTYPE_PRESP, fake_router_bssid, tsf, WLAN_CAPABILITY_ESS, 0, (void *)&ssid, sizeof(ssid), DBM_TO_MBM(-50), GFP_KERNEL); cfg80211_put_bss(wiphy, informed_bss); } /* Called with the rtnl lock held. */ static int virt_wifi_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { struct virt_wifi_wiphy_priv *priv = wiphy_priv(wiphy); wiphy_debug(wiphy, "scan\n"); if (priv->scan_request || priv->being_deleted) return -EBUSY; priv->scan_request = request; schedule_delayed_work(&priv->scan_result, HZ * 2); return 0; } /* Acquires and releases the rdev BSS lock. */ static void virt_wifi_scan_result(struct work_struct *work) { struct virt_wifi_wiphy_priv *priv = container_of(work, struct virt_wifi_wiphy_priv, scan_result.work); struct wiphy *wiphy = priv_to_wiphy(priv); struct cfg80211_scan_info scan_info = { .aborted = false }; virt_wifi_inform_bss(wiphy); /* Schedules work which acquires and releases the rtnl lock. */ cfg80211_scan_done(priv->scan_request, &scan_info); priv->scan_request = NULL; } /* May acquire and release the rdev BSS lock. */ static void virt_wifi_cancel_scan(struct wiphy *wiphy) { struct virt_wifi_wiphy_priv *priv = wiphy_priv(wiphy); cancel_delayed_work_sync(&priv->scan_result); /* Clean up dangling callbacks if necessary. */ if (priv->scan_request) { struct cfg80211_scan_info scan_info = { .aborted = true }; /* Schedules work which acquires and releases the rtnl lock. */ cfg80211_scan_done(priv->scan_request, &scan_info); priv->scan_request = NULL; } } struct virt_wifi_netdev_priv { struct delayed_work connect; struct net_device *lowerdev; struct net_device *upperdev; u32 tx_packets; u32 tx_failed; u32 connect_requested_ssid_len; u8 connect_requested_ssid[IEEE80211_MAX_SSID_LEN]; u8 connect_requested_bss[ETH_ALEN]; bool is_up; bool is_connected; bool being_deleted; }; /* Called with the rtnl lock held. */ static int virt_wifi_connect(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_connect_params *sme) { struct virt_wifi_netdev_priv *priv = netdev_priv(netdev); bool could_schedule; if (priv->being_deleted || !priv->is_up) return -EBUSY; if (!sme->ssid) return -EINVAL; priv->connect_requested_ssid_len = sme->ssid_len; memcpy(priv->connect_requested_ssid, sme->ssid, sme->ssid_len); could_schedule = schedule_delayed_work(&priv->connect, HZ * 2); if (!could_schedule) return -EBUSY; if (sme->bssid) { ether_addr_copy(priv->connect_requested_bss, sme->bssid); } else { virt_wifi_inform_bss(wiphy); eth_zero_addr(priv->connect_requested_bss); } wiphy_debug(wiphy, "connect\n"); return 0; } /* Acquires and releases the rdev event lock. */ static void virt_wifi_connect_complete(struct work_struct *work) { struct virt_wifi_netdev_priv *priv = container_of(work, struct virt_wifi_netdev_priv, connect.work); u8 *requested_bss = priv->connect_requested_bss; bool right_addr = ether_addr_equal(requested_bss, fake_router_bssid); bool right_ssid = priv->connect_requested_ssid_len == VIRT_WIFI_SSID_LEN && !memcmp(priv->connect_requested_ssid, VIRT_WIFI_SSID, priv->connect_requested_ssid_len); u16 status = WLAN_STATUS_SUCCESS; if (is_zero_ether_addr(requested_bss)) requested_bss = NULL; if (!priv->is_up || (requested_bss && !right_addr) || !right_ssid) status = WLAN_STATUS_UNSPECIFIED_FAILURE; else priv->is_connected = true; /* Schedules an event that acquires the rtnl lock. */ cfg80211_connect_result(priv->upperdev, requested_bss, NULL, 0, NULL, 0, status, GFP_KERNEL); netif_carrier_on(priv->upperdev); } /* May acquire and release the rdev event lock. */ static void virt_wifi_cancel_connect(struct net_device *netdev) { struct virt_wifi_netdev_priv *priv = netdev_priv(netdev); /* If there is work pending, clean up dangling callbacks. */ if (cancel_delayed_work_sync(&priv->connect)) { /* Schedules an event that acquires the rtnl lock. */ cfg80211_connect_result(priv->upperdev, priv->connect_requested_bss, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, GFP_KERNEL); } } /* Called with the rtnl lock held. Acquires the rdev event lock. */ static int virt_wifi_disconnect(struct wiphy *wiphy, struct net_device *netdev, u16 reason_code) { struct virt_wifi_netdev_priv *priv = netdev_priv(netdev); if (priv->being_deleted) return -EBUSY; wiphy_debug(wiphy, "disconnect\n"); virt_wifi_cancel_connect(netdev); cfg80211_disconnected(netdev, reason_code, NULL, 0, true, GFP_KERNEL); priv->is_connected = false; netif_carrier_off(netdev); return 0; } /* Called with the rtnl lock held. */ static int virt_wifi_get_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_info *sinfo) { struct virt_wifi_netdev_priv *priv = netdev_priv(dev); wiphy_debug(wiphy, "get_station\n"); if (!priv->is_connected || !ether_addr_equal(mac, fake_router_bssid)) return -ENOENT; sinfo->filled = BIT_ULL(NL80211_STA_INFO_TX_PACKETS) | BIT_ULL(NL80211_STA_INFO_TX_FAILED) | BIT_ULL(NL80211_STA_INFO_SIGNAL) | BIT_ULL(NL80211_STA_INFO_TX_BITRATE); sinfo->tx_packets = priv->tx_packets; sinfo->tx_failed = priv->tx_failed; /* For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_ */ sinfo->signal = -50; sinfo->txrate = (struct rate_info) { .legacy = 10, /* units are 100kbit/s */ }; return 0; } /* Called with the rtnl lock held. */ static int virt_wifi_dump_station(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *mac, struct station_info *sinfo) { struct virt_wifi_netdev_priv *priv = netdev_priv(dev); wiphy_debug(wiphy, "dump_station\n"); if (idx != 0 || !priv->is_connected) return -ENOENT; ether_addr_copy(mac, fake_router_bssid); return virt_wifi_get_station(wiphy, dev, fake_router_bssid, sinfo); } static const struct cfg80211_ops virt_wifi_cfg80211_ops = { .scan = virt_wifi_scan, .connect = virt_wifi_connect, .disconnect = virt_wifi_disconnect, .get_station = virt_wifi_get_station, .dump_station = virt_wifi_dump_station, }; /* Acquires and releases the rtnl lock. */ static struct wiphy *virt_wifi_make_wiphy(void) { struct wiphy *wiphy; struct virt_wifi_wiphy_priv *priv; int err; wiphy = wiphy_new(&virt_wifi_cfg80211_ops, sizeof(*priv)); if (!wiphy) return NULL; wiphy->max_scan_ssids = 4; wiphy->max_scan_ie_len = 1000; wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; wiphy->bands[NL80211_BAND_2GHZ] = &band_2ghz; wiphy->bands[NL80211_BAND_5GHZ] = &band_5ghz; wiphy->bands[NL80211_BAND_60GHZ] = NULL; wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); priv = wiphy_priv(wiphy); priv->being_deleted = false; priv->scan_request = NULL; INIT_DELAYED_WORK(&priv->scan_result, virt_wifi_scan_result); err = wiphy_register(wiphy); if (err < 0) { wiphy_free(wiphy); return NULL; } return wiphy; } /* Acquires and releases the rtnl lock. */ static void virt_wifi_destroy_wiphy(struct wiphy *wiphy) { struct virt_wifi_wiphy_priv *priv; WARN(!wiphy, "%s called with null wiphy", __func__); if (!wiphy) return; priv = wiphy_priv(wiphy); priv->being_deleted = true; virt_wifi_cancel_scan(wiphy); if (wiphy->registered) wiphy_unregister(wiphy); wiphy_free(wiphy); } /* Enters and exits a RCU-bh critical section. */ static netdev_tx_t virt_wifi_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct virt_wifi_netdev_priv *priv = netdev_priv(dev); priv->tx_packets++; if (!priv->is_connected) { priv->tx_failed++; return NET_XMIT_DROP; } skb->dev = priv->lowerdev; return dev_queue_xmit(skb); } /* Called with rtnl lock held. */ static int virt_wifi_net_device_open(struct net_device *dev) { struct virt_wifi_netdev_priv *priv = netdev_priv(dev); priv->is_up = true; return 0; } /* Called with rtnl lock held. */ static int virt_wifi_net_device_stop(struct net_device *dev) { struct virt_wifi_netdev_priv *n_priv = netdev_priv(dev); n_priv->is_up = false; if (!dev->ieee80211_ptr) return 0; virt_wifi_cancel_scan(dev->ieee80211_ptr->wiphy); virt_wifi_cancel_connect(dev); netif_carrier_off(dev); return 0; } static int virt_wifi_net_device_get_iflink(const struct net_device *dev) { struct virt_wifi_netdev_priv *priv = netdev_priv(dev); return READ_ONCE(priv->lowerdev->ifindex); } static const struct net_device_ops virt_wifi_ops = { .ndo_start_xmit = virt_wifi_start_xmit, .ndo_open = virt_wifi_net_device_open, .ndo_stop = virt_wifi_net_device_stop, .ndo_get_iflink = virt_wifi_net_device_get_iflink, }; /* Invoked as part of rtnl lock release. */ static void virt_wifi_net_device_destructor(struct net_device *dev) { /* Delayed past dellink to allow nl80211 to react to the device being * deleted. */ kfree(dev->ieee80211_ptr); dev->ieee80211_ptr = NULL; } /* No lock interaction. */ static void virt_wifi_setup(struct net_device *dev) { ether_setup(dev); dev->netdev_ops = &virt_wifi_ops; dev->needs_free_netdev = true; } /* Called in a RCU read critical section from netif_receive_skb */ static rx_handler_result_t virt_wifi_rx_handler(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct virt_wifi_netdev_priv *priv = rcu_dereference(skb->dev->rx_handler_data); if (!priv->is_connected) return RX_HANDLER_PASS; /* GFP_ATOMIC because this is a packet interrupt handler. */ skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) { dev_err(&priv->upperdev->dev, "can't skb_share_check\n"); return RX_HANDLER_CONSUMED; } *pskb = skb; skb->dev = priv->upperdev; skb->pkt_type = PACKET_HOST; return RX_HANDLER_ANOTHER; } /* Called with rtnl lock held. */ static int virt_wifi_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct virt_wifi_netdev_priv *priv = netdev_priv(dev); struct net *link_net = rtnl_newlink_link_net(params); struct nlattr **tb = params->tb; int err; if (!tb[IFLA_LINK]) return -EINVAL; netif_carrier_off(dev); priv->upperdev = dev; priv->lowerdev = __dev_get_by_index(link_net, nla_get_u32(tb[IFLA_LINK])); if (!priv->lowerdev) return -ENODEV; if (!tb[IFLA_MTU]) dev->mtu = priv->lowerdev->mtu; else if (dev->mtu > priv->lowerdev->mtu) return -EINVAL; err = netdev_rx_handler_register(priv->lowerdev, virt_wifi_rx_handler, priv); if (err) { dev_err(&priv->lowerdev->dev, "can't netdev_rx_handler_register: %d\n", err); return err; } eth_hw_addr_inherit(dev, priv->lowerdev); netif_stacked_transfer_operstate(priv->lowerdev, dev); SET_NETDEV_DEV(dev, &priv->lowerdev->dev); dev->ieee80211_ptr = kzalloc(sizeof(*dev->ieee80211_ptr), GFP_KERNEL); if (!dev->ieee80211_ptr) { err = -ENOMEM; goto remove_handler; } dev->ieee80211_ptr->iftype = NL80211_IFTYPE_STATION; dev->ieee80211_ptr->wiphy = common_wiphy; err = register_netdevice(dev); if (err) { dev_err(&priv->lowerdev->dev, "can't register_netdevice: %d\n", err); goto free_wireless_dev; } err = netdev_upper_dev_link(priv->lowerdev, dev, extack); if (err) { dev_err(&priv->lowerdev->dev, "can't netdev_upper_dev_link: %d\n", err); goto unregister_netdev; } dev->priv_destructor = virt_wifi_net_device_destructor; priv->being_deleted = false; priv->is_connected = false; priv->is_up = false; INIT_DELAYED_WORK(&priv->connect, virt_wifi_connect_complete); __module_get(THIS_MODULE); return 0; unregister_netdev: unregister_netdevice(dev); free_wireless_dev: kfree(dev->ieee80211_ptr); dev->ieee80211_ptr = NULL; remove_handler: netdev_rx_handler_unregister(priv->lowerdev); return err; } /* Called with rtnl lock held. */ static void virt_wifi_dellink(struct net_device *dev, struct list_head *head) { struct virt_wifi_netdev_priv *priv = netdev_priv(dev); if (dev->ieee80211_ptr) virt_wifi_cancel_scan(dev->ieee80211_ptr->wiphy); priv->being_deleted = true; virt_wifi_cancel_connect(dev); netif_carrier_off(dev); netdev_rx_handler_unregister(priv->lowerdev); netdev_upper_dev_unlink(priv->lowerdev, dev); unregister_netdevice_queue(dev, head); module_put(THIS_MODULE); /* Deleting the wiphy is handled in the module destructor. */ } static struct rtnl_link_ops virt_wifi_link_ops = { .kind = "virt_wifi", .setup = virt_wifi_setup, .newlink = virt_wifi_newlink, .dellink = virt_wifi_dellink, .priv_size = sizeof(struct virt_wifi_netdev_priv), }; static bool netif_is_virt_wifi_dev(const struct net_device *dev) { return rcu_access_pointer(dev->rx_handler) == virt_wifi_rx_handler; } static int virt_wifi_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *lower_dev = netdev_notifier_info_to_dev(ptr); struct virt_wifi_netdev_priv *priv; struct net_device *upper_dev; LIST_HEAD(list_kill); if (!netif_is_virt_wifi_dev(lower_dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UNREGISTER: priv = rtnl_dereference(lower_dev->rx_handler_data); if (!priv) return NOTIFY_DONE; upper_dev = priv->upperdev; upper_dev->rtnl_link_ops->dellink(upper_dev, &list_kill); unregister_netdevice_many(&list_kill); break; } return NOTIFY_DONE; } static struct notifier_block virt_wifi_notifier = { .notifier_call = virt_wifi_event, }; /* Acquires and releases the rtnl lock. */ static int __init virt_wifi_init_module(void) { int err; /* Guaranteed to be locally-administered and not multicast. */ eth_random_addr(fake_router_bssid); err = register_netdevice_notifier(&virt_wifi_notifier); if (err) return err; err = -ENOMEM; common_wiphy = virt_wifi_make_wiphy(); if (!common_wiphy) goto notifier; err = rtnl_link_register(&virt_wifi_link_ops); if (err) goto destroy_wiphy; return 0; destroy_wiphy: virt_wifi_destroy_wiphy(common_wiphy); notifier: unregister_netdevice_notifier(&virt_wifi_notifier); return err; } /* Acquires and releases the rtnl lock. */ static void __exit virt_wifi_cleanup_module(void) { /* Will delete any devices that depend on the wiphy. */ rtnl_link_unregister(&virt_wifi_link_ops); virt_wifi_destroy_wiphy(common_wiphy); unregister_netdevice_notifier(&virt_wifi_notifier); } module_init(virt_wifi_init_module); module_exit(virt_wifi_cleanup_module); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Cody Schuffelen <schuffelen@google.com>"); MODULE_DESCRIPTION("Driver for a wireless wrapper of ethernet devices"); MODULE_ALIAS_RTNL_LINK("virt_wifi"); |
8 8 8 8 7 6 6 6 6 5 1 5 8 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_cgroup.c Control Group Classifier * * Authors: Thomas Graf <tgraf@suug.ch> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/rcupdate.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/sock.h> #include <net/cls_cgroup.h> #include <net/tc_wrapper.h> struct cls_cgroup_head { u32 handle; struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_proto *tp; struct rcu_work rwork; }; TC_INDIRECT_SCOPE int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_cgroup_head *head = rcu_dereference_bh(tp->root); u32 classid = task_get_classid(skb); if (unlikely(!head)) return -1; if (!classid) return -1; if (!tcf_em_tree_match(skb, &head->ematches, NULL)) return -1; res->classid = classid; res->class = 0; return tcf_exts_exec(skb, &head->exts, res); } static void *cls_cgroup_get(struct tcf_proto *tp, u32 handle) { return NULL; } static int cls_cgroup_init(struct tcf_proto *tp) { return 0; } static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; static void __cls_cgroup_destroy(struct cls_cgroup_head *head) { tcf_exts_destroy(&head->exts); tcf_em_tree_destroy(&head->ematches); tcf_exts_put_net(&head->exts); kfree(head); } static void cls_cgroup_destroy_work(struct work_struct *work) { struct cls_cgroup_head *head = container_of(to_rcu_work(work), struct cls_cgroup_head, rwork); rtnl_lock(); __cls_cgroup_destroy(head); rtnl_unlock(); } static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, u32 flags, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; struct cls_cgroup_head *head = rtnl_dereference(tp->root); struct cls_cgroup_head *new; int err; if (!tca[TCA_OPTIONS]) return -EINVAL; if (!head && !handle) return -EINVAL; if (head && handle != head->handle) return -ENOENT; new = kzalloc(sizeof(*head), GFP_KERNEL); if (!new) return -ENOBUFS; err = tcf_exts_init(&new->exts, net, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); if (err < 0) goto errout; new->handle = handle; new->tp = tp; err = nla_parse_nested_deprecated(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], cgroup_policy, NULL); if (err < 0) goto errout; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, flags, extack); if (err < 0) goto errout; err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &new->ematches); if (err < 0) goto errout; rcu_assign_pointer(tp->root, new); if (head) { tcf_exts_get_net(&head->exts); tcf_queue_work(&head->rwork, cls_cgroup_destroy_work); } return 0; errout: tcf_exts_destroy(&new->exts); kfree(new); return err; } static void cls_cgroup_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); /* Head can still be NULL due to cls_cgroup_init(). */ if (head) { if (tcf_exts_get_net(&head->exts)) tcf_queue_work(&head->rwork, cls_cgroup_destroy_work); else __cls_cgroup_destroy(head); } } static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); if (arg->count < arg->skip) goto skip; if (!head) return; if (arg->fn(tp, head, arg) < 0) { arg->stop = 1; return; } skip: arg->count++; } static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); struct nlattr *nest; t->tcm_handle = head->handle; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (tcf_exts_dump(skb, &head->exts) < 0 || tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0) goto nla_put_failure; nla_nest_end(skb, nest); if (tcf_exts_dump_stats(skb, &head->exts) < 0) goto nla_put_failure; return skb->len; nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { .kind = "cgroup", .init = cls_cgroup_init, .change = cls_cgroup_change, .classify = cls_cgroup_classify, .destroy = cls_cgroup_destroy, .get = cls_cgroup_get, .delete = cls_cgroup_delete, .walk = cls_cgroup_walk, .dump = cls_cgroup_dump, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_CLS("cgroup"); static int __init init_cgroup_cls(void) { return register_tcf_proto_ops(&cls_cgroup_ops); } static void __exit exit_cgroup_cls(void) { unregister_tcf_proto_ops(&cls_cgroup_ops); } module_init(init_cgroup_cls); module_exit(exit_cgroup_cls); MODULE_DESCRIPTION("TC cgroup classifier"); MODULE_LICENSE("GPL"); |
28 28 28 27 26 7 26 26 26 21 21 4 26 25 25 3 24 22 22 28 5 5 5 4 3 4 3 2 3 3 3 5 9 9 9 7 7 7 6 7 306 306 6 2 22 10 10 22 27 22 22 22 22 25 25 25 25 22 22 22 5 5 1 22 22 22 22 22 1 22 9 9 9 2 7 7 2 7 1 9 7 6 7 7 9 9 9 391 391 4 4 391 100 100 99 1 2 1 153 153 100 56 153 152 151 7 7 7 7 7 6 6 6 6 6 6 6 7 7 6 2 5 7 7 6 7 6 111 111 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Anycast support for IPv6 * Linux INET6 implementation * * Authors: * David L Stevens (dlstevens@us.ibm.com) * * based heavily on net/ipv6/mcast.c */ #include <linux/capability.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/random.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/route.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/if_inet6.h> #include <net/ndisc.h> #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/checksum.h> #define IN6_ADDR_HSIZE_SHIFT 8 #define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) /* anycast address hash table */ static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; static DEFINE_SPINLOCK(acaddr_hash_lock); static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); static u32 inet6_acaddr_hash(const struct net *net, const struct in6_addr *addr) { u32 val = __ipv6_addr_jhash(addr, net_hash_mix(net)); return hash_32(val, IN6_ADDR_HSIZE_SHIFT); } /* * socket join an anycast group */ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev = NULL; struct inet6_dev *idev; struct ipv6_ac_socklist *pac; struct net *net = sock_net(sk); int ishost = !net->ipv6.devconf_all->forwarding; int err = 0; ASSERT_RTNL(); if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; if (ifindex) dev = __dev_get_by_index(net, ifindex); if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); if (!pac) return -ENOMEM; pac->acl_next = NULL; pac->acl_addr = *addr; if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { dev = rt->dst.dev; ip6_rt_put(rt); } else if (ishost) { err = -EADDRNOTAVAIL; goto error; } else { /* router, no matching interface: just pick one */ dev = __dev_get_by_flags(net, IFF_UP, IFF_UP | IFF_LOOPBACK); } } if (!dev) { err = -ENODEV; goto error; } idev = __in6_dev_get(dev); if (!idev) { if (ifindex) err = -ENODEV; else err = -EADDRNOTAVAIL; goto error; } /* reset ishost, now that we have a specific device */ ishost = !idev->cnf.forwarding; pac->acl_ifindex = dev->ifindex; /* XXX * For hosts, allow link-local or matching prefix anycasts. * This obviates the need for propagating anycast routes while * still allowing some non-router anycast participation. */ if (!ipv6_chk_prefix(addr, dev)) { if (ishost) err = -EADDRNOTAVAIL; if (err) goto error; } err = __ipv6_dev_ac_inc(idev, addr); if (!err) { pac->acl_next = np->ipv6_ac_list; np->ipv6_ac_list = pac; pac = NULL; } error: if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); return err; } /* * socket leave an anycast group */ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev; struct ipv6_ac_socklist *pac, *prev_pac; struct net *net = sock_net(sk); ASSERT_RTNL(); prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { if ((ifindex == 0 || pac->acl_ifindex == ifindex) && ipv6_addr_equal(&pac->acl_addr, addr)) break; prev_pac = pac; } if (!pac) return -ENOENT; if (prev_pac) prev_pac->acl_next = pac->acl_next; else np->ipv6_ac_list = pac->acl_next; dev = __dev_get_by_index(net, pac->acl_ifindex); if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); sock_kfree_s(sk, pac, sizeof(*pac)); return 0; } void __ipv6_sock_ac_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev = NULL; struct ipv6_ac_socklist *pac; struct net *net = sock_net(sk); int prev_index; ASSERT_RTNL(); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; prev_index = 0; while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; if (pac->acl_ifindex != prev_index) { dev = __dev_get_by_index(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } } void ipv6_sock_ac_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); if (!np->ipv6_ac_list) return; rtnl_lock(); __ipv6_sock_ac_close(sk); rtnl_unlock(); } static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca) { unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr); spin_lock(&acaddr_hash_lock); hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]); spin_unlock(&acaddr_hash_lock); } static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca) { spin_lock(&acaddr_hash_lock); hlist_del_init_rcu(&aca->aca_addr_lst); spin_unlock(&acaddr_hash_lock); } static void aca_get(struct ifacaddr6 *aca) { refcount_inc(&aca->aca_refcnt); } static void aca_free_rcu(struct rcu_head *h) { struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu); fib6_info_release(aca->aca_rt); kfree(aca); } static void aca_put(struct ifacaddr6 *ac) { if (refcount_dec_and_test(&ac->aca_refcnt)) call_rcu_hurry(&ac->rcu, aca_free_rcu); } static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i, const struct in6_addr *addr) { struct ifacaddr6 *aca; aca = kzalloc(sizeof(*aca), GFP_ATOMIC); if (!aca) return NULL; aca->aca_addr = *addr; fib6_info_hold(f6i); aca->aca_rt = f6i; INIT_HLIST_NODE(&aca->aca_addr_lst); aca->aca_users = 1; /* aca_tstamp should be updated upon changes */ aca->aca_cstamp = aca->aca_tstamp = jiffies; refcount_set(&aca->aca_refcnt, 1); return aca; } static void inet6_ifacaddr_notify(struct net_device *dev, const struct ifacaddr6 *ifaca, int event) { struct inet6_fill_args fillargs = { .event = event, .netnsid = -1, }; struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOMEM; skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + nla_total_size(sizeof(struct in6_addr)) + nla_total_size(sizeof(struct ifa_cacheinfo)), GFP_KERNEL); if (!skb) goto error; err = inet6_fill_ifacaddr(skb, ifaca, &fillargs); if (err < 0) { pr_err("Failed to fill in anycast addresses (err %d)\n", err); nlmsg_free(skb); goto error; } rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ACADDR, NULL, GFP_KERNEL); return; error: rtnl_set_sk_err(net, RTNLGRP_IPV6_ACADDR, err); } /* * device anycast group inc (add if not found) */ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifacaddr6 *aca; struct fib6_info *f6i; struct net *net; int err; ASSERT_RTNL(); write_lock_bh(&idev->lock); if (idev->dead) { err = -ENODEV; goto out; } for (aca = rtnl_dereference(idev->ac_list); aca; aca = rtnl_dereference(aca->aca_next)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) { aca->aca_users++; err = 0; goto out; } } net = dev_net(idev->dev); f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC, NULL); if (IS_ERR(f6i)) { err = PTR_ERR(f6i); goto out; } aca = aca_alloc(f6i, addr); if (!aca) { fib6_info_release(f6i); err = -ENOMEM; goto out; } /* Hold this for addrconf_join_solict() below before we unlock, * it is already exposed via idev->ac_list. */ aca_get(aca); aca->aca_next = idev->ac_list; rcu_assign_pointer(idev->ac_list, aca); write_unlock_bh(&idev->lock); ipv6_add_acaddr_hash(net, aca); ip6_ins_rt(net, f6i); addrconf_join_solict(idev->dev, &aca->aca_addr); inet6_ifacaddr_notify(idev->dev, aca, RTM_NEWANYCAST); aca_put(aca); return 0; out: write_unlock_bh(&idev->lock); return err; } /* * device anycast group decrement */ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifacaddr6 *aca, *prev_aca; ASSERT_RTNL(); write_lock_bh(&idev->lock); prev_aca = NULL; for (aca = rtnl_dereference(idev->ac_list); aca; aca = rtnl_dereference(aca->aca_next)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) break; prev_aca = aca; } if (!aca) { write_unlock_bh(&idev->lock); return -ENOENT; } if (--aca->aca_users > 0) { write_unlock_bh(&idev->lock); return 0; } if (prev_aca) rcu_assign_pointer(prev_aca->aca_next, aca->aca_next); else rcu_assign_pointer(idev->ac_list, aca->aca_next); write_unlock_bh(&idev->lock); ipv6_del_acaddr_hash(aca); addrconf_leave_solict(idev, &aca->aca_addr); ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false); inet6_ifacaddr_notify(idev->dev, aca, RTM_DELANYCAST); aca_put(aca); return 0; } /* called with rtnl_lock() */ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) { struct inet6_dev *idev = __in6_dev_get(dev); if (!idev) return -ENODEV; return __ipv6_dev_ac_dec(idev, addr); } void ipv6_ac_destroy_dev(struct inet6_dev *idev) { struct ifacaddr6 *aca; write_lock_bh(&idev->lock); while ((aca = rtnl_dereference(idev->ac_list)) != NULL) { rcu_assign_pointer(idev->ac_list, aca->aca_next); write_unlock_bh(&idev->lock); ipv6_del_acaddr_hash(aca); addrconf_leave_solict(idev, &aca->aca_addr); ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false); aca_put(aca); write_lock_bh(&idev->lock); } write_unlock_bh(&idev->lock); } /* * check if the interface has this anycast address * called with rcu_read_lock() */ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr) { struct inet6_dev *idev; struct ifacaddr6 *aca; idev = __in6_dev_get(dev); if (idev) { for (aca = rcu_dereference(idev->ac_list); aca; aca = rcu_dereference(aca->aca_next)) if (ipv6_addr_equal(&aca->aca_addr, addr)) break; return aca != NULL; } return false; } /* * check if given interface (or any, if dev==0) has this anycast address */ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { struct net_device *nh_dev; struct ifacaddr6 *aca; bool found = false; rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); else { unsigned int hash = inet6_acaddr_hash(net, addr); hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash], aca_addr_lst) { nh_dev = fib6_info_nh_dev(aca->aca_rt); if (!nh_dev || !net_eq(dev_net(nh_dev), net)) continue; if (ipv6_addr_equal(&aca->aca_addr, addr)) { found = true; break; } } } rcu_read_unlock(); return found; } /* check if this anycast address is link-local on given interface or * is global */ bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr) { return ipv6_chk_acast_addr(net, (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ? dev : NULL), addr); } #ifdef CONFIG_PROC_FS struct ac6_iter_state { struct seq_net_private p; struct net_device *dev; }; #define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private) static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) { struct ac6_iter_state *state = ac6_seq_private(seq); struct net *net = seq_file_net(seq); struct ifacaddr6 *im = NULL; for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; idev = __in6_dev_get(state->dev); if (!idev) continue; im = rcu_dereference(idev->ac_list); if (im) break; } return im; } static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im) { struct ac6_iter_state *state = ac6_seq_private(seq); struct inet6_dev *idev; im = rcu_dereference(im->aca_next); while (!im) { state->dev = next_net_device_rcu(state->dev); if (!state->dev) break; idev = __in6_dev_get(state->dev); if (!idev) continue; im = rcu_dereference(idev->ac_list); } return im; } static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos) { struct ifacaddr6 *im = ac6_get_first(seq); if (im) while (pos && (im = ac6_get_next(seq, im)) != NULL) --pos; return pos ? NULL : im; } static void *ac6_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); return ac6_get_idx(seq, *pos); } static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ifacaddr6 *im = ac6_get_next(seq, v); ++*pos; return im; } static void ac6_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static int ac6_seq_show(struct seq_file *seq, void *v) { struct ifacaddr6 *im = (struct ifacaddr6 *)v; struct ac6_iter_state *state = ac6_seq_private(seq); seq_printf(seq, "%-4d %-15s %pi6 %5d\n", state->dev->ifindex, state->dev->name, &im->aca_addr, im->aca_users); return 0; } static const struct seq_operations ac6_seq_ops = { .start = ac6_seq_start, .next = ac6_seq_next, .stop = ac6_seq_stop, .show = ac6_seq_show, }; int __net_init ac6_proc_init(struct net *net) { if (!proc_create_net("anycast6", 0444, net->proc_net, &ac6_seq_ops, sizeof(struct ac6_iter_state))) return -ENOMEM; return 0; } void ac6_proc_exit(struct net *net) { remove_proc_entry("anycast6", net->proc_net); } #endif /* Init / cleanup code */ int __init ipv6_anycast_init(void) { int i; for (i = 0; i < IN6_ADDR_HSIZE; i++) INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); return 0; } void ipv6_anycast_cleanup(void) { int i; spin_lock(&acaddr_hash_lock); for (i = 0; i < IN6_ADDR_HSIZE; i++) WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); spin_unlock(&acaddr_hash_lock); } |
3 3 3 3 4 1 3 3 4 1 10 12 4 2 10 8 4 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 | // SPDX-License-Identifier: GPL-2.0-or-later /* * IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173 * * Copyright (C)2003 USAGI/WIDE Project * * Author Mitsuru KANDA <mk@linux-ipv6.org> */ /* * [Memo] * * Outbound: * The compression of IP datagram MUST be done before AH/ESP processing, * fragmentation, and the addition of Hop-by-Hop/Routing header. * * Inbound: * The decompression of IP datagram MUST be done after the reassembly, * AH/ESP processing. */ #define pr_fmt(fmt) "IPv6: " fmt #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/ipcomp.h> #include <linux/crypto.h> #include <linux/err.h> #include <linux/pfkeyv2.h> #include <linux/random.h> #include <linux/percpu.h> #include <linux/smp.h> #include <linux/list.h> #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/mutex.h> static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); __be32 spi; const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; struct ip_comp_hdr *ipcomph = (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return 0; spi = htonl(ntohs(ipcomph->cpi)); x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); else ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; } static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct net *net = xs_net(x); struct xfrm_state *t = NULL; t = xfrm_state_alloc(net); if (!t) goto out; t->id.proto = IPPROTO_IPV6; t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr); if (!t->id.spi) goto error; memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; t->props.mode = x->props.mode; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); memcpy(&t->mark, &x->mark, sizeof(t->mark)); t->if_id = x->if_id; if (xfrm_init_state(t)) goto error; atomic_set(&t->tunnel_users, 1); out: return t; error: t->km.state = XFRM_STATE_DEAD; xfrm_state_put(t); t = NULL; goto out; } static int ipcomp6_tunnel_attach(struct xfrm_state *x) { struct net *net = xs_net(x); int err = 0; struct xfrm_state *t = NULL; __be32 spi; u32 mark = x->mark.m & x->mark.v; spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&x->props.saddr); if (spi) t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr, spi, IPPROTO_IPV6, AF_INET6); if (!t) { t = ipcomp6_tunnel_create(x); if (!t) { err = -EINVAL; goto out; } xfrm_state_insert(t); xfrm_state_hold(t); } x->tunnel = t; atomic_inc(&t->tunnel_users); out: return err; } static int ipcomp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { case XFRM_MODE_TRANSPORT: break; case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); break; default: NL_SET_ERR_MSG(extack, "Unsupported XFRM mode for IPcomp"); goto out; } err = ipcomp_init_state(x, extack); if (err) goto out; if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) { NL_SET_ERR_MSG(extack, "Kernel error: failed to initialize the associated state"); goto out; } } err = 0; out: return err; } static int ipcomp6_rcv_cb(struct sk_buff *skb, int err) { return 0; } static const struct xfrm_type ipcomp6_type = { .owner = THIS_MODULE, .proto = IPPROTO_COMP, .init_state = ipcomp6_init_state, .destructor = ipcomp_destroy, .input = ipcomp_input, .output = ipcomp_output, }; static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, .input_handler = xfrm_input, .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, .priority = 0, }; static int __init ipcomp6_init(void) { if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) { pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); return -EAGAIN; } return 0; } static void __exit ipcomp6_fini(void) { if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0) pr_info("%s: can't remove protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); } module_init(ipcomp6_init); module_exit(ipcomp6_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173"); MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>"); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP); |
31029 7801 75 75 75 75 66 3 67 57 52 56 23 67 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | // SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/fault-inject-usercopy.h> #include <linux/instrumented.h> #include <linux/kernel.h> #include <linux/nospec.h> #include <linux/string.h> #include <linux/uaccess.h> #include <linux/wordpart.h> /* out-of-line parts */ #if !defined(INLINE_COPY_FROM_USER) || defined(CONFIG_RUST) unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { return _inline_copy_from_user(to, from, n); } EXPORT_SYMBOL(_copy_from_user); #endif #if !defined(INLINE_COPY_TO_USER) || defined(CONFIG_RUST) unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { return _inline_copy_to_user(to, from, n); } EXPORT_SYMBOL(_copy_to_user); #endif /** * check_zeroed_user: check if a userspace buffer only contains zero bytes * @from: Source address, in userspace. * @size: Size of buffer. * * This is effectively shorthand for "memchr_inv(from, 0, size) == NULL" for * userspace addresses (and is more efficient because we don't care where the * first non-zero byte is). * * Returns: * * 0: There were non-zero bytes present in the buffer. * * 1: The buffer was full of zero bytes. * * -EFAULT: access to userspace failed. */ int check_zeroed_user(const void __user *from, size_t size) { unsigned long val; uintptr_t align = (uintptr_t) from % sizeof(unsigned long); if (unlikely(size == 0)) return 1; from -= align; size += align; if (!user_read_access_begin(from, size)) return -EFAULT; unsafe_get_user(val, (unsigned long __user *) from, err_fault); if (align) val &= ~aligned_byte_mask(align); while (size > sizeof(unsigned long)) { if (unlikely(val)) goto done; from += sizeof(unsigned long); size -= sizeof(unsigned long); unsafe_get_user(val, (unsigned long __user *) from, err_fault); } if (size < sizeof(unsigned long)) val &= aligned_byte_mask(size); done: user_read_access_end(); return (val == 0); err_fault: user_read_access_end(); return -EFAULT; } EXPORT_SYMBOL(check_zeroed_user); |
1032 143 37 946 1000 1000 1 774 6 258 14 987 945 51 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SIGNAL_H #define _LINUX_SIGNAL_H #include <linux/bug.h> #include <linux/list.h> #include <linux/signal_types.h> #include <linux/string.h> struct task_struct; /* for sysctl */ extern int print_fatal_signals; static inline void copy_siginfo(kernel_siginfo_t *to, const kernel_siginfo_t *from) { memcpy(to, from, sizeof(*to)); } static inline void clear_siginfo(kernel_siginfo_t *info) { memset(info, 0, sizeof(*info)); } #define SI_EXPANSION_SIZE (sizeof(struct siginfo) - sizeof(struct kernel_siginfo)) static inline void copy_siginfo_to_external(siginfo_t *to, const kernel_siginfo_t *from) { memcpy(to, from, sizeof(*from)); memset(((char *)to) + sizeof(struct kernel_siginfo), 0, SI_EXPANSION_SIZE); } int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from); int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from); enum siginfo_layout { SIL_KILL, SIL_TIMER, SIL_POLL, SIL_FAULT, SIL_FAULT_TRAPNO, SIL_FAULT_MCEERR, SIL_FAULT_BNDERR, SIL_FAULT_PKUERR, SIL_FAULT_PERF_EVENT, SIL_CHLD, SIL_RT, SIL_SYS, }; enum siginfo_layout siginfo_layout(unsigned sig, int si_code); /* * Define some primitives to manipulate sigset_t. */ #ifndef __HAVE_ARCH_SIG_BITOPS #include <linux/bitops.h> /* We don't use <linux/bitops.h> for these because there is no need to be atomic. */ static inline void sigaddset(sigset_t *set, int _sig) { unsigned long sig = _sig - 1; if (_NSIG_WORDS == 1) set->sig[0] |= 1UL << sig; else set->sig[sig / _NSIG_BPW] |= 1UL << (sig % _NSIG_BPW); } static inline void sigdelset(sigset_t *set, int _sig) { unsigned long sig = _sig - 1; if (_NSIG_WORDS == 1) set->sig[0] &= ~(1UL << sig); else set->sig[sig / _NSIG_BPW] &= ~(1UL << (sig % _NSIG_BPW)); } static inline int sigismember(sigset_t *set, int _sig) { unsigned long sig = _sig - 1; if (_NSIG_WORDS == 1) return 1 & (set->sig[0] >> sig); else return 1 & (set->sig[sig / _NSIG_BPW] >> (sig % _NSIG_BPW)); } #endif /* __HAVE_ARCH_SIG_BITOPS */ static inline int sigisemptyset(sigset_t *set) { switch (_NSIG_WORDS) { case 4: return (set->sig[3] | set->sig[2] | set->sig[1] | set->sig[0]) == 0; case 2: return (set->sig[1] | set->sig[0]) == 0; case 1: return set->sig[0] == 0; default: BUILD_BUG(); return 0; } } static inline int sigequalsets(const sigset_t *set1, const sigset_t *set2) { switch (_NSIG_WORDS) { case 4: return (set1->sig[3] == set2->sig[3]) && (set1->sig[2] == set2->sig[2]) && (set1->sig[1] == set2->sig[1]) && (set1->sig[0] == set2->sig[0]); case 2: return (set1->sig[1] == set2->sig[1]) && (set1->sig[0] == set2->sig[0]); case 1: return set1->sig[0] == set2->sig[0]; } return 0; } #define sigmask(sig) (1UL << ((sig) - 1)) #ifndef __HAVE_ARCH_SIG_SETOPS #define _SIG_SET_BINOP(name, op) \ static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \ { \ unsigned long a0, a1, a2, a3, b0, b1, b2, b3; \ \ switch (_NSIG_WORDS) { \ case 4: \ a3 = a->sig[3]; a2 = a->sig[2]; \ b3 = b->sig[3]; b2 = b->sig[2]; \ r->sig[3] = op(a3, b3); \ r->sig[2] = op(a2, b2); \ fallthrough; \ case 2: \ a1 = a->sig[1]; b1 = b->sig[1]; \ r->sig[1] = op(a1, b1); \ fallthrough; \ case 1: \ a0 = a->sig[0]; b0 = b->sig[0]; \ r->sig[0] = op(a0, b0); \ break; \ default: \ BUILD_BUG(); \ } \ } #define _sig_or(x,y) ((x) | (y)) _SIG_SET_BINOP(sigorsets, _sig_or) #define _sig_and(x,y) ((x) & (y)) _SIG_SET_BINOP(sigandsets, _sig_and) #define _sig_andn(x,y) ((x) & ~(y)) _SIG_SET_BINOP(sigandnsets, _sig_andn) #undef _SIG_SET_BINOP #undef _sig_or #undef _sig_and #undef _sig_andn #define _SIG_SET_OP(name, op) \ static inline void name(sigset_t *set) \ { \ switch (_NSIG_WORDS) { \ case 4: set->sig[3] = op(set->sig[3]); \ set->sig[2] = op(set->sig[2]); \ fallthrough; \ case 2: set->sig[1] = op(set->sig[1]); \ fallthrough; \ case 1: set->sig[0] = op(set->sig[0]); \ break; \ default: \ BUILD_BUG(); \ } \ } #define _sig_not(x) (~(x)) _SIG_SET_OP(signotset, _sig_not) #undef _SIG_SET_OP #undef _sig_not static inline void sigemptyset(sigset_t *set) { switch (_NSIG_WORDS) { default: memset(set, 0, sizeof(sigset_t)); break; case 2: set->sig[1] = 0; fallthrough; case 1: set->sig[0] = 0; break; } } static inline void sigfillset(sigset_t *set) { switch (_NSIG_WORDS) { default: memset(set, -1, sizeof(sigset_t)); break; case 2: set->sig[1] = -1; fallthrough; case 1: set->sig[0] = -1; break; } } /* Some extensions for manipulating the low 32 signals in particular. */ static inline void sigaddsetmask(sigset_t *set, unsigned long mask) { set->sig[0] |= mask; } static inline void sigdelsetmask(sigset_t *set, unsigned long mask) { set->sig[0] &= ~mask; } static inline int sigtestsetmask(sigset_t *set, unsigned long mask) { return (set->sig[0] & mask) != 0; } static inline void siginitset(sigset_t *set, unsigned long mask) { set->sig[0] = mask; switch (_NSIG_WORDS) { default: memset(&set->sig[1], 0, sizeof(long)*(_NSIG_WORDS-1)); break; case 2: set->sig[1] = 0; break; case 1: ; } } static inline void siginitsetinv(sigset_t *set, unsigned long mask) { set->sig[0] = ~mask; switch (_NSIG_WORDS) { default: memset(&set->sig[1], -1, sizeof(long)*(_NSIG_WORDS-1)); break; case 2: set->sig[1] = -1; break; case 1: ; } } #endif /* __HAVE_ARCH_SIG_SETOPS */ static inline void init_sigpending(struct sigpending *sig) { sigemptyset(&sig->signal); INIT_LIST_HEAD(&sig->list); } extern void flush_sigqueue(struct sigpending *queue); /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ static inline int valid_signal(unsigned long sig) { return sig <= _NSIG ? 1 : 0; } struct timespec; struct pt_regs; enum pid_type; extern int next_signal(struct sigpending *pending, sigset_t *mask); extern int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type); extern int group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type); extern int send_signal_locked(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type); extern int sigprocmask(int, sigset_t *, sigset_t *); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; extern bool get_signal(struct ksignal *ksig); extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); extern void exit_signals(struct task_struct *tsk); extern void kernel_sigaction(int, __sighandler_t); #define SIG_KTHREAD ((__force __sighandler_t)2) #define SIG_KTHREAD_KERNEL ((__force __sighandler_t)3) static inline void allow_signal(int sig) { /* * Kernel threads handle their own signals. Let the signal code * know it'll be handled, so that they don't get converted to * SIGKILL or just silently dropped. */ kernel_sigaction(sig, SIG_KTHREAD); } static inline void allow_kernel_signal(int sig) { /* * Kernel threads handle their own signals. Let the signal code * know signals sent by the kernel will be handled, so that they * don't get silently dropped. */ kernel_sigaction(sig, SIG_KTHREAD_KERNEL); } static inline void disallow_signal(int sig) { kernel_sigaction(sig, SIG_IGN); } extern struct kmem_cache *sighand_cachep; extern bool unhandled_signal(struct task_struct *tsk, int sig); /* * In POSIX a signal is sent either to a specific thread (Linux task) * or to the process as a whole (Linux thread group). How the signal * is sent determines whether it's to one thread or the whole group, * which determines which signal mask(s) are involved in blocking it * from being delivered until later. When the signal is delivered, * either it's caught or ignored by a user handler or it has a default * effect that applies to the whole thread group (POSIX process). * * The possible effects an unblocked signal set to SIG_DFL can have are: * ignore - Nothing Happens * terminate - kill the process, i.e. all threads in the group, * similar to exit_group. The group leader (only) reports * WIFSIGNALED status to its parent. * coredump - write a core dump file describing all threads using * the same mm and then kill all those threads * stop - stop all the threads in the group, i.e. TASK_STOPPED state * * SIGKILL and SIGSTOP cannot be caught, blocked, or ignored. * Other signals when not blocked and set to SIG_DFL behaves as follows. * The job control signals also have other special effects. * * +--------------------+------------------+ * | POSIX signal | default action | * +--------------------+------------------+ * | SIGHUP | terminate | * | SIGINT | terminate | * | SIGQUIT | coredump | * | SIGILL | coredump | * | SIGTRAP | coredump | * | SIGABRT/SIGIOT | coredump | * | SIGBUS | coredump | * | SIGFPE | coredump | * | SIGKILL | terminate(+) | * | SIGUSR1 | terminate | * | SIGSEGV | coredump | * | SIGUSR2 | terminate | * | SIGPIPE | terminate | * | SIGALRM | terminate | * | SIGTERM | terminate | * | SIGCHLD | ignore | * | SIGCONT | ignore(*) | * | SIGSTOP | stop(*)(+) | * | SIGTSTP | stop(*) | * | SIGTTIN | stop(*) | * | SIGTTOU | stop(*) | * | SIGURG | ignore | * | SIGXCPU | coredump | * | SIGXFSZ | coredump | * | SIGVTALRM | terminate | * | SIGPROF | terminate | * | SIGPOLL/SIGIO | terminate | * | SIGSYS/SIGUNUSED | coredump | * | SIGSTKFLT | terminate | * | SIGWINCH | ignore | * | SIGPWR | terminate | * | SIGRTMIN-SIGRTMAX | terminate | * +--------------------+------------------+ * | non-POSIX signal | default action | * +--------------------+------------------+ * | SIGEMT | coredump | * +--------------------+------------------+ * * (+) For SIGKILL and SIGSTOP the action is "always", not just "default". * (*) Special job control effects: * When SIGCONT is sent, it resumes the process (all threads in the group) * from TASK_STOPPED state and also clears any pending/queued stop signals * (any of those marked with "stop(*)"). This happens regardless of blocking, * catching, or ignoring SIGCONT. When any stop signal is sent, it clears * any pending/queued SIGCONT signals; this happens regardless of blocking, * catching, or ignored the stop signal, though (except for SIGSTOP) the * default action of stopping the process may happen later or never. */ #ifdef SIGEMT #define SIGEMT_MASK rt_sigmask(SIGEMT) #else #define SIGEMT_MASK 0 #endif #if SIGRTMIN > BITS_PER_LONG #define rt_sigmask(sig) (1ULL << ((sig)-1)) #else #define rt_sigmask(sig) sigmask(sig) #endif #define siginmask(sig, mask) \ ((sig) > 0 && (sig) < SIGRTMIN && (rt_sigmask(sig) & (mask))) #define SIG_KERNEL_ONLY_MASK (\ rt_sigmask(SIGKILL) | rt_sigmask(SIGSTOP)) #define SIG_KERNEL_STOP_MASK (\ rt_sigmask(SIGSTOP) | rt_sigmask(SIGTSTP) | \ rt_sigmask(SIGTTIN) | rt_sigmask(SIGTTOU) ) #define SIG_KERNEL_COREDUMP_MASK (\ rt_sigmask(SIGQUIT) | rt_sigmask(SIGILL) | \ rt_sigmask(SIGTRAP) | rt_sigmask(SIGABRT) | \ rt_sigmask(SIGFPE) | rt_sigmask(SIGSEGV) | \ rt_sigmask(SIGBUS) | rt_sigmask(SIGSYS) | \ rt_sigmask(SIGXCPU) | rt_sigmask(SIGXFSZ) | \ SIGEMT_MASK ) #define SIG_KERNEL_IGNORE_MASK (\ rt_sigmask(SIGCONT) | rt_sigmask(SIGCHLD) | \ rt_sigmask(SIGWINCH) | rt_sigmask(SIGURG) ) #define SIG_SPECIFIC_SICODES_MASK (\ rt_sigmask(SIGILL) | rt_sigmask(SIGFPE) | \ rt_sigmask(SIGSEGV) | rt_sigmask(SIGBUS) | \ rt_sigmask(SIGTRAP) | rt_sigmask(SIGCHLD) | \ rt_sigmask(SIGPOLL) | rt_sigmask(SIGSYS) | \ SIGEMT_MASK ) #define sig_kernel_only(sig) siginmask(sig, SIG_KERNEL_ONLY_MASK) #define sig_kernel_coredump(sig) siginmask(sig, SIG_KERNEL_COREDUMP_MASK) #define sig_kernel_ignore(sig) siginmask(sig, SIG_KERNEL_IGNORE_MASK) #define sig_kernel_stop(sig) siginmask(sig, SIG_KERNEL_STOP_MASK) #define sig_specific_sicodes(sig) siginmask(sig, SIG_SPECIFIC_SICODES_MASK) #define sig_fatal(t, signr) \ (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL) void signals_init(void); int restore_altstack(const stack_t __user *); int __save_altstack(stack_t __user *, unsigned long); #define unsafe_save_altstack(uss, sp, label) do { \ stack_t __user *__uss = uss; \ struct task_struct *t = current; \ unsafe_put_user((void __user *)t->sas_ss_sp, &__uss->ss_sp, label); \ unsafe_put_user(t->sas_ss_flags, &__uss->ss_flags, label); \ unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \ } while (0); #ifdef CONFIG_DYNAMIC_SIGFRAME bool sigaltstack_size_valid(size_t ss_size); #else static inline bool sigaltstack_size_valid(size_t size) { return true; } #endif /* !CONFIG_DYNAMIC_SIGFRAME */ #ifdef CONFIG_PROC_FS struct seq_file; extern void render_sigset_t(struct seq_file *, const char *, sigset_t *); #endif #ifndef arch_untagged_si_addr /* * Given a fault address and a signal and si_code which correspond to the * _sigfault union member, returns the address that must appear in si_addr if * the signal handler does not have SA_EXPOSE_TAGBITS enabled in sa_flags. */ static inline void __user *arch_untagged_si_addr(void __user *addr, unsigned long sig, unsigned long si_code) { return addr; } #endif #endif /* _LINUX_SIGNAL_H */ |
284 234 73 284 341 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | // SPDX-License-Identifier: GPL-2.0-only /* * Kernel-based Virtual Machine driver for Linux * * Copyright 2016 Red Hat, Inc. and/or its affiliates. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include <linux/debugfs.h> #include "lapic.h" #include "mmu.h" #include "mmu/mmu_internal.h" static int vcpu_get_timer_advance_ns(void *data, u64 *val) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; *val = vcpu->arch.apic->lapic_timer.timer_advance_ns; return 0; } DEFINE_SIMPLE_ATTRIBUTE(vcpu_timer_advance_ns_fops, vcpu_get_timer_advance_ns, NULL, "%llu\n"); static int vcpu_get_guest_mode(void *data, u64 *val) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; *val = vcpu->stat.guest_mode; return 0; } DEFINE_SIMPLE_ATTRIBUTE(vcpu_guest_mode_fops, vcpu_get_guest_mode, NULL, "%lld\n"); static int vcpu_get_tsc_offset(void *data, u64 *val) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; *val = vcpu->arch.tsc_offset; return 0; } DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_offset_fops, vcpu_get_tsc_offset, NULL, "%lld\n"); static int vcpu_get_tsc_scaling_ratio(void *data, u64 *val) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; *val = vcpu->arch.tsc_scaling_ratio; return 0; } DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_fops, vcpu_get_tsc_scaling_ratio, NULL, "%llu\n"); static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val) { *val = kvm_caps.tsc_scaling_ratio_frac_bits; return 0; } DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n"); void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { debugfs_create_file("guest_mode", 0444, debugfs_dentry, vcpu, &vcpu_guest_mode_fops); debugfs_create_file("tsc-offset", 0444, debugfs_dentry, vcpu, &vcpu_tsc_offset_fops); if (lapic_in_kernel(vcpu)) debugfs_create_file("lapic_timer_advance_ns", 0444, debugfs_dentry, vcpu, &vcpu_timer_advance_ns_fops); if (kvm_caps.has_tsc_control) { debugfs_create_file("tsc-scaling-ratio", 0444, debugfs_dentry, vcpu, &vcpu_tsc_scaling_fops); debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444, debugfs_dentry, vcpu, &vcpu_tsc_scaling_frac_fops); } } /* * This covers statistics <1024 (11=log(1024)+1), which should be enough to * cover RMAP_RECYCLE_THRESHOLD. */ #define RMAP_LOG_SIZE 11 static const char *kvm_lpage_str[KVM_NR_PAGE_SIZES] = { "4K", "2M", "1G" }; static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v) { struct kvm_rmap_head *rmap; struct kvm *kvm = m->private; struct kvm_memory_slot *slot; struct kvm_memslots *slots; unsigned int lpage_size, index; /* Still small enough to be on the stack */ unsigned int *log[KVM_NR_PAGE_SIZES], *cur; int i, j, k, l, ret; if (!kvm_memslots_have_rmaps(kvm)) return 0; ret = -ENOMEM; memset(log, 0, sizeof(log)); for (i = 0; i < KVM_NR_PAGE_SIZES; i++) { log[i] = kcalloc(RMAP_LOG_SIZE, sizeof(unsigned int), GFP_KERNEL); if (!log[i]) goto out; } mutex_lock(&kvm->slots_lock); write_lock(&kvm->mmu_lock); for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { int bkt; slots = __kvm_memslots(kvm, i); kvm_for_each_memslot(slot, bkt, slots) for (k = 0; k < KVM_NR_PAGE_SIZES; k++) { rmap = slot->arch.rmap[k]; lpage_size = kvm_mmu_slot_lpages(slot, k + 1); cur = log[k]; for (l = 0; l < lpage_size; l++) { index = ffs(pte_list_count(&rmap[l])); if (WARN_ON_ONCE(index >= RMAP_LOG_SIZE)) index = RMAP_LOG_SIZE - 1; cur[index]++; } } } write_unlock(&kvm->mmu_lock); mutex_unlock(&kvm->slots_lock); /* index=0 counts no rmap; index=1 counts 1 rmap */ seq_printf(m, "Rmap_Count:\t0\t1\t"); for (i = 2; i < RMAP_LOG_SIZE; i++) { j = 1 << (i - 1); k = (1 << i) - 1; seq_printf(m, "%d-%d\t", j, k); } seq_printf(m, "\n"); for (i = 0; i < KVM_NR_PAGE_SIZES; i++) { seq_printf(m, "Level=%s:\t", kvm_lpage_str[i]); cur = log[i]; for (j = 0; j < RMAP_LOG_SIZE; j++) seq_printf(m, "%d\t", cur[j]); seq_printf(m, "\n"); } ret = 0; out: for (i = 0; i < KVM_NR_PAGE_SIZES; i++) kfree(log[i]); return ret; } static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file) { struct kvm *kvm = inode->i_private; int r; if (!kvm_get_kvm_safe(kvm)) return -ENOENT; r = single_open(file, kvm_mmu_rmaps_stat_show, kvm); if (r < 0) kvm_put_kvm(kvm); return r; } static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file) { struct kvm *kvm = inode->i_private; kvm_put_kvm(kvm); return single_release(inode, file); } static const struct file_operations mmu_rmaps_stat_fops = { .owner = THIS_MODULE, .open = kvm_mmu_rmaps_stat_open, .read = seq_read, .llseek = seq_lseek, .release = kvm_mmu_rmaps_stat_release, }; void kvm_arch_create_vm_debugfs(struct kvm *kvm) { debugfs_create_file("mmu_rmaps_stat", 0644, kvm->debugfs_dentry, kvm, &mmu_rmaps_stat_fops); } |
8 8 7 1 6 1 5 8 2 6 5 4 3 1 2 6 1 4 3 2 1 1 4 1 5 5 4 1 3 2 1 5 8 8 6 1 5 4 1 8 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/namei.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "../fs/internal.h" #include "io_uring.h" #include "fs.h" struct io_rename { struct file *file; int old_dfd; int new_dfd; struct filename *oldpath; struct filename *newpath; int flags; }; struct io_unlink { struct file *file; int dfd; int flags; struct filename *filename; }; struct io_mkdir { struct file *file; int dfd; umode_t mode; struct filename *filename; }; struct io_link { struct file *file; int old_dfd; int new_dfd; struct filename *oldpath; struct filename *newpath; int flags; }; int io_renameat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename); const char __user *oldf, *newf; if (sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; ren->old_dfd = READ_ONCE(sqe->fd); oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); ren->new_dfd = READ_ONCE(sqe->len); ren->flags = READ_ONCE(sqe->rename_flags); ren->oldpath = getname(oldf); if (IS_ERR(ren->oldpath)) return PTR_ERR(ren->oldpath); ren->newpath = getname(newf); if (IS_ERR(ren->newpath)) { putname(ren->oldpath); return PTR_ERR(ren->newpath); } req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_renameat(struct io_kiocb *req, unsigned int issue_flags) { struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd, ren->newpath, ren->flags); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_OK; } void io_renameat_cleanup(struct io_kiocb *req) { struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename); putname(ren->oldpath); putname(ren->newpath); } int io_unlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_unlink *un = io_kiocb_to_cmd(req, struct io_unlink); const char __user *fname; if (sqe->off || sqe->len || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; un->dfd = READ_ONCE(sqe->fd); un->flags = READ_ONCE(sqe->unlink_flags); if (un->flags & ~AT_REMOVEDIR) return -EINVAL; fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); un->filename = getname(fname); if (IS_ERR(un->filename)) return PTR_ERR(un->filename); req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags) { struct io_unlink *un = io_kiocb_to_cmd(req, struct io_unlink); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); if (un->flags & AT_REMOVEDIR) ret = do_rmdir(un->dfd, un->filename); else ret = do_unlinkat(un->dfd, un->filename); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_OK; } void io_unlinkat_cleanup(struct io_kiocb *req) { struct io_unlink *ul = io_kiocb_to_cmd(req, struct io_unlink); putname(ul->filename); } int io_mkdirat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_mkdir *mkd = io_kiocb_to_cmd(req, struct io_mkdir); const char __user *fname; if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; mkd->dfd = READ_ONCE(sqe->fd); mkd->mode = READ_ONCE(sqe->len); fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); mkd->filename = getname(fname); if (IS_ERR(mkd->filename)) return PTR_ERR(mkd->filename); req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_mkdirat(struct io_kiocb *req, unsigned int issue_flags) { struct io_mkdir *mkd = io_kiocb_to_cmd(req, struct io_mkdir); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_OK; } void io_mkdirat_cleanup(struct io_kiocb *req) { struct io_mkdir *md = io_kiocb_to_cmd(req, struct io_mkdir); putname(md->filename); } int io_symlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_link *sl = io_kiocb_to_cmd(req, struct io_link); const char __user *oldpath, *newpath; if (sqe->len || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; sl->new_dfd = READ_ONCE(sqe->fd); oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr)); newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2)); sl->oldpath = getname(oldpath); if (IS_ERR(sl->oldpath)) return PTR_ERR(sl->oldpath); sl->newpath = getname(newpath); if (IS_ERR(sl->newpath)) { putname(sl->oldpath); return PTR_ERR(sl->newpath); } req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags) { struct io_link *sl = io_kiocb_to_cmd(req, struct io_link); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_OK; } int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link); const char __user *oldf, *newf; if (sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; lnk->old_dfd = READ_ONCE(sqe->fd); lnk->new_dfd = READ_ONCE(sqe->len); oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); lnk->flags = READ_ONCE(sqe->hardlink_flags); lnk->oldpath = getname_uflags(oldf, lnk->flags); if (IS_ERR(lnk->oldpath)) return PTR_ERR(lnk->oldpath); lnk->newpath = getname(newf); if (IS_ERR(lnk->newpath)) { putname(lnk->oldpath); return PTR_ERR(lnk->newpath); } req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_linkat(struct io_kiocb *req, unsigned int issue_flags) { struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd, lnk->newpath, lnk->flags); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_OK; } void io_link_cleanup(struct io_kiocb *req) { struct io_link *sl = io_kiocb_to_cmd(req, struct io_link); putname(sl->oldpath); putname(sl->newpath); } |
12 12 11 12 12 12 10 10 12 14 14 473 413 409 410 474 462 462 410 211 410 409 410 176 177 35 57 56 56 29 31 31 200 201 200 200 177 57 147 147 22 23 21 2 21 21 21 3 23 20 20 9 11 11 11 11 11 7 4 4 7 155 147 154 155 154 155 155 154 155 154 145 154 9 146 146 146 155 74 74 45 74 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 | // SPDX-License-Identifier: GPL-2.0-or-later /* Generic associative array implementation. * * See Documentation/core-api/assoc_array.rst for information. * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ //#define DEBUG #include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/assoc_array_priv.h> /* * Iterate over an associative array. The caller must hold the RCU read lock * or better. */ static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, const struct assoc_array_ptr *stop, int (*iterator)(const void *leaf, void *iterator_data), void *iterator_data) { const struct assoc_array_shortcut *shortcut; const struct assoc_array_node *node; const struct assoc_array_ptr *cursor, *ptr, *parent; unsigned long has_meta; int slot, ret; cursor = root; begin_node: if (assoc_array_ptr_is_shortcut(cursor)) { /* Descend through a shortcut */ shortcut = assoc_array_ptr_to_shortcut(cursor); cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ } node = assoc_array_ptr_to_node(cursor); slot = 0; /* We perform two passes of each node. * * The first pass does all the leaves in this node. This means we * don't miss any leaves if the node is split up by insertion whilst * we're iterating over the branches rooted here (we may, however, see * some leaves twice). */ has_meta = 0; for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ has_meta |= (unsigned long)ptr; if (ptr && assoc_array_ptr_is_leaf(ptr)) { /* We need a barrier between the read of the pointer, * which is supplied by the above READ_ONCE(). */ /* Invoke the callback */ ret = iterator(assoc_array_ptr_to_leaf(ptr), iterator_data); if (ret) return ret; } } /* The second pass attends to all the metadata pointers. If we follow * one of these we may find that we don't come back here, but rather go * back to a replacement node with the leaves in a different layout. * * We are guaranteed to make progress, however, as the slot number for * a particular portion of the key space cannot change - and we * continue at the back pointer + 1. */ if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) goto finished_node; slot = 0; continue_node: node = assoc_array_ptr_to_node(cursor); for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ if (assoc_array_ptr_is_meta(ptr)) { cursor = ptr; goto begin_node; } } finished_node: /* Move up to the parent (may need to skip back over a shortcut) */ parent = READ_ONCE(node->back_pointer); /* Address dependency. */ slot = node->parent_slot; if (parent == stop) return 0; if (assoc_array_ptr_is_shortcut(parent)) { shortcut = assoc_array_ptr_to_shortcut(parent); cursor = parent; parent = READ_ONCE(shortcut->back_pointer); /* Address dependency. */ slot = shortcut->parent_slot; if (parent == stop) return 0; } /* Ascend to next slot in parent node */ cursor = parent; slot++; goto continue_node; } /** * assoc_array_iterate - Pass all objects in the array to a callback * @array: The array to iterate over. * @iterator: The callback function. * @iterator_data: Private data for the callback function. * * Iterate over all the objects in an associative array. Each one will be * presented to the iterator function. * * If the array is being modified concurrently with the iteration then it is * possible that some objects in the array will be passed to the iterator * callback more than once - though every object should be passed at least * once. If this is undesirable then the caller must lock against modification * for the duration of this function. * * The function will return 0 if no objects were in the array or else it will * return the result of the last iterator function called. Iteration stops * immediately if any call to the iteration function results in a non-zero * return. * * The caller should hold the RCU read lock or better if concurrent * modification is possible. */ int assoc_array_iterate(const struct assoc_array *array, int (*iterator)(const void *object, void *iterator_data), void *iterator_data) { struct assoc_array_ptr *root = READ_ONCE(array->root); /* Address dependency. */ if (!root) return 0; return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); } enum assoc_array_walk_status { assoc_array_walk_tree_empty, assoc_array_walk_found_terminal_node, assoc_array_walk_found_wrong_shortcut, }; struct assoc_array_walk_result { struct { struct assoc_array_node *node; /* Node in which leaf might be found */ int level; int slot; } terminal_node; struct { struct assoc_array_shortcut *shortcut; int level; int sc_level; unsigned long sc_segments; unsigned long dissimilarity; } wrong_shortcut; }; /* * Navigate through the internal tree looking for the closest node to the key. */ static enum assoc_array_walk_status assoc_array_walk(const struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key, struct assoc_array_walk_result *result) { struct assoc_array_shortcut *shortcut; struct assoc_array_node *node; struct assoc_array_ptr *cursor, *ptr; unsigned long sc_segments, dissimilarity; unsigned long segments; int level, sc_level, next_sc_level; int slot; pr_devel("-->%s()\n", __func__); cursor = READ_ONCE(array->root); /* Address dependency. */ if (!cursor) return assoc_array_walk_tree_empty; level = 0; /* Use segments from the key for the new leaf to navigate through the * internal tree, skipping through nodes and shortcuts that are on * route to the destination. Eventually we'll come to a slot that is * either empty or contains a leaf at which point we've found a node in * which the leaf we're looking for might be found or into which it * should be inserted. */ jumped: segments = ops->get_key_chunk(index_key, level); pr_devel("segments[%d]: %lx\n", level, segments); if (assoc_array_ptr_is_shortcut(cursor)) goto follow_shortcut; consider_node: node = assoc_array_ptr_to_node(cursor); slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); slot &= ASSOC_ARRAY_FAN_MASK; ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ pr_devel("consider slot %x [ix=%d type=%lu]\n", slot, level, (unsigned long)ptr & 3); if (!assoc_array_ptr_is_meta(ptr)) { /* The node doesn't have a node/shortcut pointer in the slot * corresponding to the index key that we have to follow. */ result->terminal_node.node = node; result->terminal_node.level = level; result->terminal_node.slot = slot; pr_devel("<--%s() = terminal_node\n", __func__); return assoc_array_walk_found_terminal_node; } if (assoc_array_ptr_is_node(ptr)) { /* There is a pointer to a node in the slot corresponding to * this index key segment, so we need to follow it. */ cursor = ptr; level += ASSOC_ARRAY_LEVEL_STEP; if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) goto consider_node; goto jumped; } /* There is a shortcut in the slot corresponding to the index key * segment. We follow the shortcut if its partial index key matches * this leaf's. Otherwise we need to split the shortcut. */ cursor = ptr; follow_shortcut: shortcut = assoc_array_ptr_to_shortcut(cursor); pr_devel("shortcut to %d\n", shortcut->skip_to_level); sc_level = level + ASSOC_ARRAY_LEVEL_STEP; BUG_ON(sc_level > shortcut->skip_to_level); do { /* Check the leaf against the shortcut's index key a word at a * time, trimming the final word (the shortcut stores the index * key completely from the root to the shortcut's target). */ if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) segments = ops->get_key_chunk(index_key, sc_level); sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; dissimilarity = segments ^ sc_segments; if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { /* Trim segments that are beyond the shortcut */ int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; dissimilarity &= ~(ULONG_MAX << shift); next_sc_level = shortcut->skip_to_level; } else { next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); } if (dissimilarity != 0) { /* This shortcut points elsewhere */ result->wrong_shortcut.shortcut = shortcut; result->wrong_shortcut.level = level; result->wrong_shortcut.sc_level = sc_level; result->wrong_shortcut.sc_segments = sc_segments; result->wrong_shortcut.dissimilarity = dissimilarity; return assoc_array_walk_found_wrong_shortcut; } sc_level = next_sc_level; } while (sc_level < shortcut->skip_to_level); /* The shortcut matches the leaf's index to this point. */ cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { level = sc_level; goto jumped; } else { level = sc_level; goto consider_node; } } /** * assoc_array_find - Find an object by index key * @array: The associative array to search. * @ops: The operations to use. * @index_key: The key to the object. * * Find an object in an associative array by walking through the internal tree * to the node that should contain the object and then searching the leaves * there. NULL is returned if the requested object was not found in the array. * * The caller must hold the RCU read lock or better. */ void *assoc_array_find(const struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key) { struct assoc_array_walk_result result; const struct assoc_array_node *node; const struct assoc_array_ptr *ptr; const void *leaf; int slot; if (assoc_array_walk(array, ops, index_key, &result) != assoc_array_walk_found_terminal_node) return NULL; node = result.terminal_node.node; /* If the target key is available to us, it's has to be pointed to by * the terminal node. */ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ if (ptr && assoc_array_ptr_is_leaf(ptr)) { /* We need a barrier between the read of the pointer * and dereferencing the pointer - but only if we are * actually going to dereference it. */ leaf = assoc_array_ptr_to_leaf(ptr); if (ops->compare_object(leaf, index_key)) return (void *)leaf; } } return NULL; } /* * Destructively iterate over an associative array. The caller must prevent * other simultaneous accesses. */ static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, const struct assoc_array_ops *ops) { struct assoc_array_shortcut *shortcut; struct assoc_array_node *node; struct assoc_array_ptr *cursor, *parent = NULL; int slot = -1; pr_devel("-->%s()\n", __func__); cursor = root; if (!cursor) { pr_devel("empty\n"); return; } move_to_meta: if (assoc_array_ptr_is_shortcut(cursor)) { /* Descend through a shortcut */ pr_devel("[%d] shortcut\n", slot); BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); shortcut = assoc_array_ptr_to_shortcut(cursor); BUG_ON(shortcut->back_pointer != parent); BUG_ON(slot != -1 && shortcut->parent_slot != slot); parent = cursor; cursor = shortcut->next_node; slot = -1; BUG_ON(!assoc_array_ptr_is_node(cursor)); } pr_devel("[%d] node\n", slot); node = assoc_array_ptr_to_node(cursor); BUG_ON(node->back_pointer != parent); BUG_ON(slot != -1 && node->parent_slot != slot); slot = 0; continue_node: pr_devel("Node %p [back=%p]\n", node, node->back_pointer); for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { struct assoc_array_ptr *ptr = node->slots[slot]; if (!ptr) continue; if (assoc_array_ptr_is_meta(ptr)) { parent = cursor; cursor = ptr; goto move_to_meta; } if (ops) { pr_devel("[%d] free leaf\n", slot); ops->free_object(assoc_array_ptr_to_leaf(ptr)); } } parent = node->back_pointer; slot = node->parent_slot; pr_devel("free node\n"); kfree(node); if (!parent) return; /* Done */ /* Move back up to the parent (may need to free a shortcut on * the way up) */ if (assoc_array_ptr_is_shortcut(parent)) { shortcut = assoc_array_ptr_to_shortcut(parent); BUG_ON(shortcut->next_node != cursor); cursor = parent; parent = shortcut->back_pointer; slot = shortcut->parent_slot; pr_devel("free shortcut\n"); kfree(shortcut); if (!parent) return; BUG_ON(!assoc_array_ptr_is_node(parent)); } /* Ascend to next slot in parent node */ pr_devel("ascend to %p[%d]\n", parent, slot); cursor = parent; node = assoc_array_ptr_to_node(cursor); slot++; goto continue_node; } /** * assoc_array_destroy - Destroy an associative array * @array: The array to destroy. * @ops: The operations to use. * * Discard all metadata and free all objects in an associative array. The * array will be empty and ready to use again upon completion. This function * cannot fail. * * The caller must prevent all other accesses whilst this takes place as no * attempt is made to adjust pointers gracefully to permit RCU readlock-holding * accesses to continue. On the other hand, no memory allocation is required. */ void assoc_array_destroy(struct assoc_array *array, const struct assoc_array_ops *ops) { assoc_array_destroy_subtree(array->root, ops); array->root = NULL; } /* * Handle insertion into an empty tree. */ static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) { struct assoc_array_node *new_n0; pr_devel("-->%s()\n", __func__); new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); if (!new_n0) return false; edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); edit->leaf_p = &new_n0->slots[0]; edit->adjust_count_on = new_n0; edit->set[0].ptr = &edit->array->root; edit->set[0].to = assoc_array_node_to_ptr(new_n0); pr_devel("<--%s() = ok [no root]\n", __func__); return true; } /* * Handle insertion into a terminal node. */ static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, const struct assoc_array_ops *ops, const void *index_key, struct assoc_array_walk_result *result) { struct assoc_array_shortcut *shortcut, *new_s0; struct assoc_array_node *node, *new_n0, *new_n1, *side; struct assoc_array_ptr *ptr; unsigned long dissimilarity, base_seg, blank; size_t keylen; bool have_meta; int level, diff; int slot, next_slot, free_slot, i, j; node = result->terminal_node.node; level = result->terminal_node.level; edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; pr_devel("-->%s()\n", __func__); /* We arrived at a node which doesn't have an onward node or shortcut * pointer that we have to follow. This means that (a) the leaf we * want must go here (either by insertion or replacement) or (b) we * need to split this node and insert in one of the fragments. */ free_slot = -1; /* Firstly, we have to check the leaves in this node to see if there's * a matching one we should replace in place. */ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ptr = node->slots[i]; if (!ptr) { free_slot = i; continue; } if (assoc_array_ptr_is_leaf(ptr) && ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { pr_devel("replace in slot %d\n", i); edit->leaf_p = &node->slots[i]; edit->dead_leaf = node->slots[i]; pr_devel("<--%s() = ok [replace]\n", __func__); return true; } } /* If there is a free slot in this node then we can just insert the * leaf here. */ if (free_slot >= 0) { pr_devel("insert in free slot %d\n", free_slot); edit->leaf_p = &node->slots[free_slot]; edit->adjust_count_on = node; pr_devel("<--%s() = ok [insert]\n", __func__); return true; } /* The node has no spare slots - so we're either going to have to split * it or insert another node before it. * * Whatever, we're going to need at least two new nodes - so allocate * those now. We may also need a new shortcut, but we deal with that * when we need it. */ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); if (!new_n0) return false; edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); if (!new_n1) return false; edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); /* We need to find out how similar the leaves are. */ pr_devel("no spare slots\n"); have_meta = false; for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ptr = node->slots[i]; if (assoc_array_ptr_is_meta(ptr)) { edit->segment_cache[i] = 0xff; have_meta = true; continue; } base_seg = ops->get_object_key_chunk( assoc_array_ptr_to_leaf(ptr), level); base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; } if (have_meta) { pr_devel("have meta\n"); goto split_node; } /* The node contains only leaves */ dissimilarity = 0; base_seg = edit->segment_cache[0]; for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) dissimilarity |= edit->segment_cache[i] ^ base_seg; pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { /* The old leaves all cluster in the same slot. We will need * to insert a shortcut if the new node wants to cluster with them. */ if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) goto all_leaves_cluster_together; /* Otherwise all the old leaves cluster in the same slot, but * the new leaf wants to go into a different slot - so we * create a new node (n0) to hold the new leaf and a pointer to * a new node (n1) holding all the old leaves. * * This can be done by falling through to the node splitting * path. */ pr_devel("present leaves cluster but not new leaf\n"); } split_node: pr_devel("split node\n"); /* We need to split the current node. The node must contain anything * from a single leaf (in the one leaf case, this leaf will cluster * with the new leaf) and the rest meta-pointers, to all leaves, some * of which may cluster. * * It won't contain the case in which all the current leaves plus the * new leaves want to cluster in the same slot. * * We need to expel at least two leaves out of a set consisting of the * leaves in the node and the new leaf. The current meta pointers can * just be copied as they shouldn't cluster with any of the leaves. * * We need a new node (n0) to replace the current one and a new node to * take the expelled nodes (n1). */ edit->set[0].to = assoc_array_node_to_ptr(new_n0); new_n0->back_pointer = node->back_pointer; new_n0->parent_slot = node->parent_slot; new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); new_n1->parent_slot = -1; /* Need to calculate this */ do_split_node: pr_devel("do_split_node\n"); new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; new_n1->nr_leaves_on_branch = 0; /* Begin by finding two matching leaves. There have to be at least two * that match - even if there are meta pointers - because any leaf that * would match a slot with a meta pointer in it must be somewhere * behind that meta pointer and cannot be here. Further, given N * remaining leaf slots, we now have N+1 leaves to go in them. */ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { slot = edit->segment_cache[i]; if (slot != 0xff) for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) if (edit->segment_cache[j] == slot) goto found_slot_for_multiple_occupancy; } found_slot_for_multiple_occupancy: pr_devel("same slot: %x %x [%02x]\n", i, j, slot); BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); new_n1->parent_slot = slot; /* Metadata pointers cannot change slot */ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) if (assoc_array_ptr_is_meta(node->slots[i])) new_n0->slots[i] = node->slots[i]; else new_n0->slots[i] = NULL; BUG_ON(new_n0->slots[slot] != NULL); new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); /* Filter the leaf pointers between the new nodes */ free_slot = -1; next_slot = 0; for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { if (assoc_array_ptr_is_meta(node->slots[i])) continue; if (edit->segment_cache[i] == slot) { new_n1->slots[next_slot++] = node->slots[i]; new_n1->nr_leaves_on_branch++; } else { do { free_slot++; } while (new_n0->slots[free_slot] != NULL); new_n0->slots[free_slot] = node->slots[i]; } } pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { do { free_slot++; } while (new_n0->slots[free_slot] != NULL); edit->leaf_p = &new_n0->slots[free_slot]; edit->adjust_count_on = new_n0; } else { edit->leaf_p = &new_n1->slots[next_slot++]; edit->adjust_count_on = new_n1; } BUG_ON(next_slot <= 1); edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { if (edit->segment_cache[i] == 0xff) { ptr = node->slots[i]; BUG_ON(assoc_array_ptr_is_leaf(ptr)); if (assoc_array_ptr_is_node(ptr)) { side = assoc_array_ptr_to_node(ptr); edit->set_backpointers[i] = &side->back_pointer; } else { shortcut = assoc_array_ptr_to_shortcut(ptr); edit->set_backpointers[i] = &shortcut->back_pointer; } } } ptr = node->back_pointer; if (!ptr) edit->set[0].ptr = &edit->array->root; else if (assoc_array_ptr_is_node(ptr)) edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; else edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; edit->excised_meta[0] = assoc_array_node_to_ptr(node); pr_devel("<--%s() = ok [split node]\n", __func__); return true; all_leaves_cluster_together: /* All the leaves, new and old, want to cluster together in this node * in the same slot, so we have to replace this node with a shortcut to * skip over the identical parts of the key and then place a pair of * nodes, one inside the other, at the end of the shortcut and * distribute the keys between them. * * Firstly we need to work out where the leaves start diverging as a * bit position into their keys so that we know how big the shortcut * needs to be. * * We only need to make a single pass of N of the N+1 leaves because if * any keys differ between themselves at bit X then at least one of * them must also differ with the base key at bit X or before. */ pr_devel("all leaves cluster together\n"); diff = INT_MAX; for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]), index_key); if (x < diff) { BUG_ON(x < 0); diff = x; } } BUG_ON(diff == INT_MAX); BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; new_s0 = kzalloc(struct_size(new_s0, index_key, keylen), GFP_KERNEL); if (!new_s0) return false; edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); new_s0->back_pointer = node->back_pointer; new_s0->parent_slot = node->parent_slot; new_s0->next_node = assoc_array_node_to_ptr(new_n0); new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); new_n0->parent_slot = 0; new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); new_n1->parent_slot = -1; /* Need to calculate this */ new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; pr_devel("skip_to_level = %d [diff %d]\n", level, diff); BUG_ON(level <= 0); for (i = 0; i < keylen; i++) new_s0->index_key[i] = ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); if (level & ASSOC_ARRAY_KEY_CHUNK_MASK) { blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); new_s0->index_key[keylen - 1] &= ~blank; } /* This now reduces to a node splitting exercise for which we'll need * to regenerate the disparity table. */ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ptr = node->slots[i]; base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), level); base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; } base_seg = ops->get_key_chunk(index_key, level); base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; goto do_split_node; } /* * Handle insertion into the middle of a shortcut. */ static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, const struct assoc_array_ops *ops, struct assoc_array_walk_result *result) { struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; struct assoc_array_node *node, *new_n0, *side; unsigned long sc_segments, dissimilarity, blank; size_t keylen; int level, sc_level, diff; int sc_slot; shortcut = result->wrong_shortcut.shortcut; level = result->wrong_shortcut.level; sc_level = result->wrong_shortcut.sc_level; sc_segments = result->wrong_shortcut.sc_segments; dissimilarity = result->wrong_shortcut.dissimilarity; pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", __func__, level, dissimilarity, sc_level); /* We need to split a shortcut and insert a node between the two * pieces. Zero-length pieces will be dispensed with entirely. * * First of all, we need to find out in which level the first * difference was. */ diff = __ffs(dissimilarity); diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; pr_devel("diff=%d\n", diff); if (!shortcut->back_pointer) { edit->set[0].ptr = &edit->array->root; } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { node = assoc_array_ptr_to_node(shortcut->back_pointer); edit->set[0].ptr = &node->slots[shortcut->parent_slot]; } else { BUG(); } edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); /* Create a new node now since we're going to need it anyway */ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); if (!new_n0) return false; edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); edit->adjust_count_on = new_n0; /* Insert a new shortcut before the new node if this segment isn't of * zero length - otherwise we just connect the new node directly to the * parent. */ level += ASSOC_ARRAY_LEVEL_STEP; if (diff > level) { pr_devel("pre-shortcut %d...%d\n", level, diff); keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; new_s0 = kzalloc(struct_size(new_s0, index_key, keylen), GFP_KERNEL); if (!new_s0) return false; edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); new_s0->back_pointer = shortcut->back_pointer; new_s0->parent_slot = shortcut->parent_slot; new_s0->next_node = assoc_array_node_to_ptr(new_n0); new_s0->skip_to_level = diff; new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); new_n0->parent_slot = 0; memcpy(new_s0->index_key, shortcut->index_key, flex_array_size(new_s0, index_key, keylen)); blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); new_s0->index_key[keylen - 1] &= ~blank; } else { pr_devel("no pre-shortcut\n"); edit->set[0].to = assoc_array_node_to_ptr(new_n0); new_n0->back_pointer = shortcut->back_pointer; new_n0->parent_slot = shortcut->parent_slot; } side = assoc_array_ptr_to_node(shortcut->next_node); new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; /* We need to know which slot in the new node is going to take a * metadata pointer. */ sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); sc_slot &= ASSOC_ARRAY_FAN_MASK; pr_devel("new slot %lx >> %d -> %d\n", sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); /* Determine whether we need to follow the new node with a replacement * for the current shortcut. We could in theory reuse the current * shortcut if its parent slot number doesn't change - but that's a * 1-in-16 chance so not worth expending the code upon. */ level = diff + ASSOC_ARRAY_LEVEL_STEP; if (level < shortcut->skip_to_level) { pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; new_s1 = kzalloc(struct_size(new_s1, index_key, keylen), GFP_KERNEL); if (!new_s1) return false; edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); new_s1->parent_slot = sc_slot; new_s1->next_node = shortcut->next_node; new_s1->skip_to_level = shortcut->skip_to_level; new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); memcpy(new_s1->index_key, shortcut->index_key, flex_array_size(new_s1, index_key, keylen)); edit->set[1].ptr = &side->back_pointer; edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); } else { pr_devel("no post-shortcut\n"); /* We don't have to replace the pointed-to node as long as we * use memory barriers to make sure the parent slot number is * changed before the back pointer (the parent slot number is * irrelevant to the old parent shortcut). */ new_n0->slots[sc_slot] = shortcut->next_node; edit->set_parent_slot[0].p = &side->parent_slot; edit->set_parent_slot[0].to = sc_slot; edit->set[1].ptr = &side->back_pointer; edit->set[1].to = assoc_array_node_to_ptr(new_n0); } /* Install the new leaf in a spare slot in the new node. */ if (sc_slot == 0) edit->leaf_p = &new_n0->slots[1]; else edit->leaf_p = &new_n0->slots[0]; pr_devel("<--%s() = ok [split shortcut]\n", __func__); return true; } /** * assoc_array_insert - Script insertion of an object into an associative array * @array: The array to insert into. * @ops: The operations to use. * @index_key: The key to insert at. * @object: The object to insert. * * Precalculate and preallocate a script for the insertion or replacement of an * object in an associative array. This results in an edit script that can * either be applied or cancelled. * * The function returns a pointer to an edit script or -ENOMEM. * * The caller should lock against other modifications and must continue to hold * the lock until assoc_array_apply_edit() has been called. * * Accesses to the tree may take place concurrently with this function, * provided they hold the RCU read lock. */ struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key, void *object) { struct assoc_array_walk_result result; struct assoc_array_edit *edit; pr_devel("-->%s()\n", __func__); /* The leaf pointer we're given must not have the bottom bit set as we * use those for type-marking the pointer. NULL pointers are also not * allowed as they indicate an empty slot but we have to allow them * here as they can be updated later. */ BUG_ON(assoc_array_ptr_is_meta(object)); edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); if (!edit) return ERR_PTR(-ENOMEM); edit->array = array; edit->ops = ops; edit->leaf = assoc_array_leaf_to_ptr(object); edit->adjust_count_by = 1; switch (assoc_array_walk(array, ops, index_key, &result)) { case assoc_array_walk_tree_empty: /* Allocate a root node if there isn't one yet */ if (!assoc_array_insert_in_empty_tree(edit)) goto enomem; return edit; case assoc_array_walk_found_terminal_node: /* We found a node that doesn't have a node/shortcut pointer in * the slot corresponding to the index key that we have to * follow. */ if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, &result)) goto enomem; return edit; case assoc_array_walk_found_wrong_shortcut: /* We found a shortcut that didn't match our key in a slot we * needed to follow. */ if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) goto enomem; return edit; } enomem: /* Clean up after an out of memory error */ pr_devel("enomem\n"); assoc_array_cancel_edit(edit); return ERR_PTR(-ENOMEM); } /** * assoc_array_insert_set_object - Set the new object pointer in an edit script * @edit: The edit script to modify. * @object: The object pointer to set. * * Change the object to be inserted in an edit script. The object pointed to * by the old object is not freed. This must be done prior to applying the * script. */ void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) { BUG_ON(!object); edit->leaf = assoc_array_leaf_to_ptr(object); } struct assoc_array_delete_collapse_context { struct assoc_array_node *node; const void *skip_leaf; int slot; }; /* * Subtree collapse to node iterator. */ static int assoc_array_delete_collapse_iterator(const void *leaf, void *iterator_data) { struct assoc_array_delete_collapse_context *collapse = iterator_data; if (leaf == collapse->skip_leaf) return 0; BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); return 0; } /** * assoc_array_delete - Script deletion of an object from an associative array * @array: The array to search. * @ops: The operations to use. * @index_key: The key to the object. * * Precalculate and preallocate a script for the deletion of an object from an * associative array. This results in an edit script that can either be * applied or cancelled. * * The function returns a pointer to an edit script if the object was found, * NULL if the object was not found or -ENOMEM. * * The caller should lock against other modifications and must continue to hold * the lock until assoc_array_apply_edit() has been called. * * Accesses to the tree may take place concurrently with this function, * provided they hold the RCU read lock. */ struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, const struct assoc_array_ops *ops, const void *index_key) { struct assoc_array_delete_collapse_context collapse; struct assoc_array_walk_result result; struct assoc_array_node *node, *new_n0; struct assoc_array_edit *edit; struct assoc_array_ptr *ptr; bool has_meta; int slot, i; pr_devel("-->%s()\n", __func__); edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); if (!edit) return ERR_PTR(-ENOMEM); edit->array = array; edit->ops = ops; edit->adjust_count_by = -1; switch (assoc_array_walk(array, ops, index_key, &result)) { case assoc_array_walk_found_terminal_node: /* We found a node that should contain the leaf we've been * asked to remove - *if* it's in the tree. */ pr_devel("terminal_node\n"); node = result.terminal_node.node; for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = node->slots[slot]; if (ptr && assoc_array_ptr_is_leaf(ptr) && ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) goto found_leaf; } fallthrough; case assoc_array_walk_tree_empty: case assoc_array_walk_found_wrong_shortcut: default: assoc_array_cancel_edit(edit); pr_devel("not found\n"); return NULL; } found_leaf: BUG_ON(array->nr_leaves_on_tree <= 0); /* In the simplest form of deletion we just clear the slot and release * the leaf after a suitable interval. */ edit->dead_leaf = node->slots[slot]; edit->set[0].ptr = &node->slots[slot]; edit->set[0].to = NULL; edit->adjust_count_on = node; /* If that concludes erasure of the last leaf, then delete the entire * internal array. */ if (array->nr_leaves_on_tree == 1) { edit->set[1].ptr = &array->root; edit->set[1].to = NULL; edit->adjust_count_on = NULL; edit->excised_subtree = array->root; pr_devel("all gone\n"); return edit; } /* However, we'd also like to clear up some metadata blocks if we * possibly can. * * We go for a simple algorithm of: if this node has FAN_OUT or fewer * leaves in it, then attempt to collapse it - and attempt to * recursively collapse up the tree. * * We could also try and collapse in partially filled subtrees to take * up space in this node. */ if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { struct assoc_array_node *parent, *grandparent; struct assoc_array_ptr *ptr; /* First of all, we need to know if this node has metadata so * that we don't try collapsing if all the leaves are already * here. */ has_meta = false; for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ptr = node->slots[i]; if (assoc_array_ptr_is_meta(ptr)) { has_meta = true; break; } } pr_devel("leaves: %ld [m=%d]\n", node->nr_leaves_on_branch - 1, has_meta); /* Look further up the tree to see if we can collapse this node * into a more proximal node too. */ parent = node; collapse_up: pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); ptr = parent->back_pointer; if (!ptr) goto do_collapse; if (assoc_array_ptr_is_shortcut(ptr)) { struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); ptr = s->back_pointer; if (!ptr) goto do_collapse; } grandparent = assoc_array_ptr_to_node(ptr); if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { parent = grandparent; goto collapse_up; } do_collapse: /* There's no point collapsing if the original node has no meta * pointers to discard and if we didn't merge into one of that * node's ancestry. */ if (has_meta || parent != node) { node = parent; /* Create a new node to collapse into */ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); if (!new_n0) goto enomem; edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); new_n0->back_pointer = node->back_pointer; new_n0->parent_slot = node->parent_slot; new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; edit->adjust_count_on = new_n0; collapse.node = new_n0; collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); collapse.slot = 0; assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), node->back_pointer, assoc_array_delete_collapse_iterator, &collapse); pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); if (!node->back_pointer) { edit->set[1].ptr = &array->root; } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { BUG(); } else if (assoc_array_ptr_is_node(node->back_pointer)) { struct assoc_array_node *p = assoc_array_ptr_to_node(node->back_pointer); edit->set[1].ptr = &p->slots[node->parent_slot]; } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(node->back_pointer); edit->set[1].ptr = &s->next_node; } edit->set[1].to = assoc_array_node_to_ptr(new_n0); edit->excised_subtree = assoc_array_node_to_ptr(node); } } return edit; enomem: /* Clean up after an out of memory error */ pr_devel("enomem\n"); assoc_array_cancel_edit(edit); return ERR_PTR(-ENOMEM); } /** * assoc_array_clear - Script deletion of all objects from an associative array * @array: The array to clear. * @ops: The operations to use. * * Precalculate and preallocate a script for the deletion of all the objects * from an associative array. This results in an edit script that can either * be applied or cancelled. * * The function returns a pointer to an edit script if there are objects to be * deleted, NULL if there are no objects in the array or -ENOMEM. * * The caller should lock against other modifications and must continue to hold * the lock until assoc_array_apply_edit() has been called. * * Accesses to the tree may take place concurrently with this function, * provided they hold the RCU read lock. */ struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, const struct assoc_array_ops *ops) { struct assoc_array_edit *edit; pr_devel("-->%s()\n", __func__); if (!array->root) return NULL; edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); if (!edit) return ERR_PTR(-ENOMEM); edit->array = array; edit->ops = ops; edit->set[1].ptr = &array->root; edit->set[1].to = NULL; edit->excised_subtree = array->root; edit->ops_for_excised_subtree = ops; pr_devel("all gone\n"); return edit; } /* * Handle the deferred destruction after an applied edit. */ static void assoc_array_rcu_cleanup(struct rcu_head *head) { struct assoc_array_edit *edit = container_of(head, struct assoc_array_edit, rcu); int i; pr_devel("-->%s()\n", __func__); if (edit->dead_leaf) edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) if (edit->excised_meta[i]) kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); if (edit->excised_subtree) { BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); if (assoc_array_ptr_is_node(edit->excised_subtree)) { struct assoc_array_node *n = assoc_array_ptr_to_node(edit->excised_subtree); n->back_pointer = NULL; } else { struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(edit->excised_subtree); s->back_pointer = NULL; } assoc_array_destroy_subtree(edit->excised_subtree, edit->ops_for_excised_subtree); } kfree(edit); } /** * assoc_array_apply_edit - Apply an edit script to an associative array * @edit: The script to apply. * * Apply an edit script to an associative array to effect an insertion, * deletion or clearance. As the edit script includes preallocated memory, * this is guaranteed not to fail. * * The edit script, dead objects and dead metadata will be scheduled for * destruction after an RCU grace period to permit those doing read-only * accesses on the array to continue to do so under the RCU read lock whilst * the edit is taking place. */ void assoc_array_apply_edit(struct assoc_array_edit *edit) { struct assoc_array_shortcut *shortcut; struct assoc_array_node *node; struct assoc_array_ptr *ptr; int i; pr_devel("-->%s()\n", __func__); smp_wmb(); if (edit->leaf_p) *edit->leaf_p = edit->leaf; smp_wmb(); for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) if (edit->set_parent_slot[i].p) *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; smp_wmb(); for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) if (edit->set_backpointers[i]) *edit->set_backpointers[i] = edit->set_backpointers_to; smp_wmb(); for (i = 0; i < ARRAY_SIZE(edit->set); i++) if (edit->set[i].ptr) *edit->set[i].ptr = edit->set[i].to; if (edit->array->root == NULL) { edit->array->nr_leaves_on_tree = 0; } else if (edit->adjust_count_on) { node = edit->adjust_count_on; for (;;) { node->nr_leaves_on_branch += edit->adjust_count_by; ptr = node->back_pointer; if (!ptr) break; if (assoc_array_ptr_is_shortcut(ptr)) { shortcut = assoc_array_ptr_to_shortcut(ptr); ptr = shortcut->back_pointer; if (!ptr) break; } BUG_ON(!assoc_array_ptr_is_node(ptr)); node = assoc_array_ptr_to_node(ptr); } edit->array->nr_leaves_on_tree += edit->adjust_count_by; } call_rcu(&edit->rcu, assoc_array_rcu_cleanup); } /** * assoc_array_cancel_edit - Discard an edit script. * @edit: The script to discard. * * Free an edit script and all the preallocated data it holds without making * any changes to the associative array it was intended for. * * NOTE! In the case of an insertion script, this does _not_ release the leaf * that was to be inserted. That is left to the caller. */ void assoc_array_cancel_edit(struct assoc_array_edit *edit) { struct assoc_array_ptr *ptr; int i; pr_devel("-->%s()\n", __func__); /* Clean up after an out of memory error */ for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { ptr = edit->new_meta[i]; if (ptr) { if (assoc_array_ptr_is_node(ptr)) kfree(assoc_array_ptr_to_node(ptr)); else kfree(assoc_array_ptr_to_shortcut(ptr)); } } kfree(edit); } /** * assoc_array_gc - Garbage collect an associative array. * @array: The array to clean. * @ops: The operations to use. * @iterator: A callback function to pass judgement on each object. * @iterator_data: Private data for the callback function. * * Collect garbage from an associative array and pack down the internal tree to * save memory. * * The iterator function is asked to pass judgement upon each object in the * array. If it returns false, the object is discard and if it returns true, * the object is kept. If it returns true, it must increment the object's * usage count (or whatever it needs to do to retain it) before returning. * * This function returns 0 if successful or -ENOMEM if out of memory. In the * latter case, the array is not changed. * * The caller should lock against other modifications and must continue to hold * the lock until assoc_array_apply_edit() has been called. * * Accesses to the tree may take place concurrently with this function, * provided they hold the RCU read lock. */ int assoc_array_gc(struct assoc_array *array, const struct assoc_array_ops *ops, bool (*iterator)(void *object, void *iterator_data), void *iterator_data) { struct assoc_array_shortcut *shortcut, *new_s; struct assoc_array_node *node, *new_n; struct assoc_array_edit *edit; struct assoc_array_ptr *cursor, *ptr; struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; unsigned long nr_leaves_on_tree; bool retained; int keylen, slot, nr_free, next_slot, i; pr_devel("-->%s()\n", __func__); if (!array->root) return 0; edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); if (!edit) return -ENOMEM; edit->array = array; edit->ops = ops; edit->ops_for_excised_subtree = ops; edit->set[0].ptr = &array->root; edit->excised_subtree = array->root; new_root = new_parent = NULL; new_ptr_pp = &new_root; cursor = array->root; descend: /* If this point is a shortcut, then we need to duplicate it and * advance the target cursor. */ if (assoc_array_ptr_is_shortcut(cursor)) { shortcut = assoc_array_ptr_to_shortcut(cursor); keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; new_s = kmalloc(struct_size(new_s, index_key, keylen), GFP_KERNEL); if (!new_s) goto enomem; pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); memcpy(new_s, shortcut, struct_size(new_s, index_key, keylen)); new_s->back_pointer = new_parent; new_s->parent_slot = shortcut->parent_slot; *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); new_ptr_pp = &new_s->next_node; cursor = shortcut->next_node; } /* Duplicate the node at this position */ node = assoc_array_ptr_to_node(cursor); new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); if (!new_n) goto enomem; pr_devel("dup node %p -> %p\n", node, new_n); new_n->back_pointer = new_parent; new_n->parent_slot = node->parent_slot; *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); new_ptr_pp = NULL; slot = 0; continue_node: /* Filter across any leaves and gc any subtrees */ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = node->slots[slot]; if (!ptr) continue; if (assoc_array_ptr_is_leaf(ptr)) { if (iterator(assoc_array_ptr_to_leaf(ptr), iterator_data)) /* The iterator will have done any reference * counting on the object for us. */ new_n->slots[slot] = ptr; continue; } new_ptr_pp = &new_n->slots[slot]; cursor = ptr; goto descend; } retry_compress: pr_devel("-- compress node %p --\n", new_n); /* Count up the number of empty slots in this node and work out the * subtree leaf count. */ new_n->nr_leaves_on_branch = 0; nr_free = 0; for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = new_n->slots[slot]; if (!ptr) nr_free++; else if (assoc_array_ptr_is_leaf(ptr)) new_n->nr_leaves_on_branch++; } pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); /* See what we can fold in */ retained = false; next_slot = 0; for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { struct assoc_array_shortcut *s; struct assoc_array_node *child; ptr = new_n->slots[slot]; if (!ptr || assoc_array_ptr_is_leaf(ptr)) continue; s = NULL; if (assoc_array_ptr_is_shortcut(ptr)) { s = assoc_array_ptr_to_shortcut(ptr); ptr = s->next_node; } child = assoc_array_ptr_to_node(ptr); new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; if (child->nr_leaves_on_branch <= nr_free + 1) { /* Fold the child node into this one */ pr_devel("[%d] fold node %lu/%d [nx %d]\n", slot, child->nr_leaves_on_branch, nr_free + 1, next_slot); /* We would already have reaped an intervening shortcut * on the way back up the tree. */ BUG_ON(s); new_n->slots[slot] = NULL; nr_free++; if (slot < next_slot) next_slot = slot; for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { struct assoc_array_ptr *p = child->slots[i]; if (!p) continue; BUG_ON(assoc_array_ptr_is_meta(p)); while (new_n->slots[next_slot]) next_slot++; BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); new_n->slots[next_slot++] = p; nr_free--; } kfree(child); } else { pr_devel("[%d] retain node %lu/%d [nx %d]\n", slot, child->nr_leaves_on_branch, nr_free + 1, next_slot); retained = true; } } if (retained && new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { pr_devel("internal nodes remain despite enough space, retrying\n"); goto retry_compress; } pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); nr_leaves_on_tree = new_n->nr_leaves_on_branch; /* Excise this node if it is singly occupied by a shortcut */ if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) if ((ptr = new_n->slots[slot])) break; if (assoc_array_ptr_is_meta(ptr) && assoc_array_ptr_is_shortcut(ptr)) { pr_devel("excise node %p with 1 shortcut\n", new_n); new_s = assoc_array_ptr_to_shortcut(ptr); new_parent = new_n->back_pointer; slot = new_n->parent_slot; kfree(new_n); if (!new_parent) { new_s->back_pointer = NULL; new_s->parent_slot = 0; new_root = ptr; goto gc_complete; } if (assoc_array_ptr_is_shortcut(new_parent)) { /* We can discard any preceding shortcut also */ struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(new_parent); pr_devel("excise preceding shortcut\n"); new_parent = new_s->back_pointer = s->back_pointer; slot = new_s->parent_slot = s->parent_slot; kfree(s); if (!new_parent) { new_s->back_pointer = NULL; new_s->parent_slot = 0; new_root = ptr; goto gc_complete; } } new_s->back_pointer = new_parent; new_s->parent_slot = slot; new_n = assoc_array_ptr_to_node(new_parent); new_n->slots[slot] = ptr; goto ascend_old_tree; } } /* Excise any shortcuts we might encounter that point to nodes that * only contain leaves. */ ptr = new_n->back_pointer; if (!ptr) goto gc_complete; if (assoc_array_ptr_is_shortcut(ptr)) { new_s = assoc_array_ptr_to_shortcut(ptr); new_parent = new_s->back_pointer; slot = new_s->parent_slot; if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { struct assoc_array_node *n; pr_devel("excise shortcut\n"); new_n->back_pointer = new_parent; new_n->parent_slot = slot; kfree(new_s); if (!new_parent) { new_root = assoc_array_node_to_ptr(new_n); goto gc_complete; } n = assoc_array_ptr_to_node(new_parent); n->slots[slot] = assoc_array_node_to_ptr(new_n); } } else { new_parent = ptr; } new_n = assoc_array_ptr_to_node(new_parent); ascend_old_tree: ptr = node->back_pointer; if (assoc_array_ptr_is_shortcut(ptr)) { shortcut = assoc_array_ptr_to_shortcut(ptr); slot = shortcut->parent_slot; cursor = shortcut->back_pointer; if (!cursor) goto gc_complete; } else { slot = node->parent_slot; cursor = ptr; } BUG_ON(!cursor); node = assoc_array_ptr_to_node(cursor); slot++; goto continue_node; gc_complete: edit->set[0].to = new_root; assoc_array_apply_edit(edit); array->nr_leaves_on_tree = nr_leaves_on_tree; return 0; enomem: pr_devel("enomem\n"); assoc_array_destroy_subtree(new_root, edit->ops); kfree(edit); return -ENOMEM; } |
279 280 3 3 164 164 163 3 3 1 160 160 160 160 89 88 88 3 88 89 9 9 9 2 2 1 1 1 2 2 9 8 8 8 8 3 3 8 8 8 8 8 8 42 42 42 5 5 42 5 42 45 45 45 44 2 45 2 45 1 44 7 7 6 2 1 7 4 9 6 9 8 1 7 3 4 4 4 1 4 4 4 3 9 9 8 9 20 19 1 18 1 17 11 2 9 6 9 1 8 8 8 8 8 8 10 10 8 7 7 7 4 5 4 3 3 7 6 7 9 9 8 8 7 1 8 7 7 5 8 6 8 4 4 3 3 2 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright © 2017 Keith Packard <keithp@keithp.com> */ #include <linux/file.h> #include <linux/uaccess.h> #include <drm/drm_auth.h> #include <drm/drm_crtc.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_lease.h> #include <drm/drm_print.h> #include "drm_crtc_internal.h" #include "drm_internal.h" /** * DOC: drm leasing * * DRM leases provide information about whether a DRM master may control a DRM * mode setting object. This enables the creation of multiple DRM masters that * manage subsets of display resources. * * The original DRM master of a device 'owns' the available drm resources. It * may create additional DRM masters and 'lease' resources which it controls * to the new DRM master. This gives the new DRM master control over the * leased resources until the owner revokes the lease, or the new DRM master * is closed. Some helpful terminology: * * - An 'owner' is a &struct drm_master that is not leasing objects from * another &struct drm_master, and hence 'owns' the objects. The owner can be * identified as the &struct drm_master for which &drm_master.lessor is NULL. * * - A 'lessor' is a &struct drm_master which is leasing objects to one or more * other &struct drm_master. Currently, lessees are not allowed to * create sub-leases, hence the lessor is the same as the owner. * * - A 'lessee' is a &struct drm_master which is leasing objects from some * other &struct drm_master. Each lessee only leases resources from a single * lessor recorded in &drm_master.lessor, and holds the set of objects that * it is leasing in &drm_master.leases. * * - A 'lease' is a contract between the lessor and lessee that identifies * which resources may be controlled by the lessee. All of the resources * that are leased must be owned by or leased to the lessor, and lessors are * not permitted to lease the same object to multiple lessees. * * The set of objects any &struct drm_master 'controls' is limited to the set * of objects it leases (for lessees) or all objects (for owners). * * Objects not controlled by a &struct drm_master cannot be modified through * the various state manipulating ioctls, and any state reported back to user * space will be edited to make them appear idle and/or unusable. For * instance, connectors always report 'disconnected', while encoders * report no possible crtcs or clones. * * Since each lessee may lease objects from a single lessor, display resource * leases form a tree of &struct drm_master. As lessees are currently not * allowed to create sub-leases, the tree depth is limited to 1. All of * these get activated simultaneously when the top level device owner changes * through the SETMASTER or DROPMASTER IOCTL, so &drm_device.master points to * the owner at the top of the lease tree (i.e. the &struct drm_master for which * &drm_master.lessor is NULL). The full list of lessees that are leasing * objects from the owner can be searched via the owner's * &drm_master.lessee_idr. */ #define drm_for_each_lessee(lessee, lessor) \ list_for_each_entry((lessee), &(lessor)->lessees, lessee_list) static uint64_t drm_lease_idr_object; struct drm_master *drm_lease_owner(struct drm_master *master) { while (master->lessor != NULL) master = master->lessor; return master; } static struct drm_master* _drm_find_lessee(struct drm_master *master, int lessee_id) { lockdep_assert_held(&master->dev->mode_config.idr_mutex); return idr_find(&drm_lease_owner(master)->lessee_idr, lessee_id); } static int _drm_lease_held_master(struct drm_master *master, int id) { lockdep_assert_held(&master->dev->mode_config.idr_mutex); if (master->lessor) return idr_find(&master->leases, id) != NULL; return true; } /* Checks if the given object has been leased to some lessee of drm_master */ static bool _drm_has_leased(struct drm_master *master, int id) { struct drm_master *lessee; lockdep_assert_held(&master->dev->mode_config.idr_mutex); drm_for_each_lessee(lessee, master) if (_drm_lease_held_master(lessee, id)) return true; return false; } /* Called with idr_mutex held */ bool _drm_lease_held(struct drm_file *file_priv, int id) { bool ret; struct drm_master *master; if (!file_priv) return true; master = drm_file_get_master(file_priv); if (!master) return true; ret = _drm_lease_held_master(master, id); drm_master_put(&master); return ret; } bool drm_lease_held(struct drm_file *file_priv, int id) { struct drm_master *master; bool ret; if (!file_priv) return true; master = drm_file_get_master(file_priv); if (!master) return true; if (!master->lessor) { ret = true; goto out; } mutex_lock(&master->dev->mode_config.idr_mutex); ret = _drm_lease_held_master(master, id); mutex_unlock(&master->dev->mode_config.idr_mutex); out: drm_master_put(&master); return ret; } /* * Given a bitmask of crtcs to check, reconstructs a crtc mask based on the * crtcs which are visible through the specified file. */ uint32_t drm_lease_filter_crtcs(struct drm_file *file_priv, uint32_t crtcs_in) { struct drm_master *master; struct drm_device *dev; struct drm_crtc *crtc; int count_in, count_out; uint32_t crtcs_out = 0; if (!file_priv) return crtcs_in; master = drm_file_get_master(file_priv); if (!master) return crtcs_in; if (!master->lessor) { crtcs_out = crtcs_in; goto out; } dev = master->dev; count_in = count_out = 0; mutex_lock(&master->dev->mode_config.idr_mutex); list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { if (_drm_lease_held_master(master, crtc->base.id)) { uint32_t mask_in = 1ul << count_in; if ((crtcs_in & mask_in) != 0) { uint32_t mask_out = 1ul << count_out; crtcs_out |= mask_out; } count_out++; } count_in++; } mutex_unlock(&master->dev->mode_config.idr_mutex); out: drm_master_put(&master); return crtcs_out; } /* * Uses drm_master_create to allocate a new drm_master, then checks to * make sure all of the desired objects can be leased, atomically * leasing them to the new drmmaster. * * ERR_PTR(-EACCES) some other master holds the title to any object * ERR_PTR(-ENOENT) some object is not a valid DRM object for this device * ERR_PTR(-EBUSY) some other lessee holds title to this object * ERR_PTR(-EEXIST) same object specified more than once in the provided list * ERR_PTR(-ENOMEM) allocation failed */ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr *leases) { struct drm_device *dev = lessor->dev; int error; struct drm_master *lessee; int object; int id; void *entry; drm_dbg_lease(dev, "lessor %d\n", lessor->lessee_id); lessee = drm_master_create(lessor->dev); if (!lessee) { drm_dbg_lease(dev, "drm_master_create failed\n"); return ERR_PTR(-ENOMEM); } mutex_lock(&dev->mode_config.idr_mutex); idr_for_each_entry(leases, entry, object) { error = 0; if (!idr_find(&dev->mode_config.object_idr, object)) error = -ENOENT; else if (_drm_has_leased(lessor, object)) error = -EBUSY; if (error != 0) { drm_dbg_lease(dev, "object %d failed %d\n", object, error); goto out_lessee; } } /* Insert the new lessee into the tree */ id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL); if (id < 0) { error = id; goto out_lessee; } lessee->lessee_id = id; lessee->lessor = drm_master_get(lessor); list_add_tail(&lessee->lessee_list, &lessor->lessees); /* Move the leases over */ lessee->leases = *leases; drm_dbg_lease(dev, "new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor); mutex_unlock(&dev->mode_config.idr_mutex); return lessee; out_lessee: mutex_unlock(&dev->mode_config.idr_mutex); drm_master_put(&lessee); return ERR_PTR(error); } void drm_lease_destroy(struct drm_master *master) { struct drm_device *dev = master->dev; mutex_lock(&dev->mode_config.idr_mutex); drm_dbg_lease(dev, "drm_lease_destroy %d\n", master->lessee_id); /* This master is referenced by all lessees, hence it cannot be destroyed * until all of them have been */ WARN_ON(!list_empty(&master->lessees)); /* Remove this master from the lessee idr in the owner */ if (master->lessee_id != 0) { drm_dbg_lease(dev, "remove master %d from device list of lessees\n", master->lessee_id); idr_remove(&(drm_lease_owner(master)->lessee_idr), master->lessee_id); } /* Remove this master from any lessee list it may be on */ list_del(&master->lessee_list); mutex_unlock(&dev->mode_config.idr_mutex); if (master->lessor) { /* Tell the master to check the lessee list */ drm_sysfs_lease_event(dev); drm_master_put(&master->lessor); } drm_dbg_lease(dev, "drm_lease_destroy done %d\n", master->lessee_id); } static void _drm_lease_revoke(struct drm_master *top) { int object; void *entry; struct drm_master *master = top; lockdep_assert_held(&top->dev->mode_config.idr_mutex); /* * Walk the tree starting at 'top' emptying all leases. Because * the tree is fully connected, we can do this without recursing */ for (;;) { drm_dbg_lease(master->dev, "revoke leases for %p %d\n", master, master->lessee_id); /* Evacuate the lease */ idr_for_each_entry(&master->leases, entry, object) idr_remove(&master->leases, object); /* Depth-first list walk */ /* Down */ if (!list_empty(&master->lessees)) { master = list_first_entry(&master->lessees, struct drm_master, lessee_list); } else { /* Up */ while (master != top && master == list_last_entry(&master->lessor->lessees, struct drm_master, lessee_list)) master = master->lessor; if (master == top) break; /* Over */ master = list_next_entry(master, lessee_list); } } } void drm_lease_revoke(struct drm_master *top) { mutex_lock(&top->dev->mode_config.idr_mutex); _drm_lease_revoke(top); mutex_unlock(&top->dev->mode_config.idr_mutex); } static int validate_lease(struct drm_device *dev, int object_count, struct drm_mode_object **objects, bool universal_planes) { int o; int has_crtc = -1; int has_connector = -1; int has_plane = -1; /* we want to confirm that there is at least one crtc, plane connector object. */ for (o = 0; o < object_count; o++) { if (objects[o]->type == DRM_MODE_OBJECT_CRTC && has_crtc == -1) { has_crtc = o; } if (objects[o]->type == DRM_MODE_OBJECT_CONNECTOR && has_connector == -1) has_connector = o; if (universal_planes) { if (objects[o]->type == DRM_MODE_OBJECT_PLANE && has_plane == -1) has_plane = o; } } if (has_crtc == -1 || has_connector == -1) return -EINVAL; if (universal_planes && has_plane == -1) return -EINVAL; return 0; } static int fill_object_idr(struct drm_device *dev, struct drm_file *lessor_priv, struct idr *leases, int object_count, u32 *object_ids) { struct drm_mode_object **objects; u32 o; int ret; bool universal_planes = READ_ONCE(lessor_priv->universal_planes); objects = kcalloc(object_count, sizeof(struct drm_mode_object *), GFP_KERNEL); if (!objects) return -ENOMEM; /* step one - get references to all the mode objects and check for validity. */ for (o = 0; o < object_count; o++) { objects[o] = drm_mode_object_find(dev, lessor_priv, object_ids[o], DRM_MODE_OBJECT_ANY); if (!objects[o]) { ret = -ENOENT; goto out_free_objects; } if (!drm_mode_object_lease_required(objects[o]->type)) { DRM_DEBUG_KMS("invalid object for lease\n"); ret = -EINVAL; goto out_free_objects; } } ret = validate_lease(dev, object_count, objects, universal_planes); if (ret) { drm_dbg_lease(dev, "lease validation failed\n"); goto out_free_objects; } /* add their IDs to the lease request - taking into account universal planes */ for (o = 0; o < object_count; o++) { struct drm_mode_object *obj = objects[o]; u32 object_id = objects[o]->id; drm_dbg_lease(dev, "Adding object %d to lease\n", object_id); /* * We're using an IDR to hold the set of leased * objects, but we don't need to point at the object's * data structure from the lease as the main object_idr * will be used to actually find that. Instead, all we * really want is a 'leased/not-leased' result, for * which any non-NULL pointer will work fine. */ ret = idr_alloc(leases, &drm_lease_idr_object , object_id, object_id + 1, GFP_KERNEL); if (ret < 0) { drm_dbg_lease(dev, "Object %d cannot be inserted into leases (%d)\n", object_id, ret); goto out_free_objects; } if (obj->type == DRM_MODE_OBJECT_CRTC && !universal_planes) { struct drm_crtc *crtc = obj_to_crtc(obj); ret = idr_alloc(leases, &drm_lease_idr_object, crtc->primary->base.id, crtc->primary->base.id + 1, GFP_KERNEL); if (ret < 0) { drm_dbg_lease(dev, "Object primary plane %d cannot be inserted into leases (%d)\n", object_id, ret); goto out_free_objects; } if (crtc->cursor) { ret = idr_alloc(leases, &drm_lease_idr_object, crtc->cursor->base.id, crtc->cursor->base.id + 1, GFP_KERNEL); if (ret < 0) { drm_dbg_lease(dev, "Object cursor plane %d cannot be inserted into leases (%d)\n", object_id, ret); goto out_free_objects; } } } } ret = 0; out_free_objects: for (o = 0; o < object_count; o++) { if (objects[o]) drm_mode_object_put(objects[o]); } kfree(objects); return ret; } /* * The master associated with the specified file will have a lease * created containing the objects specified in the ioctl structure. * A file descriptor will be allocated for that and returned to the * application. */ int drm_mode_create_lease_ioctl(struct drm_device *dev, void *data, struct drm_file *lessor_priv) { struct drm_mode_create_lease *cl = data; size_t object_count; int ret = 0; struct idr leases; struct drm_master *lessor; struct drm_master *lessee = NULL; struct file *lessee_file = NULL; struct file *lessor_file = lessor_priv->filp; struct drm_file *lessee_priv; int fd = -1; uint32_t *object_ids; /* Can't lease without MODESET */ if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; if (cl->flags && (cl->flags & ~(O_CLOEXEC | O_NONBLOCK))) { drm_dbg_lease(dev, "invalid flags\n"); return -EINVAL; } lessor = drm_file_get_master(lessor_priv); /* Do not allow sub-leases */ if (lessor->lessor) { drm_dbg_lease(dev, "recursive leasing not allowed\n"); ret = -EINVAL; goto out_lessor; } object_count = cl->object_count; /* Handle leased objects, if any */ idr_init(&leases); if (object_count != 0) { object_ids = memdup_array_user(u64_to_user_ptr(cl->object_ids), object_count, sizeof(__u32)); if (IS_ERR(object_ids)) { ret = PTR_ERR(object_ids); idr_destroy(&leases); goto out_lessor; } /* fill and validate the object idr */ ret = fill_object_idr(dev, lessor_priv, &leases, object_count, object_ids); kfree(object_ids); if (ret) { drm_dbg_lease(dev, "lease object lookup failed: %i\n", ret); idr_destroy(&leases); goto out_lessor; } } /* Allocate a file descriptor for the lease */ fd = get_unused_fd_flags(cl->flags & (O_CLOEXEC | O_NONBLOCK)); if (fd < 0) { idr_destroy(&leases); ret = fd; goto out_lessor; } drm_dbg_lease(dev, "Creating lease\n"); /* lessee will take the ownership of leases */ lessee = drm_lease_create(lessor, &leases); if (IS_ERR(lessee)) { ret = PTR_ERR(lessee); idr_destroy(&leases); goto out_leases; } /* Clone the lessor file to create a new file for us */ drm_dbg_lease(dev, "Allocating lease file\n"); lessee_file = file_clone_open(lessor_file); if (IS_ERR(lessee_file)) { ret = PTR_ERR(lessee_file); goto out_lessee; } lessee_priv = lessee_file->private_data; /* Change the file to a master one */ drm_master_put(&lessee_priv->master); lessee_priv->master = lessee; lessee_priv->is_master = 1; lessee_priv->authenticated = 1; /* Pass fd back to userspace */ drm_dbg_lease(dev, "Returning fd %d id %d\n", fd, lessee->lessee_id); cl->fd = fd; cl->lessee_id = lessee->lessee_id; /* Hook up the fd */ fd_install(fd, lessee_file); drm_master_put(&lessor); drm_dbg_lease(dev, "drm_mode_create_lease_ioctl succeeded\n"); return 0; out_lessee: drm_master_put(&lessee); out_leases: put_unused_fd(fd); out_lessor: drm_master_put(&lessor); drm_dbg_lease(dev, "drm_mode_create_lease_ioctl failed: %d\n", ret); return ret; } int drm_mode_list_lessees_ioctl(struct drm_device *dev, void *data, struct drm_file *lessor_priv) { struct drm_mode_list_lessees *arg = data; __u32 __user *lessee_ids = (__u32 __user *) (uintptr_t) (arg->lessees_ptr); __u32 count_lessees = arg->count_lessees; struct drm_master *lessor, *lessee; int count; int ret = 0; if (arg->pad) return -EINVAL; /* Can't lease without MODESET */ if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; lessor = drm_file_get_master(lessor_priv); drm_dbg_lease(dev, "List lessees for %d\n", lessor->lessee_id); mutex_lock(&dev->mode_config.idr_mutex); count = 0; drm_for_each_lessee(lessee, lessor) { /* Only list un-revoked leases */ if (!idr_is_empty(&lessee->leases)) { if (count_lessees > count) { drm_dbg_lease(dev, "Add lessee %d\n", lessee->lessee_id); ret = put_user(lessee->lessee_id, lessee_ids + count); if (ret) break; } count++; } } drm_dbg_lease(dev, "Lessor leases to %d\n", count); if (ret == 0) arg->count_lessees = count; mutex_unlock(&dev->mode_config.idr_mutex); drm_master_put(&lessor); return ret; } /* Return the list of leased objects for the specified lessee */ int drm_mode_get_lease_ioctl(struct drm_device *dev, void *data, struct drm_file *lessee_priv) { struct drm_mode_get_lease *arg = data; __u32 __user *object_ids = (__u32 __user *) (uintptr_t) (arg->objects_ptr); __u32 count_objects = arg->count_objects; struct drm_master *lessee; struct idr *object_idr; int count; void *entry; int object; int ret = 0; if (arg->pad) return -EINVAL; /* Can't lease without MODESET */ if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; lessee = drm_file_get_master(lessee_priv); drm_dbg_lease(dev, "get lease for %d\n", lessee->lessee_id); mutex_lock(&dev->mode_config.idr_mutex); if (lessee->lessor == NULL) /* owner can use all objects */ object_idr = &lessee->dev->mode_config.object_idr; else /* lessee can only use allowed object */ object_idr = &lessee->leases; count = 0; idr_for_each_entry(object_idr, entry, object) { if (count_objects > count) { drm_dbg_lease(dev, "adding object %d\n", object); ret = put_user(object, object_ids + count); if (ret) break; } count++; } DRM_DEBUG("lease holds %d objects\n", count); if (ret == 0) arg->count_objects = count; mutex_unlock(&dev->mode_config.idr_mutex); drm_master_put(&lessee); return ret; } /* * This removes all of the objects from the lease without * actually getting rid of the lease itself; that way all * references to it still work correctly */ int drm_mode_revoke_lease_ioctl(struct drm_device *dev, void *data, struct drm_file *lessor_priv) { struct drm_mode_revoke_lease *arg = data; struct drm_master *lessor; struct drm_master *lessee; int ret = 0; drm_dbg_lease(dev, "revoke lease for %d\n", arg->lessee_id); /* Can't lease without MODESET */ if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; lessor = drm_file_get_master(lessor_priv); mutex_lock(&dev->mode_config.idr_mutex); lessee = _drm_find_lessee(lessor, arg->lessee_id); /* No such lessee */ if (!lessee) { ret = -ENOENT; goto fail; } /* Lease is not held by lessor */ if (lessee->lessor != lessor) { ret = -EACCES; goto fail; } _drm_lease_revoke(lessee); fail: mutex_unlock(&dev->mode_config.idr_mutex); drm_master_put(&lessor); return ret; } |
328 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PATH_H #define _LINUX_PATH_H struct dentry; struct vfsmount; struct path { struct vfsmount *mnt; struct dentry *dentry; } __randomize_layout; extern void path_get(const struct path *); extern void path_put(const struct path *); static inline int path_equal(const struct path *path1, const struct path *path2) { return path1->mnt == path2->mnt && path1->dentry == path2->dentry; } /* * Cleanup macro for use with __free(path_put). Avoids dereference and * copying @path unlike DEFINE_FREE(). path_put() will handle the empty * path correctly just ensure @path is initialized: * * struct path path __free(path_put) = {}; */ #define __free_path_put path_put #endif /* _LINUX_PATH_H */ |
57 35 13 30 30 29 24 30 31 31 31 25 25 37 37 37 37 37 3 3 37 2 24 30 35 35 35 26 26 13 9 13 13 35 26 26 26 26 26 26 26 26 26 35 35 35 862 859 9 860 6 6 6 6 4 26 26 26 26 26 26 26 26 26 1 1 1 1 29 29 29 24 24 24 24 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 | // SPDX-License-Identifier: GPL-2.0 /* * drivers/base/devres.c - device resource management * * Copyright (c) 2006 SUSE Linux Products GmbH * Copyright (c) 2006 Tejun Heo <teheo@suse.de> */ #include <linux/device.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/percpu.h> #include <asm/sections.h> #include "base.h" #include "trace.h" struct devres_node { struct list_head entry; dr_release_t release; const char *name; size_t size; }; struct devres { struct devres_node node; /* * Some archs want to perform DMA into kmalloc caches * and need a guaranteed alignment larger than * the alignment of a 64-bit integer. * Thus we use ARCH_DMA_MINALIGN for data[] which will force the same * alignment for struct devres when allocated by kmalloc(). */ u8 __aligned(ARCH_DMA_MINALIGN) data[]; }; struct devres_group { struct devres_node node[2]; void *id; int color; /* -- 8 pointers */ }; static void set_node_dbginfo(struct devres_node *node, const char *name, size_t size) { node->name = name; node->size = size; } #ifdef CONFIG_DEBUG_DEVRES static int log_devres = 0; module_param_named(log, log_devres, int, S_IRUGO | S_IWUSR); static void devres_dbg(struct device *dev, struct devres_node *node, const char *op) { if (unlikely(log_devres)) dev_err(dev, "DEVRES %3s %p %s (%zu bytes)\n", op, node, node->name, node->size); } #else /* CONFIG_DEBUG_DEVRES */ #define devres_dbg(dev, node, op) do {} while (0) #endif /* CONFIG_DEBUG_DEVRES */ static void devres_log(struct device *dev, struct devres_node *node, const char *op) { trace_devres_log(dev, op, node, node->name, node->size); devres_dbg(dev, node, op); } /* * Release functions for devres group. These callbacks are used only * for identification. */ static void group_open_release(struct device *dev, void *res) { /* noop */ } static void group_close_release(struct device *dev, void *res) { /* noop */ } static struct devres_group *node_to_group(struct devres_node *node) { if (node->release == &group_open_release) return container_of(node, struct devres_group, node[0]); if (node->release == &group_close_release) return container_of(node, struct devres_group, node[1]); return NULL; } static bool check_dr_size(size_t size, size_t *tot_size) { /* We must catch any near-SIZE_MAX cases that could overflow. */ if (unlikely(check_add_overflow(sizeof(struct devres), size, tot_size))) return false; /* Actually allocate the full kmalloc bucket size. */ *tot_size = kmalloc_size_roundup(*tot_size); return true; } static __always_inline struct devres *alloc_dr(dr_release_t release, size_t size, gfp_t gfp, int nid) { size_t tot_size; struct devres *dr; if (!check_dr_size(size, &tot_size)) return NULL; dr = kmalloc_node_track_caller(tot_size, gfp, nid); if (unlikely(!dr)) return NULL; /* No need to clear memory twice */ if (!(gfp & __GFP_ZERO)) memset(dr, 0, offsetof(struct devres, data)); INIT_LIST_HEAD(&dr->node.entry); dr->node.release = release; return dr; } static void add_dr(struct device *dev, struct devres_node *node) { devres_log(dev, node, "ADD"); BUG_ON(!list_empty(&node->entry)); list_add_tail(&node->entry, &dev->devres_head); } static void replace_dr(struct device *dev, struct devres_node *old, struct devres_node *new) { devres_log(dev, old, "REPLACE"); BUG_ON(!list_empty(&new->entry)); list_replace(&old->entry, &new->entry); } /** * __devres_alloc_node - Allocate device resource data * @release: Release function devres will be associated with * @size: Allocation size * @gfp: Allocation flags * @nid: NUMA node * @name: Name of the resource * * Allocate devres of @size bytes. The allocated area is zeroed, then * associated with @release. The returned pointer can be passed to * other devres_*() functions. * * RETURNS: * Pointer to allocated devres on success, NULL on failure. */ void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid, const char *name) { struct devres *dr; dr = alloc_dr(release, size, gfp | __GFP_ZERO, nid); if (unlikely(!dr)) return NULL; set_node_dbginfo(&dr->node, name, size); return dr->data; } EXPORT_SYMBOL_GPL(__devres_alloc_node); /** * devres_for_each_res - Resource iterator * @dev: Device to iterate resource from * @release: Look for resources associated with this release function * @match: Match function (optional) * @match_data: Data for the match function * @fn: Function to be called for each matched resource. * @data: Data for @fn, the 3rd parameter of @fn * * Call @fn for each devres of @dev which is associated with @release * and for which @match returns 1. * * RETURNS: * void */ void devres_for_each_res(struct device *dev, dr_release_t release, dr_match_t match, void *match_data, void (*fn)(struct device *, void *, void *), void *data) { struct devres_node *node; struct devres_node *tmp; unsigned long flags; if (!fn) return; spin_lock_irqsave(&dev->devres_lock, flags); list_for_each_entry_safe_reverse(node, tmp, &dev->devres_head, entry) { struct devres *dr = container_of(node, struct devres, node); if (node->release != release) continue; if (match && !match(dev, dr->data, match_data)) continue; fn(dev, dr->data, data); } spin_unlock_irqrestore(&dev->devres_lock, flags); } EXPORT_SYMBOL_GPL(devres_for_each_res); /** * devres_free - Free device resource data * @res: Pointer to devres data to free * * Free devres created with devres_alloc(). */ void devres_free(void *res) { if (res) { struct devres *dr = container_of(res, struct devres, data); BUG_ON(!list_empty(&dr->node.entry)); kfree(dr); } } EXPORT_SYMBOL_GPL(devres_free); /** * devres_add - Register device resource * @dev: Device to add resource to * @res: Resource to register * * Register devres @res to @dev. @res should have been allocated * using devres_alloc(). On driver detach, the associated release * function will be invoked and devres will be freed automatically. */ void devres_add(struct device *dev, void *res) { struct devres *dr = container_of(res, struct devres, data); unsigned long flags; spin_lock_irqsave(&dev->devres_lock, flags); add_dr(dev, &dr->node); spin_unlock_irqrestore(&dev->devres_lock, flags); } EXPORT_SYMBOL_GPL(devres_add); static struct devres *find_dr(struct device *dev, dr_release_t release, dr_match_t match, void *match_data) { struct devres_node *node; list_for_each_entry_reverse(node, &dev->devres_head, entry) { struct devres *dr = container_of(node, struct devres, node); if (node->release != release) continue; if (match && !match(dev, dr->data, match_data)) continue; return dr; } return NULL; } /** * devres_find - Find device resource * @dev: Device to lookup resource from * @release: Look for resources associated with this release function * @match: Match function (optional) * @match_data: Data for the match function * * Find the latest devres of @dev which is associated with @release * and for which @match returns 1. If @match is NULL, it's considered * to match all. * * RETURNS: * Pointer to found devres, NULL if not found. */ void *devres_find(struct device *dev, dr_release_t release, dr_match_t match, void *match_data) { struct devres *dr; unsigned long flags; spin_lock_irqsave(&dev->devres_lock, flags); dr = find_dr(dev, release, match, match_data); spin_unlock_irqrestore(&dev->devres_lock, flags); if (dr) return dr->data; return NULL; } EXPORT_SYMBOL_GPL(devres_find); /** * devres_get - Find devres, if non-existent, add one atomically * @dev: Device to lookup or add devres for * @new_res: Pointer to new initialized devres to add if not found * @match: Match function (optional) * @match_data: Data for the match function * * Find the latest devres of @dev which has the same release function * as @new_res and for which @match return 1. If found, @new_res is * freed; otherwise, @new_res is added atomically. * * RETURNS: * Pointer to found or added devres. */ void *devres_get(struct device *dev, void *new_res, dr_match_t match, void *match_data) { struct devres *new_dr = container_of(new_res, struct devres, data); struct devres *dr; unsigned long flags; spin_lock_irqsave(&dev->devres_lock, flags); dr = find_dr(dev, new_dr->node.release, match, match_data); if (!dr) { add_dr(dev, &new_dr->node); dr = new_dr; new_res = NULL; } spin_unlock_irqrestore(&dev->devres_lock, flags); devres_free(new_res); return dr->data; } EXPORT_SYMBOL_GPL(devres_get); /** * devres_remove - Find a device resource and remove it * @dev: Device to find resource from * @release: Look for resources associated with this release function * @match: Match function (optional) * @match_data: Data for the match function * * Find the latest devres of @dev associated with @release and for * which @match returns 1. If @match is NULL, it's considered to * match all. If found, the resource is removed atomically and * returned. * * RETURNS: * Pointer to removed devres on success, NULL if not found. */ void *devres_remove(struct device *dev, dr_release_t release, dr_match_t match, void *match_data) { struct devres *dr; unsigned long flags; spin_lock_irqsave(&dev->devres_lock, flags); dr = find_dr(dev, release, match, match_data); if (dr) { list_del_init(&dr->node.entry); devres_log(dev, &dr->node, "REM"); } spin_unlock_irqrestore(&dev->devres_lock, flags); if (dr) return dr->data; return NULL; } EXPORT_SYMBOL_GPL(devres_remove); /** * devres_destroy - Find a device resource and destroy it * @dev: Device to find resource from * @release: Look for resources associated with this release function * @match: Match function (optional) * @match_data: Data for the match function * * Find the latest devres of @dev associated with @release and for * which @match returns 1. If @match is NULL, it's considered to * match all. If found, the resource is removed atomically and freed. * * Note that the release function for the resource will not be called, * only the devres-allocated data will be freed. The caller becomes * responsible for freeing any other data. * * RETURNS: * 0 if devres is found and freed, -ENOENT if not found. */ int devres_destroy(struct device *dev, dr_release_t release, dr_match_t match, void *match_data) { void *res; res = devres_remove(dev, release, match, match_data); if (unlikely(!res)) return -ENOENT; devres_free(res); return 0; } EXPORT_SYMBOL_GPL(devres_destroy); /** * devres_release - Find a device resource and destroy it, calling release * @dev: Device to find resource from * @release: Look for resources associated with this release function * @match: Match function (optional) * @match_data: Data for the match function * * Find the latest devres of @dev associated with @release and for * which @match returns 1. If @match is NULL, it's considered to * match all. If found, the resource is removed atomically, the * release function called and the resource freed. * * RETURNS: * 0 if devres is found and freed, -ENOENT if not found. */ int devres_release(struct device *dev, dr_release_t release, dr_match_t match, void *match_data) { void *res; res = devres_remove(dev, release, match, match_data); if (unlikely(!res)) return -ENOENT; (*release)(dev, res); devres_free(res); return 0; } EXPORT_SYMBOL_GPL(devres_release); static int remove_nodes(struct device *dev, struct list_head *first, struct list_head *end, struct list_head *todo) { struct devres_node *node, *n; int cnt = 0, nr_groups = 0; /* First pass - move normal devres entries to @todo and clear * devres_group colors. */ node = list_entry(first, struct devres_node, entry); list_for_each_entry_safe_from(node, n, end, entry) { struct devres_group *grp; grp = node_to_group(node); if (grp) { /* clear color of group markers in the first pass */ grp->color = 0; nr_groups++; } else { /* regular devres entry */ if (&node->entry == first) first = first->next; list_move_tail(&node->entry, todo); cnt++; } } if (!nr_groups) return cnt; /* Second pass - Scan groups and color them. A group gets * color value of two iff the group is wholly contained in * [current node, end). That is, for a closed group, both opening * and closing markers should be in the range, while just the * opening marker is enough for an open group. */ node = list_entry(first, struct devres_node, entry); list_for_each_entry_safe_from(node, n, end, entry) { struct devres_group *grp; grp = node_to_group(node); BUG_ON(!grp || list_empty(&grp->node[0].entry)); grp->color++; if (list_empty(&grp->node[1].entry)) grp->color++; BUG_ON(grp->color <= 0 || grp->color > 2); if (grp->color == 2) { /* No need to update current node or end. The removed * nodes are always before both. */ list_move_tail(&grp->node[0].entry, todo); list_del_init(&grp->node[1].entry); } } return cnt; } static void release_nodes(struct device *dev, struct list_head *todo) { struct devres *dr, *tmp; /* Release. Note that both devres and devres_group are * handled as devres in the following loop. This is safe. */ list_for_each_entry_safe_reverse(dr, tmp, todo, node.entry) { devres_log(dev, &dr->node, "REL"); dr->node.release(dev, dr->data); kfree(dr); } } /** * devres_release_all - Release all managed resources * @dev: Device to release resources for * * Release all resources associated with @dev. This function is * called on driver detach. */ int devres_release_all(struct device *dev) { unsigned long flags; LIST_HEAD(todo); int cnt; /* Looks like an uninitialized device structure */ if (WARN_ON(dev->devres_head.next == NULL)) return -ENODEV; /* Nothing to release if list is empty */ if (list_empty(&dev->devres_head)) return 0; spin_lock_irqsave(&dev->devres_lock, flags); cnt = remove_nodes(dev, dev->devres_head.next, &dev->devres_head, &todo); spin_unlock_irqrestore(&dev->devres_lock, flags); release_nodes(dev, &todo); return cnt; } /** * devres_open_group - Open a new devres group * @dev: Device to open devres group for * @id: Separator ID * @gfp: Allocation flags * * Open a new devres group for @dev with @id. For @id, using a * pointer to an object which won't be used for another group is * recommended. If @id is NULL, address-wise unique ID is created. * * RETURNS: * ID of the new group, NULL on failure. */ void *devres_open_group(struct device *dev, void *id, gfp_t gfp) { struct devres_group *grp; unsigned long flags; grp = kmalloc(sizeof(*grp), gfp); if (unlikely(!grp)) return NULL; grp->node[0].release = &group_open_release; grp->node[1].release = &group_close_release; INIT_LIST_HEAD(&grp->node[0].entry); INIT_LIST_HEAD(&grp->node[1].entry); set_node_dbginfo(&grp->node[0], "grp<", 0); set_node_dbginfo(&grp->node[1], "grp>", 0); grp->id = grp; if (id) grp->id = id; grp->color = 0; spin_lock_irqsave(&dev->devres_lock, flags); add_dr(dev, &grp->node[0]); spin_unlock_irqrestore(&dev->devres_lock, flags); return grp->id; } EXPORT_SYMBOL_GPL(devres_open_group); /* * Find devres group with ID @id. If @id is NULL, look for the latest open * group. */ static struct devres_group *find_group(struct device *dev, void *id) { struct devres_node *node; list_for_each_entry_reverse(node, &dev->devres_head, entry) { struct devres_group *grp; if (node->release != &group_open_release) continue; grp = container_of(node, struct devres_group, node[0]); if (id) { if (grp->id == id) return grp; } else if (list_empty(&grp->node[1].entry)) return grp; } return NULL; } /** * devres_close_group - Close a devres group * @dev: Device to close devres group for * @id: ID of target group, can be NULL * * Close the group identified by @id. If @id is NULL, the latest open * group is selected. */ void devres_close_group(struct device *dev, void *id) { struct devres_group *grp; unsigned long flags; spin_lock_irqsave(&dev->devres_lock, flags); grp = find_group(dev, id); if (grp) add_dr(dev, &grp->node[1]); else WARN_ON(1); spin_unlock_irqrestore(&dev->devres_lock, flags); } EXPORT_SYMBOL_GPL(devres_close_group); /** * devres_remove_group - Remove a devres group * @dev: Device to remove group for * @id: ID of target group, can be NULL * * Remove the group identified by @id. If @id is NULL, the latest * open group is selected. Note that removing a group doesn't affect * any other resources. */ void devres_remove_group(struct device *dev, void *id) { struct devres_group *grp; unsigned long flags; spin_lock_irqsave(&dev->devres_lock, flags); grp = find_group(dev, id); if (grp) { list_del_init(&grp->node[0].entry); list_del_init(&grp->node[1].entry); devres_log(dev, &grp->node[0], "REM"); } else WARN_ON(1); spin_unlock_irqrestore(&dev->devres_lock, flags); kfree(grp); } EXPORT_SYMBOL_GPL(devres_remove_group); /** * devres_release_group - Release resources in a devres group * @dev: Device to release group for * @id: ID of target group, can be NULL * * Release all resources in the group identified by @id. If @id is * NULL, the latest open group is selected. The selected group and * groups properly nested inside the selected group are removed. * * RETURNS: * The number of released non-group resources. */ int devres_release_group(struct device *dev, void *id) { struct devres_group *grp; unsigned long flags; LIST_HEAD(todo); int cnt = 0; spin_lock_irqsave(&dev->devres_lock, flags); grp = find_group(dev, id); if (grp) { struct list_head *first = &grp->node[0].entry; struct list_head *end = &dev->devres_head; if (!list_empty(&grp->node[1].entry)) end = grp->node[1].entry.next; cnt = remove_nodes(dev, first, end, &todo); spin_unlock_irqrestore(&dev->devres_lock, flags); release_nodes(dev, &todo); } else if (list_empty(&dev->devres_head)) { /* * dev is probably dying via devres_release_all(): groups * have already been removed and are on the process of * being released - don't touch and don't warn. */ spin_unlock_irqrestore(&dev->devres_lock, flags); } else { WARN_ON(1); spin_unlock_irqrestore(&dev->devres_lock, flags); } return cnt; } EXPORT_SYMBOL_GPL(devres_release_group); /* * Custom devres actions allow inserting a simple function call * into the teardown sequence. */ struct action_devres { void *data; void (*action)(void *); }; static int devm_action_match(struct device *dev, void *res, void *p) { struct action_devres *devres = res; struct action_devres *target = p; return devres->action == target->action && devres->data == target->data; } static void devm_action_release(struct device *dev, void *res) { struct action_devres *devres = res; devres->action(devres->data); } /** * __devm_add_action() - add a custom action to list of managed resources * @dev: Device that owns the action * @action: Function that should be called * @data: Pointer to data passed to @action implementation * @name: Name of the resource (for debugging purposes) * * This adds a custom action to the list of managed resources so that * it gets executed as part of standard resource unwinding. */ int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name) { struct action_devres *devres; devres = __devres_alloc_node(devm_action_release, sizeof(struct action_devres), GFP_KERNEL, NUMA_NO_NODE, name); if (!devres) return -ENOMEM; devres->data = data; devres->action = action; devres_add(dev, devres); return 0; } EXPORT_SYMBOL_GPL(__devm_add_action); /** * devm_remove_action_nowarn() - removes previously added custom action * @dev: Device that owns the action * @action: Function implementing the action * @data: Pointer to data passed to @action implementation * * Removes instance of @action previously added by devm_add_action(). * Both action and data should match one of the existing entries. * * In contrast to devm_remove_action(), this function does not WARN() if no * entry could have been found. * * This should only be used if the action is contained in an object with * independent lifetime management, e.g. the Devres rust abstraction. * * Causing the warning from regular driver code most likely indicates an abuse * of the devres API. * * Returns: 0 on success, -ENOENT if no entry could have been found. */ int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data) { struct action_devres devres = { .data = data, .action = action, }; return devres_destroy(dev, devm_action_release, devm_action_match, &devres); } EXPORT_SYMBOL_GPL(devm_remove_action_nowarn); /** * devm_release_action() - release previously added custom action * @dev: Device that owns the action * @action: Function implementing the action * @data: Pointer to data passed to @action implementation * * Releases and removes instance of @action previously added by * devm_add_action(). Both action and data should match one of the * existing entries. */ void devm_release_action(struct device *dev, void (*action)(void *), void *data) { struct action_devres devres = { .data = data, .action = action, }; WARN_ON(devres_release(dev, devm_action_release, devm_action_match, &devres)); } EXPORT_SYMBOL_GPL(devm_release_action); /* * Managed kmalloc/kfree */ static void devm_kmalloc_release(struct device *dev, void *res) { /* noop */ } static int devm_kmalloc_match(struct device *dev, void *res, void *data) { return res == data; } /** * devm_kmalloc - Resource-managed kmalloc * @dev: Device to allocate memory for * @size: Allocation size * @gfp: Allocation gfp flags * * Managed kmalloc. Memory allocated with this function is * automatically freed on driver detach. Like all other devres * resources, guaranteed alignment is unsigned long long. * * RETURNS: * Pointer to allocated memory on success, NULL on failure. */ void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) { struct devres *dr; if (unlikely(!size)) return ZERO_SIZE_PTR; /* use raw alloc_dr for kmalloc caller tracing */ dr = alloc_dr(devm_kmalloc_release, size, gfp, dev_to_node(dev)); if (unlikely(!dr)) return NULL; /* * This is named devm_kzalloc_release for historical reasons * The initial implementation did not support kmalloc, only kzalloc */ set_node_dbginfo(&dr->node, "devm_kzalloc_release", size); devres_add(dev, dr->data); return dr->data; } EXPORT_SYMBOL_GPL(devm_kmalloc); /** * devm_krealloc - Resource-managed krealloc() * @dev: Device to re-allocate memory for * @ptr: Pointer to the memory chunk to re-allocate * @new_size: New allocation size * @gfp: Allocation gfp flags * * Managed krealloc(). Resizes the memory chunk allocated with devm_kmalloc(). * Behaves similarly to regular krealloc(): if @ptr is NULL or ZERO_SIZE_PTR, * it's the equivalent of devm_kmalloc(). If new_size is zero, it frees the * previously allocated memory and returns ZERO_SIZE_PTR. This function doesn't * change the order in which the release callback for the re-alloc'ed devres * will be called (except when falling back to devm_kmalloc() or when freeing * resources when new_size is zero). The contents of the memory are preserved * up to the lesser of new and old sizes. */ void *devm_krealloc(struct device *dev, void *ptr, size_t new_size, gfp_t gfp) { size_t total_new_size, total_old_size; struct devres *old_dr, *new_dr; unsigned long flags; if (unlikely(!new_size)) { devm_kfree(dev, ptr); return ZERO_SIZE_PTR; } if (unlikely(ZERO_OR_NULL_PTR(ptr))) return devm_kmalloc(dev, new_size, gfp); if (WARN_ON(is_kernel_rodata((unsigned long)ptr))) /* * We cannot reliably realloc a const string returned by * devm_kstrdup_const(). */ return NULL; if (!check_dr_size(new_size, &total_new_size)) return NULL; total_old_size = ksize(container_of(ptr, struct devres, data)); if (total_old_size == 0) { WARN(1, "Pointer doesn't point to dynamically allocated memory."); return NULL; } /* * If new size is smaller or equal to the actual number of bytes * allocated previously - just return the same pointer. */ if (total_new_size <= total_old_size) return ptr; /* * Otherwise: allocate new, larger chunk. We need to allocate before * taking the lock as most probably the caller uses GFP_KERNEL. * alloc_dr() will call check_dr_size() to reserve extra memory * for struct devres automatically, so size @new_size user request * is delivered to it directly as devm_kmalloc() does. */ new_dr = alloc_dr(devm_kmalloc_release, new_size, gfp, dev_to_node(dev)); if (!new_dr) return NULL; /* * The spinlock protects the linked list against concurrent * modifications but not the resource itself. */ spin_lock_irqsave(&dev->devres_lock, flags); old_dr = find_dr(dev, devm_kmalloc_release, devm_kmalloc_match, ptr); if (!old_dr) { spin_unlock_irqrestore(&dev->devres_lock, flags); kfree(new_dr); WARN(1, "Memory chunk not managed or managed by a different device."); return NULL; } replace_dr(dev, &old_dr->node, &new_dr->node); spin_unlock_irqrestore(&dev->devres_lock, flags); /* * We can copy the memory contents after releasing the lock as we're * no longer modifying the list links. */ memcpy(new_dr->data, old_dr->data, total_old_size - offsetof(struct devres, data)); /* * Same for releasing the old devres - it's now been removed from the * list. This is also the reason why we must not use devm_kfree() - the * links are no longer valid. */ kfree(old_dr); return new_dr->data; } EXPORT_SYMBOL_GPL(devm_krealloc); /** * devm_kstrdup - Allocate resource managed space and * copy an existing string into that. * @dev: Device to allocate memory for * @s: the string to duplicate * @gfp: the GFP mask used in the devm_kmalloc() call when * allocating memory * RETURNS: * Pointer to allocated string on success, NULL on failure. */ char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) { size_t size; char *buf; if (!s) return NULL; size = strlen(s) + 1; buf = devm_kmalloc(dev, size, gfp); if (buf) memcpy(buf, s, size); return buf; } EXPORT_SYMBOL_GPL(devm_kstrdup); /** * devm_kstrdup_const - resource managed conditional string duplication * @dev: device for which to duplicate the string * @s: the string to duplicate * @gfp: the GFP mask used in the kmalloc() call when allocating memory * * Strings allocated by devm_kstrdup_const will be automatically freed when * the associated device is detached. * * RETURNS: * Source string if it is in .rodata section otherwise it falls back to * devm_kstrdup. */ const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp) { if (is_kernel_rodata((unsigned long)s)) return s; return devm_kstrdup(dev, s, gfp); } EXPORT_SYMBOL_GPL(devm_kstrdup_const); /** * devm_kvasprintf - Allocate resource managed space and format a string * into that. * @dev: Device to allocate memory for * @gfp: the GFP mask used in the devm_kmalloc() call when * allocating memory * @fmt: The printf()-style format string * @ap: Arguments for the format string * RETURNS: * Pointer to allocated string on success, NULL on failure. */ char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, va_list ap) { unsigned int len; char *p; va_list aq; va_copy(aq, ap); len = vsnprintf(NULL, 0, fmt, aq); va_end(aq); p = devm_kmalloc(dev, len+1, gfp); if (!p) return NULL; vsnprintf(p, len+1, fmt, ap); return p; } EXPORT_SYMBOL(devm_kvasprintf); /** * devm_kasprintf - Allocate resource managed space and format a string * into that. * @dev: Device to allocate memory for * @gfp: the GFP mask used in the devm_kmalloc() call when * allocating memory * @fmt: The printf()-style format string * @...: Arguments for the format string * RETURNS: * Pointer to allocated string on success, NULL on failure. */ char *devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...) { va_list ap; char *p; va_start(ap, fmt); p = devm_kvasprintf(dev, gfp, fmt, ap); va_end(ap); return p; } EXPORT_SYMBOL_GPL(devm_kasprintf); /** * devm_kfree - Resource-managed kfree * @dev: Device this memory belongs to * @p: Memory to free * * Free memory allocated with devm_kmalloc(). */ void devm_kfree(struct device *dev, const void *p) { int rc; /* * Special cases: pointer to a string in .rodata returned by * devm_kstrdup_const() or NULL/ZERO ptr. */ if (unlikely(is_kernel_rodata((unsigned long)p) || ZERO_OR_NULL_PTR(p))) return; rc = devres_destroy(dev, devm_kmalloc_release, devm_kmalloc_match, (void *)p); WARN_ON(rc); } EXPORT_SYMBOL_GPL(devm_kfree); /** * devm_kmemdup - Resource-managed kmemdup * @dev: Device this memory belongs to * @src: Memory region to duplicate * @len: Memory region length * @gfp: GFP mask to use * * Duplicate region of a memory using resource managed kmalloc */ void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp) { void *p; p = devm_kmalloc(dev, len, gfp); if (p) memcpy(p, src, len); return p; } EXPORT_SYMBOL_GPL(devm_kmemdup); struct pages_devres { unsigned long addr; unsigned int order; }; static int devm_pages_match(struct device *dev, void *res, void *p) { struct pages_devres *devres = res; struct pages_devres *target = p; return devres->addr == target->addr; } static void devm_pages_release(struct device *dev, void *res) { struct pages_devres *devres = res; free_pages(devres->addr, devres->order); } /** * devm_get_free_pages - Resource-managed __get_free_pages * @dev: Device to allocate memory for * @gfp_mask: Allocation gfp flags * @order: Allocation size is (1 << order) pages * * Managed get_free_pages. Memory allocated with this function is * automatically freed on driver detach. * * RETURNS: * Address of allocated memory on success, 0 on failure. */ unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order) { struct pages_devres *devres; unsigned long addr; addr = __get_free_pages(gfp_mask, order); if (unlikely(!addr)) return 0; devres = devres_alloc(devm_pages_release, sizeof(struct pages_devres), GFP_KERNEL); if (unlikely(!devres)) { free_pages(addr, order); return 0; } devres->addr = addr; devres->order = order; devres_add(dev, devres); return addr; } EXPORT_SYMBOL_GPL(devm_get_free_pages); /** * devm_free_pages - Resource-managed free_pages * @dev: Device this memory belongs to * @addr: Memory to free * * Free memory allocated with devm_get_free_pages(). Unlike free_pages, * there is no need to supply the @order. */ void devm_free_pages(struct device *dev, unsigned long addr) { struct pages_devres devres = { .addr = addr }; WARN_ON(devres_release(dev, devm_pages_release, devm_pages_match, &devres)); } EXPORT_SYMBOL_GPL(devm_free_pages); static void devm_percpu_release(struct device *dev, void *pdata) { void __percpu *p; p = *(void __percpu **)pdata; free_percpu(p); } static int devm_percpu_match(struct device *dev, void *data, void *p) { struct devres *devr = container_of(data, struct devres, data); return *(void **)devr->data == p; } /** * __devm_alloc_percpu - Resource-managed alloc_percpu * @dev: Device to allocate per-cpu memory for * @size: Size of per-cpu memory to allocate * @align: Alignment of per-cpu memory to allocate * * Managed alloc_percpu. Per-cpu memory allocated with this function is * automatically freed on driver detach. * * RETURNS: * Pointer to allocated memory on success, NULL on failure. */ void __percpu *__devm_alloc_percpu(struct device *dev, size_t size, size_t align) { void *p; void __percpu *pcpu; pcpu = __alloc_percpu(size, align); if (!pcpu) return NULL; p = devres_alloc(devm_percpu_release, sizeof(void *), GFP_KERNEL); if (!p) { free_percpu(pcpu); return NULL; } *(void __percpu **)p = pcpu; devres_add(dev, p); return pcpu; } EXPORT_SYMBOL_GPL(__devm_alloc_percpu); /** * devm_free_percpu - Resource-managed free_percpu * @dev: Device this memory belongs to * @pdata: Per-cpu memory to free * * Free memory allocated with devm_alloc_percpu(). */ void devm_free_percpu(struct device *dev, void __percpu *pdata) { /* * Use devres_release() to prevent memory leakage as * devm_free_pages() does. */ WARN_ON(devres_release(dev, devm_percpu_release, devm_percpu_match, (void *)(__force unsigned long)pdata)); } EXPORT_SYMBOL_GPL(devm_free_percpu); |
16 1 1 1 1 4 2 2 2 2 4 12 12 12 12 1 1 1 1 12 1 1 1 13 13 12 12 1 13 12 2 12 13 13 13 13 13 11 12 13 13 13 1 12 13 13 14 14 14 14 14 14 14 4 1 3 3 2 2 1 2 2 4 8 8 8 8 8 10 7 4 4 3 3 2 4 2 2 2 4 2 2 4 10 6 4 6 4 3 3 2 2 2 4 4 1 1 4 15 15 2 1 2 3 3 2 2 2 1 1 1 1 2 2 4 4 2 24 5 4 24 5 5 5 4 4 4 3 5 4 10 12 10 12 12 4 6 2 5 2 3 3 3 5 6 5 6 18 18 15 3 13 11 6 18 8 10 10 16 16 16 16 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 | // SPDX-License-Identifier: GPL-2.0 /* * The USB Monitor, inspired by Dave Harding's USBMon. * * This is a binary format reader. * * Copyright (C) 2006 Paolo Abeni (paolo.abeni@email.it) * Copyright (C) 2006,2007 Pete Zaitcev (zaitcev@redhat.com) */ #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/types.h> #include <linux/fs.h> #include <linux/cdev.h> #include <linux/export.h> #include <linux/usb.h> #include <linux/poll.h> #include <linux/compat.h> #include <linux/mm.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/time64.h> #include <linux/uaccess.h> #include "usb_mon.h" /* * Defined by USB 2.0 clause 9.3, table 9.2. */ #define SETUP_LEN 8 /* ioctl macros */ #define MON_IOC_MAGIC 0x92 #define MON_IOCQ_URB_LEN _IO(MON_IOC_MAGIC, 1) /* #2 used to be MON_IOCX_URB, removed before it got into Linus tree */ #define MON_IOCG_STATS _IOR(MON_IOC_MAGIC, 3, struct mon_bin_stats) #define MON_IOCT_RING_SIZE _IO(MON_IOC_MAGIC, 4) #define MON_IOCQ_RING_SIZE _IO(MON_IOC_MAGIC, 5) #define MON_IOCX_GET _IOW(MON_IOC_MAGIC, 6, struct mon_bin_get) #define MON_IOCX_MFETCH _IOWR(MON_IOC_MAGIC, 7, struct mon_bin_mfetch) #define MON_IOCH_MFLUSH _IO(MON_IOC_MAGIC, 8) /* #9 was MON_IOCT_SETAPI */ #define MON_IOCX_GETX _IOW(MON_IOC_MAGIC, 10, struct mon_bin_get) #ifdef CONFIG_COMPAT #define MON_IOCX_GET32 _IOW(MON_IOC_MAGIC, 6, struct mon_bin_get32) #define MON_IOCX_MFETCH32 _IOWR(MON_IOC_MAGIC, 7, struct mon_bin_mfetch32) #define MON_IOCX_GETX32 _IOW(MON_IOC_MAGIC, 10, struct mon_bin_get32) #endif /* * Some architectures have enormous basic pages (16KB for ia64, 64KB for ppc). * But it's all right. Just use a simple way to make sure the chunk is never * smaller than a page. * * N.B. An application does not know our chunk size. * * Woops, get_zeroed_page() returns a single page. I guess we're stuck with * page-sized chunks for the time being. */ #define CHUNK_SIZE PAGE_SIZE #define CHUNK_ALIGN(x) (((x)+CHUNK_SIZE-1) & ~(CHUNK_SIZE-1)) /* * The magic limit was calculated so that it allows the monitoring * application to pick data once in two ticks. This way, another application, * which presumably drives the bus, gets to hog CPU, yet we collect our data. * If HZ is 100, a 480 mbit/s bus drives 614 KB every jiffy. USB has an * enormous overhead built into the bus protocol, so we need about 1000 KB. * * This is still too much for most cases, where we just snoop a few * descriptor fetches for enumeration. So, the default is a "reasonable" * amount for systems with HZ=250 and incomplete bus saturation. * * XXX What about multi-megabyte URBs which take minutes to transfer? */ #define BUFF_MAX CHUNK_ALIGN(1200*1024) #define BUFF_DFL CHUNK_ALIGN(300*1024) #define BUFF_MIN CHUNK_ALIGN(8*1024) /* * The per-event API header (2 per URB). * * This structure is seen in userland as defined by the documentation. */ struct mon_bin_hdr { u64 id; /* URB ID - from submission to callback */ unsigned char type; /* Same as in text API; extensible. */ unsigned char xfer_type; /* ISO, Intr, Control, Bulk */ unsigned char epnum; /* Endpoint number and transfer direction */ unsigned char devnum; /* Device address */ unsigned short busnum; /* Bus number */ char flag_setup; char flag_data; s64 ts_sec; /* ktime_get_real_ts64 */ s32 ts_usec; /* ktime_get_real_ts64 */ int status; unsigned int len_urb; /* Length of data (submitted or actual) */ unsigned int len_cap; /* Delivered length */ union { unsigned char setup[SETUP_LEN]; /* Only for Control S-type */ struct iso_rec { int error_count; int numdesc; } iso; } s; int interval; int start_frame; unsigned int xfer_flags; unsigned int ndesc; /* Actual number of ISO descriptors */ }; /* * ISO vector, packed into the head of data stream. * This has to take 16 bytes to make sure that the end of buffer * wrap is not happening in the middle of a descriptor. */ struct mon_bin_isodesc { int iso_status; unsigned int iso_off; unsigned int iso_len; u32 _pad; }; /* per file statistic */ struct mon_bin_stats { u32 queued; u32 dropped; }; struct mon_bin_get { struct mon_bin_hdr __user *hdr; /* Can be 48 bytes or 64. */ void __user *data; size_t alloc; /* Length of data (can be zero) */ }; struct mon_bin_mfetch { u32 __user *offvec; /* Vector of events fetched */ u32 nfetch; /* Number of events to fetch (out: fetched) */ u32 nflush; /* Number of events to flush */ }; #ifdef CONFIG_COMPAT struct mon_bin_get32 { u32 hdr32; u32 data32; u32 alloc32; }; struct mon_bin_mfetch32 { u32 offvec32; u32 nfetch32; u32 nflush32; }; #endif /* Having these two values same prevents wrapping of the mon_bin_hdr */ #define PKT_ALIGN 64 #define PKT_SIZE 64 #define PKT_SZ_API0 48 /* API 0 (2.6.20) size */ #define PKT_SZ_API1 64 /* API 1 size: extra fields */ #define ISODESC_MAX 128 /* Same number as usbfs allows, 2048 bytes. */ /* max number of USB bus supported */ #define MON_BIN_MAX_MINOR 128 /* * The buffer: map of used pages. */ struct mon_pgmap { struct page *pg; unsigned char *ptr; /* XXX just use page_to_virt everywhere? */ }; /* * This gets associated with an open file struct. */ struct mon_reader_bin { /* The buffer: one per open. */ spinlock_t b_lock; /* Protect b_cnt, b_in */ unsigned int b_size; /* Current size of the buffer - bytes */ unsigned int b_cnt; /* Bytes used */ unsigned int b_in, b_out; /* Offsets into buffer - bytes */ unsigned int b_read; /* Amount of read data in curr. pkt. */ struct mon_pgmap *b_vec; /* The map array */ wait_queue_head_t b_wait; /* Wait for data here */ struct mutex fetch_lock; /* Protect b_read, b_out */ int mmap_active; /* A list of these is needed for "bus 0". Some time later. */ struct mon_reader r; /* Stats */ unsigned int cnt_lost; }; static inline struct mon_bin_hdr *MON_OFF2HDR(const struct mon_reader_bin *rp, unsigned int offset) { return (struct mon_bin_hdr *) (rp->b_vec[offset / CHUNK_SIZE].ptr + offset % CHUNK_SIZE); } #define MON_RING_EMPTY(rp) ((rp)->b_cnt == 0) static unsigned char xfer_to_pipe[4] = { PIPE_CONTROL, PIPE_ISOCHRONOUS, PIPE_BULK, PIPE_INTERRUPT }; static const struct class mon_bin_class = { .name = "usbmon", }; static dev_t mon_bin_dev0; static struct cdev mon_bin_cdev; static void mon_buff_area_fill(const struct mon_reader_bin *rp, unsigned int offset, unsigned int size); static int mon_bin_wait_event(struct file *file, struct mon_reader_bin *rp); static int mon_alloc_buff(struct mon_pgmap *map, int npages); static void mon_free_buff(struct mon_pgmap *map, int npages); /* * This is a "chunked memcpy". It does not manipulate any counters. */ static unsigned int mon_copy_to_buff(const struct mon_reader_bin *this, unsigned int off, const unsigned char *from, unsigned int length) { unsigned int step_len; unsigned char *buf; unsigned int in_page; while (length) { /* * Determine step_len. */ step_len = length; in_page = CHUNK_SIZE - (off & (CHUNK_SIZE-1)); if (in_page < step_len) step_len = in_page; /* * Copy data and advance pointers. */ buf = this->b_vec[off / CHUNK_SIZE].ptr + off % CHUNK_SIZE; memcpy(buf, from, step_len); if ((off += step_len) >= this->b_size) off = 0; from += step_len; length -= step_len; } return off; } /* * This is a little worse than the above because it's "chunked copy_to_user". * The return value is an error code, not an offset. */ static int copy_from_buf(const struct mon_reader_bin *this, unsigned int off, char __user *to, int length) { unsigned int step_len; unsigned char *buf; unsigned int in_page; while (length) { /* * Determine step_len. */ step_len = length; in_page = CHUNK_SIZE - (off & (CHUNK_SIZE-1)); if (in_page < step_len) step_len = in_page; /* * Copy data and advance pointers. */ buf = this->b_vec[off / CHUNK_SIZE].ptr + off % CHUNK_SIZE; if (copy_to_user(to, buf, step_len)) return -EINVAL; if ((off += step_len) >= this->b_size) off = 0; to += step_len; length -= step_len; } return 0; } /* * Allocate an (aligned) area in the buffer. * This is called under b_lock. * Returns ~0 on failure. */ static unsigned int mon_buff_area_alloc(struct mon_reader_bin *rp, unsigned int size) { unsigned int offset; size = (size + PKT_ALIGN-1) & ~(PKT_ALIGN-1); if (rp->b_cnt + size > rp->b_size) return ~0; offset = rp->b_in; rp->b_cnt += size; if ((rp->b_in += size) >= rp->b_size) rp->b_in -= rp->b_size; return offset; } /* * This is the same thing as mon_buff_area_alloc, only it does not allow * buffers to wrap. This is needed by applications which pass references * into mmap-ed buffers up their stacks (libpcap can do that). * * Currently, we always have the header stuck with the data, although * it is not strictly speaking necessary. * * When a buffer would wrap, we place a filler packet to mark the space. */ static unsigned int mon_buff_area_alloc_contiguous(struct mon_reader_bin *rp, unsigned int size) { unsigned int offset; unsigned int fill_size; size = (size + PKT_ALIGN-1) & ~(PKT_ALIGN-1); if (rp->b_cnt + size > rp->b_size) return ~0; if (rp->b_in + size > rp->b_size) { /* * This would wrap. Find if we still have space after * skipping to the end of the buffer. If we do, place * a filler packet and allocate a new packet. */ fill_size = rp->b_size - rp->b_in; if (rp->b_cnt + size + fill_size > rp->b_size) return ~0; mon_buff_area_fill(rp, rp->b_in, fill_size); offset = 0; rp->b_in = size; rp->b_cnt += size + fill_size; } else if (rp->b_in + size == rp->b_size) { offset = rp->b_in; rp->b_in = 0; rp->b_cnt += size; } else { offset = rp->b_in; rp->b_in += size; rp->b_cnt += size; } return offset; } /* * Return a few (kilo-)bytes to the head of the buffer. * This is used if a data fetch fails. */ static void mon_buff_area_shrink(struct mon_reader_bin *rp, unsigned int size) { /* size &= ~(PKT_ALIGN-1); -- we're called with aligned size */ rp->b_cnt -= size; if (rp->b_in < size) rp->b_in += rp->b_size; rp->b_in -= size; } /* * This has to be called under both b_lock and fetch_lock, because * it accesses both b_cnt and b_out. */ static void mon_buff_area_free(struct mon_reader_bin *rp, unsigned int size) { size = (size + PKT_ALIGN-1) & ~(PKT_ALIGN-1); rp->b_cnt -= size; if ((rp->b_out += size) >= rp->b_size) rp->b_out -= rp->b_size; } static void mon_buff_area_fill(const struct mon_reader_bin *rp, unsigned int offset, unsigned int size) { struct mon_bin_hdr *ep; ep = MON_OFF2HDR(rp, offset); memset(ep, 0, PKT_SIZE); ep->type = '@'; ep->len_cap = size - PKT_SIZE; } static inline char mon_bin_get_setup(unsigned char *setupb, const struct urb *urb, char ev_type) { if (urb->setup_packet == NULL) return 'Z'; memcpy(setupb, urb->setup_packet, SETUP_LEN); return 0; } static unsigned int mon_bin_get_data(const struct mon_reader_bin *rp, unsigned int offset, struct urb *urb, unsigned int length, char *flag) { int i; struct scatterlist *sg; unsigned int this_len; *flag = 0; if (urb->num_sgs == 0) { if (urb->transfer_buffer == NULL) { *flag = 'Z'; return length; } mon_copy_to_buff(rp, offset, urb->transfer_buffer, length); length = 0; } else { /* If IOMMU coalescing occurred, we cannot trust sg_page */ if (urb->transfer_flags & URB_DMA_SG_COMBINED) { *flag = 'D'; return length; } /* Copy up to the first non-addressable segment */ for_each_sg(urb->sg, sg, urb->num_sgs, i) { if (length == 0 || PageHighMem(sg_page(sg))) break; this_len = min_t(unsigned int, sg->length, length); offset = mon_copy_to_buff(rp, offset, sg_virt(sg), this_len); length -= this_len; } if (i == 0) *flag = 'D'; } return length; } /* * This is the look-ahead pass in case of 'C Zi', when actual_length cannot * be used to determine the length of the whole contiguous buffer. */ static unsigned int mon_bin_collate_isodesc(const struct mon_reader_bin *rp, struct urb *urb, unsigned int ndesc) { struct usb_iso_packet_descriptor *fp; unsigned int length; length = 0; fp = urb->iso_frame_desc; while (ndesc-- != 0) { if (fp->actual_length != 0) { if (fp->offset + fp->actual_length > length) length = fp->offset + fp->actual_length; } fp++; } return length; } static void mon_bin_get_isodesc(const struct mon_reader_bin *rp, unsigned int offset, struct urb *urb, char ev_type, unsigned int ndesc) { struct mon_bin_isodesc *dp; struct usb_iso_packet_descriptor *fp; fp = urb->iso_frame_desc; while (ndesc-- != 0) { dp = (struct mon_bin_isodesc *) (rp->b_vec[offset / CHUNK_SIZE].ptr + offset % CHUNK_SIZE); dp->iso_status = fp->status; dp->iso_off = fp->offset; dp->iso_len = (ev_type == 'S') ? fp->length : fp->actual_length; dp->_pad = 0; if ((offset += sizeof(struct mon_bin_isodesc)) >= rp->b_size) offset = 0; fp++; } } static void mon_bin_event(struct mon_reader_bin *rp, struct urb *urb, char ev_type, int status) { const struct usb_endpoint_descriptor *epd = &urb->ep->desc; struct timespec64 ts; unsigned long flags; unsigned int urb_length; unsigned int offset; unsigned int length; unsigned int delta; unsigned int ndesc, lendesc; unsigned char dir; struct mon_bin_hdr *ep; char data_tag = 0; ktime_get_real_ts64(&ts); spin_lock_irqsave(&rp->b_lock, flags); /* * Find the maximum allowable length, then allocate space. */ urb_length = (ev_type == 'S') ? urb->transfer_buffer_length : urb->actual_length; length = urb_length; if (usb_endpoint_xfer_isoc(epd)) { if (urb->number_of_packets < 0) { ndesc = 0; } else if (urb->number_of_packets >= ISODESC_MAX) { ndesc = ISODESC_MAX; } else { ndesc = urb->number_of_packets; } if (ev_type == 'C' && usb_urb_dir_in(urb)) length = mon_bin_collate_isodesc(rp, urb, ndesc); } else { ndesc = 0; } lendesc = ndesc*sizeof(struct mon_bin_isodesc); /* not an issue unless there's a subtle bug in a HCD somewhere */ if (length >= urb->transfer_buffer_length) length = urb->transfer_buffer_length; if (length >= rp->b_size/5) length = rp->b_size/5; if (usb_urb_dir_in(urb)) { if (ev_type == 'S') { length = 0; data_tag = '<'; } /* Cannot rely on endpoint number in case of control ep.0 */ dir = USB_DIR_IN; } else { if (ev_type == 'C') { length = 0; data_tag = '>'; } dir = 0; } if (rp->mmap_active) { offset = mon_buff_area_alloc_contiguous(rp, length + PKT_SIZE + lendesc); } else { offset = mon_buff_area_alloc(rp, length + PKT_SIZE + lendesc); } if (offset == ~0) { rp->cnt_lost++; spin_unlock_irqrestore(&rp->b_lock, flags); return; } ep = MON_OFF2HDR(rp, offset); if ((offset += PKT_SIZE) >= rp->b_size) offset = 0; /* * Fill the allocated area. */ memset(ep, 0, PKT_SIZE); ep->type = ev_type; ep->xfer_type = xfer_to_pipe[usb_endpoint_type(epd)]; ep->epnum = dir | usb_endpoint_num(epd); ep->devnum = urb->dev->devnum; ep->busnum = urb->dev->bus->busnum; ep->id = (unsigned long) urb; ep->ts_sec = ts.tv_sec; ep->ts_usec = ts.tv_nsec / NSEC_PER_USEC; ep->status = status; ep->len_urb = urb_length; ep->len_cap = length + lendesc; ep->xfer_flags = urb->transfer_flags; if (usb_endpoint_xfer_int(epd)) { ep->interval = urb->interval; } else if (usb_endpoint_xfer_isoc(epd)) { ep->interval = urb->interval; ep->start_frame = urb->start_frame; ep->s.iso.error_count = urb->error_count; ep->s.iso.numdesc = urb->number_of_packets; } if (usb_endpoint_xfer_control(epd) && ev_type == 'S') { ep->flag_setup = mon_bin_get_setup(ep->s.setup, urb, ev_type); } else { ep->flag_setup = '-'; } if (ndesc != 0) { ep->ndesc = ndesc; mon_bin_get_isodesc(rp, offset, urb, ev_type, ndesc); if ((offset += lendesc) >= rp->b_size) offset -= rp->b_size; } if (length != 0) { length = mon_bin_get_data(rp, offset, urb, length, &ep->flag_data); if (length > 0) { delta = (ep->len_cap + PKT_ALIGN-1) & ~(PKT_ALIGN-1); ep->len_cap -= length; delta -= (ep->len_cap + PKT_ALIGN-1) & ~(PKT_ALIGN-1); mon_buff_area_shrink(rp, delta); } } else { ep->flag_data = data_tag; } spin_unlock_irqrestore(&rp->b_lock, flags); wake_up(&rp->b_wait); } static void mon_bin_submit(void *data, struct urb *urb) { struct mon_reader_bin *rp = data; mon_bin_event(rp, urb, 'S', -EINPROGRESS); } static void mon_bin_complete(void *data, struct urb *urb, int status) { struct mon_reader_bin *rp = data; mon_bin_event(rp, urb, 'C', status); } static void mon_bin_error(void *data, struct urb *urb, int error) { struct mon_reader_bin *rp = data; struct timespec64 ts; unsigned long flags; unsigned int offset; struct mon_bin_hdr *ep; ktime_get_real_ts64(&ts); spin_lock_irqsave(&rp->b_lock, flags); offset = mon_buff_area_alloc(rp, PKT_SIZE); if (offset == ~0) { /* Not incrementing cnt_lost. Just because. */ spin_unlock_irqrestore(&rp->b_lock, flags); return; } ep = MON_OFF2HDR(rp, offset); memset(ep, 0, PKT_SIZE); ep->type = 'E'; ep->xfer_type = xfer_to_pipe[usb_endpoint_type(&urb->ep->desc)]; ep->epnum = usb_urb_dir_in(urb) ? USB_DIR_IN : 0; ep->epnum |= usb_endpoint_num(&urb->ep->desc); ep->devnum = urb->dev->devnum; ep->busnum = urb->dev->bus->busnum; ep->id = (unsigned long) urb; ep->ts_sec = ts.tv_sec; ep->ts_usec = ts.tv_nsec / NSEC_PER_USEC; ep->status = error; ep->flag_setup = '-'; ep->flag_data = 'E'; spin_unlock_irqrestore(&rp->b_lock, flags); wake_up(&rp->b_wait); } static int mon_bin_open(struct inode *inode, struct file *file) { struct mon_bus *mbus; struct mon_reader_bin *rp; size_t size; int rc; mutex_lock(&mon_lock); mbus = mon_bus_lookup(iminor(inode)); if (mbus == NULL) { mutex_unlock(&mon_lock); return -ENODEV; } if (mbus != &mon_bus0 && mbus->u_bus == NULL) { printk(KERN_ERR TAG ": consistency error on open\n"); mutex_unlock(&mon_lock); return -ENODEV; } rp = kzalloc(sizeof(struct mon_reader_bin), GFP_KERNEL); if (rp == NULL) { rc = -ENOMEM; goto err_alloc; } spin_lock_init(&rp->b_lock); init_waitqueue_head(&rp->b_wait); mutex_init(&rp->fetch_lock); rp->b_size = BUFF_DFL; size = sizeof(struct mon_pgmap) * (rp->b_size/CHUNK_SIZE); if ((rp->b_vec = kzalloc(size, GFP_KERNEL)) == NULL) { rc = -ENOMEM; goto err_allocvec; } if ((rc = mon_alloc_buff(rp->b_vec, rp->b_size/CHUNK_SIZE)) < 0) goto err_allocbuff; rp->r.m_bus = mbus; rp->r.r_data = rp; rp->r.rnf_submit = mon_bin_submit; rp->r.rnf_error = mon_bin_error; rp->r.rnf_complete = mon_bin_complete; mon_reader_add(mbus, &rp->r); file->private_data = rp; mutex_unlock(&mon_lock); return 0; err_allocbuff: kfree(rp->b_vec); err_allocvec: kfree(rp); err_alloc: mutex_unlock(&mon_lock); return rc; } /* * Extract an event from buffer and copy it to user space. * Wait if there is no event ready. * Returns zero or error. */ static int mon_bin_get_event(struct file *file, struct mon_reader_bin *rp, struct mon_bin_hdr __user *hdr, unsigned int hdrbytes, void __user *data, unsigned int nbytes) { unsigned long flags; struct mon_bin_hdr *ep; size_t step_len; unsigned int offset; int rc; mutex_lock(&rp->fetch_lock); if ((rc = mon_bin_wait_event(file, rp)) < 0) { mutex_unlock(&rp->fetch_lock); return rc; } ep = MON_OFF2HDR(rp, rp->b_out); if (copy_to_user(hdr, ep, hdrbytes)) { mutex_unlock(&rp->fetch_lock); return -EFAULT; } step_len = min(ep->len_cap, nbytes); if ((offset = rp->b_out + PKT_SIZE) >= rp->b_size) offset = 0; if (copy_from_buf(rp, offset, data, step_len)) { mutex_unlock(&rp->fetch_lock); return -EFAULT; } spin_lock_irqsave(&rp->b_lock, flags); mon_buff_area_free(rp, PKT_SIZE + ep->len_cap); spin_unlock_irqrestore(&rp->b_lock, flags); rp->b_read = 0; mutex_unlock(&rp->fetch_lock); return 0; } static int mon_bin_release(struct inode *inode, struct file *file) { struct mon_reader_bin *rp = file->private_data; struct mon_bus* mbus = rp->r.m_bus; mutex_lock(&mon_lock); if (mbus->nreaders <= 0) { printk(KERN_ERR TAG ": consistency error on close\n"); mutex_unlock(&mon_lock); return 0; } mon_reader_del(mbus, &rp->r); mon_free_buff(rp->b_vec, rp->b_size/CHUNK_SIZE); kfree(rp->b_vec); kfree(rp); mutex_unlock(&mon_lock); return 0; } static ssize_t mon_bin_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { struct mon_reader_bin *rp = file->private_data; unsigned int hdrbytes = PKT_SZ_API0; unsigned long flags; struct mon_bin_hdr *ep; unsigned int offset; size_t step_len; char *ptr; ssize_t done = 0; int rc; mutex_lock(&rp->fetch_lock); if ((rc = mon_bin_wait_event(file, rp)) < 0) { mutex_unlock(&rp->fetch_lock); return rc; } ep = MON_OFF2HDR(rp, rp->b_out); if (rp->b_read < hdrbytes) { step_len = min_t(size_t, nbytes, hdrbytes - rp->b_read); ptr = ((char *)ep) + rp->b_read; if (step_len && copy_to_user(buf, ptr, step_len)) { mutex_unlock(&rp->fetch_lock); return -EFAULT; } nbytes -= step_len; buf += step_len; rp->b_read += step_len; done += step_len; } if (rp->b_read >= hdrbytes) { step_len = ep->len_cap; step_len -= rp->b_read - hdrbytes; if (step_len > nbytes) step_len = nbytes; offset = rp->b_out + PKT_SIZE; offset += rp->b_read - hdrbytes; if (offset >= rp->b_size) offset -= rp->b_size; if (copy_from_buf(rp, offset, buf, step_len)) { mutex_unlock(&rp->fetch_lock); return -EFAULT; } nbytes -= step_len; buf += step_len; rp->b_read += step_len; done += step_len; } /* * Check if whole packet was read, and if so, jump to the next one. */ if (rp->b_read >= hdrbytes + ep->len_cap) { spin_lock_irqsave(&rp->b_lock, flags); mon_buff_area_free(rp, PKT_SIZE + ep->len_cap); spin_unlock_irqrestore(&rp->b_lock, flags); rp->b_read = 0; } mutex_unlock(&rp->fetch_lock); return done; } /* * Remove at most nevents from chunked buffer. * Returns the number of removed events. */ static int mon_bin_flush(struct mon_reader_bin *rp, unsigned nevents) { unsigned long flags; struct mon_bin_hdr *ep; int i; mutex_lock(&rp->fetch_lock); spin_lock_irqsave(&rp->b_lock, flags); for (i = 0; i < nevents; ++i) { if (MON_RING_EMPTY(rp)) break; ep = MON_OFF2HDR(rp, rp->b_out); mon_buff_area_free(rp, PKT_SIZE + ep->len_cap); } spin_unlock_irqrestore(&rp->b_lock, flags); rp->b_read = 0; mutex_unlock(&rp->fetch_lock); return i; } /* * Fetch at most max event offsets into the buffer and put them into vec. * The events are usually freed later with mon_bin_flush. * Return the effective number of events fetched. */ static int mon_bin_fetch(struct file *file, struct mon_reader_bin *rp, u32 __user *vec, unsigned int max) { unsigned int cur_out; unsigned int bytes, avail; unsigned int size; unsigned int nevents; struct mon_bin_hdr *ep; unsigned long flags; int rc; mutex_lock(&rp->fetch_lock); if ((rc = mon_bin_wait_event(file, rp)) < 0) { mutex_unlock(&rp->fetch_lock); return rc; } spin_lock_irqsave(&rp->b_lock, flags); avail = rp->b_cnt; spin_unlock_irqrestore(&rp->b_lock, flags); cur_out = rp->b_out; nevents = 0; bytes = 0; while (bytes < avail) { if (nevents >= max) break; ep = MON_OFF2HDR(rp, cur_out); if (put_user(cur_out, &vec[nevents])) { mutex_unlock(&rp->fetch_lock); return -EFAULT; } nevents++; size = ep->len_cap + PKT_SIZE; size = (size + PKT_ALIGN-1) & ~(PKT_ALIGN-1); if ((cur_out += size) >= rp->b_size) cur_out -= rp->b_size; bytes += size; } mutex_unlock(&rp->fetch_lock); return nevents; } /* * Count events. This is almost the same as the above mon_bin_fetch, * only we do not store offsets into user vector, and we have no limit. */ static int mon_bin_queued(struct mon_reader_bin *rp) { unsigned int cur_out; unsigned int bytes, avail; unsigned int size; unsigned int nevents; struct mon_bin_hdr *ep; unsigned long flags; mutex_lock(&rp->fetch_lock); spin_lock_irqsave(&rp->b_lock, flags); avail = rp->b_cnt; spin_unlock_irqrestore(&rp->b_lock, flags); cur_out = rp->b_out; nevents = 0; bytes = 0; while (bytes < avail) { ep = MON_OFF2HDR(rp, cur_out); nevents++; size = ep->len_cap + PKT_SIZE; size = (size + PKT_ALIGN-1) & ~(PKT_ALIGN-1); if ((cur_out += size) >= rp->b_size) cur_out -= rp->b_size; bytes += size; } mutex_unlock(&rp->fetch_lock); return nevents; } /* */ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct mon_reader_bin *rp = file->private_data; // struct mon_bus* mbus = rp->r.m_bus; int ret = 0; struct mon_bin_hdr *ep; unsigned long flags; switch (cmd) { case MON_IOCQ_URB_LEN: /* * N.B. This only returns the size of data, without the header. */ spin_lock_irqsave(&rp->b_lock, flags); if (!MON_RING_EMPTY(rp)) { ep = MON_OFF2HDR(rp, rp->b_out); ret = ep->len_cap; } spin_unlock_irqrestore(&rp->b_lock, flags); break; case MON_IOCQ_RING_SIZE: mutex_lock(&rp->fetch_lock); ret = rp->b_size; mutex_unlock(&rp->fetch_lock); break; case MON_IOCT_RING_SIZE: /* * Changing the buffer size will flush it's contents; the new * buffer is allocated before releasing the old one to be sure * the device will stay functional also in case of memory * pressure. */ { int size; struct mon_pgmap *vec; if (arg < BUFF_MIN || arg > BUFF_MAX) return -EINVAL; size = CHUNK_ALIGN(arg); vec = kcalloc(size / CHUNK_SIZE, sizeof(struct mon_pgmap), GFP_KERNEL); if (vec == NULL) { ret = -ENOMEM; break; } ret = mon_alloc_buff(vec, size/CHUNK_SIZE); if (ret < 0) { kfree(vec); break; } mutex_lock(&rp->fetch_lock); spin_lock_irqsave(&rp->b_lock, flags); if (rp->mmap_active) { mon_free_buff(vec, size/CHUNK_SIZE); kfree(vec); ret = -EBUSY; } else { mon_free_buff(rp->b_vec, rp->b_size/CHUNK_SIZE); kfree(rp->b_vec); rp->b_vec = vec; rp->b_size = size; rp->b_read = rp->b_in = rp->b_out = rp->b_cnt = 0; rp->cnt_lost = 0; } spin_unlock_irqrestore(&rp->b_lock, flags); mutex_unlock(&rp->fetch_lock); } break; case MON_IOCH_MFLUSH: ret = mon_bin_flush(rp, arg); break; case MON_IOCX_GET: case MON_IOCX_GETX: { struct mon_bin_get getb; if (copy_from_user(&getb, (void __user *)arg, sizeof(struct mon_bin_get))) return -EFAULT; if (getb.alloc > 0x10000000) /* Want to cast to u32 */ return -EINVAL; ret = mon_bin_get_event(file, rp, getb.hdr, (cmd == MON_IOCX_GET)? PKT_SZ_API0: PKT_SZ_API1, getb.data, (unsigned int)getb.alloc); } break; case MON_IOCX_MFETCH: { struct mon_bin_mfetch mfetch; struct mon_bin_mfetch __user *uptr; uptr = (struct mon_bin_mfetch __user *)arg; if (copy_from_user(&mfetch, uptr, sizeof(mfetch))) return -EFAULT; if (mfetch.nflush) { ret = mon_bin_flush(rp, mfetch.nflush); if (ret < 0) return ret; if (put_user(ret, &uptr->nflush)) return -EFAULT; } ret = mon_bin_fetch(file, rp, mfetch.offvec, mfetch.nfetch); if (ret < 0) return ret; if (put_user(ret, &uptr->nfetch)) return -EFAULT; ret = 0; } break; case MON_IOCG_STATS: { struct mon_bin_stats __user *sp; unsigned int nevents; unsigned int ndropped; spin_lock_irqsave(&rp->b_lock, flags); ndropped = rp->cnt_lost; rp->cnt_lost = 0; spin_unlock_irqrestore(&rp->b_lock, flags); nevents = mon_bin_queued(rp); sp = (struct mon_bin_stats __user *)arg; if (put_user(ndropped, &sp->dropped)) return -EFAULT; if (put_user(nevents, &sp->queued)) return -EFAULT; } break; default: return -ENOTTY; } return ret; } #ifdef CONFIG_COMPAT static long mon_bin_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct mon_reader_bin *rp = file->private_data; int ret; switch (cmd) { case MON_IOCX_GET32: case MON_IOCX_GETX32: { struct mon_bin_get32 getb; if (copy_from_user(&getb, (void __user *)arg, sizeof(struct mon_bin_get32))) return -EFAULT; ret = mon_bin_get_event(file, rp, compat_ptr(getb.hdr32), (cmd == MON_IOCX_GET32)? PKT_SZ_API0: PKT_SZ_API1, compat_ptr(getb.data32), getb.alloc32); if (ret < 0) return ret; } return 0; case MON_IOCX_MFETCH32: { struct mon_bin_mfetch32 mfetch; struct mon_bin_mfetch32 __user *uptr; uptr = (struct mon_bin_mfetch32 __user *) compat_ptr(arg); if (copy_from_user(&mfetch, uptr, sizeof(mfetch))) return -EFAULT; if (mfetch.nflush32) { ret = mon_bin_flush(rp, mfetch.nflush32); if (ret < 0) return ret; if (put_user(ret, &uptr->nflush32)) return -EFAULT; } ret = mon_bin_fetch(file, rp, compat_ptr(mfetch.offvec32), mfetch.nfetch32); if (ret < 0) return ret; if (put_user(ret, &uptr->nfetch32)) return -EFAULT; } return 0; case MON_IOCG_STATS: return mon_bin_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); case MON_IOCQ_URB_LEN: case MON_IOCQ_RING_SIZE: case MON_IOCT_RING_SIZE: case MON_IOCH_MFLUSH: return mon_bin_ioctl(file, cmd, arg); default: ; } return -ENOTTY; } #endif /* CONFIG_COMPAT */ static __poll_t mon_bin_poll(struct file *file, struct poll_table_struct *wait) { struct mon_reader_bin *rp = file->private_data; __poll_t mask = 0; unsigned long flags; if (file->f_mode & FMODE_READ) poll_wait(file, &rp->b_wait, wait); spin_lock_irqsave(&rp->b_lock, flags); if (!MON_RING_EMPTY(rp)) mask |= EPOLLIN | EPOLLRDNORM; /* readable */ spin_unlock_irqrestore(&rp->b_lock, flags); return mask; } /* * open and close: just keep track of how many times the device is * mapped, to use the proper memory allocation function. */ static void mon_bin_vma_open(struct vm_area_struct *vma) { struct mon_reader_bin *rp = vma->vm_private_data; unsigned long flags; spin_lock_irqsave(&rp->b_lock, flags); rp->mmap_active++; spin_unlock_irqrestore(&rp->b_lock, flags); } static void mon_bin_vma_close(struct vm_area_struct *vma) { unsigned long flags; struct mon_reader_bin *rp = vma->vm_private_data; spin_lock_irqsave(&rp->b_lock, flags); rp->mmap_active--; spin_unlock_irqrestore(&rp->b_lock, flags); } /* * Map ring pages to user space. */ static vm_fault_t mon_bin_vma_fault(struct vm_fault *vmf) { struct mon_reader_bin *rp = vmf->vma->vm_private_data; unsigned long offset, chunk_idx; struct page *pageptr; unsigned long flags; spin_lock_irqsave(&rp->b_lock, flags); offset = vmf->pgoff << PAGE_SHIFT; if (offset >= rp->b_size) { spin_unlock_irqrestore(&rp->b_lock, flags); return VM_FAULT_SIGBUS; } chunk_idx = offset / CHUNK_SIZE; pageptr = rp->b_vec[chunk_idx].pg; get_page(pageptr); vmf->page = pageptr; spin_unlock_irqrestore(&rp->b_lock, flags); return 0; } static const struct vm_operations_struct mon_bin_vm_ops = { .open = mon_bin_vma_open, .close = mon_bin_vma_close, .fault = mon_bin_vma_fault, }; static int mon_bin_mmap(struct file *filp, struct vm_area_struct *vma) { /* don't do anything here: "fault" will set up page table entries */ vma->vm_ops = &mon_bin_vm_ops; if (vma->vm_flags & VM_WRITE) return -EPERM; vm_flags_mod(vma, VM_DONTEXPAND | VM_DONTDUMP, VM_MAYWRITE); vma->vm_private_data = filp->private_data; mon_bin_vma_open(vma); return 0; } static const struct file_operations mon_fops_binary = { .owner = THIS_MODULE, .open = mon_bin_open, .read = mon_bin_read, /* .write = mon_text_write, */ .poll = mon_bin_poll, .unlocked_ioctl = mon_bin_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = mon_bin_compat_ioctl, #endif .release = mon_bin_release, .mmap = mon_bin_mmap, }; static int mon_bin_wait_event(struct file *file, struct mon_reader_bin *rp) { DECLARE_WAITQUEUE(waita, current); unsigned long flags; add_wait_queue(&rp->b_wait, &waita); set_current_state(TASK_INTERRUPTIBLE); spin_lock_irqsave(&rp->b_lock, flags); while (MON_RING_EMPTY(rp)) { spin_unlock_irqrestore(&rp->b_lock, flags); if (file->f_flags & O_NONBLOCK) { set_current_state(TASK_RUNNING); remove_wait_queue(&rp->b_wait, &waita); return -EWOULDBLOCK; /* Same as EAGAIN in Linux */ } schedule(); if (signal_pending(current)) { remove_wait_queue(&rp->b_wait, &waita); return -EINTR; } set_current_state(TASK_INTERRUPTIBLE); spin_lock_irqsave(&rp->b_lock, flags); } spin_unlock_irqrestore(&rp->b_lock, flags); set_current_state(TASK_RUNNING); remove_wait_queue(&rp->b_wait, &waita); return 0; } static int mon_alloc_buff(struct mon_pgmap *map, int npages) { int n; unsigned long vaddr; for (n = 0; n < npages; n++) { vaddr = get_zeroed_page(GFP_KERNEL); if (vaddr == 0) { while (n-- != 0) free_page((unsigned long) map[n].ptr); return -ENOMEM; } map[n].ptr = (unsigned char *) vaddr; map[n].pg = virt_to_page((void *) vaddr); } return 0; } static void mon_free_buff(struct mon_pgmap *map, int npages) { int n; for (n = 0; n < npages; n++) free_page((unsigned long) map[n].ptr); } int mon_bin_add(struct mon_bus *mbus, const struct usb_bus *ubus) { struct device *dev; unsigned minor = ubus? ubus->busnum: 0; if (minor >= MON_BIN_MAX_MINOR) return 0; dev = device_create(&mon_bin_class, ubus ? ubus->controller : NULL, MKDEV(MAJOR(mon_bin_dev0), minor), NULL, "usbmon%d", minor); if (IS_ERR(dev)) return 0; mbus->classdev = dev; return 1; } void mon_bin_del(struct mon_bus *mbus) { device_destroy(&mon_bin_class, mbus->classdev->devt); } int __init mon_bin_init(void) { int rc; rc = class_register(&mon_bin_class); if (rc) goto err_class; rc = alloc_chrdev_region(&mon_bin_dev0, 0, MON_BIN_MAX_MINOR, "usbmon"); if (rc < 0) goto err_dev; cdev_init(&mon_bin_cdev, &mon_fops_binary); mon_bin_cdev.owner = THIS_MODULE; rc = cdev_add(&mon_bin_cdev, mon_bin_dev0, MON_BIN_MAX_MINOR); if (rc < 0) goto err_add; return 0; err_add: unregister_chrdev_region(mon_bin_dev0, MON_BIN_MAX_MINOR); err_dev: class_unregister(&mon_bin_class); err_class: return rc; } void mon_bin_exit(void) { cdev_del(&mon_bin_cdev); unregister_chrdev_region(mon_bin_dev0, MON_BIN_MAX_MINOR); class_unregister(&mon_bin_class); } |
18 18 18 18 18 18 20 20 6 6 6 6 20 20 19 2 2 2 2 2 2 18 18 18 2 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 | // SPDX-License-Identifier: GPL-2.0-or-later /* Client connection-specific management code. * * Copyright (C) 2016, 2020 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * Client connections need to be cached for a little while after they've made a * call so as to handle retransmitted DATA packets in case the server didn't * receive the final ACK or terminating ABORT we sent it. * * There are flags of relevance to the cache: * * (2) DONT_REUSE - The connection should be discarded as soon as possible and * should not be reused. This is set when an exclusive connection is used * or a call ID counter overflows. * * The caching state may only be changed if the cache lock is held. * * There are two idle client connection expiry durations. If the total number * of connections is below the reap threshold, we use the normal duration; if * it's above, we use the fast duration. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/slab.h> #include <linux/idr.h> #include <linux/timer.h> #include <linux/sched/signal.h> #include "ar-internal.h" __read_mostly unsigned int rxrpc_reap_client_connections = 900; __read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; static void rxrpc_activate_bundle(struct rxrpc_bundle *bundle) { atomic_inc(&bundle->active); } /* * Release a connection ID for a client connection. */ static void rxrpc_put_client_connection_id(struct rxrpc_local *local, struct rxrpc_connection *conn) { idr_remove(&local->conn_ids, conn->proto.cid >> RXRPC_CIDSHIFT); } /* * Destroy the client connection ID tree. */ static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local) { struct rxrpc_connection *conn; int id; if (!idr_is_empty(&local->conn_ids)) { idr_for_each_entry(&local->conn_ids, conn, id) { pr_err("AF_RXRPC: Leaked client conn %p {%d}\n", conn, refcount_read(&conn->ref)); } BUG(); } idr_destroy(&local->conn_ids); } /* * Allocate a connection bundle. */ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call, gfp_t gfp) { static atomic_t rxrpc_bundle_id; struct rxrpc_bundle *bundle; bundle = kzalloc(sizeof(*bundle), gfp); if (bundle) { bundle->local = call->local; bundle->peer = rxrpc_get_peer(call->peer, rxrpc_peer_get_bundle); bundle->key = key_get(call->key); bundle->security = call->security; bundle->exclusive = test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags); bundle->upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags); bundle->service_id = call->dest_srx.srx_service; bundle->security_level = call->security_level; bundle->debug_id = atomic_inc_return(&rxrpc_bundle_id); refcount_set(&bundle->ref, 1); atomic_set(&bundle->active, 1); INIT_LIST_HEAD(&bundle->waiting_calls); trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new); write_lock(&bundle->local->rxnet->conn_lock); list_add_tail(&bundle->proc_link, &bundle->local->rxnet->bundle_proc_list); write_unlock(&bundle->local->rxnet->conn_lock); } return bundle; } struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why) { int r; __refcount_inc(&bundle->ref, &r); trace_rxrpc_bundle(bundle->debug_id, r + 1, why); return bundle; } static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) { trace_rxrpc_bundle(bundle->debug_id, refcount_read(&bundle->ref), rxrpc_bundle_free); write_lock(&bundle->local->rxnet->conn_lock); list_del(&bundle->proc_link); write_unlock(&bundle->local->rxnet->conn_lock); rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle); key_put(bundle->key); kfree(bundle); } void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why) { unsigned int id; bool dead; int r; if (bundle) { id = bundle->debug_id; dead = __refcount_dec_and_test(&bundle->ref, &r); trace_rxrpc_bundle(id, r - 1, why); if (dead) rxrpc_free_bundle(bundle); } } /* * Get rid of outstanding client connection preallocations when a local * endpoint is destroyed. */ void rxrpc_purge_client_connections(struct rxrpc_local *local) { rxrpc_destroy_client_conn_ids(local); } /* * Allocate a client connection. */ static struct rxrpc_connection * rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle) { struct rxrpc_connection *conn; struct rxrpc_local *local = bundle->local; struct rxrpc_net *rxnet = local->rxnet; int id; _enter(""); conn = rxrpc_alloc_connection(rxnet, GFP_ATOMIC | __GFP_NOWARN); if (!conn) return ERR_PTR(-ENOMEM); id = idr_alloc_cyclic(&local->conn_ids, conn, 1, 0x40000000, GFP_ATOMIC | __GFP_NOWARN); if (id < 0) { kfree(conn); return ERR_PTR(id); } refcount_set(&conn->ref, 1); conn->proto.cid = id << RXRPC_CIDSHIFT; conn->proto.epoch = local->rxnet->epoch; conn->out_clientflag = RXRPC_CLIENT_INITIATED; conn->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn); conn->local = rxrpc_get_local(bundle->local, rxrpc_local_get_client_conn); conn->peer = rxrpc_get_peer(bundle->peer, rxrpc_peer_get_client_conn); conn->key = key_get(bundle->key); conn->security = bundle->security; conn->exclusive = bundle->exclusive; conn->upgrade = bundle->upgrade; conn->orig_service_id = bundle->service_id; conn->security_level = bundle->security_level; conn->state = RXRPC_CONN_CLIENT_UNSECURED; conn->service_id = conn->orig_service_id; if (conn->security == &rxrpc_no_security) conn->state = RXRPC_CONN_CLIENT; atomic_inc(&rxnet->nr_conns); write_lock(&rxnet->conn_lock); list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); write_unlock(&rxnet->conn_lock); rxrpc_see_connection(conn, rxrpc_conn_new_client); atomic_inc(&rxnet->nr_client_conns); trace_rxrpc_client(conn, -1, rxrpc_client_alloc); return conn; } /* * Determine if a connection may be reused. */ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) { struct rxrpc_net *rxnet; int id_cursor, id, distance, limit; if (!conn) goto dont_reuse; rxnet = conn->rxnet; if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags)) goto dont_reuse; if ((conn->state != RXRPC_CONN_CLIENT_UNSECURED && conn->state != RXRPC_CONN_CLIENT) || conn->proto.epoch != rxnet->epoch) goto mark_dont_reuse; /* The IDR tree gets very expensive on memory if the connection IDs are * widely scattered throughout the number space, so we shall want to * kill off connections that, say, have an ID more than about four * times the maximum number of client conns away from the current * allocation point to try and keep the IDs concentrated. */ id_cursor = idr_get_cursor(&conn->local->conn_ids); id = conn->proto.cid >> RXRPC_CIDSHIFT; distance = id - id_cursor; if (distance < 0) distance = -distance; limit = umax(atomic_read(&rxnet->nr_conns) * 4, 1024); if (distance > limit) goto mark_dont_reuse; return true; mark_dont_reuse: set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); dont_reuse: return false; } /* * Look up the conn bundle that matches the connection parameters, adding it if * it doesn't yet exist. */ int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp) { struct rxrpc_bundle *bundle, *candidate; struct rxrpc_local *local = call->local; struct rb_node *p, **pp, *parent; long diff; bool upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags); _enter("{%px,%x,%u,%u}", call->peer, key_serial(call->key), call->security_level, upgrade); if (test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags)) { call->bundle = rxrpc_alloc_bundle(call, gfp); return call->bundle ? 0 : -ENOMEM; } /* First, see if the bundle is already there. */ _debug("search 1"); spin_lock(&local->client_bundles_lock); p = local->client_bundles.rb_node; while (p) { bundle = rb_entry(p, struct rxrpc_bundle, local_node); #define cmp(X, Y) ((long)(X) - (long)(Y)) diff = (cmp(bundle->peer, call->peer) ?: cmp(bundle->key, call->key) ?: cmp(bundle->security_level, call->security_level) ?: cmp(bundle->upgrade, upgrade)); #undef cmp if (diff < 0) p = p->rb_left; else if (diff > 0) p = p->rb_right; else goto found_bundle; } spin_unlock(&local->client_bundles_lock); _debug("not found"); /* It wasn't. We need to add one. */ candidate = rxrpc_alloc_bundle(call, gfp); if (!candidate) return -ENOMEM; _debug("search 2"); spin_lock(&local->client_bundles_lock); pp = &local->client_bundles.rb_node; parent = NULL; while (*pp) { parent = *pp; bundle = rb_entry(parent, struct rxrpc_bundle, local_node); #define cmp(X, Y) ((long)(X) - (long)(Y)) diff = (cmp(bundle->peer, call->peer) ?: cmp(bundle->key, call->key) ?: cmp(bundle->security_level, call->security_level) ?: cmp(bundle->upgrade, upgrade)); #undef cmp if (diff < 0) pp = &(*pp)->rb_left; else if (diff > 0) pp = &(*pp)->rb_right; else goto found_bundle_free; } _debug("new bundle"); rb_link_node(&candidate->local_node, parent, pp); rb_insert_color(&candidate->local_node, &local->client_bundles); call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call); spin_unlock(&local->client_bundles_lock); _leave(" = B=%u [new]", call->bundle->debug_id); return 0; found_bundle_free: rxrpc_free_bundle(candidate); found_bundle: call->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call); rxrpc_activate_bundle(bundle); spin_unlock(&local->client_bundles_lock); _leave(" = B=%u [found]", call->bundle->debug_id); return 0; } /* * Allocate a new connection and add it into a bundle. */ static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, unsigned int slot) { struct rxrpc_connection *conn, *old; unsigned int shift = slot * RXRPC_MAXCALLS; unsigned int i; old = bundle->conns[slot]; if (old) { bundle->conns[slot] = NULL; bundle->conn_ids[slot] = 0; trace_rxrpc_client(old, -1, rxrpc_client_replace); rxrpc_put_connection(old, rxrpc_conn_put_noreuse); } conn = rxrpc_alloc_client_connection(bundle); if (IS_ERR(conn)) { bundle->alloc_error = PTR_ERR(conn); return false; } rxrpc_activate_bundle(bundle); conn->bundle_shift = shift; bundle->conns[slot] = conn; bundle->conn_ids[slot] = conn->debug_id; for (i = 0; i < RXRPC_MAXCALLS; i++) set_bit(shift + i, &bundle->avail_chans); return true; } /* * Add a connection to a bundle if there are no usable connections or we have * connections waiting for extra capacity. */ static bool rxrpc_bundle_has_space(struct rxrpc_bundle *bundle) { int slot = -1, i, usable; _enter(""); bundle->alloc_error = 0; /* See if there are any usable connections. */ usable = 0; for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) { if (rxrpc_may_reuse_conn(bundle->conns[i])) usable++; else if (slot == -1) slot = i; } if (!usable && bundle->upgrade) bundle->try_upgrade = true; if (!usable) goto alloc_conn; if (!bundle->avail_chans && !bundle->try_upgrade && usable < ARRAY_SIZE(bundle->conns)) goto alloc_conn; _leave(""); return usable; alloc_conn: return slot >= 0 ? rxrpc_add_conn_to_bundle(bundle, slot) : false; } /* * Assign a channel to the call at the front of the queue and wake the call up. * We don't increment the callNumber counter until this number has been exposed * to the world. */ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, unsigned int channel) { struct rxrpc_channel *chan = &conn->channels[channel]; struct rxrpc_bundle *bundle = conn->bundle; struct rxrpc_call *call = list_entry(bundle->waiting_calls.next, struct rxrpc_call, wait_link); u32 call_id = chan->call_counter + 1; _enter("C=%x,%u", conn->debug_id, channel); list_del_init(&call->wait_link); trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); /* Cancel the final ACK on the previous call if it hasn't been sent yet * as the DATA packet will implicitly ACK it. */ clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); clear_bit(conn->bundle_shift + channel, &bundle->avail_chans); rxrpc_see_call(call, rxrpc_call_see_activate_client); call->conn = rxrpc_get_connection(conn, rxrpc_conn_get_activate_call); call->cid = conn->proto.cid | channel; call->call_id = call_id; call->dest_srx.srx_service = conn->service_id; call->cong_ssthresh = call->peer->cong_ssthresh; if (call->cong_cwnd >= call->cong_ssthresh) call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; else call->cong_ca_state = RXRPC_CA_SLOW_START; chan->call_id = call_id; chan->call_debug_id = call->debug_id; chan->call = call; rxrpc_see_call(call, rxrpc_call_see_connected); trace_rxrpc_connect_call(call); call->tx_last_sent = ktime_get_real(); rxrpc_start_call_timer(call); rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_SEND_REQUEST); wake_up(&call->waitq); } /* * Remove a connection from the idle list if it's on it. */ static void rxrpc_unidle_conn(struct rxrpc_connection *conn) { if (!list_empty(&conn->cache_link)) { list_del_init(&conn->cache_link); rxrpc_put_connection(conn, rxrpc_conn_put_unidle); } } /* * Assign channels and callNumbers to waiting calls. */ static void rxrpc_activate_channels(struct rxrpc_bundle *bundle) { struct rxrpc_connection *conn; unsigned long avail, mask; unsigned int channel, slot; trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans); if (bundle->try_upgrade) mask = 1; else mask = ULONG_MAX; while (!list_empty(&bundle->waiting_calls)) { avail = bundle->avail_chans & mask; if (!avail) break; channel = __ffs(avail); clear_bit(channel, &bundle->avail_chans); slot = channel / RXRPC_MAXCALLS; conn = bundle->conns[slot]; if (!conn) break; if (bundle->try_upgrade) set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags); rxrpc_unidle_conn(conn); channel &= (RXRPC_MAXCALLS - 1); conn->act_chans |= 1 << channel; rxrpc_activate_one_channel(conn, channel); } } /* * Connect waiting channels (called from the I/O thread). */ void rxrpc_connect_client_calls(struct rxrpc_local *local) { struct rxrpc_call *call; LIST_HEAD(new_client_calls); spin_lock_irq(&local->client_call_lock); list_splice_tail_init(&local->new_client_calls, &new_client_calls); spin_unlock_irq(&local->client_call_lock); while ((call = list_first_entry_or_null(&new_client_calls, struct rxrpc_call, wait_link))) { struct rxrpc_bundle *bundle = call->bundle; list_move_tail(&call->wait_link, &bundle->waiting_calls); rxrpc_see_call(call, rxrpc_call_see_waiting_call); if (rxrpc_bundle_has_space(bundle)) rxrpc_activate_channels(bundle); } } /* * Note that a call, and thus a connection, is about to be exposed to the * world. */ void rxrpc_expose_client_call(struct rxrpc_call *call) { unsigned int channel = call->cid & RXRPC_CHANNELMASK; struct rxrpc_connection *conn = call->conn; struct rxrpc_channel *chan = &conn->channels[channel]; if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) { /* Mark the call ID as being used. If the callNumber counter * exceeds ~2 billion, we kill the connection after its * outstanding calls have finished so that the counter doesn't * wrap. */ chan->call_counter++; if (chan->call_counter >= INT_MAX) set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); trace_rxrpc_client(conn, channel, rxrpc_client_exposed); spin_lock_irq(&call->peer->lock); hlist_add_head(&call->error_link, &call->peer->error_targets); spin_unlock_irq(&call->peer->lock); } } /* * Set the reap timer. */ static void rxrpc_set_client_reap_timer(struct rxrpc_local *local) { if (!local->kill_all_client_conns) { unsigned long now = jiffies; unsigned long reap_at = now + rxrpc_conn_idle_client_expiry; if (local->rxnet->live) timer_reduce(&local->client_conn_reap_timer, reap_at); } } /* * Disconnect a client call. */ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call *call) { struct rxrpc_connection *conn; struct rxrpc_channel *chan = NULL; struct rxrpc_local *local = bundle->local; unsigned int channel; bool may_reuse; u32 cid; _enter("c=%x", call->debug_id); /* Calls that have never actually been assigned a channel can simply be * discarded. */ conn = call->conn; if (!conn) { _debug("call is waiting"); ASSERTCMP(call->call_id, ==, 0); ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); /* May still be on ->new_client_calls. */ spin_lock_irq(&local->client_call_lock); list_del_init(&call->wait_link); spin_unlock_irq(&local->client_call_lock); return; } cid = call->cid; channel = cid & RXRPC_CHANNELMASK; chan = &conn->channels[channel]; trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); if (WARN_ON(chan->call != call)) return; may_reuse = rxrpc_may_reuse_conn(conn); /* If a client call was exposed to the world, we save the result for * retransmission. * * We use a barrier here so that the call number and abort code can be * read without needing to take a lock. * * TODO: Make the incoming packet handler check this and handle * terminal retransmission without requiring access to the call. */ if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { _debug("exposed %u,%u", call->call_id, call->abort_code); __rxrpc_disconnect_call(conn, call); if (test_and_clear_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) { trace_rxrpc_client(conn, channel, rxrpc_client_to_active); bundle->try_upgrade = false; if (may_reuse) rxrpc_activate_channels(bundle); } } /* See if we can pass the channel directly to another call. */ if (may_reuse && !list_empty(&bundle->waiting_calls)) { trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); rxrpc_activate_one_channel(conn, channel); return; } /* Schedule the final ACK to be transmitted in a short while so that it * can be skipped if we find a follow-on call. The first DATA packet * of the follow on call will implicitly ACK this call. */ if (call->completion == RXRPC_CALL_SUCCEEDED && test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { unsigned long final_ack_at = jiffies + 2; chan->final_ack_at = final_ack_at; smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */ set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); rxrpc_reduce_conn_timer(conn, final_ack_at); } /* Deactivate the channel. */ chan->call = NULL; set_bit(conn->bundle_shift + channel, &conn->bundle->avail_chans); conn->act_chans &= ~(1 << channel); /* If no channels remain active, then put the connection on the idle * list for a short while. Give it a ref to stop it going away if it * becomes unbundled. */ if (!conn->act_chans) { trace_rxrpc_client(conn, channel, rxrpc_client_to_idle); conn->idle_timestamp = jiffies; rxrpc_get_connection(conn, rxrpc_conn_get_idle); list_move_tail(&conn->cache_link, &local->idle_client_conns); rxrpc_set_client_reap_timer(local); } } /* * Remove a connection from a bundle. */ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) { struct rxrpc_bundle *bundle = conn->bundle; unsigned int bindex; int i; _enter("C=%x", conn->debug_id); if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) rxrpc_process_delayed_final_acks(conn, true); bindex = conn->bundle_shift / RXRPC_MAXCALLS; if (bundle->conns[bindex] == conn) { _debug("clear slot %u", bindex); bundle->conns[bindex] = NULL; bundle->conn_ids[bindex] = 0; for (i = 0; i < RXRPC_MAXCALLS; i++) clear_bit(conn->bundle_shift + i, &bundle->avail_chans); rxrpc_put_client_connection_id(bundle->local, conn); rxrpc_deactivate_bundle(bundle); rxrpc_put_connection(conn, rxrpc_conn_put_unbundle); } } /* * Drop the active count on a bundle. */ void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) { struct rxrpc_local *local; bool need_put = false; if (!bundle) return; local = bundle->local; if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) { if (!bundle->exclusive) { _debug("erase bundle"); rb_erase(&bundle->local_node, &local->client_bundles); need_put = true; } spin_unlock(&local->client_bundles_lock); if (need_put) rxrpc_put_bundle(bundle, rxrpc_bundle_put_discard); } } /* * Clean up a dead client connection. */ void rxrpc_kill_client_conn(struct rxrpc_connection *conn) { struct rxrpc_local *local = conn->local; struct rxrpc_net *rxnet = local->rxnet; _enter("C=%x", conn->debug_id); trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); atomic_dec(&rxnet->nr_client_conns); rxrpc_put_client_connection_id(local, conn); } /* * Discard expired client connections from the idle list. Each conn in the * idle list has been exposed and holds an extra ref because of that. * * This may be called from conn setup or from a work item so cannot be * considered non-reentrant. */ void rxrpc_discard_expired_client_conns(struct rxrpc_local *local) { struct rxrpc_connection *conn; unsigned long expiry, conn_expires_at, now; unsigned int nr_conns; _enter(""); /* We keep an estimate of what the number of conns ought to be after * we've discarded some so that we don't overdo the discarding. */ nr_conns = atomic_read(&local->rxnet->nr_client_conns); next: conn = list_first_entry_or_null(&local->idle_client_conns, struct rxrpc_connection, cache_link); if (!conn) return; if (!local->kill_all_client_conns) { /* If the number of connections is over the reap limit, we * expedite discard by reducing the expiry timeout. We must, * however, have at least a short grace period to be able to do * final-ACK or ABORT retransmission. */ expiry = rxrpc_conn_idle_client_expiry; if (nr_conns > rxrpc_reap_client_connections) expiry = rxrpc_conn_idle_client_fast_expiry; if (conn->local->service_closed) expiry = rxrpc_closed_conn_expiry * HZ; conn_expires_at = conn->idle_timestamp + expiry; now = jiffies; if (time_after(conn_expires_at, now)) goto not_yet_expired; } atomic_dec(&conn->active); trace_rxrpc_client(conn, -1, rxrpc_client_discard); list_del_init(&conn->cache_link); rxrpc_unbundle_conn(conn); /* Drop the ->cache_link ref */ rxrpc_put_connection(conn, rxrpc_conn_put_discard_idle); nr_conns--; goto next; not_yet_expired: /* The connection at the front of the queue hasn't yet expired, so * schedule the work item for that point if we discarded something. * * We don't worry if the work item is already scheduled - it can look * after rescheduling itself at a later time. We could cancel it, but * then things get messier. */ _debug("not yet"); if (!local->kill_all_client_conns) timer_reduce(&local->client_conn_reap_timer, conn_expires_at); _leave(""); } /* * Clean up the client connections on a local endpoint. */ void rxrpc_clean_up_local_conns(struct rxrpc_local *local) { struct rxrpc_connection *conn; _enter(""); local->kill_all_client_conns = true; timer_delete_sync(&local->client_conn_reap_timer); while ((conn = list_first_entry_or_null(&local->idle_client_conns, struct rxrpc_connection, cache_link))) { list_del_init(&conn->cache_link); atomic_dec(&conn->active); trace_rxrpc_client(conn, -1, rxrpc_client_discard); rxrpc_unbundle_conn(conn); rxrpc_put_connection(conn, rxrpc_conn_put_local_dead); } _leave(" [culled]"); } |
142 142 259 2449 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_QSPINLOCK_H #define _ASM_X86_QSPINLOCK_H #include <linux/jump_label.h> #include <asm/cpufeature.h> #include <asm-generic/qspinlock_types.h> #include <asm/paravirt.h> #include <asm/rmwcc.h> #define _Q_PENDING_LOOPS (1 << 9) #define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) { u32 val; /* * We can't use GEN_BINARY_RMWcc() inside an if() stmt because asm goto * and CONFIG_PROFILE_ALL_BRANCHES=y results in a label inside a * statement expression, which GCC doesn't like. */ val = GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c, "I", _Q_PENDING_OFFSET) * _Q_PENDING_VAL; val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK; return val; } #ifdef CONFIG_PARAVIRT_SPINLOCKS extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); extern void __pv_init_lock_hash(void); extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); extern bool nopvspin; #define queued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock * @lock : Pointer to queued spinlock structure * * A smp_store_release() on the least-significant byte. */ static inline void native_queued_spin_unlock(struct qspinlock *lock) { smp_store_release(&lock->locked, 0); } static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { pv_queued_spin_lock_slowpath(lock, val); } static inline void queued_spin_unlock(struct qspinlock *lock) { kcsan_release(); pv_queued_spin_unlock(lock); } #define vcpu_is_preempted vcpu_is_preempted static inline bool vcpu_is_preempted(long cpu) { return pv_vcpu_is_preempted(cpu); } #endif #ifdef CONFIG_PARAVIRT /* * virt_spin_lock_key - disables by default the virt_spin_lock() hijack. * * Native (and PV wanting native due to vCPU pinning) should keep this key * disabled. Native does not touch the key. * * When in a guest then native_pv_lock_init() enables the key first and * KVM/XEN might conditionally disable it later in the boot process again. */ DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); /* * Shortcut for the queued_spin_lock_slowpath() function that allows * virt to hijack it. * * Returns: * true - lock has been negotiated, all done; * false - queued_spin_lock_slowpath() will do its thing. */ #define virt_spin_lock virt_spin_lock static inline bool virt_spin_lock(struct qspinlock *lock) { int val; if (!static_branch_likely(&virt_spin_lock_key)) return false; /* * On hypervisors without PARAVIRT_SPINLOCKS support we fall * back to a Test-and-Set spinlock, because fair locks have * horrible lock 'holder' preemption issues. */ __retry: val = atomic_read(&lock->val); if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) { cpu_relax(); goto __retry; } return true; } #endif /* CONFIG_PARAVIRT */ #include <asm-generic/qspinlock.h> #endif /* _ASM_X86_QSPINLOCK_H */ |
70 130 120 43 60 55 15 15 12 12 11 11 101 46 36 53 26 9 9 9 29 29 29 21 21 21 14 14 14 14 10 14 14 10 14 14 7 14 14 14 14 14 14 14 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 | // SPDX-License-Identifier: GPL-2.0-or-later /* * xfrm algorithm interface * * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> */ #include <crypto/acompress.h> #include <crypto/aead.h> #include <crypto/hash.h> #include <crypto/skcipher.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/pfkeyv2.h> #include <linux/scatterlist.h> #include <net/xfrm.h> #if IS_ENABLED(CONFIG_INET_ESP) || IS_ENABLED(CONFIG_INET6_ESP) #include <net/esp.h> #endif /* * Algorithms supported by IPsec. These entries contain properties which * are used in key negotiation and xfrm processing, and are used to verify * that instantiated crypto transforms have correct parameters for IPsec * purposes. */ static struct xfrm_algo_desc aead_list[] = { { .name = "rfc4106(gcm(aes))", .uinfo = { .aead = { .geniv = "seqiv", .icv_truncbits = 64, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV8, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 256 } }, { .name = "rfc4106(gcm(aes))", .uinfo = { .aead = { .geniv = "seqiv", .icv_truncbits = 96, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV12, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 256 } }, { .name = "rfc4106(gcm(aes))", .uinfo = { .aead = { .geniv = "seqiv", .icv_truncbits = 128, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV16, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 256 } }, { .name = "rfc4309(ccm(aes))", .uinfo = { .aead = { .geniv = "seqiv", .icv_truncbits = 64, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV8, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 256 } }, { .name = "rfc4309(ccm(aes))", .uinfo = { .aead = { .geniv = "seqiv", .icv_truncbits = 96, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV12, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 256 } }, { .name = "rfc4309(ccm(aes))", .uinfo = { .aead = { .geniv = "seqiv", .icv_truncbits = 128, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV16, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 256 } }, { .name = "rfc4543(gcm(aes))", .uinfo = { .aead = { .geniv = "seqiv", .icv_truncbits = 128, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_NULL_AES_GMAC, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 256 } }, { .name = "rfc7539esp(chacha20,poly1305)", .uinfo = { .aead = { .geniv = "seqiv", .icv_truncbits = 128, } }, .pfkey_supported = 0, }, }; static struct xfrm_algo_desc aalg_list[] = { { .name = "digest_null", .uinfo = { .auth = { .icv_truncbits = 0, .icv_fullbits = 0, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_AALG_NULL, .sadb_alg_ivlen = 0, .sadb_alg_minbits = 0, .sadb_alg_maxbits = 0 } }, { .name = "hmac(md5)", .compat = "md5", .uinfo = { .auth = { .icv_truncbits = 96, .icv_fullbits = 128, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_AALG_MD5HMAC, .sadb_alg_ivlen = 0, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 128 } }, { .name = "hmac(sha1)", .compat = "sha1", .uinfo = { .auth = { .icv_truncbits = 96, .icv_fullbits = 160, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_AALG_SHA1HMAC, .sadb_alg_ivlen = 0, .sadb_alg_minbits = 160, .sadb_alg_maxbits = 160 } }, { .name = "hmac(sha256)", .compat = "sha256", .uinfo = { .auth = { .icv_truncbits = 96, .icv_fullbits = 256, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_AALG_SHA2_256HMAC, .sadb_alg_ivlen = 0, .sadb_alg_minbits = 256, .sadb_alg_maxbits = 256 } }, { .name = "hmac(sha384)", .uinfo = { .auth = { .icv_truncbits = 192, .icv_fullbits = 384, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_AALG_SHA2_384HMAC, .sadb_alg_ivlen = 0, .sadb_alg_minbits = 384, .sadb_alg_maxbits = 384 } }, { .name = "hmac(sha512)", .uinfo = { .auth = { .icv_truncbits = 256, .icv_fullbits = 512, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_AALG_SHA2_512HMAC, .sadb_alg_ivlen = 0, .sadb_alg_minbits = 512, .sadb_alg_maxbits = 512 } }, { .name = "hmac(rmd160)", .compat = "rmd160", .uinfo = { .auth = { .icv_truncbits = 96, .icv_fullbits = 160, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC, .sadb_alg_ivlen = 0, .sadb_alg_minbits = 160, .sadb_alg_maxbits = 160 } }, { .name = "xcbc(aes)", .uinfo = { .auth = { .icv_truncbits = 96, .icv_fullbits = 128, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_AALG_AES_XCBC_MAC, .sadb_alg_ivlen = 0, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 128 } }, { /* rfc4494 */ .name = "cmac(aes)", .uinfo = { .auth = { .icv_truncbits = 96, .icv_fullbits = 128, } }, .pfkey_supported = 0, }, { .name = "hmac(sm3)", .compat = "sm3", .uinfo = { .auth = { .icv_truncbits = 256, .icv_fullbits = 256, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_AALG_SM3_256HMAC, .sadb_alg_ivlen = 0, .sadb_alg_minbits = 256, .sadb_alg_maxbits = 256 } }, }; static struct xfrm_algo_desc ealg_list[] = { { .name = "ecb(cipher_null)", .compat = "cipher_null", .uinfo = { .encr = { .blockbits = 8, .defkeybits = 0, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_EALG_NULL, .sadb_alg_ivlen = 0, .sadb_alg_minbits = 0, .sadb_alg_maxbits = 0 } }, { .name = "cbc(des)", .compat = "des", .uinfo = { .encr = { .geniv = "echainiv", .blockbits = 64, .defkeybits = 64, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_EALG_DESCBC, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 64, .sadb_alg_maxbits = 64 } }, { .name = "cbc(des3_ede)", .compat = "des3_ede", .uinfo = { .encr = { .geniv = "echainiv", .blockbits = 64, .defkeybits = 192, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_EALG_3DESCBC, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 192, .sadb_alg_maxbits = 192 } }, { .name = "cbc(cast5)", .compat = "cast5", .uinfo = { .encr = { .geniv = "echainiv", .blockbits = 64, .defkeybits = 128, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_CASTCBC, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 40, .sadb_alg_maxbits = 128 } }, { .name = "cbc(blowfish)", .compat = "blowfish", .uinfo = { .encr = { .geniv = "echainiv", .blockbits = 64, .defkeybits = 128, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_BLOWFISHCBC, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 40, .sadb_alg_maxbits = 448 } }, { .name = "cbc(aes)", .compat = "aes", .uinfo = { .encr = { .geniv = "echainiv", .blockbits = 128, .defkeybits = 128, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_AESCBC, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 256 } }, { .name = "cbc(serpent)", .compat = "serpent", .uinfo = { .encr = { .geniv = "echainiv", .blockbits = 128, .defkeybits = 128, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_SERPENTCBC, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 256, } }, { .name = "cbc(camellia)", .compat = "camellia", .uinfo = { .encr = { .geniv = "echainiv", .blockbits = 128, .defkeybits = 128, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_CAMELLIACBC, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 256 } }, { .name = "cbc(twofish)", .compat = "twofish", .uinfo = { .encr = { .geniv = "echainiv", .blockbits = 128, .defkeybits = 128, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_TWOFISHCBC, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 256 } }, { .name = "rfc3686(ctr(aes))", .uinfo = { .encr = { .geniv = "seqiv", .blockbits = 128, .defkeybits = 160, /* 128-bit key + 32-bit nonce */ } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_AESCTR, .sadb_alg_ivlen = 8, .sadb_alg_minbits = 160, .sadb_alg_maxbits = 288 } }, { .name = "cbc(sm4)", .compat = "sm4", .uinfo = { .encr = { .geniv = "echainiv", .blockbits = 128, .defkeybits = 128, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_EALG_SM4CBC, .sadb_alg_ivlen = 16, .sadb_alg_minbits = 128, .sadb_alg_maxbits = 256 } }, }; static struct xfrm_algo_desc calg_list[] = { { .name = "deflate", .uinfo = { .comp = { .threshold = 90, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE } }, { .name = "lzs", .uinfo = { .comp = { .threshold = 90, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_LZS } }, { .name = "lzjh", .uinfo = { .comp = { .threshold = 50, } }, .pfkey_supported = 1, .desc = { .sadb_alg_id = SADB_X_CALG_LZJH } }, }; static inline int aalg_entries(void) { return ARRAY_SIZE(aalg_list); } static inline int ealg_entries(void) { return ARRAY_SIZE(ealg_list); } static inline int calg_entries(void) { return ARRAY_SIZE(calg_list); } struct xfrm_algo_list { int (*find)(const char *name, u32 type, u32 mask); struct xfrm_algo_desc *algs; int entries; }; static const struct xfrm_algo_list xfrm_aead_list = { .find = crypto_has_aead, .algs = aead_list, .entries = ARRAY_SIZE(aead_list), }; static const struct xfrm_algo_list xfrm_aalg_list = { .find = crypto_has_ahash, .algs = aalg_list, .entries = ARRAY_SIZE(aalg_list), }; static const struct xfrm_algo_list xfrm_ealg_list = { .find = crypto_has_skcipher, .algs = ealg_list, .entries = ARRAY_SIZE(ealg_list), }; static const struct xfrm_algo_list xfrm_calg_list = { .find = crypto_has_acomp, .algs = calg_list, .entries = ARRAY_SIZE(calg_list), }; static struct xfrm_algo_desc *xfrm_find_algo( const struct xfrm_algo_list *algo_list, int match(const struct xfrm_algo_desc *entry, const void *data), const void *data, int probe) { struct xfrm_algo_desc *list = algo_list->algs; int i, status; for (i = 0; i < algo_list->entries; i++) { if (!match(list + i, data)) continue; if (list[i].available) return &list[i]; if (!probe) break; status = algo_list->find(list[i].name, 0, 0); if (!status) break; list[i].available = status; return &list[i]; } return NULL; } static int xfrm_alg_id_match(const struct xfrm_algo_desc *entry, const void *data) { return entry->desc.sadb_alg_id == (unsigned long)data; } struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id) { return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_id_match, (void *)(unsigned long)alg_id, 1); } EXPORT_SYMBOL_GPL(xfrm_aalg_get_byid); struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id) { return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_id_match, (void *)(unsigned long)alg_id, 1); } EXPORT_SYMBOL_GPL(xfrm_ealg_get_byid); struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id) { return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_id_match, (void *)(unsigned long)alg_id, 1); } EXPORT_SYMBOL_GPL(xfrm_calg_get_byid); static int xfrm_alg_name_match(const struct xfrm_algo_desc *entry, const void *data) { const char *name = data; return name && (!strcmp(name, entry->name) || (entry->compat && !strcmp(name, entry->compat))); } struct xfrm_algo_desc *xfrm_aalg_get_byname(const char *name, int probe) { return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_name_match, name, probe); } EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); struct xfrm_algo_desc *xfrm_ealg_get_byname(const char *name, int probe) { return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_name_match, name, probe); } EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); struct xfrm_algo_desc *xfrm_calg_get_byname(const char *name, int probe) { return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_name_match, name, probe); } EXPORT_SYMBOL_GPL(xfrm_calg_get_byname); struct xfrm_aead_name { const char *name; int icvbits; }; static int xfrm_aead_name_match(const struct xfrm_algo_desc *entry, const void *data) { const struct xfrm_aead_name *aead = data; const char *name = aead->name; return aead->icvbits == entry->uinfo.aead.icv_truncbits && name && !strcmp(name, entry->name); } struct xfrm_algo_desc *xfrm_aead_get_byname(const char *name, int icv_len, int probe) { struct xfrm_aead_name data = { .name = name, .icvbits = icv_len, }; return xfrm_find_algo(&xfrm_aead_list, xfrm_aead_name_match, &data, probe); } EXPORT_SYMBOL_GPL(xfrm_aead_get_byname); struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx) { if (idx >= aalg_entries()) return NULL; return &aalg_list[idx]; } EXPORT_SYMBOL_GPL(xfrm_aalg_get_byidx); struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx) { if (idx >= ealg_entries()) return NULL; return &ealg_list[idx]; } EXPORT_SYMBOL_GPL(xfrm_ealg_get_byidx); /* * Probe for the availability of crypto algorithms, and set the available * flag for any algorithms found on the system. This is typically called by * pfkey during userspace SA add, update or register. */ void xfrm_probe_algs(void) { int i, status; BUG_ON(in_softirq()); for (i = 0; i < aalg_entries(); i++) { status = crypto_has_ahash(aalg_list[i].name, 0, 0); if (aalg_list[i].available != status) aalg_list[i].available = status; } for (i = 0; i < ealg_entries(); i++) { status = crypto_has_skcipher(ealg_list[i].name, 0, 0); if (ealg_list[i].available != status) ealg_list[i].available = status; } for (i = 0; i < calg_entries(); i++) { status = crypto_has_acomp(calg_list[i].name, 0, 0); if (calg_list[i].available != status) calg_list[i].available = status; } } EXPORT_SYMBOL_GPL(xfrm_probe_algs); int xfrm_count_pfkey_auth_supported(void) { int i, n; for (i = 0, n = 0; i < aalg_entries(); i++) if (aalg_list[i].available && aalg_list[i].pfkey_supported) n++; return n; } EXPORT_SYMBOL_GPL(xfrm_count_pfkey_auth_supported); int xfrm_count_pfkey_enc_supported(void) { int i, n; for (i = 0, n = 0; i < ealg_entries(); i++) if (ealg_list[i].available && ealg_list[i].pfkey_supported) n++; return n; } EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported); MODULE_DESCRIPTION("XFRM Algorithm interface"); MODULE_LICENSE("GPL"); |
5 5 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/befs/debug.c * * Copyright (C) 2001 Will Dyson (will_dyson at pobox.com) * * With help from the ntfs-tng driver by Anton Altparmakov * * Copyright (C) 1999 Makoto Kato (m_kato@ga2.so-net.ne.jp) * * debug functions */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #ifdef __KERNEL__ #include <linux/stdarg.h> #include <linux/string.h> #include <linux/spinlock.h> #include <linux/kernel.h> #include <linux/fs.h> #include <linux/slab.h> #endif /* __KERNEL__ */ #include "befs.h" void befs_error(const struct super_block *sb, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; pr_err("(%s): %pV\n", sb->s_id, &vaf); va_end(args); } void befs_warning(const struct super_block *sb, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; pr_warn("(%s): %pV\n", sb->s_id, &vaf); va_end(args); } void befs_debug(const struct super_block *sb, const char *fmt, ...) { #ifdef CONFIG_BEFS_DEBUG struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; pr_debug("(%s): %pV\n", sb->s_id, &vaf); va_end(args); #endif //CONFIG_BEFS_DEBUG } void befs_dump_inode(const struct super_block *sb, befs_inode *inode) { #ifdef CONFIG_BEFS_DEBUG befs_block_run tmp_run; befs_debug(sb, "befs_inode information"); befs_debug(sb, " magic1 %08x", fs32_to_cpu(sb, inode->magic1)); tmp_run = fsrun_to_cpu(sb, inode->inode_num); befs_debug(sb, " inode_num %u, %hu, %hu", tmp_run.allocation_group, tmp_run.start, tmp_run.len); befs_debug(sb, " uid %u", fs32_to_cpu(sb, inode->uid)); befs_debug(sb, " gid %u", fs32_to_cpu(sb, inode->gid)); befs_debug(sb, " mode %08x", fs32_to_cpu(sb, inode->mode)); befs_debug(sb, " flags %08x", fs32_to_cpu(sb, inode->flags)); befs_debug(sb, " create_time %llu", fs64_to_cpu(sb, inode->create_time)); befs_debug(sb, " last_modified_time %llu", fs64_to_cpu(sb, inode->last_modified_time)); tmp_run = fsrun_to_cpu(sb, inode->parent); befs_debug(sb, " parent [%u, %hu, %hu]", tmp_run.allocation_group, tmp_run.start, tmp_run.len); tmp_run = fsrun_to_cpu(sb, inode->attributes); befs_debug(sb, " attributes [%u, %hu, %hu]", tmp_run.allocation_group, tmp_run.start, tmp_run.len); befs_debug(sb, " type %08x", fs32_to_cpu(sb, inode->type)); befs_debug(sb, " inode_size %u", fs32_to_cpu(sb, inode->inode_size)); if (S_ISLNK(fs32_to_cpu(sb, inode->mode))) { befs_debug(sb, " Symbolic link [%s]", inode->data.symlink); } else { int i; for (i = 0; i < BEFS_NUM_DIRECT_BLOCKS; i++) { tmp_run = fsrun_to_cpu(sb, inode->data.datastream.direct[i]); befs_debug(sb, " direct %d [%u, %hu, %hu]", i, tmp_run.allocation_group, tmp_run.start, tmp_run.len); } befs_debug(sb, " max_direct_range %llu", fs64_to_cpu(sb, inode->data.datastream. max_direct_range)); tmp_run = fsrun_to_cpu(sb, inode->data.datastream.indirect); befs_debug(sb, " indirect [%u, %hu, %hu]", tmp_run.allocation_group, tmp_run.start, tmp_run.len); befs_debug(sb, " max_indirect_range %llu", fs64_to_cpu(sb, inode->data.datastream. max_indirect_range)); tmp_run = fsrun_to_cpu(sb, inode->data.datastream.double_indirect); befs_debug(sb, " double indirect [%u, %hu, %hu]", tmp_run.allocation_group, tmp_run.start, tmp_run.len); befs_debug(sb, " max_double_indirect_range %llu", fs64_to_cpu(sb, inode->data.datastream. max_double_indirect_range)); befs_debug(sb, " size %llu", fs64_to_cpu(sb, inode->data.datastream.size)); } #endif //CONFIG_BEFS_DEBUG } /* * Display super block structure for debug. */ void befs_dump_super_block(const struct super_block *sb, befs_super_block *sup) { #ifdef CONFIG_BEFS_DEBUG befs_block_run tmp_run; befs_debug(sb, "befs_super_block information"); befs_debug(sb, " name %s", sup->name); befs_debug(sb, " magic1 %08x", fs32_to_cpu(sb, sup->magic1)); befs_debug(sb, " fs_byte_order %08x", fs32_to_cpu(sb, sup->fs_byte_order)); befs_debug(sb, " block_size %u", fs32_to_cpu(sb, sup->block_size)); befs_debug(sb, " block_shift %u", fs32_to_cpu(sb, sup->block_shift)); befs_debug(sb, " num_blocks %llu", fs64_to_cpu(sb, sup->num_blocks)); befs_debug(sb, " used_blocks %llu", fs64_to_cpu(sb, sup->used_blocks)); befs_debug(sb, " inode_size %u", fs32_to_cpu(sb, sup->inode_size)); befs_debug(sb, " magic2 %08x", fs32_to_cpu(sb, sup->magic2)); befs_debug(sb, " blocks_per_ag %u", fs32_to_cpu(sb, sup->blocks_per_ag)); befs_debug(sb, " ag_shift %u", fs32_to_cpu(sb, sup->ag_shift)); befs_debug(sb, " num_ags %u", fs32_to_cpu(sb, sup->num_ags)); befs_debug(sb, " flags %08x", fs32_to_cpu(sb, sup->flags)); tmp_run = fsrun_to_cpu(sb, sup->log_blocks); befs_debug(sb, " log_blocks %u, %hu, %hu", tmp_run.allocation_group, tmp_run.start, tmp_run.len); befs_debug(sb, " log_start %lld", fs64_to_cpu(sb, sup->log_start)); befs_debug(sb, " log_end %lld", fs64_to_cpu(sb, sup->log_end)); befs_debug(sb, " magic3 %08x", fs32_to_cpu(sb, sup->magic3)); tmp_run = fsrun_to_cpu(sb, sup->root_dir); befs_debug(sb, " root_dir %u, %hu, %hu", tmp_run.allocation_group, tmp_run.start, tmp_run.len); tmp_run = fsrun_to_cpu(sb, sup->indices); befs_debug(sb, " indices %u, %hu, %hu", tmp_run.allocation_group, tmp_run.start, tmp_run.len); #endif //CONFIG_BEFS_DEBUG } #if 0 /* unused */ void befs_dump_small_data(const struct super_block *sb, befs_small_data *sd) { } /* unused */ void befs_dump_run(const struct super_block *sb, befs_disk_block_run run) { #ifdef CONFIG_BEFS_DEBUG befs_block_run n = fsrun_to_cpu(sb, run); befs_debug(sb, "[%u, %hu, %hu]", n.allocation_group, n.start, n.len); #endif //CONFIG_BEFS_DEBUG } #endif /* 0 */ void befs_dump_index_entry(const struct super_block *sb, befs_disk_btree_super *super) { #ifdef CONFIG_BEFS_DEBUG befs_debug(sb, "Btree super structure"); befs_debug(sb, " magic %08x", fs32_to_cpu(sb, super->magic)); befs_debug(sb, " node_size %u", fs32_to_cpu(sb, super->node_size)); befs_debug(sb, " max_depth %08x", fs32_to_cpu(sb, super->max_depth)); befs_debug(sb, " data_type %08x", fs32_to_cpu(sb, super->data_type)); befs_debug(sb, " root_node_pointer %016LX", fs64_to_cpu(sb, super->root_node_ptr)); befs_debug(sb, " free_node_pointer %016LX", fs64_to_cpu(sb, super->free_node_ptr)); befs_debug(sb, " maximum size %016LX", fs64_to_cpu(sb, super->max_size)); #endif //CONFIG_BEFS_DEBUG } void befs_dump_index_node(const struct super_block *sb, befs_btree_nodehead *node) { #ifdef CONFIG_BEFS_DEBUG befs_debug(sb, "Btree node structure"); befs_debug(sb, " left %016LX", fs64_to_cpu(sb, node->left)); befs_debug(sb, " right %016LX", fs64_to_cpu(sb, node->right)); befs_debug(sb, " overflow %016LX", fs64_to_cpu(sb, node->overflow)); befs_debug(sb, " all_key_count %hu", fs16_to_cpu(sb, node->all_key_count)); befs_debug(sb, " all_key_length %hu", fs16_to_cpu(sb, node->all_key_length)); #endif //CONFIG_BEFS_DEBUG } |
67 67 67 67 66 67 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Video for Linux Two * * A generic video device interface for the LINUX operating system * using a set of device structures/vectors for low level operations. * * This file replaces the videodev.c file that comes with the * regular kernel distribution. * * Author: Bill Dirks <bill@thedirks.org> * based on code by Alan Cox, <alan@cymru.net> */ /* * Video capture interface for Linux * * A generic video device interface for the LINUX operating system * using a set of device structures/vectors for low level operations. * * Author: Alan Cox, <alan@lxorguk.ukuu.org.uk> * * Fixes: */ /* * Video4linux 1/2 integration by Justin Schoeman * <justin@suntiger.ee.up.ac.za> * 2.4 PROCFS support ported from 2.4 kernels by * Iñaki GarcÃa Etxebarria <garetxe@euskalnet.net> * Makefile fix by "W. Michael Petullo" <mike@flyn.org> * 2.4 devfs support ported from 2.4 kernels by * Dan Merillat <dan@merillat.org> * Added Gerd Knorrs v4l1 enhancements (Justin Schoeman) */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/uaccess.h> #include <asm/io.h> #include <asm/div64.h> #include <media/v4l2-common.h> #include <media/v4l2-device.h> #include <media/v4l2-ctrls.h> #include <linux/videodev2.h> /* * * V 4 L 2 D R I V E R H E L P E R A P I * */ /* * Video Standard Operations (contributed by Michael Schimek) */ /* Helper functions for control handling */ /* Fill in a struct v4l2_queryctrl */ int v4l2_ctrl_query_fill(struct v4l2_queryctrl *qctrl, s32 _min, s32 _max, s32 _step, s32 _def) { const char *name; s64 min = _min; s64 max = _max; u64 step = _step; s64 def = _def; v4l2_ctrl_fill(qctrl->id, &name, &qctrl->type, &min, &max, &step, &def, &qctrl->flags); if (name == NULL) return -EINVAL; qctrl->minimum = min; qctrl->maximum = max; qctrl->step = step; qctrl->default_value = def; qctrl->reserved[0] = qctrl->reserved[1] = 0; strscpy(qctrl->name, name, sizeof(qctrl->name)); return 0; } EXPORT_SYMBOL(v4l2_ctrl_query_fill); /* Clamp x to be between min and max, aligned to a multiple of 2^align. min * and max don't have to be aligned, but there must be at least one valid * value. E.g., min=17,max=31,align=4 is not allowed as there are no multiples * of 16 between 17 and 31. */ static unsigned int clamp_align(unsigned int x, unsigned int min, unsigned int max, unsigned int align) { /* Bits that must be zero to be aligned */ unsigned int mask = ~((1 << align) - 1); /* Clamp to aligned min and max */ x = clamp(x, (min + ~mask) & mask, max & mask); /* Round to nearest aligned value */ if (align) x = (x + (1 << (align - 1))) & mask; return x; } static unsigned int clamp_roundup(unsigned int x, unsigned int min, unsigned int max, unsigned int alignment) { x = clamp(x, min, max); if (alignment) x = round_up(x, alignment); return x; } void v4l_bound_align_image(u32 *w, unsigned int wmin, unsigned int wmax, unsigned int walign, u32 *h, unsigned int hmin, unsigned int hmax, unsigned int halign, unsigned int salign) { *w = clamp_align(*w, wmin, wmax, walign); *h = clamp_align(*h, hmin, hmax, halign); /* Usually we don't need to align the size and are done now. */ if (!salign) return; /* How much alignment do we have? */ walign = __ffs(*w); halign = __ffs(*h); /* Enough to satisfy the image alignment? */ if (walign + halign < salign) { /* Max walign where there is still a valid width */ unsigned int wmaxa = __fls(wmax ^ (wmin - 1)); /* Max halign where there is still a valid height */ unsigned int hmaxa = __fls(hmax ^ (hmin - 1)); /* up the smaller alignment until we have enough */ do { if (halign >= hmaxa || (walign <= halign && walign < wmaxa)) { *w = clamp_align(*w, wmin, wmax, walign + 1); walign = __ffs(*w); } else { *h = clamp_align(*h, hmin, hmax, halign + 1); halign = __ffs(*h); } } while (halign + walign < salign); } } EXPORT_SYMBOL_GPL(v4l_bound_align_image); const void * __v4l2_find_nearest_size(const void *array, size_t array_size, size_t entry_size, size_t width_offset, size_t height_offset, s32 width, s32 height) { u32 error, min_error = U32_MAX; const void *best = NULL; unsigned int i; if (!array) return NULL; for (i = 0; i < array_size; i++, array += entry_size) { const u32 *entry_width = array + width_offset; const u32 *entry_height = array + height_offset; error = abs(*entry_width - width) + abs(*entry_height - height); if (error > min_error) continue; min_error = error; best = array; if (!error) break; } return best; } EXPORT_SYMBOL_GPL(__v4l2_find_nearest_size); int v4l2_g_parm_cap(struct video_device *vdev, struct v4l2_subdev *sd, struct v4l2_streamparm *a) { struct v4l2_subdev_frame_interval ival = { 0 }; int ret; if (a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE && a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) return -EINVAL; if (vdev->device_caps & V4L2_CAP_READWRITE) a->parm.capture.readbuffers = 2; if (v4l2_subdev_has_op(sd, pad, get_frame_interval)) a->parm.capture.capability = V4L2_CAP_TIMEPERFRAME; ret = v4l2_subdev_call_state_active(sd, pad, get_frame_interval, &ival); if (!ret) a->parm.capture.timeperframe = ival.interval; return ret; } EXPORT_SYMBOL_GPL(v4l2_g_parm_cap); int v4l2_s_parm_cap(struct video_device *vdev, struct v4l2_subdev *sd, struct v4l2_streamparm *a) { struct v4l2_subdev_frame_interval ival = { .interval = a->parm.capture.timeperframe }; int ret; if (a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE && a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) return -EINVAL; memset(&a->parm, 0, sizeof(a->parm)); if (vdev->device_caps & V4L2_CAP_READWRITE) a->parm.capture.readbuffers = 2; else a->parm.capture.readbuffers = 0; if (v4l2_subdev_has_op(sd, pad, get_frame_interval)) a->parm.capture.capability = V4L2_CAP_TIMEPERFRAME; ret = v4l2_subdev_call_state_active(sd, pad, set_frame_interval, &ival); if (!ret) a->parm.capture.timeperframe = ival.interval; return ret; } EXPORT_SYMBOL_GPL(v4l2_s_parm_cap); const struct v4l2_format_info *v4l2_format_info(u32 format) { static const struct v4l2_format_info formats[] = { /* RGB formats */ { .format = V4L2_PIX_FMT_BGR24, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 3, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_RGB24, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 3, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_HSV24, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 3, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_BGR32, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_XBGR32, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_BGRX32, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_RGB32, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_XRGB32, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_RGBX32, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_HSV32, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_ARGB32, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_RGBA32, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_ABGR32, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_BGRA32, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_RGB565, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_RGB555, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_BGR666, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_BGR48_12, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 6, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_BGR48, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 6, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_RGB48, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 6, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_ABGR64_12, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 8, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_RGBA1010102, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_RGBX1010102, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_ARGB2101010, .pixel_enc = V4L2_PIXEL_ENC_RGB, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, /* YUV packed formats */ { .format = V4L2_PIX_FMT_YUYV, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_YVYU, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_UYVY, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_VYUY, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_Y210, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_Y212, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_Y216, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 4, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_YUV48_12, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 6, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_MT2110T, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 2, .comp_planes = 2, .bpp = { 5, 10, 0, 0 }, .bpp_div = { 4, 4, 1, 1 }, .hdiv = 2, .vdiv = 2, .block_w = { 16, 8, 0, 0 }, .block_h = { 32, 16, 0, 0 }}, { .format = V4L2_PIX_FMT_MT2110R, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 2, .comp_planes = 2, .bpp = { 5, 10, 0, 0 }, .bpp_div = { 4, 4, 1, 1 }, .hdiv = 2, .vdiv = 2, .block_w = { 16, 8, 0, 0 }, .block_h = { 32, 16, 0, 0 }}, /* YUV planar formats */ { .format = V4L2_PIX_FMT_NV12, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, { .format = V4L2_PIX_FMT_NV21, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, { .format = V4L2_PIX_FMT_NV16, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_NV61, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_NV24, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_NV42, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_P010, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 2, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_P012, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 2, 4, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, { .format = V4L2_PIX_FMT_YUV410, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 4, .vdiv = 4 }, { .format = V4L2_PIX_FMT_YVU410, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 4, .vdiv = 4 }, { .format = V4L2_PIX_FMT_YUV411P, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 4, .vdiv = 1 }, { .format = V4L2_PIX_FMT_YUV420, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, { .format = V4L2_PIX_FMT_YVU420, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, { .format = V4L2_PIX_FMT_YUV422P, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_GREY, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, /* Tiled YUV formats */ { .format = V4L2_PIX_FMT_NV12_4L4, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, { .format = V4L2_PIX_FMT_NV15_4L4, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 5, 10, 0, 0 }, .bpp_div = { 4, 4, 1, 1 }, .hdiv = 2, .vdiv = 2, .block_w = { 4, 2, 0, 0 }, .block_h = { 1, 1, 0, 0 }}, { .format = V4L2_PIX_FMT_P010_4L4, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 1, .comp_planes = 2, .bpp = { 2, 4, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, /* YUV planar formats, non contiguous variant */ { .format = V4L2_PIX_FMT_YUV420M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, { .format = V4L2_PIX_FMT_YVU420M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, { .format = V4L2_PIX_FMT_YUV422M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_YVU422M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_YUV444M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_YVU444M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 3, .comp_planes = 3, .bpp = { 1, 1, 1, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_NV12M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 2, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, { .format = V4L2_PIX_FMT_NV21M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 2, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, { .format = V4L2_PIX_FMT_NV16M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 2, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_NV61M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 2, .comp_planes = 2, .bpp = { 1, 2, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 1 }, { .format = V4L2_PIX_FMT_P012M, .pixel_enc = V4L2_PIXEL_ENC_YUV, .mem_planes = 2, .comp_planes = 2, .bpp = { 2, 4, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 2, .vdiv = 2 }, /* Bayer RGB formats */ { .format = V4L2_PIX_FMT_SBGGR8, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SGBRG8, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SGRBG8, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SRGGB8, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SBGGR10, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SGBRG10, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SGRBG10, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SRGGB10, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SBGGR10ALAW8, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SGBRG10ALAW8, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SGRBG10ALAW8, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SRGGB10ALAW8, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SBGGR10DPCM8, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SGBRG10DPCM8, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SGRBG10DPCM8, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SRGGB10DPCM8, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 1, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SBGGR12, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SGBRG12, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SGRBG12, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, { .format = V4L2_PIX_FMT_SRGGB12, .pixel_enc = V4L2_PIXEL_ENC_BAYER, .mem_planes = 1, .comp_planes = 1, .bpp = { 2, 0, 0, 0 }, .bpp_div = { 1, 1, 1, 1 }, .hdiv = 1, .vdiv = 1 }, }; unsigned int i; for (i = 0; i < ARRAY_SIZE(formats); ++i) if (formats[i].format == format) return &formats[i]; return NULL; } EXPORT_SYMBOL(v4l2_format_info); static inline unsigned int v4l2_format_block_width(const struct v4l2_format_info *info, int plane) { if (!info->block_w[plane]) return 1; return info->block_w[plane]; } static inline unsigned int v4l2_format_block_height(const struct v4l2_format_info *info, int plane) { if (!info->block_h[plane]) return 1; return info->block_h[plane]; } void v4l2_apply_frmsize_constraints(u32 *width, u32 *height, const struct v4l2_frmsize_stepwise *frmsize) { if (!frmsize) return; /* * Clamp width/height to meet min/max constraints and round it up to * macroblock alignment. */ *width = clamp_roundup(*width, frmsize->min_width, frmsize->max_width, frmsize->step_width); *height = clamp_roundup(*height, frmsize->min_height, frmsize->max_height, frmsize->step_height); } EXPORT_SYMBOL_GPL(v4l2_apply_frmsize_constraints); int v4l2_fill_pixfmt_mp(struct v4l2_pix_format_mplane *pixfmt, u32 pixelformat, u32 width, u32 height) { const struct v4l2_format_info *info; struct v4l2_plane_pix_format *plane; int i; info = v4l2_format_info(pixelformat); if (!info) return -EINVAL; pixfmt->width = width; pixfmt->height = height; pixfmt->pixelformat = pixelformat; pixfmt->num_planes = info->mem_planes; if (info->mem_planes == 1) { plane = &pixfmt->plane_fmt[0]; plane->bytesperline = ALIGN(width, v4l2_format_block_width(info, 0)) * info->bpp[0] / info->bpp_div[0]; plane->sizeimage = 0; for (i = 0; i < info->comp_planes; i++) { unsigned int hdiv = (i == 0) ? 1 : info->hdiv; unsigned int vdiv = (i == 0) ? 1 : info->vdiv; unsigned int aligned_width; unsigned int aligned_height; aligned_width = ALIGN(width, v4l2_format_block_width(info, i)); aligned_height = ALIGN(height, v4l2_format_block_height(info, i)); plane->sizeimage += info->bpp[i] * DIV_ROUND_UP(aligned_width, hdiv) * DIV_ROUND_UP(aligned_height, vdiv) / info->bpp_div[i]; } } else { for (i = 0; i < info->comp_planes; i++) { unsigned int hdiv = (i == 0) ? 1 : info->hdiv; unsigned int vdiv = (i == 0) ? 1 : info->vdiv; unsigned int aligned_width; unsigned int aligned_height; aligned_width = ALIGN(width, v4l2_format_block_width(info, i)); aligned_height = ALIGN(height, v4l2_format_block_height(info, i)); plane = &pixfmt->plane_fmt[i]; plane->bytesperline = info->bpp[i] * DIV_ROUND_UP(aligned_width, hdiv) / info->bpp_div[i]; plane->sizeimage = plane->bytesperline * DIV_ROUND_UP(aligned_height, vdiv); } } return 0; } EXPORT_SYMBOL_GPL(v4l2_fill_pixfmt_mp); int v4l2_fill_pixfmt(struct v4l2_pix_format *pixfmt, u32 pixelformat, u32 width, u32 height) { const struct v4l2_format_info *info; int i; info = v4l2_format_info(pixelformat); if (!info) return -EINVAL; /* Single planar API cannot be used for multi plane formats. */ if (info->mem_planes > 1) return -EINVAL; pixfmt->width = width; pixfmt->height = height; pixfmt->pixelformat = pixelformat; pixfmt->bytesperline = ALIGN(width, v4l2_format_block_width(info, 0)) * info->bpp[0] / info->bpp_div[0]; pixfmt->sizeimage = 0; for (i = 0; i < info->comp_planes; i++) { unsigned int hdiv = (i == 0) ? 1 : info->hdiv; unsigned int vdiv = (i == 0) ? 1 : info->vdiv; unsigned int aligned_width; unsigned int aligned_height; aligned_width = ALIGN(width, v4l2_format_block_width(info, i)); aligned_height = ALIGN(height, v4l2_format_block_height(info, i)); pixfmt->sizeimage += info->bpp[i] * DIV_ROUND_UP(aligned_width, hdiv) * DIV_ROUND_UP(aligned_height, vdiv) / info->bpp_div[i]; } return 0; } EXPORT_SYMBOL_GPL(v4l2_fill_pixfmt); s64 __v4l2_get_link_freq_ctrl(struct v4l2_ctrl_handler *handler, unsigned int mul, unsigned int div) { struct v4l2_ctrl *ctrl; s64 freq; ctrl = v4l2_ctrl_find(handler, V4L2_CID_LINK_FREQ); if (ctrl) { struct v4l2_querymenu qm = { .id = V4L2_CID_LINK_FREQ }; int ret; qm.index = v4l2_ctrl_g_ctrl(ctrl); ret = v4l2_querymenu(handler, &qm); if (ret) return -ENOENT; freq = qm.value; } else { if (!mul || !div) return -ENOENT; ctrl = v4l2_ctrl_find(handler, V4L2_CID_PIXEL_RATE); if (!ctrl) return -ENOENT; freq = div_u64(v4l2_ctrl_g_ctrl_int64(ctrl) * mul, div); pr_warn("%s: Link frequency estimated using pixel rate: result might be inaccurate\n", __func__); pr_warn("%s: Consider implementing support for V4L2_CID_LINK_FREQ in the transmitter driver\n", __func__); } return freq > 0 ? freq : -EINVAL; } EXPORT_SYMBOL_GPL(__v4l2_get_link_freq_ctrl); #ifdef CONFIG_MEDIA_CONTROLLER s64 __v4l2_get_link_freq_pad(struct media_pad *pad, unsigned int mul, unsigned int div) { struct v4l2_mbus_config mbus_config = {}; struct v4l2_subdev *sd; int ret; sd = media_entity_to_v4l2_subdev(pad->entity); ret = v4l2_subdev_call(sd, pad, get_mbus_config, pad->index, &mbus_config); if (ret < 0 && ret != -ENOIOCTLCMD) return ret; if (mbus_config.link_freq) return mbus_config.link_freq; /* * Fall back to using the link frequency control if the media bus config * doesn't provide a link frequency. */ return __v4l2_get_link_freq_ctrl(sd->ctrl_handler, mul, div); } EXPORT_SYMBOL_GPL(__v4l2_get_link_freq_pad); #endif /* CONFIG_MEDIA_CONTROLLER */ /* * Simplify a fraction using a simple continued fraction decomposition. The * idea here is to convert fractions such as 333333/10000000 to 1/30 using * 32 bit arithmetic only. The algorithm is not perfect and relies upon two * arbitrary parameters to remove non-significative terms from the simple * continued fraction decomposition. Using 8 and 333 for n_terms and threshold * respectively seems to give nice results. */ void v4l2_simplify_fraction(u32 *numerator, u32 *denominator, unsigned int n_terms, unsigned int threshold) { u32 *an; u32 x, y, r; unsigned int i, n; an = kmalloc_array(n_terms, sizeof(*an), GFP_KERNEL); if (an == NULL) return; /* * Convert the fraction to a simple continued fraction. See * https://en.wikipedia.org/wiki/Continued_fraction * Stop if the current term is bigger than or equal to the given * threshold. */ x = *numerator; y = *denominator; for (n = 0; n < n_terms && y != 0; ++n) { an[n] = x / y; if (an[n] >= threshold) { if (n < 2) n++; break; } r = x - an[n] * y; x = y; y = r; } /* Expand the simple continued fraction back to an integer fraction. */ x = 0; y = 1; for (i = n; i > 0; --i) { r = y; y = an[i-1] * y + x; x = r; } *numerator = y; *denominator = x; kfree(an); } EXPORT_SYMBOL_GPL(v4l2_simplify_fraction); /* * Convert a fraction to a frame interval in 100ns multiples. The idea here is * to compute numerator / denominator * 10000000 using 32 bit fixed point * arithmetic only. */ u32 v4l2_fraction_to_interval(u32 numerator, u32 denominator) { u32 multiplier; /* Saturate the result if the operation would overflow. */ if (denominator == 0 || numerator/denominator >= ((u32)-1)/10000000) return (u32)-1; /* * Divide both the denominator and the multiplier by two until * numerator * multiplier doesn't overflow. If anyone knows a better * algorithm please let me know. */ multiplier = 10000000; while (numerator > ((u32)-1)/multiplier) { multiplier /= 2; denominator /= 2; } return denominator ? numerator * multiplier / denominator : 0; } EXPORT_SYMBOL_GPL(v4l2_fraction_to_interval); int v4l2_link_freq_to_bitmap(struct device *dev, const u64 *fw_link_freqs, unsigned int num_of_fw_link_freqs, const s64 *driver_link_freqs, unsigned int num_of_driver_link_freqs, unsigned long *bitmap) { unsigned int i; *bitmap = 0; if (!num_of_fw_link_freqs) { dev_err(dev, "no link frequencies in firmware\n"); return -ENODATA; } for (i = 0; i < num_of_fw_link_freqs; i++) { unsigned int j; for (j = 0; j < num_of_driver_link_freqs; j++) { if (fw_link_freqs[i] != driver_link_freqs[j]) continue; dev_dbg(dev, "enabling link frequency %lld Hz\n", driver_link_freqs[j]); *bitmap |= BIT(j); break; } } if (!*bitmap) { dev_err(dev, "no matching link frequencies found\n"); dev_dbg(dev, "specified in firmware:\n"); for (i = 0; i < num_of_fw_link_freqs; i++) dev_dbg(dev, "\t%llu Hz\n", fw_link_freqs[i]); dev_dbg(dev, "driver supported:\n"); for (i = 0; i < num_of_driver_link_freqs; i++) dev_dbg(dev, "\t%lld Hz\n", driver_link_freqs[i]); return -ENOENT; } return 0; } EXPORT_SYMBOL_GPL(v4l2_link_freq_to_bitmap); |
11301 1004 1001 11309 987 10654 674 10796 10694 11323 11312 11306 11251 11248 11253 11244 47 261 275 48 268 269 274 266 263 263 102 260 260 263 260 3 264 263 264 265 260 261 259 3 3 104 33 82 104 27 27 2 11 10 2 26 27 4 27 21 21 4 21 2 5 4 1 2 2 1 1 1 1 1 5 4 5 9523 1091 287 148 682 530 529 530 530 530 528 530 328 26 494 472 530 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 | // SPDX-License-Identifier: GPL-2.0-only /* * 32bit Socket syscall emulation. Based on arch/sparc64/kernel/sys_sparc32.c. * * Copyright (C) 2000 VA Linux Co * Copyright (C) 2000 Don Dugger <n0ano@valinux.com> * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) * Copyright (C) 2000 Hewlett-Packard Co. * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> * Copyright (C) 2000,2001 Andi Kleen, SuSE Labs */ #include <linux/kernel.h> #include <linux/gfp.h> #include <linux/fs.h> #include <linux/types.h> #include <linux/file.h> #include <linux/icmpv6.h> #include <linux/socket.h> #include <linux/syscalls.h> #include <linux/filter.h> #include <linux/compat.h> #include <linux/security.h> #include <linux/audit.h> #include <linux/export.h> #include <net/scm.h> #include <net/sock.h> #include <net/ip.h> #include <net/ipv6.h> #include <linux/uaccess.h> #include <net/compat.h> int __get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr *msg, struct sockaddr __user **save_addr) { ssize_t err; kmsg->msg_flags = msg->msg_flags; kmsg->msg_namelen = msg->msg_namelen; if (!msg->msg_name) kmsg->msg_namelen = 0; if (kmsg->msg_namelen < 0) return -EINVAL; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_control_is_user = true; kmsg->msg_get_inq = 0; kmsg->msg_control_user = compat_ptr(msg->msg_control); kmsg->msg_controllen = msg->msg_controllen; if (save_addr) *save_addr = compat_ptr(msg->msg_name); if (msg->msg_name && kmsg->msg_namelen) { if (!save_addr) { err = move_addr_to_kernel(compat_ptr(msg->msg_name), kmsg->msg_namelen, kmsg->msg_name); if (err < 0) return err; } } else { kmsg->msg_name = NULL; kmsg->msg_namelen = 0; } if (msg->msg_iovlen > UIO_MAXIOV) return -EMSGSIZE; kmsg->msg_iocb = NULL; kmsg->msg_ubuf = NULL; return 0; } int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg, struct sockaddr __user **save_addr, struct iovec **iov) { struct compat_msghdr msg; ssize_t err; if (copy_from_user(&msg, umsg, sizeof(*umsg))) return -EFAULT; err = __get_compat_msghdr(kmsg, &msg, save_addr); if (err) return err; err = import_iovec(save_addr ? ITER_DEST : ITER_SOURCE, compat_ptr(msg.msg_iov), msg.msg_iovlen, UIO_FASTIOV, iov, &kmsg->msg_iter); return err < 0 ? err : 0; } /* Bleech... */ #define CMSG_COMPAT_ALIGN(len) ALIGN((len), sizeof(s32)) #define CMSG_COMPAT_DATA(cmsg) \ ((void __user *)((char __user *)(cmsg) + sizeof(struct compat_cmsghdr))) #define CMSG_COMPAT_SPACE(len) \ (sizeof(struct compat_cmsghdr) + CMSG_COMPAT_ALIGN(len)) #define CMSG_COMPAT_LEN(len) \ (sizeof(struct compat_cmsghdr) + (len)) #define CMSG_COMPAT_FIRSTHDR(msg) \ (((msg)->msg_controllen) >= sizeof(struct compat_cmsghdr) ? \ (struct compat_cmsghdr __user *)((msg)->msg_control_user) : \ (struct compat_cmsghdr __user *)NULL) #define CMSG_COMPAT_OK(ucmlen, ucmsg, mhdr) \ ((ucmlen) >= sizeof(struct compat_cmsghdr) && \ (ucmlen) <= (unsigned long) \ ((mhdr)->msg_controllen - \ ((char __user *)(ucmsg) - (char __user *)(mhdr)->msg_control_user))) static inline struct compat_cmsghdr __user *cmsg_compat_nxthdr(struct msghdr *msg, struct compat_cmsghdr __user *cmsg, int cmsg_len) { char __user *ptr = (char __user *)cmsg + CMSG_COMPAT_ALIGN(cmsg_len); if ((unsigned long)(ptr + 1 - (char __user *)msg->msg_control_user) > msg->msg_controllen) return NULL; return (struct compat_cmsghdr __user *)ptr; } /* There is a lot of hair here because the alignment rules (and * thus placement) of cmsg headers and length are different for * 32-bit apps. -DaveM */ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk, unsigned char *stackbuf, int stackbuf_size) { struct compat_cmsghdr __user *ucmsg; struct cmsghdr *kcmsg, *kcmsg_base; compat_size_t ucmlen; __kernel_size_t kcmlen, tmp; int err = -EFAULT; BUILD_BUG_ON(sizeof(struct compat_cmsghdr) != CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr))); kcmlen = 0; kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf; ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg); while (ucmsg != NULL) { if (get_user(ucmlen, &ucmsg->cmsg_len)) return -EFAULT; /* Catch bogons. */ if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg)) return -EINVAL; tmp = ((ucmlen - sizeof(*ucmsg)) + sizeof(struct cmsghdr)); tmp = CMSG_ALIGN(tmp); kcmlen += tmp; ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); } if (kcmlen == 0) return -EINVAL; /* The kcmlen holds the 64-bit version of the control length. * It may not be modified as we do not stick it into the kmsg * until we have successfully copied over all of the data * from the user. */ if (kcmlen > stackbuf_size) kcmsg_base = kcmsg = sock_kmalloc(sk, kcmlen, GFP_KERNEL); if (kcmsg == NULL) return -ENOMEM; /* Now copy them over neatly. */ memset(kcmsg, 0, kcmlen); ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg); while (ucmsg != NULL) { struct compat_cmsghdr cmsg; if (copy_from_user(&cmsg, ucmsg, sizeof(cmsg))) goto Efault; if (!CMSG_COMPAT_OK(cmsg.cmsg_len, ucmsg, kmsg)) goto Einval; tmp = ((cmsg.cmsg_len - sizeof(*ucmsg)) + sizeof(struct cmsghdr)); if ((char *)kcmsg_base + kcmlen - (char *)kcmsg < CMSG_ALIGN(tmp)) goto Einval; kcmsg->cmsg_len = tmp; kcmsg->cmsg_level = cmsg.cmsg_level; kcmsg->cmsg_type = cmsg.cmsg_type; tmp = CMSG_ALIGN(tmp); if (copy_from_user(CMSG_DATA(kcmsg), CMSG_COMPAT_DATA(ucmsg), (cmsg.cmsg_len - sizeof(*ucmsg)))) goto Efault; /* Advance. */ kcmsg = (struct cmsghdr *)((char *)kcmsg + tmp); ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, cmsg.cmsg_len); } /* * check the length of messages copied in is the same as the * what we get from the first loop */ if ((char *)kcmsg - (char *)kcmsg_base != kcmlen) goto Einval; /* Ok, looks like we made it. Hook it up and return success. */ kmsg->msg_control_is_user = false; kmsg->msg_control = kcmsg_base; kmsg->msg_controllen = kcmlen; return 0; Einval: err = -EINVAL; Efault: if (kcmsg_base != (struct cmsghdr *)stackbuf) sock_kfree_s(sk, kcmsg_base, kcmlen); return err; } int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data) { struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control_user; struct compat_cmsghdr cmhdr; struct old_timeval32 ctv; struct old_timespec32 cts[3]; int cmlen; if (cm == NULL || kmsg->msg_controllen < sizeof(*cm)) { kmsg->msg_flags |= MSG_CTRUNC; return 0; /* XXX: return error? check spec. */ } if (!COMPAT_USE_64BIT_TIME) { if (level == SOL_SOCKET && type == SO_TIMESTAMP_OLD) { struct __kernel_old_timeval *tv = (struct __kernel_old_timeval *)data; ctv.tv_sec = tv->tv_sec; ctv.tv_usec = tv->tv_usec; data = &ctv; len = sizeof(ctv); } if (level == SOL_SOCKET && (type == SO_TIMESTAMPNS_OLD || type == SO_TIMESTAMPING_OLD)) { int count = type == SO_TIMESTAMPNS_OLD ? 1 : 3; int i; struct __kernel_old_timespec *ts = data; for (i = 0; i < count; i++) { cts[i].tv_sec = ts[i].tv_sec; cts[i].tv_nsec = ts[i].tv_nsec; } data = &cts; len = sizeof(cts[0]) * count; } } cmlen = CMSG_COMPAT_LEN(len); if (kmsg->msg_controllen < cmlen) { kmsg->msg_flags |= MSG_CTRUNC; cmlen = kmsg->msg_controllen; } cmhdr.cmsg_level = level; cmhdr.cmsg_type = type; cmhdr.cmsg_len = cmlen; if (copy_to_user(cm, &cmhdr, sizeof cmhdr)) return -EFAULT; if (copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr))) return -EFAULT; cmlen = CMSG_COMPAT_SPACE(len); if (kmsg->msg_controllen < cmlen) cmlen = kmsg->msg_controllen; kmsg->msg_control_user += cmlen; kmsg->msg_controllen -= cmlen; return 0; } static int scm_max_fds_compat(struct msghdr *msg) { if (msg->msg_controllen <= sizeof(struct compat_cmsghdr)) return 0; return (msg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int); } void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm) { struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *)msg->msg_control_user; unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; int fdmax = min_t(int, scm_max_fds_compat(msg), scm->fp->count); int __user *cmsg_data = CMSG_COMPAT_DATA(cm); int err = 0, i; for (i = 0; i < fdmax; i++) { err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags); if (err < 0) break; } if (i > 0) { int cmlen = CMSG_COMPAT_LEN(i * sizeof(int)); err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_COMPAT_SPACE(i * sizeof(int)); if (msg->msg_controllen < cmlen) cmlen = msg->msg_controllen; msg->msg_control_user += cmlen; msg->msg_controllen -= cmlen; } } if (i < scm->fp->count || (scm->fp->count && fdmax <= 0)) msg->msg_flags |= MSG_CTRUNC; /* * All of the files that fit in the message have had their usage counts * incremented, so we just free the list. */ __scm_destroy(scm); } /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) static unsigned char nas[21] = { AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), AL(4), AL(5), AL(4) }; #undef AL static inline long __compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) { return __sys_sendmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT, false); } COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { return __compat_sys_sendmsg(fd, msg, flags); } static inline long __compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags) { return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, false); } COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags) { return __compat_sys_sendmmsg(fd, mmsg, vlen, flags); } static inline long __compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) { return __sys_recvmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT, false); } COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { return __compat_sys_recvmsg(fd, msg, flags); } static inline long __compat_sys_recvfrom(int fd, void __user *buf, compat_size_t len, unsigned int flags, struct sockaddr __user *addr, int __user *addrlen) { return __sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); } COMPAT_SYSCALL_DEFINE4(recv, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags) { return __compat_sys_recvfrom(fd, buf, len, flags, NULL, NULL); } COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int __user *, addrlen) { return __compat_sys_recvfrom(fd, buf, len, flags, addr, addrlen); } COMPAT_SYSCALL_DEFINE5(recvmmsg_time64, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags, struct __kernel_timespec __user *, timeout) { return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, timeout, NULL); } #ifdef CONFIG_COMPAT_32BIT_TIME COMPAT_SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags, struct old_timespec32 __user *, timeout) { return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, NULL, timeout); } #endif COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) { u32 a[AUDITSC_ARGS]; unsigned int len; u32 a0, a1; int ret; if (call < SYS_SOCKET || call > SYS_SENDMMSG) return -EINVAL; len = nas[call]; if (len > sizeof(a)) return -EINVAL; if (copy_from_user(a, args, len)) return -EFAULT; ret = audit_socketcall_compat(len / sizeof(a[0]), a); if (ret) return ret; a0 = a[0]; a1 = a[1]; switch (call) { case SYS_SOCKET: ret = __sys_socket(a0, a1, a[2]); break; case SYS_BIND: ret = __sys_bind(a0, compat_ptr(a1), a[2]); break; case SYS_CONNECT: ret = __sys_connect(a0, compat_ptr(a1), a[2]); break; case SYS_LISTEN: ret = __sys_listen(a0, a1); break; case SYS_ACCEPT: ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0); break; case SYS_GETSOCKNAME: ret = __sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); break; case SYS_GETPEERNAME: ret = __sys_getpeername(a0, compat_ptr(a1), compat_ptr(a[2])); break; case SYS_SOCKETPAIR: ret = __sys_socketpair(a0, a1, a[2], compat_ptr(a[3])); break; case SYS_SEND: ret = __sys_sendto(a0, compat_ptr(a1), a[2], a[3], NULL, 0); break; case SYS_SENDTO: ret = __sys_sendto(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), a[5]); break; case SYS_RECV: ret = __compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3], NULL, NULL); break; case SYS_RECVFROM: ret = __compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), compat_ptr(a[5])); break; case SYS_SHUTDOWN: ret = __sys_shutdown(a0, a1); break; case SYS_SETSOCKOPT: ret = __sys_setsockopt(a0, a1, a[2], compat_ptr(a[3]), a[4]); break; case SYS_GETSOCKOPT: ret = __sys_getsockopt(a0, a1, a[2], compat_ptr(a[3]), compat_ptr(a[4])); break; case SYS_SENDMSG: ret = __compat_sys_sendmsg(a0, compat_ptr(a1), a[2]); break; case SYS_SENDMMSG: ret = __compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]); break; case SYS_RECVMSG: ret = __compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; case SYS_RECVMMSG: ret = __sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3] | MSG_CMSG_COMPAT, NULL, compat_ptr(a[4])); break; case SYS_ACCEPT4: ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); break; default: ret = -EINVAL; break; } return ret; } |
2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | // SPDX-License-Identifier: GPL-2.0-or-later /* paravirtual clock -- common code used by kvm/xen */ #include <linux/clocksource.h> #include <linux/kernel.h> #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/sched.h> #include <linux/gfp.h> #include <linux/memblock.h> #include <linux/nmi.h> #include <asm/fixmap.h> #include <asm/pvclock.h> #include <asm/vgtod.h> static u8 valid_flags __read_mostly = 0; static struct pvclock_vsyscall_time_info *pvti_cpu0_va __read_mostly; void pvclock_set_flags(u8 flags) { valid_flags = flags; } unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) { u64 pv_tsc_khz = 1000000ULL << 32; do_div(pv_tsc_khz, src->tsc_to_system_mul); if (src->tsc_shift < 0) pv_tsc_khz <<= -src->tsc_shift; else pv_tsc_khz >>= src->tsc_shift; return pv_tsc_khz; } void pvclock_touch_watchdogs(void) { touch_softlockup_watchdog_sync(); clocksource_touch_watchdog(); rcu_cpu_stall_reset(); reset_hung_task_detector(); } static atomic64_t last_value = ATOMIC64_INIT(0); void pvclock_resume(void) { atomic64_set(&last_value, 0); } u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) { unsigned version; u8 flags; do { version = pvclock_read_begin(src); flags = src->flags; } while (pvclock_read_retry(src, version)); return flags & valid_flags; } static __always_inline u64 __pvclock_clocksource_read(struct pvclock_vcpu_time_info *src, bool dowd) { unsigned version; u64 ret; u64 last; u8 flags; do { version = pvclock_read_begin(src); ret = __pvclock_read_cycles(src, rdtsc_ordered()); flags = src->flags; } while (pvclock_read_retry(src, version)); if (dowd && unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { src->flags &= ~PVCLOCK_GUEST_STOPPED; pvclock_touch_watchdogs(); } if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && (flags & PVCLOCK_TSC_STABLE_BIT)) return ret; /* * Assumption here is that last_value, a global accumulator, always goes * forward. If we are less than that, we should not be much smaller. * We assume there is an error margin we're inside, and then the correction * does not sacrifice accuracy. * * For reads: global may have changed between test and return, * but this means someone else updated poked the clock at a later time. * We just need to make sure we are not seeing a backwards event. * * For updates: last_value = ret is not enough, since two vcpus could be * updating at the same time, and one of them could be slightly behind, * making the assumption that last_value always go forward fail to hold. */ last = raw_atomic64_read(&last_value); do { if (ret <= last) return last; } while (!raw_atomic64_try_cmpxchg(&last_value, &last, ret)); return ret; } u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) { return __pvclock_clocksource_read(src, true); } noinstr u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src) { return __pvclock_clocksource_read(src, false); } void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, struct pvclock_vcpu_time_info *vcpu_time, struct timespec64 *ts) { u32 version; u64 delta; struct timespec64 now; /* get wallclock at system boot */ do { version = wall_clock->version; rmb(); /* fetch version before time */ /* * Note: wall_clock->sec is a u32 value, so it can * only store dates between 1970 and 2106. To allow * times beyond that, we need to create a new hypercall * interface with an extended pvclock_wall_clock structure * like ARM has. */ now.tv_sec = wall_clock->sec; now.tv_nsec = wall_clock->nsec; rmb(); /* fetch time before checking version */ } while ((wall_clock->version & 1) || (version != wall_clock->version)); delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ delta += now.tv_sec * NSEC_PER_SEC + now.tv_nsec; now.tv_nsec = do_div(delta, NSEC_PER_SEC); now.tv_sec = delta; set_normalized_timespec64(ts, now.tv_sec, now.tv_nsec); } void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti) { WARN_ON(vclock_was_used(VDSO_CLOCKMODE_PVCLOCK)); pvti_cpu0_va = pvti; } struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void) { return pvti_cpu0_va; } EXPORT_SYMBOL_GPL(pvclock_get_pvti_cpu0_va); |
2390 2392 2395 2009 610 1815 1518 1833 953 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | // SPDX-License-Identifier: GPL-2.0 /* * linux/lib/kasprintf.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/stdarg.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/string.h> /* Simplified asprintf. */ char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap) { unsigned int first, second; char *p; va_list aq; va_copy(aq, ap); first = vsnprintf(NULL, 0, fmt, aq); va_end(aq); p = kmalloc_track_caller(first+1, gfp); if (!p) return NULL; second = vsnprintf(p, first+1, fmt, ap); WARN(first != second, "different return values (%u and %u) from vsnprintf(\"%s\", ...)", first, second, fmt); return p; } EXPORT_SYMBOL(kvasprintf); /* * If fmt contains no % (or is exactly %s), use kstrdup_const. If fmt * (or the sole vararg) points to rodata, we will then save a memory * allocation and string copy. In any case, the return value should be * freed using kfree_const(). */ const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list ap) { if (!strchr(fmt, '%')) return kstrdup_const(fmt, gfp); if (!strcmp(fmt, "%s")) return kstrdup_const(va_arg(ap, const char*), gfp); return kvasprintf(gfp, fmt, ap); } EXPORT_SYMBOL(kvasprintf_const); char *kasprintf(gfp_t gfp, const char *fmt, ...) { va_list ap; char *p; va_start(ap, fmt); p = kvasprintf(gfp, fmt, ap); va_end(ap); return p; } EXPORT_SYMBOL(kasprintf); |
59 146 151 173 126 1 2 2 1 55 109 109 153 295 1 295 153 55 67 184 127 186 178 66 67 66 295 186 186 295 294 295 295 295 56 56 56 56 56 1 56 295 118 62 67 118 118 294 56 56 56 54 55 54 55 24 24 23 19 4 43 43 3 6 34 38 38 38 22 22 1 22 22 129 129 129 129 59 59 59 59 136 136 136 136 136 7 5 4 4 2 2 2 2 135 136 135 4 4 4 4 1 4 4 4 4 5 6 5 1 5 2 5 1 5 1 1 126 170 171 55 1 171 126 126 171 126 126 125 55 55 55 55 17 17 17 17 166 17 180 180 180 178 1 180 165 42 180 179 177 180 121 179 168 180 178 24 22 1 24 249 249 249 244 245 124 249 7 138 138 81 23 23 22 1 1 1 23 7 7 1 7 124 122 21 21 124 56 55 56 56 50 3 123 73 1 1 1 1 3 3 1 8 8 1 1 58 56 56 56 26 1 1 1 1 1 4 139 138 83 8 8 69 69 69 69 68 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 | // SPDX-License-Identifier: GPL-2.0+ /* * dummy_hcd.c -- Dummy/Loopback USB host and device emulator driver. * * Maintainer: Alan Stern <stern@rowland.harvard.edu> * * Copyright (C) 2003 David Brownell * Copyright (C) 2003-2005 Alan Stern */ /* * This exposes a device side "USB gadget" API, driven by requests to a * Linux-USB host controller driver. USB traffic is simulated; there's * no need for USB hardware. Use this with two other drivers: * * - Gadget driver, responding to requests (device); * - Host-side device driver, as already familiar in Linux. * * Having this all in one kernel can help some stages of development, * bypassing some hardware (and driver) issues. UML could help too. * * Note: The emulation does not include isochronous transfers! */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/delay.h> #include <linux/ioport.h> #include <linux/slab.h> #include <linux/string_choices.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/hrtimer.h> #include <linux/list.h> #include <linux/interrupt.h> #include <linux/platform_device.h> #include <linux/usb.h> #include <linux/usb/gadget.h> #include <linux/usb/hcd.h> #include <linux/scatterlist.h> #include <asm/byteorder.h> #include <linux/io.h> #include <asm/irq.h> #include <linux/unaligned.h> #define DRIVER_DESC "USB Host+Gadget Emulator" #define DRIVER_VERSION "02 May 2005" #define POWER_BUDGET 500 /* in mA; use 8 for low-power port testing */ #define POWER_BUDGET_3 900 /* in mA */ #define DUMMY_TIMER_INT_NSECS 125000 /* 1 microframe */ static const char driver_name[] = "dummy_hcd"; static const char driver_desc[] = "USB Host+Gadget Emulator"; static const char gadget_name[] = "dummy_udc"; MODULE_DESCRIPTION(DRIVER_DESC); MODULE_AUTHOR("David Brownell"); MODULE_LICENSE("GPL"); struct dummy_hcd_module_parameters { bool is_super_speed; bool is_high_speed; unsigned int num; }; static struct dummy_hcd_module_parameters mod_data = { .is_super_speed = false, .is_high_speed = true, .num = 1, }; module_param_named(is_super_speed, mod_data.is_super_speed, bool, S_IRUGO); MODULE_PARM_DESC(is_super_speed, "true to simulate SuperSpeed connection"); module_param_named(is_high_speed, mod_data.is_high_speed, bool, S_IRUGO); MODULE_PARM_DESC(is_high_speed, "true to simulate HighSpeed connection"); module_param_named(num, mod_data.num, uint, S_IRUGO); MODULE_PARM_DESC(num, "number of emulated controllers"); /*-------------------------------------------------------------------------*/ /* gadget side driver data structures */ struct dummy_ep { struct list_head queue; unsigned long last_io; /* jiffies timestamp */ struct usb_gadget *gadget; const struct usb_endpoint_descriptor *desc; struct usb_ep ep; unsigned halted:1; unsigned wedged:1; unsigned already_seen:1; unsigned setup_stage:1; unsigned stream_en:1; }; struct dummy_request { struct list_head queue; /* ep's requests */ struct usb_request req; }; static inline struct dummy_ep *usb_ep_to_dummy_ep(struct usb_ep *_ep) { return container_of(_ep, struct dummy_ep, ep); } static inline struct dummy_request *usb_request_to_dummy_request (struct usb_request *_req) { return container_of(_req, struct dummy_request, req); } /*-------------------------------------------------------------------------*/ /* * Every device has ep0 for control requests, plus up to 30 more endpoints, * in one of two types: * * - Configurable: direction (in/out), type (bulk, iso, etc), and endpoint * number can be changed. Names like "ep-a" are used for this type. * * - Fixed Function: in other cases. some characteristics may be mutable; * that'd be hardware-specific. Names like "ep12out-bulk" are used. * * Gadget drivers are responsible for not setting up conflicting endpoint * configurations, illegal or unsupported packet lengths, and so on. */ static const char ep0name[] = "ep0"; static const struct { const char *name; const struct usb_ep_caps caps; } ep_info[] = { #define EP_INFO(_name, _caps) \ { \ .name = _name, \ .caps = _caps, \ } /* we don't provide isochronous endpoints since we don't support them */ #define TYPE_BULK_OR_INT (USB_EP_CAPS_TYPE_BULK | USB_EP_CAPS_TYPE_INT) /* everyone has ep0 */ EP_INFO(ep0name, USB_EP_CAPS(USB_EP_CAPS_TYPE_CONTROL, USB_EP_CAPS_DIR_ALL)), /* act like a pxa250: fifteen fixed function endpoints */ EP_INFO("ep1in-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_IN)), EP_INFO("ep2out-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_OUT)), /* EP_INFO("ep3in-iso", USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_IN)), EP_INFO("ep4out-iso", USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_OUT)), */ EP_INFO("ep5in-int", USB_EP_CAPS(USB_EP_CAPS_TYPE_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep6in-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_IN)), EP_INFO("ep7out-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_OUT)), /* EP_INFO("ep8in-iso", USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_IN)), EP_INFO("ep9out-iso", USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_OUT)), */ EP_INFO("ep10in-int", USB_EP_CAPS(USB_EP_CAPS_TYPE_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep11in-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_IN)), EP_INFO("ep12out-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_OUT)), /* EP_INFO("ep13in-iso", USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_IN)), EP_INFO("ep14out-iso", USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_OUT)), */ EP_INFO("ep15in-int", USB_EP_CAPS(USB_EP_CAPS_TYPE_INT, USB_EP_CAPS_DIR_IN)), /* or like sa1100: two fixed function endpoints */ EP_INFO("ep1out-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep2in-bulk", USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_IN)), /* and now some generic EPs so we have enough in multi config */ EP_INFO("ep-aout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-bin", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep-cout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-dout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-ein", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep-fout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-gin", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep-hout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-iout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-jin", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep-kout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), EP_INFO("ep-lin", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)), EP_INFO("ep-mout", USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)), #undef EP_INFO }; #define DUMMY_ENDPOINTS ARRAY_SIZE(ep_info) /*-------------------------------------------------------------------------*/ #define FIFO_SIZE 64 struct urbp { struct urb *urb; struct list_head urbp_list; struct sg_mapping_iter miter; u32 miter_started; }; enum dummy_rh_state { DUMMY_RH_RESET, DUMMY_RH_SUSPENDED, DUMMY_RH_RUNNING }; struct dummy_hcd { struct dummy *dum; enum dummy_rh_state rh_state; struct hrtimer timer; u32 port_status; u32 old_status; unsigned long re_timeout; struct usb_device *udev; struct list_head urbp_list; struct urbp *next_frame_urbp; u32 stream_en_ep; u8 num_stream[30 / 2]; unsigned timer_pending:1; unsigned active:1; unsigned old_active:1; unsigned resuming:1; }; struct dummy { spinlock_t lock; /* * DEVICE/GADGET side support */ struct dummy_ep ep[DUMMY_ENDPOINTS]; int address; int callback_usage; struct usb_gadget gadget; struct usb_gadget_driver *driver; struct dummy_request fifo_req; u8 fifo_buf[FIFO_SIZE]; u16 devstatus; unsigned ints_enabled:1; unsigned udc_suspended:1; unsigned pullup:1; /* * HOST side support */ struct dummy_hcd *hs_hcd; struct dummy_hcd *ss_hcd; }; static inline struct dummy_hcd *hcd_to_dummy_hcd(struct usb_hcd *hcd) { return (struct dummy_hcd *) (hcd->hcd_priv); } static inline struct usb_hcd *dummy_hcd_to_hcd(struct dummy_hcd *dum) { return container_of((void *) dum, struct usb_hcd, hcd_priv); } static inline struct device *dummy_dev(struct dummy_hcd *dum) { return dummy_hcd_to_hcd(dum)->self.controller; } static inline struct device *udc_dev(struct dummy *dum) { return dum->gadget.dev.parent; } static inline struct dummy *ep_to_dummy(struct dummy_ep *ep) { return container_of(ep->gadget, struct dummy, gadget); } static inline struct dummy_hcd *gadget_to_dummy_hcd(struct usb_gadget *gadget) { struct dummy *dum = container_of(gadget, struct dummy, gadget); if (dum->gadget.speed == USB_SPEED_SUPER) return dum->ss_hcd; else return dum->hs_hcd; } static inline struct dummy *gadget_dev_to_dummy(struct device *dev) { return container_of(dev, struct dummy, gadget.dev); } /*-------------------------------------------------------------------------*/ /* DEVICE/GADGET SIDE UTILITY ROUTINES */ /* called with spinlock held */ static void nuke(struct dummy *dum, struct dummy_ep *ep) { while (!list_empty(&ep->queue)) { struct dummy_request *req; req = list_entry(ep->queue.next, struct dummy_request, queue); list_del_init(&req->queue); req->req.status = -ESHUTDOWN; spin_unlock(&dum->lock); usb_gadget_giveback_request(&ep->ep, &req->req); spin_lock(&dum->lock); } } /* caller must hold lock */ static void stop_activity(struct dummy *dum) { int i; /* prevent any more requests */ dum->address = 0; /* The timer is left running so that outstanding URBs can fail */ /* nuke any pending requests first, so driver i/o is quiesced */ for (i = 0; i < DUMMY_ENDPOINTS; ++i) nuke(dum, &dum->ep[i]); /* driver now does any non-usb quiescing necessary */ } /** * set_link_state_by_speed() - Sets the current state of the link according to * the hcd speed * @dum_hcd: pointer to the dummy_hcd structure to update the link state for * * This function updates the port_status according to the link state and the * speed of the hcd. */ static void set_link_state_by_speed(struct dummy_hcd *dum_hcd) { struct dummy *dum = dum_hcd->dum; if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) { if ((dum_hcd->port_status & USB_SS_PORT_STAT_POWER) == 0) { dum_hcd->port_status = 0; } else if (!dum->pullup || dum->udc_suspended) { /* UDC suspend must cause a disconnect */ dum_hcd->port_status &= ~(USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE); if ((dum_hcd->old_status & USB_PORT_STAT_CONNECTION) != 0) dum_hcd->port_status |= (USB_PORT_STAT_C_CONNECTION << 16); } else { /* device is connected and not suspended */ dum_hcd->port_status |= (USB_PORT_STAT_CONNECTION | USB_PORT_STAT_SPEED_5GBPS) ; if ((dum_hcd->old_status & USB_PORT_STAT_CONNECTION) == 0) dum_hcd->port_status |= (USB_PORT_STAT_C_CONNECTION << 16); if ((dum_hcd->port_status & USB_PORT_STAT_ENABLE) && (dum_hcd->port_status & USB_PORT_STAT_LINK_STATE) == USB_SS_PORT_LS_U0 && dum_hcd->rh_state != DUMMY_RH_SUSPENDED) dum_hcd->active = 1; } } else { if ((dum_hcd->port_status & USB_PORT_STAT_POWER) == 0) { dum_hcd->port_status = 0; } else if (!dum->pullup || dum->udc_suspended) { /* UDC suspend must cause a disconnect */ dum_hcd->port_status &= ~(USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE | USB_PORT_STAT_LOW_SPEED | USB_PORT_STAT_HIGH_SPEED | USB_PORT_STAT_SUSPEND); if ((dum_hcd->old_status & USB_PORT_STAT_CONNECTION) != 0) dum_hcd->port_status |= (USB_PORT_STAT_C_CONNECTION << 16); } else { dum_hcd->port_status |= USB_PORT_STAT_CONNECTION; if ((dum_hcd->old_status & USB_PORT_STAT_CONNECTION) == 0) dum_hcd->port_status |= (USB_PORT_STAT_C_CONNECTION << 16); if ((dum_hcd->port_status & USB_PORT_STAT_ENABLE) == 0) dum_hcd->port_status &= ~USB_PORT_STAT_SUSPEND; else if ((dum_hcd->port_status & USB_PORT_STAT_SUSPEND) == 0 && dum_hcd->rh_state != DUMMY_RH_SUSPENDED) dum_hcd->active = 1; } } } /* caller must hold lock */ static void set_link_state(struct dummy_hcd *dum_hcd) __must_hold(&dum->lock) { struct dummy *dum = dum_hcd->dum; unsigned int power_bit; dum_hcd->active = 0; if (dum->pullup) if ((dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3 && dum->gadget.speed != USB_SPEED_SUPER) || (dummy_hcd_to_hcd(dum_hcd)->speed != HCD_USB3 && dum->gadget.speed == USB_SPEED_SUPER)) return; set_link_state_by_speed(dum_hcd); power_bit = (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3 ? USB_SS_PORT_STAT_POWER : USB_PORT_STAT_POWER); if ((dum_hcd->port_status & USB_PORT_STAT_ENABLE) == 0 || dum_hcd->active) dum_hcd->resuming = 0; /* Currently !connected or in reset */ if ((dum_hcd->port_status & power_bit) == 0 || (dum_hcd->port_status & USB_PORT_STAT_RESET) != 0) { unsigned int disconnect = power_bit & dum_hcd->old_status & (~dum_hcd->port_status); unsigned int reset = USB_PORT_STAT_RESET & (~dum_hcd->old_status) & dum_hcd->port_status; /* Report reset and disconnect events to the driver */ if (dum->ints_enabled && (disconnect || reset)) { stop_activity(dum); ++dum->callback_usage; spin_unlock(&dum->lock); if (reset) usb_gadget_udc_reset(&dum->gadget, dum->driver); else dum->driver->disconnect(&dum->gadget); spin_lock(&dum->lock); --dum->callback_usage; } } else if (dum_hcd->active != dum_hcd->old_active && dum->ints_enabled) { ++dum->callback_usage; spin_unlock(&dum->lock); if (dum_hcd->old_active && dum->driver->suspend) dum->driver->suspend(&dum->gadget); else if (!dum_hcd->old_active && dum->driver->resume) dum->driver->resume(&dum->gadget); spin_lock(&dum->lock); --dum->callback_usage; } dum_hcd->old_status = dum_hcd->port_status; dum_hcd->old_active = dum_hcd->active; } /*-------------------------------------------------------------------------*/ /* DEVICE/GADGET SIDE DRIVER * * This only tracks gadget state. All the work is done when the host * side tries some (emulated) i/o operation. Real device controller * drivers would do real i/o using dma, fifos, irqs, timers, etc. */ #define is_enabled(dum) \ (dum->port_status & USB_PORT_STAT_ENABLE) static int dummy_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc) { struct dummy *dum; struct dummy_hcd *dum_hcd; struct dummy_ep *ep; unsigned max; int retval; ep = usb_ep_to_dummy_ep(_ep); if (!_ep || !desc || ep->desc || _ep->name == ep0name || desc->bDescriptorType != USB_DT_ENDPOINT) return -EINVAL; dum = ep_to_dummy(ep); if (!dum->driver) return -ESHUTDOWN; dum_hcd = gadget_to_dummy_hcd(&dum->gadget); if (!is_enabled(dum_hcd)) return -ESHUTDOWN; /* * For HS/FS devices only bits 0..10 of the wMaxPacketSize represent the * maximum packet size. * For SS devices the wMaxPacketSize is limited by 1024. */ max = usb_endpoint_maxp(desc); /* drivers must not request bad settings, since lower levels * (hardware or its drivers) may not check. some endpoints * can't do iso, many have maxpacket limitations, etc. * * since this "hardware" driver is here to help debugging, we * have some extra sanity checks. (there could be more though, * especially for "ep9out" style fixed function ones.) */ retval = -EINVAL; switch (usb_endpoint_type(desc)) { case USB_ENDPOINT_XFER_BULK: if (strstr(ep->ep.name, "-iso") || strstr(ep->ep.name, "-int")) { goto done; } switch (dum->gadget.speed) { case USB_SPEED_SUPER: if (max == 1024) break; goto done; case USB_SPEED_HIGH: if (max == 512) break; goto done; case USB_SPEED_FULL: if (max == 8 || max == 16 || max == 32 || max == 64) /* we'll fake any legal size */ break; /* save a return statement */ fallthrough; default: goto done; } break; case USB_ENDPOINT_XFER_INT: if (strstr(ep->ep.name, "-iso")) /* bulk is ok */ goto done; /* real hardware might not handle all packet sizes */ switch (dum->gadget.speed) { case USB_SPEED_SUPER: case USB_SPEED_HIGH: if (max <= 1024) break; /* save a return statement */ fallthrough; case USB_SPEED_FULL: if (max <= 64) break; /* save a return statement */ fallthrough; default: if (max <= 8) break; goto done; } break; case USB_ENDPOINT_XFER_ISOC: if (strstr(ep->ep.name, "-bulk") || strstr(ep->ep.name, "-int")) goto done; /* real hardware might not handle all packet sizes */ switch (dum->gadget.speed) { case USB_SPEED_SUPER: case USB_SPEED_HIGH: if (max <= 1024) break; /* save a return statement */ fallthrough; case USB_SPEED_FULL: if (max <= 1023) break; /* save a return statement */ fallthrough; default: goto done; } break; default: /* few chips support control except on ep0 */ goto done; } _ep->maxpacket = max; if (usb_ss_max_streams(_ep->comp_desc)) { if (!usb_endpoint_xfer_bulk(desc)) { dev_err(udc_dev(dum), "Can't enable stream support on " "non-bulk ep %s\n", _ep->name); return -EINVAL; } ep->stream_en = 1; } ep->desc = desc; dev_dbg(udc_dev(dum), "enabled %s (ep%d%s-%s) maxpacket %d stream %s\n", _ep->name, desc->bEndpointAddress & 0x0f, (desc->bEndpointAddress & USB_DIR_IN) ? "in" : "out", usb_ep_type_string(usb_endpoint_type(desc)), max, str_enabled_disabled(ep->stream_en)); /* at this point real hardware should be NAKing transfers * to that endpoint, until a buffer is queued to it. */ ep->halted = ep->wedged = 0; retval = 0; done: return retval; } static int dummy_disable(struct usb_ep *_ep) { struct dummy_ep *ep; struct dummy *dum; unsigned long flags; ep = usb_ep_to_dummy_ep(_ep); if (!_ep || !ep->desc || _ep->name == ep0name) return -EINVAL; dum = ep_to_dummy(ep); spin_lock_irqsave(&dum->lock, flags); ep->desc = NULL; ep->stream_en = 0; nuke(dum, ep); spin_unlock_irqrestore(&dum->lock, flags); dev_dbg(udc_dev(dum), "disabled %s\n", _ep->name); return 0; } static struct usb_request *dummy_alloc_request(struct usb_ep *_ep, gfp_t mem_flags) { struct dummy_request *req; if (!_ep) return NULL; req = kzalloc(sizeof(*req), mem_flags); if (!req) return NULL; INIT_LIST_HEAD(&req->queue); return &req->req; } static void dummy_free_request(struct usb_ep *_ep, struct usb_request *_req) { struct dummy_request *req; if (!_ep || !_req) { WARN_ON(1); return; } req = usb_request_to_dummy_request(_req); WARN_ON(!list_empty(&req->queue)); kfree(req); } static void fifo_complete(struct usb_ep *ep, struct usb_request *req) { } static int dummy_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t mem_flags) { struct dummy_ep *ep; struct dummy_request *req; struct dummy *dum; struct dummy_hcd *dum_hcd; unsigned long flags; req = usb_request_to_dummy_request(_req); if (!_req || !list_empty(&req->queue) || !_req->complete) return -EINVAL; ep = usb_ep_to_dummy_ep(_ep); if (!_ep || (!ep->desc && _ep->name != ep0name)) return -EINVAL; dum = ep_to_dummy(ep); dum_hcd = gadget_to_dummy_hcd(&dum->gadget); if (!dum->driver || !is_enabled(dum_hcd)) return -ESHUTDOWN; #if 0 dev_dbg(udc_dev(dum), "ep %p queue req %p to %s, len %d buf %p\n", ep, _req, _ep->name, _req->length, _req->buf); #endif _req->status = -EINPROGRESS; _req->actual = 0; spin_lock_irqsave(&dum->lock, flags); /* implement an emulated single-request FIFO */ if (ep->desc && (ep->desc->bEndpointAddress & USB_DIR_IN) && list_empty(&dum->fifo_req.queue) && list_empty(&ep->queue) && _req->length <= FIFO_SIZE) { req = &dum->fifo_req; req->req = *_req; req->req.buf = dum->fifo_buf; memcpy(dum->fifo_buf, _req->buf, _req->length); req->req.context = dum; req->req.complete = fifo_complete; list_add_tail(&req->queue, &ep->queue); spin_unlock(&dum->lock); _req->actual = _req->length; _req->status = 0; usb_gadget_giveback_request(_ep, _req); spin_lock(&dum->lock); } else list_add_tail(&req->queue, &ep->queue); spin_unlock_irqrestore(&dum->lock, flags); /* real hardware would likely enable transfers here, in case * it'd been left NAKing. */ return 0; } static int dummy_dequeue(struct usb_ep *_ep, struct usb_request *_req) { struct dummy_ep *ep; struct dummy *dum; int retval = -EINVAL; unsigned long flags; struct dummy_request *req = NULL, *iter; if (!_ep || !_req) return retval; ep = usb_ep_to_dummy_ep(_ep); dum = ep_to_dummy(ep); if (!dum->driver) return -ESHUTDOWN; local_irq_save(flags); spin_lock(&dum->lock); list_for_each_entry(iter, &ep->queue, queue) { if (&iter->req != _req) continue; list_del_init(&iter->queue); _req->status = -ECONNRESET; req = iter; retval = 0; break; } spin_unlock(&dum->lock); if (retval == 0) { dev_dbg(udc_dev(dum), "dequeued req %p from %s, len %d buf %p\n", req, _ep->name, _req->length, _req->buf); usb_gadget_giveback_request(_ep, _req); } local_irq_restore(flags); return retval; } static int dummy_set_halt_and_wedge(struct usb_ep *_ep, int value, int wedged) { struct dummy_ep *ep; struct dummy *dum; if (!_ep) return -EINVAL; ep = usb_ep_to_dummy_ep(_ep); dum = ep_to_dummy(ep); if (!dum->driver) return -ESHUTDOWN; if (!value) ep->halted = ep->wedged = 0; else if (ep->desc && (ep->desc->bEndpointAddress & USB_DIR_IN) && !list_empty(&ep->queue)) return -EAGAIN; else { ep->halted = 1; if (wedged) ep->wedged = 1; } /* FIXME clear emulated data toggle too */ return 0; } static int dummy_set_halt(struct usb_ep *_ep, int value) { return dummy_set_halt_and_wedge(_ep, value, 0); } static int dummy_set_wedge(struct usb_ep *_ep) { if (!_ep || _ep->name == ep0name) return -EINVAL; return dummy_set_halt_and_wedge(_ep, 1, 1); } static const struct usb_ep_ops dummy_ep_ops = { .enable = dummy_enable, .disable = dummy_disable, .alloc_request = dummy_alloc_request, .free_request = dummy_free_request, .queue = dummy_queue, .dequeue = dummy_dequeue, .set_halt = dummy_set_halt, .set_wedge = dummy_set_wedge, }; /*-------------------------------------------------------------------------*/ /* there are both host and device side versions of this call ... */ static int dummy_g_get_frame(struct usb_gadget *_gadget) { struct timespec64 ts64; ktime_get_ts64(&ts64); return ts64.tv_nsec / NSEC_PER_MSEC; } static int dummy_wakeup(struct usb_gadget *_gadget) { struct dummy_hcd *dum_hcd; dum_hcd = gadget_to_dummy_hcd(_gadget); if (!(dum_hcd->dum->devstatus & ((1 << USB_DEVICE_B_HNP_ENABLE) | (1 << USB_DEVICE_REMOTE_WAKEUP)))) return -EINVAL; if ((dum_hcd->port_status & USB_PORT_STAT_CONNECTION) == 0) return -ENOLINK; if ((dum_hcd->port_status & USB_PORT_STAT_SUSPEND) == 0 && dum_hcd->rh_state != DUMMY_RH_SUSPENDED) return -EIO; /* FIXME: What if the root hub is suspended but the port isn't? */ /* hub notices our request, issues downstream resume, etc */ dum_hcd->resuming = 1; dum_hcd->re_timeout = jiffies + msecs_to_jiffies(20); mod_timer(&dummy_hcd_to_hcd(dum_hcd)->rh_timer, dum_hcd->re_timeout); return 0; } static int dummy_set_selfpowered(struct usb_gadget *_gadget, int value) { struct dummy *dum; _gadget->is_selfpowered = (value != 0); dum = gadget_to_dummy_hcd(_gadget)->dum; if (value) dum->devstatus |= (1 << USB_DEVICE_SELF_POWERED); else dum->devstatus &= ~(1 << USB_DEVICE_SELF_POWERED); return 0; } static void dummy_udc_update_ep0(struct dummy *dum) { if (dum->gadget.speed == USB_SPEED_SUPER) dum->ep[0].ep.maxpacket = 9; else dum->ep[0].ep.maxpacket = 64; } static int dummy_pullup(struct usb_gadget *_gadget, int value) { struct dummy_hcd *dum_hcd; struct dummy *dum; unsigned long flags; dum = gadget_dev_to_dummy(&_gadget->dev); dum_hcd = gadget_to_dummy_hcd(_gadget); spin_lock_irqsave(&dum->lock, flags); dum->pullup = (value != 0); set_link_state(dum_hcd); if (value == 0) { /* * Emulate synchronize_irq(): wait for callbacks to finish. * This seems to be the best place to emulate the call to * synchronize_irq() that's in usb_gadget_remove_driver(). * Doing it in dummy_udc_stop() would be too late since it * is called after the unbind callback and unbind shouldn't * be invoked until all the other callbacks are finished. */ while (dum->callback_usage > 0) { spin_unlock_irqrestore(&dum->lock, flags); usleep_range(1000, 2000); spin_lock_irqsave(&dum->lock, flags); } } spin_unlock_irqrestore(&dum->lock, flags); usb_hcd_poll_rh_status(dummy_hcd_to_hcd(dum_hcd)); return 0; } static void dummy_udc_set_speed(struct usb_gadget *_gadget, enum usb_device_speed speed) { struct dummy *dum; dum = gadget_dev_to_dummy(&_gadget->dev); dum->gadget.speed = speed; dummy_udc_update_ep0(dum); } static void dummy_udc_async_callbacks(struct usb_gadget *_gadget, bool enable) { struct dummy *dum = gadget_dev_to_dummy(&_gadget->dev); spin_lock_irq(&dum->lock); dum->ints_enabled = enable; spin_unlock_irq(&dum->lock); } static int dummy_udc_start(struct usb_gadget *g, struct usb_gadget_driver *driver); static int dummy_udc_stop(struct usb_gadget *g); static const struct usb_gadget_ops dummy_ops = { .get_frame = dummy_g_get_frame, .wakeup = dummy_wakeup, .set_selfpowered = dummy_set_selfpowered, .pullup = dummy_pullup, .udc_start = dummy_udc_start, .udc_stop = dummy_udc_stop, .udc_set_speed = dummy_udc_set_speed, .udc_async_callbacks = dummy_udc_async_callbacks, }; /*-------------------------------------------------------------------------*/ /* "function" sysfs attribute */ static ssize_t function_show(struct device *dev, struct device_attribute *attr, char *buf) { struct dummy *dum = gadget_dev_to_dummy(dev); if (!dum->driver || !dum->driver->function) return 0; return scnprintf(buf, PAGE_SIZE, "%s\n", dum->driver->function); } static DEVICE_ATTR_RO(function); /*-------------------------------------------------------------------------*/ /* * Driver registration/unregistration. * * This is basically hardware-specific; there's usually only one real USB * device (not host) controller since that's how USB devices are intended * to work. So most implementations of these api calls will rely on the * fact that only one driver will ever bind to the hardware. But curious * hardware can be built with discrete components, so the gadget API doesn't * require that assumption. * * For this emulator, it might be convenient to create a usb device * for each driver that registers: just add to a big root hub. */ static int dummy_udc_start(struct usb_gadget *g, struct usb_gadget_driver *driver) { struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(g); struct dummy *dum = dum_hcd->dum; switch (g->speed) { /* All the speeds we support */ case USB_SPEED_LOW: case USB_SPEED_FULL: case USB_SPEED_HIGH: case USB_SPEED_SUPER: break; default: dev_err(dummy_dev(dum_hcd), "Unsupported driver max speed %d\n", driver->max_speed); return -EINVAL; } /* * DEVICE side init ... the layer above hardware, which * can't enumerate without help from the driver we're binding. */ spin_lock_irq(&dum->lock); dum->devstatus = 0; dum->driver = driver; spin_unlock_irq(&dum->lock); return 0; } static int dummy_udc_stop(struct usb_gadget *g) { struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(g); struct dummy *dum = dum_hcd->dum; spin_lock_irq(&dum->lock); dum->ints_enabled = 0; stop_activity(dum); dum->driver = NULL; spin_unlock_irq(&dum->lock); return 0; } #undef is_enabled /* The gadget structure is stored inside the hcd structure and will be * released along with it. */ static void init_dummy_udc_hw(struct dummy *dum) { int i; INIT_LIST_HEAD(&dum->gadget.ep_list); for (i = 0; i < DUMMY_ENDPOINTS; i++) { struct dummy_ep *ep = &dum->ep[i]; if (!ep_info[i].name) break; ep->ep.name = ep_info[i].name; ep->ep.caps = ep_info[i].caps; ep->ep.ops = &dummy_ep_ops; list_add_tail(&ep->ep.ep_list, &dum->gadget.ep_list); ep->halted = ep->wedged = ep->already_seen = ep->setup_stage = 0; usb_ep_set_maxpacket_limit(&ep->ep, ~0); ep->ep.max_streams = 16; ep->last_io = jiffies; ep->gadget = &dum->gadget; ep->desc = NULL; INIT_LIST_HEAD(&ep->queue); } dum->gadget.ep0 = &dum->ep[0].ep; list_del_init(&dum->ep[0].ep.ep_list); INIT_LIST_HEAD(&dum->fifo_req.queue); #ifdef CONFIG_USB_OTG dum->gadget.is_otg = 1; #endif } static int dummy_udc_probe(struct platform_device *pdev) { struct dummy *dum; int rc; dum = *((void **)dev_get_platdata(&pdev->dev)); /* Clear usb_gadget region for new registration to udc-core */ memzero_explicit(&dum->gadget, sizeof(struct usb_gadget)); dum->gadget.name = gadget_name; dum->gadget.ops = &dummy_ops; if (mod_data.is_super_speed) dum->gadget.max_speed = USB_SPEED_SUPER; else if (mod_data.is_high_speed) dum->gadget.max_speed = USB_SPEED_HIGH; else dum->gadget.max_speed = USB_SPEED_FULL; dum->gadget.dev.parent = &pdev->dev; init_dummy_udc_hw(dum); rc = usb_add_gadget_udc(&pdev->dev, &dum->gadget); if (rc < 0) goto err_udc; rc = device_create_file(&dum->gadget.dev, &dev_attr_function); if (rc < 0) goto err_dev; platform_set_drvdata(pdev, dum); return rc; err_dev: usb_del_gadget_udc(&dum->gadget); err_udc: return rc; } static void dummy_udc_remove(struct platform_device *pdev) { struct dummy *dum = platform_get_drvdata(pdev); device_remove_file(&dum->gadget.dev, &dev_attr_function); usb_del_gadget_udc(&dum->gadget); } static void dummy_udc_pm(struct dummy *dum, struct dummy_hcd *dum_hcd, int suspend) { spin_lock_irq(&dum->lock); dum->udc_suspended = suspend; set_link_state(dum_hcd); spin_unlock_irq(&dum->lock); } static int dummy_udc_suspend(struct platform_device *pdev, pm_message_t state) { struct dummy *dum = platform_get_drvdata(pdev); struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(&dum->gadget); dev_dbg(&pdev->dev, "%s\n", __func__); dummy_udc_pm(dum, dum_hcd, 1); usb_hcd_poll_rh_status(dummy_hcd_to_hcd(dum_hcd)); return 0; } static int dummy_udc_resume(struct platform_device *pdev) { struct dummy *dum = platform_get_drvdata(pdev); struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(&dum->gadget); dev_dbg(&pdev->dev, "%s\n", __func__); dummy_udc_pm(dum, dum_hcd, 0); usb_hcd_poll_rh_status(dummy_hcd_to_hcd(dum_hcd)); return 0; } static struct platform_driver dummy_udc_driver = { .probe = dummy_udc_probe, .remove = dummy_udc_remove, .suspend = dummy_udc_suspend, .resume = dummy_udc_resume, .driver = { .name = gadget_name, }, }; /*-------------------------------------------------------------------------*/ static unsigned int dummy_get_ep_idx(const struct usb_endpoint_descriptor *desc) { unsigned int index; index = usb_endpoint_num(desc) << 1; if (usb_endpoint_dir_in(desc)) index |= 1; return index; } /* HOST SIDE DRIVER * * this uses the hcd framework to hook up to host side drivers. * its root hub will only have one device, otherwise it acts like * a normal host controller. * * when urbs are queued, they're just stuck on a list that we * scan in a timer callback. that callback connects writes from * the host with reads from the device, and so on, based on the * usb 2.0 rules. */ static int dummy_ep_stream_en(struct dummy_hcd *dum_hcd, struct urb *urb) { const struct usb_endpoint_descriptor *desc = &urb->ep->desc; u32 index; if (!usb_endpoint_xfer_bulk(desc)) return 0; index = dummy_get_ep_idx(desc); return (1 << index) & dum_hcd->stream_en_ep; } /* * The max stream number is saved as a nibble so for the 30 possible endpoints * we only 15 bytes of memory. Therefore we are limited to max 16 streams (0 * means we use only 1 stream). The maximum according to the spec is 16bit so * if the 16 stream limit is about to go, the array size should be incremented * to 30 elements of type u16. */ static int get_max_streams_for_pipe(struct dummy_hcd *dum_hcd, unsigned int pipe) { int max_streams; max_streams = dum_hcd->num_stream[usb_pipeendpoint(pipe)]; if (usb_pipeout(pipe)) max_streams >>= 4; else max_streams &= 0xf; max_streams++; return max_streams; } static void set_max_streams_for_pipe(struct dummy_hcd *dum_hcd, unsigned int pipe, unsigned int streams) { int max_streams; streams--; max_streams = dum_hcd->num_stream[usb_pipeendpoint(pipe)]; if (usb_pipeout(pipe)) { streams <<= 4; max_streams &= 0xf; } else { max_streams &= 0xf0; } max_streams |= streams; dum_hcd->num_stream[usb_pipeendpoint(pipe)] = max_streams; } static int dummy_validate_stream(struct dummy_hcd *dum_hcd, struct urb *urb) { unsigned int max_streams; int enabled; enabled = dummy_ep_stream_en(dum_hcd, urb); if (!urb->stream_id) { if (enabled) return -EINVAL; return 0; } if (!enabled) return -EINVAL; max_streams = get_max_streams_for_pipe(dum_hcd, usb_pipeendpoint(urb->pipe)); if (urb->stream_id > max_streams) { dev_err(dummy_dev(dum_hcd), "Stream id %d is out of range.\n", urb->stream_id); BUG(); return -EINVAL; } return 0; } static int dummy_urb_enqueue( struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags ) { struct dummy_hcd *dum_hcd; struct urbp *urbp; unsigned long flags; int rc; urbp = kmalloc(sizeof *urbp, mem_flags); if (!urbp) return -ENOMEM; urbp->urb = urb; urbp->miter_started = 0; dum_hcd = hcd_to_dummy_hcd(hcd); spin_lock_irqsave(&dum_hcd->dum->lock, flags); rc = dummy_validate_stream(dum_hcd, urb); if (rc) { kfree(urbp); goto done; } rc = usb_hcd_link_urb_to_ep(hcd, urb); if (rc) { kfree(urbp); goto done; } if (!dum_hcd->udev) { dum_hcd->udev = urb->dev; usb_get_dev(dum_hcd->udev); } else if (unlikely(dum_hcd->udev != urb->dev)) dev_err(dummy_dev(dum_hcd), "usb_device address has changed!\n"); list_add_tail(&urbp->urbp_list, &dum_hcd->urbp_list); urb->hcpriv = urbp; if (!dum_hcd->next_frame_urbp) dum_hcd->next_frame_urbp = urbp; if (usb_pipetype(urb->pipe) == PIPE_CONTROL) urb->error_count = 1; /* mark as a new urb */ /* kick the scheduler, it'll do the rest */ if (!dum_hcd->timer_pending) { dum_hcd->timer_pending = 1; hrtimer_start(&dum_hcd->timer, ns_to_ktime(DUMMY_TIMER_INT_NSECS), HRTIMER_MODE_REL_SOFT); } done: spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); return rc; } static int dummy_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) { struct dummy_hcd *dum_hcd; unsigned long flags; int rc; /* giveback happens automatically in timer callback, * so make sure the callback happens */ dum_hcd = hcd_to_dummy_hcd(hcd); spin_lock_irqsave(&dum_hcd->dum->lock, flags); rc = usb_hcd_check_unlink_urb(hcd, urb, status); if (rc == 0 && !dum_hcd->timer_pending) { dum_hcd->timer_pending = 1; hrtimer_start(&dum_hcd->timer, ns_to_ktime(0), HRTIMER_MODE_REL_SOFT); } spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); return rc; } static int dummy_perform_transfer(struct urb *urb, struct dummy_request *req, u32 len) { void *ubuf, *rbuf; struct urbp *urbp = urb->hcpriv; int to_host; struct sg_mapping_iter *miter = &urbp->miter; u32 trans = 0; u32 this_sg; bool next_sg; to_host = usb_urb_dir_in(urb); rbuf = req->req.buf + req->req.actual; if (!urb->num_sgs) { ubuf = urb->transfer_buffer + urb->actual_length; if (to_host) memcpy(ubuf, rbuf, len); else memcpy(rbuf, ubuf, len); return len; } if (!urbp->miter_started) { u32 flags = SG_MITER_ATOMIC; if (to_host) flags |= SG_MITER_TO_SG; else flags |= SG_MITER_FROM_SG; sg_miter_start(miter, urb->sg, urb->num_sgs, flags); urbp->miter_started = 1; } next_sg = sg_miter_next(miter); if (next_sg == false) { WARN_ON_ONCE(1); return -EINVAL; } do { ubuf = miter->addr; this_sg = min_t(u32, len, miter->length); miter->consumed = this_sg; trans += this_sg; if (to_host) memcpy(ubuf, rbuf, this_sg); else memcpy(rbuf, ubuf, this_sg); len -= this_sg; if (!len) break; next_sg = sg_miter_next(miter); if (next_sg == false) { WARN_ON_ONCE(1); return -EINVAL; } rbuf += this_sg; } while (1); sg_miter_stop(miter); return trans; } /* transfer up to a frame's worth; caller must own lock */ static int transfer(struct dummy_hcd *dum_hcd, struct urb *urb, struct dummy_ep *ep, int limit, int *status) { struct dummy *dum = dum_hcd->dum; struct dummy_request *req; int sent = 0; top: /* if there's no request queued, the device is NAKing; return */ list_for_each_entry(req, &ep->queue, queue) { unsigned host_len, dev_len, len; int is_short, to_host; int rescan = 0; if (dummy_ep_stream_en(dum_hcd, urb)) { if ((urb->stream_id != req->req.stream_id)) continue; } /* 1..N packets of ep->ep.maxpacket each ... the last one * may be short (including zero length). * * writer can send a zlp explicitly (length 0) or implicitly * (length mod maxpacket zero, and 'zero' flag); they always * terminate reads. */ host_len = urb->transfer_buffer_length - urb->actual_length; dev_len = req->req.length - req->req.actual; len = min(host_len, dev_len); /* FIXME update emulated data toggle too */ to_host = usb_urb_dir_in(urb); if (unlikely(len == 0)) is_short = 1; else { /* not enough bandwidth left? */ if (limit < ep->ep.maxpacket && limit < len) break; len = min_t(unsigned, len, limit); if (len == 0) break; /* send multiple of maxpacket first, then remainder */ if (len >= ep->ep.maxpacket) { is_short = 0; if (len % ep->ep.maxpacket) rescan = 1; len -= len % ep->ep.maxpacket; } else { is_short = 1; } len = dummy_perform_transfer(urb, req, len); ep->last_io = jiffies; if ((int)len < 0) { req->req.status = len; } else { limit -= len; sent += len; urb->actual_length += len; req->req.actual += len; } } /* short packets terminate, maybe with overflow/underflow. * it's only really an error to write too much. * * partially filling a buffer optionally blocks queue advances * (so completion handlers can clean up the queue) but we don't * need to emulate such data-in-flight. */ if (is_short) { if (host_len == dev_len) { req->req.status = 0; *status = 0; } else if (to_host) { req->req.status = 0; if (dev_len > host_len) *status = -EOVERFLOW; else *status = 0; } else { *status = 0; if (host_len > dev_len) req->req.status = -EOVERFLOW; else req->req.status = 0; } /* * many requests terminate without a short packet. * send a zlp if demanded by flags. */ } else { if (req->req.length == req->req.actual) { if (req->req.zero && to_host) rescan = 1; else req->req.status = 0; } if (urb->transfer_buffer_length == urb->actual_length) { if (urb->transfer_flags & URB_ZERO_PACKET && !to_host) rescan = 1; else *status = 0; } } /* device side completion --> continuable */ if (req->req.status != -EINPROGRESS) { list_del_init(&req->queue); spin_unlock(&dum->lock); usb_gadget_giveback_request(&ep->ep, &req->req); spin_lock(&dum->lock); /* requests might have been unlinked... */ rescan = 1; } /* host side completion --> terminate */ if (*status != -EINPROGRESS) break; /* rescan to continue with any other queued i/o */ if (rescan) goto top; } return sent; } static int periodic_bytes(struct dummy *dum, struct dummy_ep *ep) { int limit = ep->ep.maxpacket; if (dum->gadget.speed == USB_SPEED_HIGH) { int tmp; /* high bandwidth mode */ tmp = usb_endpoint_maxp_mult(ep->desc); tmp *= 8 /* applies to entire frame */; limit += limit * tmp; } if (dum->gadget.speed == USB_SPEED_SUPER) { switch (usb_endpoint_type(ep->desc)) { case USB_ENDPOINT_XFER_ISOC: /* Sec. 4.4.8.2 USB3.0 Spec */ limit = 3 * 16 * 1024 * 8; break; case USB_ENDPOINT_XFER_INT: /* Sec. 4.4.7.2 USB3.0 Spec */ limit = 3 * 1024 * 8; break; case USB_ENDPOINT_XFER_BULK: default: break; } } return limit; } #define is_active(dum_hcd) ((dum_hcd->port_status & \ (USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE | \ USB_PORT_STAT_SUSPEND)) \ == (USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE)) static struct dummy_ep *find_endpoint(struct dummy *dum, u8 address) { int i; if (!is_active((dum->gadget.speed == USB_SPEED_SUPER ? dum->ss_hcd : dum->hs_hcd))) return NULL; if (!dum->ints_enabled) return NULL; if ((address & ~USB_DIR_IN) == 0) return &dum->ep[0]; for (i = 1; i < DUMMY_ENDPOINTS; i++) { struct dummy_ep *ep = &dum->ep[i]; if (!ep->desc) continue; if (ep->desc->bEndpointAddress == address) return ep; } return NULL; } #undef is_active #define Dev_Request (USB_TYPE_STANDARD | USB_RECIP_DEVICE) #define Dev_InRequest (Dev_Request | USB_DIR_IN) #define Intf_Request (USB_TYPE_STANDARD | USB_RECIP_INTERFACE) #define Intf_InRequest (Intf_Request | USB_DIR_IN) #define Ep_Request (USB_TYPE_STANDARD | USB_RECIP_ENDPOINT) #define Ep_InRequest (Ep_Request | USB_DIR_IN) /** * handle_control_request() - handles all control transfers * @dum_hcd: pointer to dummy (the_controller) * @urb: the urb request to handle * @setup: pointer to the setup data for a USB device control * request * @status: pointer to request handling status * * Return 0 - if the request was handled * 1 - if the request wasn't handles * error code on error */ static int handle_control_request(struct dummy_hcd *dum_hcd, struct urb *urb, struct usb_ctrlrequest *setup, int *status) { struct dummy_ep *ep2; struct dummy *dum = dum_hcd->dum; int ret_val = 1; unsigned w_index; unsigned w_value; w_index = le16_to_cpu(setup->wIndex); w_value = le16_to_cpu(setup->wValue); switch (setup->bRequest) { case USB_REQ_SET_ADDRESS: if (setup->bRequestType != Dev_Request) break; dum->address = w_value; *status = 0; dev_dbg(udc_dev(dum), "set_address = %d\n", w_value); ret_val = 0; break; case USB_REQ_SET_FEATURE: if (setup->bRequestType == Dev_Request) { ret_val = 0; switch (w_value) { case USB_DEVICE_REMOTE_WAKEUP: break; case USB_DEVICE_B_HNP_ENABLE: dum->gadget.b_hnp_enable = 1; break; case USB_DEVICE_A_HNP_SUPPORT: dum->gadget.a_hnp_support = 1; break; case USB_DEVICE_A_ALT_HNP_SUPPORT: dum->gadget.a_alt_hnp_support = 1; break; case USB_DEVICE_U1_ENABLE: if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) w_value = USB_DEV_STAT_U1_ENABLED; else ret_val = -EOPNOTSUPP; break; case USB_DEVICE_U2_ENABLE: if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) w_value = USB_DEV_STAT_U2_ENABLED; else ret_val = -EOPNOTSUPP; break; case USB_DEVICE_LTM_ENABLE: if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) w_value = USB_DEV_STAT_LTM_ENABLED; else ret_val = -EOPNOTSUPP; break; default: ret_val = -EOPNOTSUPP; } if (ret_val == 0) { dum->devstatus |= (1 << w_value); *status = 0; } } else if (setup->bRequestType == Ep_Request) { /* endpoint halt */ ep2 = find_endpoint(dum, w_index); if (!ep2 || ep2->ep.name == ep0name) { ret_val = -EOPNOTSUPP; break; } ep2->halted = 1; ret_val = 0; *status = 0; } break; case USB_REQ_CLEAR_FEATURE: if (setup->bRequestType == Dev_Request) { ret_val = 0; switch (w_value) { case USB_DEVICE_REMOTE_WAKEUP: w_value = USB_DEVICE_REMOTE_WAKEUP; break; case USB_DEVICE_U1_ENABLE: if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) w_value = USB_DEV_STAT_U1_ENABLED; else ret_val = -EOPNOTSUPP; break; case USB_DEVICE_U2_ENABLE: if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) w_value = USB_DEV_STAT_U2_ENABLED; else ret_val = -EOPNOTSUPP; break; case USB_DEVICE_LTM_ENABLE: if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) w_value = USB_DEV_STAT_LTM_ENABLED; else ret_val = -EOPNOTSUPP; break; default: ret_val = -EOPNOTSUPP; break; } if (ret_val == 0) { dum->devstatus &= ~(1 << w_value); *status = 0; } } else if (setup->bRequestType == Ep_Request) { /* endpoint halt */ ep2 = find_endpoint(dum, w_index); if (!ep2) { ret_val = -EOPNOTSUPP; break; } if (!ep2->wedged) ep2->halted = 0; ret_val = 0; *status = 0; } break; case USB_REQ_GET_STATUS: if (setup->bRequestType == Dev_InRequest || setup->bRequestType == Intf_InRequest || setup->bRequestType == Ep_InRequest) { char *buf; /* * device: remote wakeup, selfpowered * interface: nothing * endpoint: halt */ buf = (char *)urb->transfer_buffer; if (urb->transfer_buffer_length > 0) { if (setup->bRequestType == Ep_InRequest) { ep2 = find_endpoint(dum, w_index); if (!ep2) { ret_val = -EOPNOTSUPP; break; } buf[0] = ep2->halted; } else if (setup->bRequestType == Dev_InRequest) { buf[0] = (u8)dum->devstatus; } else buf[0] = 0; } if (urb->transfer_buffer_length > 1) buf[1] = 0; urb->actual_length = min_t(u32, 2, urb->transfer_buffer_length); ret_val = 0; *status = 0; } break; } return ret_val; } /* * Drive both sides of the transfers; looks like irq handlers to both * drivers except that the callbacks are invoked from soft interrupt * context. */ static enum hrtimer_restart dummy_timer(struct hrtimer *t) { struct dummy_hcd *dum_hcd = from_timer(dum_hcd, t, timer); struct dummy *dum = dum_hcd->dum; struct urbp *urbp, *tmp; unsigned long flags; int limit, total; int i; /* simplistic model for one frame's bandwidth */ /* FIXME: account for transaction and packet overhead */ switch (dum->gadget.speed) { case USB_SPEED_LOW: total = 8/*bytes*/ * 12/*packets*/; break; case USB_SPEED_FULL: total = 64/*bytes*/ * 19/*packets*/; break; case USB_SPEED_HIGH: total = 512/*bytes*/ * 13/*packets*/ * 8/*uframes*/; break; case USB_SPEED_SUPER: /* Bus speed is 500000 bytes/ms, so use a little less */ total = 490000; break; default: /* Can't happen */ dev_err(dummy_dev(dum_hcd), "bogus device speed\n"); total = 0; break; } /* look at each urb queued by the host side driver */ spin_lock_irqsave(&dum->lock, flags); dum_hcd->timer_pending = 0; if (!dum_hcd->udev) { dev_err(dummy_dev(dum_hcd), "timer fired with no URBs pending?\n"); spin_unlock_irqrestore(&dum->lock, flags); return HRTIMER_NORESTART; } dum_hcd->next_frame_urbp = NULL; for (i = 0; i < DUMMY_ENDPOINTS; i++) { if (!ep_info[i].name) break; dum->ep[i].already_seen = 0; } restart: list_for_each_entry_safe(urbp, tmp, &dum_hcd->urbp_list, urbp_list) { struct urb *urb; struct dummy_request *req; u8 address; struct dummy_ep *ep = NULL; int status = -EINPROGRESS; /* stop when we reach URBs queued after the timer interrupt */ if (urbp == dum_hcd->next_frame_urbp) break; urb = urbp->urb; if (urb->unlinked) goto return_urb; else if (dum_hcd->rh_state != DUMMY_RH_RUNNING) continue; /* Used up this frame's bandwidth? */ if (total <= 0) continue; /* find the gadget's ep for this request (if configured) */ address = usb_pipeendpoint (urb->pipe); if (usb_urb_dir_in(urb)) address |= USB_DIR_IN; ep = find_endpoint(dum, address); if (!ep) { /* set_configuration() disagreement */ dev_dbg(dummy_dev(dum_hcd), "no ep configured for urb %p\n", urb); status = -EPROTO; goto return_urb; } if (ep->already_seen) continue; ep->already_seen = 1; if (ep == &dum->ep[0] && urb->error_count) { ep->setup_stage = 1; /* a new urb */ urb->error_count = 0; } if (ep->halted && !ep->setup_stage) { /* NOTE: must not be iso! */ dev_dbg(dummy_dev(dum_hcd), "ep %s halted, urb %p\n", ep->ep.name, urb); status = -EPIPE; goto return_urb; } /* FIXME make sure both ends agree on maxpacket */ /* handle control requests */ if (ep == &dum->ep[0] && ep->setup_stage) { struct usb_ctrlrequest setup; int value; setup = *(struct usb_ctrlrequest *) urb->setup_packet; /* paranoia, in case of stale queued data */ list_for_each_entry(req, &ep->queue, queue) { list_del_init(&req->queue); req->req.status = -EOVERFLOW; dev_dbg(udc_dev(dum), "stale req = %p\n", req); spin_unlock(&dum->lock); usb_gadget_giveback_request(&ep->ep, &req->req); spin_lock(&dum->lock); ep->already_seen = 0; goto restart; } /* gadget driver never sees set_address or operations * on standard feature flags. some hardware doesn't * even expose them. */ ep->last_io = jiffies; ep->setup_stage = 0; ep->halted = 0; value = handle_control_request(dum_hcd, urb, &setup, &status); /* gadget driver handles all other requests. block * until setup() returns; no reentrancy issues etc. */ if (value > 0) { ++dum->callback_usage; spin_unlock(&dum->lock); value = dum->driver->setup(&dum->gadget, &setup); spin_lock(&dum->lock); --dum->callback_usage; if (value >= 0) { /* no delays (max 64KB data stage) */ limit = 64*1024; goto treat_control_like_bulk; } /* error, see below */ } if (value < 0) { if (value != -EOPNOTSUPP) dev_dbg(udc_dev(dum), "setup --> %d\n", value); status = -EPIPE; urb->actual_length = 0; } goto return_urb; } /* non-control requests */ limit = total; switch (usb_pipetype(urb->pipe)) { case PIPE_ISOCHRONOUS: /* * We don't support isochronous. But if we did, * here are some of the issues we'd have to face: * * Is it urb->interval since the last xfer? * Use urb->iso_frame_desc[i]. * Complete whether or not ep has requests queued. * Report random errors, to debug drivers. */ limit = max(limit, periodic_bytes(dum, ep)); status = -EINVAL; /* fail all xfers */ break; case PIPE_INTERRUPT: /* FIXME is it urb->interval since the last xfer? * this almost certainly polls too fast. */ limit = max(limit, periodic_bytes(dum, ep)); fallthrough; default: treat_control_like_bulk: ep->last_io = jiffies; total -= transfer(dum_hcd, urb, ep, limit, &status); break; } /* incomplete transfer? */ if (status == -EINPROGRESS) continue; return_urb: list_del(&urbp->urbp_list); kfree(urbp); if (ep) ep->already_seen = ep->setup_stage = 0; usb_hcd_unlink_urb_from_ep(dummy_hcd_to_hcd(dum_hcd), urb); spin_unlock(&dum->lock); usb_hcd_giveback_urb(dummy_hcd_to_hcd(dum_hcd), urb, status); spin_lock(&dum->lock); goto restart; } if (list_empty(&dum_hcd->urbp_list)) { usb_put_dev(dum_hcd->udev); dum_hcd->udev = NULL; } else if (!dum_hcd->timer_pending && dum_hcd->rh_state == DUMMY_RH_RUNNING) { /* want a 1 msec delay here */ dum_hcd->timer_pending = 1; hrtimer_start(&dum_hcd->timer, ns_to_ktime(DUMMY_TIMER_INT_NSECS), HRTIMER_MODE_REL_SOFT); } spin_unlock_irqrestore(&dum->lock, flags); return HRTIMER_NORESTART; } /*-------------------------------------------------------------------------*/ #define PORT_C_MASK \ ((USB_PORT_STAT_C_CONNECTION \ | USB_PORT_STAT_C_ENABLE \ | USB_PORT_STAT_C_SUSPEND \ | USB_PORT_STAT_C_OVERCURRENT \ | USB_PORT_STAT_C_RESET) << 16) static int dummy_hub_status(struct usb_hcd *hcd, char *buf) { struct dummy_hcd *dum_hcd; unsigned long flags; int retval = 0; dum_hcd = hcd_to_dummy_hcd(hcd); spin_lock_irqsave(&dum_hcd->dum->lock, flags); if (!HCD_HW_ACCESSIBLE(hcd)) goto done; if (dum_hcd->resuming && time_after_eq(jiffies, dum_hcd->re_timeout)) { dum_hcd->port_status |= (USB_PORT_STAT_C_SUSPEND << 16); dum_hcd->port_status &= ~USB_PORT_STAT_SUSPEND; set_link_state(dum_hcd); } if ((dum_hcd->port_status & PORT_C_MASK) != 0) { *buf = (1 << 1); dev_dbg(dummy_dev(dum_hcd), "port status 0x%08x has changes\n", dum_hcd->port_status); retval = 1; if (dum_hcd->rh_state == DUMMY_RH_SUSPENDED) usb_hcd_resume_root_hub(hcd); } done: spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); return retval; } /* usb 3.0 root hub device descriptor */ static struct { struct usb_bos_descriptor bos; struct usb_ss_cap_descriptor ss_cap; } __packed usb3_bos_desc = { .bos = { .bLength = USB_DT_BOS_SIZE, .bDescriptorType = USB_DT_BOS, .wTotalLength = cpu_to_le16(sizeof(usb3_bos_desc)), .bNumDeviceCaps = 1, }, .ss_cap = { .bLength = USB_DT_USB_SS_CAP_SIZE, .bDescriptorType = USB_DT_DEVICE_CAPABILITY, .bDevCapabilityType = USB_SS_CAP_TYPE, .wSpeedSupported = cpu_to_le16(USB_5GBPS_OPERATION), .bFunctionalitySupport = ilog2(USB_5GBPS_OPERATION), }, }; static inline void ss_hub_descriptor(struct usb_hub_descriptor *desc) { memset(desc, 0, sizeof *desc); desc->bDescriptorType = USB_DT_SS_HUB; desc->bDescLength = 12; desc->wHubCharacteristics = cpu_to_le16( HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_COMMON_OCPM); desc->bNbrPorts = 1; desc->u.ss.bHubHdrDecLat = 0x04; /* Worst case: 0.4 micro sec*/ desc->u.ss.DeviceRemovable = 0; } static inline void hub_descriptor(struct usb_hub_descriptor *desc) { memset(desc, 0, sizeof *desc); desc->bDescriptorType = USB_DT_HUB; desc->bDescLength = 9; desc->wHubCharacteristics = cpu_to_le16( HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_COMMON_OCPM); desc->bNbrPorts = 1; desc->u.hs.DeviceRemovable[0] = 0; desc->u.hs.DeviceRemovable[1] = 0xff; /* PortPwrCtrlMask */ } static int dummy_hub_control( struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, char *buf, u16 wLength ) { struct dummy_hcd *dum_hcd; int retval = 0; unsigned long flags; if (!HCD_HW_ACCESSIBLE(hcd)) return -ETIMEDOUT; dum_hcd = hcd_to_dummy_hcd(hcd); spin_lock_irqsave(&dum_hcd->dum->lock, flags); switch (typeReq) { case ClearHubFeature: break; case ClearPortFeature: switch (wValue) { case USB_PORT_FEAT_SUSPEND: if (hcd->speed == HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "USB_PORT_FEAT_SUSPEND req not " "supported for USB 3.0 roothub\n"); goto error; } if (dum_hcd->port_status & USB_PORT_STAT_SUSPEND) { /* 20msec resume signaling */ dum_hcd->resuming = 1; dum_hcd->re_timeout = jiffies + msecs_to_jiffies(20); } break; case USB_PORT_FEAT_POWER: dev_dbg(dummy_dev(dum_hcd), "power-off\n"); if (hcd->speed == HCD_USB3) dum_hcd->port_status &= ~USB_SS_PORT_STAT_POWER; else dum_hcd->port_status &= ~USB_PORT_STAT_POWER; set_link_state(dum_hcd); break; case USB_PORT_FEAT_ENABLE: case USB_PORT_FEAT_C_ENABLE: case USB_PORT_FEAT_C_SUSPEND: /* Not allowed for USB-3 */ if (hcd->speed == HCD_USB3) goto error; fallthrough; case USB_PORT_FEAT_C_CONNECTION: case USB_PORT_FEAT_C_RESET: dum_hcd->port_status &= ~(1 << wValue); set_link_state(dum_hcd); break; default: /* Disallow INDICATOR and C_OVER_CURRENT */ goto error; } break; case GetHubDescriptor: if (hcd->speed == HCD_USB3 && (wLength < USB_DT_SS_HUB_SIZE || wValue != (USB_DT_SS_HUB << 8))) { dev_dbg(dummy_dev(dum_hcd), "Wrong hub descriptor type for " "USB 3.0 roothub.\n"); goto error; } if (hcd->speed == HCD_USB3) ss_hub_descriptor((struct usb_hub_descriptor *) buf); else hub_descriptor((struct usb_hub_descriptor *) buf); break; case DeviceRequest | USB_REQ_GET_DESCRIPTOR: if (hcd->speed != HCD_USB3) goto error; if ((wValue >> 8) != USB_DT_BOS) goto error; memcpy(buf, &usb3_bos_desc, sizeof(usb3_bos_desc)); retval = sizeof(usb3_bos_desc); break; case GetHubStatus: *(__le32 *) buf = cpu_to_le32(0); break; case GetPortStatus: if (wIndex != 1) retval = -EPIPE; /* whoever resets or resumes must GetPortStatus to * complete it!! */ if (dum_hcd->resuming && time_after_eq(jiffies, dum_hcd->re_timeout)) { dum_hcd->port_status |= (USB_PORT_STAT_C_SUSPEND << 16); dum_hcd->port_status &= ~USB_PORT_STAT_SUSPEND; } if ((dum_hcd->port_status & USB_PORT_STAT_RESET) != 0 && time_after_eq(jiffies, dum_hcd->re_timeout)) { dum_hcd->port_status |= (USB_PORT_STAT_C_RESET << 16); dum_hcd->port_status &= ~USB_PORT_STAT_RESET; if (dum_hcd->dum->pullup) { dum_hcd->port_status |= USB_PORT_STAT_ENABLE; if (hcd->speed < HCD_USB3) { switch (dum_hcd->dum->gadget.speed) { case USB_SPEED_HIGH: dum_hcd->port_status |= USB_PORT_STAT_HIGH_SPEED; break; case USB_SPEED_LOW: dum_hcd->dum->gadget.ep0-> maxpacket = 8; dum_hcd->port_status |= USB_PORT_STAT_LOW_SPEED; break; default: break; } } } } set_link_state(dum_hcd); ((__le16 *) buf)[0] = cpu_to_le16(dum_hcd->port_status); ((__le16 *) buf)[1] = cpu_to_le16(dum_hcd->port_status >> 16); break; case SetHubFeature: retval = -EPIPE; break; case SetPortFeature: switch (wValue) { case USB_PORT_FEAT_LINK_STATE: if (hcd->speed != HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "USB_PORT_FEAT_LINK_STATE req not " "supported for USB 2.0 roothub\n"); goto error; } /* * Since this is dummy we don't have an actual link so * there is nothing to do for the SET_LINK_STATE cmd */ break; case USB_PORT_FEAT_U1_TIMEOUT: case USB_PORT_FEAT_U2_TIMEOUT: /* TODO: add suspend/resume support! */ if (hcd->speed != HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "USB_PORT_FEAT_U1/2_TIMEOUT req not " "supported for USB 2.0 roothub\n"); goto error; } break; case USB_PORT_FEAT_SUSPEND: /* Applicable only for USB2.0 hub */ if (hcd->speed == HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "USB_PORT_FEAT_SUSPEND req not " "supported for USB 3.0 roothub\n"); goto error; } if (dum_hcd->active) { dum_hcd->port_status |= USB_PORT_STAT_SUSPEND; /* HNP would happen here; for now we * assume b_bus_req is always true. */ set_link_state(dum_hcd); if (((1 << USB_DEVICE_B_HNP_ENABLE) & dum_hcd->dum->devstatus) != 0) dev_dbg(dummy_dev(dum_hcd), "no HNP yet!\n"); } break; case USB_PORT_FEAT_POWER: if (hcd->speed == HCD_USB3) dum_hcd->port_status |= USB_SS_PORT_STAT_POWER; else dum_hcd->port_status |= USB_PORT_STAT_POWER; set_link_state(dum_hcd); break; case USB_PORT_FEAT_BH_PORT_RESET: /* Applicable only for USB3.0 hub */ if (hcd->speed != HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "USB_PORT_FEAT_BH_PORT_RESET req not " "supported for USB 2.0 roothub\n"); goto error; } fallthrough; case USB_PORT_FEAT_RESET: if (!(dum_hcd->port_status & USB_PORT_STAT_CONNECTION)) break; /* if it's already enabled, disable */ if (hcd->speed == HCD_USB3) { dum_hcd->port_status = (USB_SS_PORT_STAT_POWER | USB_PORT_STAT_CONNECTION | USB_PORT_STAT_RESET); } else { dum_hcd->port_status &= ~(USB_PORT_STAT_ENABLE | USB_PORT_STAT_LOW_SPEED | USB_PORT_STAT_HIGH_SPEED); dum_hcd->port_status |= USB_PORT_STAT_RESET; } /* * We want to reset device status. All but the * Self powered feature */ dum_hcd->dum->devstatus &= (1 << USB_DEVICE_SELF_POWERED); /* * FIXME USB3.0: what is the correct reset signaling * interval? Is it still 50msec as for HS? */ dum_hcd->re_timeout = jiffies + msecs_to_jiffies(50); set_link_state(dum_hcd); break; case USB_PORT_FEAT_C_CONNECTION: case USB_PORT_FEAT_C_RESET: case USB_PORT_FEAT_C_ENABLE: case USB_PORT_FEAT_C_SUSPEND: /* Not allowed for USB-3, and ignored for USB-2 */ if (hcd->speed == HCD_USB3) goto error; break; default: /* Disallow TEST, INDICATOR, and C_OVER_CURRENT */ goto error; } break; case GetPortErrorCount: if (hcd->speed != HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "GetPortErrorCount req not " "supported for USB 2.0 roothub\n"); goto error; } /* We'll always return 0 since this is a dummy hub */ *(__le32 *) buf = cpu_to_le32(0); break; case SetHubDepth: if (hcd->speed != HCD_USB3) { dev_dbg(dummy_dev(dum_hcd), "SetHubDepth req not supported for " "USB 2.0 roothub\n"); goto error; } break; default: dev_dbg(dummy_dev(dum_hcd), "hub control req%04x v%04x i%04x l%d\n", typeReq, wValue, wIndex, wLength); error: /* "protocol stall" on error */ retval = -EPIPE; } spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); if ((dum_hcd->port_status & PORT_C_MASK) != 0) usb_hcd_poll_rh_status(hcd); return retval; } static int dummy_bus_suspend(struct usb_hcd *hcd) { struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd); dev_dbg(&hcd->self.root_hub->dev, "%s\n", __func__); spin_lock_irq(&dum_hcd->dum->lock); dum_hcd->rh_state = DUMMY_RH_SUSPENDED; set_link_state(dum_hcd); hcd->state = HC_STATE_SUSPENDED; spin_unlock_irq(&dum_hcd->dum->lock); return 0; } static int dummy_bus_resume(struct usb_hcd *hcd) { struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd); int rc = 0; dev_dbg(&hcd->self.root_hub->dev, "%s\n", __func__); spin_lock_irq(&dum_hcd->dum->lock); if (!HCD_HW_ACCESSIBLE(hcd)) { rc = -ESHUTDOWN; } else { dum_hcd->rh_state = DUMMY_RH_RUNNING; set_link_state(dum_hcd); if (!list_empty(&dum_hcd->urbp_list)) { dum_hcd->timer_pending = 1; hrtimer_start(&dum_hcd->timer, ns_to_ktime(0), HRTIMER_MODE_REL_SOFT); } hcd->state = HC_STATE_RUNNING; } spin_unlock_irq(&dum_hcd->dum->lock); return rc; } /*-------------------------------------------------------------------------*/ static inline ssize_t show_urb(char *buf, size_t size, struct urb *urb) { int ep = usb_pipeendpoint(urb->pipe); return scnprintf(buf, size, "urb/%p %s ep%d%s%s len %d/%d\n", urb, ({ char *s; switch (urb->dev->speed) { case USB_SPEED_LOW: s = "ls"; break; case USB_SPEED_FULL: s = "fs"; break; case USB_SPEED_HIGH: s = "hs"; break; case USB_SPEED_SUPER: s = "ss"; break; default: s = "?"; break; } s; }), ep, ep ? (usb_urb_dir_in(urb) ? "in" : "out") : "", ({ char *s; \ switch (usb_pipetype(urb->pipe)) { \ case PIPE_CONTROL: \ s = ""; \ break; \ case PIPE_BULK: \ s = "-bulk"; \ break; \ case PIPE_INTERRUPT: \ s = "-int"; \ break; \ default: \ s = "-iso"; \ break; \ } s; }), urb->actual_length, urb->transfer_buffer_length); } static ssize_t urbs_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_hcd *hcd = dev_get_drvdata(dev); struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd); struct urbp *urbp; size_t size = 0; unsigned long flags; spin_lock_irqsave(&dum_hcd->dum->lock, flags); list_for_each_entry(urbp, &dum_hcd->urbp_list, urbp_list) { size_t temp; temp = show_urb(buf, PAGE_SIZE - size, urbp->urb); buf += temp; size += temp; } spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); return size; } static DEVICE_ATTR_RO(urbs); static int dummy_start_ss(struct dummy_hcd *dum_hcd) { hrtimer_setup(&dum_hcd->timer, dummy_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); dum_hcd->rh_state = DUMMY_RH_RUNNING; dum_hcd->stream_en_ep = 0; INIT_LIST_HEAD(&dum_hcd->urbp_list); dummy_hcd_to_hcd(dum_hcd)->power_budget = POWER_BUDGET_3; dummy_hcd_to_hcd(dum_hcd)->state = HC_STATE_RUNNING; dummy_hcd_to_hcd(dum_hcd)->uses_new_polling = 1; #ifdef CONFIG_USB_OTG dummy_hcd_to_hcd(dum_hcd)->self.otg_port = 1; #endif return 0; /* FIXME 'urbs' should be a per-device thing, maybe in usbcore */ return device_create_file(dummy_dev(dum_hcd), &dev_attr_urbs); } static int dummy_start(struct usb_hcd *hcd) { struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd); /* * HOST side init ... we emulate a root hub that'll only ever * talk to one device (the gadget side). Also appears in sysfs, * just like more familiar pci-based HCDs. */ if (!usb_hcd_is_primary_hcd(hcd)) return dummy_start_ss(dum_hcd); spin_lock_init(&dum_hcd->dum->lock); hrtimer_setup(&dum_hcd->timer, dummy_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); dum_hcd->rh_state = DUMMY_RH_RUNNING; INIT_LIST_HEAD(&dum_hcd->urbp_list); hcd->power_budget = POWER_BUDGET; hcd->state = HC_STATE_RUNNING; hcd->uses_new_polling = 1; #ifdef CONFIG_USB_OTG hcd->self.otg_port = 1; #endif /* FIXME 'urbs' should be a per-device thing, maybe in usbcore */ return device_create_file(dummy_dev(dum_hcd), &dev_attr_urbs); } static void dummy_stop(struct usb_hcd *hcd) { struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd); hrtimer_cancel(&dum_hcd->timer); dum_hcd->timer_pending = 0; device_remove_file(dummy_dev(dum_hcd), &dev_attr_urbs); dev_info(dummy_dev(dum_hcd), "stopped\n"); } /*-------------------------------------------------------------------------*/ static int dummy_h_get_frame(struct usb_hcd *hcd) { return dummy_g_get_frame(NULL); } static int dummy_setup(struct usb_hcd *hcd) { struct dummy *dum; dum = *((void **)dev_get_platdata(hcd->self.controller)); hcd->self.sg_tablesize = ~0; if (usb_hcd_is_primary_hcd(hcd)) { dum->hs_hcd = hcd_to_dummy_hcd(hcd); dum->hs_hcd->dum = dum; /* * Mark the first roothub as being USB 2.0. * The USB 3.0 roothub will be registered later by * dummy_hcd_probe() */ hcd->speed = HCD_USB2; hcd->self.root_hub->speed = USB_SPEED_HIGH; } else { dum->ss_hcd = hcd_to_dummy_hcd(hcd); dum->ss_hcd->dum = dum; hcd->speed = HCD_USB3; hcd->self.root_hub->speed = USB_SPEED_SUPER; } return 0; } /* Change a group of bulk endpoints to support multiple stream IDs */ static int dummy_alloc_streams(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint **eps, unsigned int num_eps, unsigned int num_streams, gfp_t mem_flags) { struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd); unsigned long flags; int max_stream; int ret_streams = num_streams; unsigned int index; unsigned int i; if (!num_eps) return -EINVAL; spin_lock_irqsave(&dum_hcd->dum->lock, flags); for (i = 0; i < num_eps; i++) { index = dummy_get_ep_idx(&eps[i]->desc); if ((1 << index) & dum_hcd->stream_en_ep) { ret_streams = -EINVAL; goto out; } max_stream = usb_ss_max_streams(&eps[i]->ss_ep_comp); if (!max_stream) { ret_streams = -EINVAL; goto out; } if (max_stream < ret_streams) { dev_dbg(dummy_dev(dum_hcd), "Ep 0x%x only supports %u " "stream IDs.\n", eps[i]->desc.bEndpointAddress, max_stream); ret_streams = max_stream; } } for (i = 0; i < num_eps; i++) { index = dummy_get_ep_idx(&eps[i]->desc); dum_hcd->stream_en_ep |= 1 << index; set_max_streams_for_pipe(dum_hcd, usb_endpoint_num(&eps[i]->desc), ret_streams); } out: spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); return ret_streams; } /* Reverts a group of bulk endpoints back to not using stream IDs. */ static int dummy_free_streams(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint **eps, unsigned int num_eps, gfp_t mem_flags) { struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd); unsigned long flags; int ret; unsigned int index; unsigned int i; spin_lock_irqsave(&dum_hcd->dum->lock, flags); for (i = 0; i < num_eps; i++) { index = dummy_get_ep_idx(&eps[i]->desc); if (!((1 << index) & dum_hcd->stream_en_ep)) { ret = -EINVAL; goto out; } } for (i = 0; i < num_eps; i++) { index = dummy_get_ep_idx(&eps[i]->desc); dum_hcd->stream_en_ep &= ~(1 << index); set_max_streams_for_pipe(dum_hcd, usb_endpoint_num(&eps[i]->desc), 0); } ret = 0; out: spin_unlock_irqrestore(&dum_hcd->dum->lock, flags); return ret; } static struct hc_driver dummy_hcd = { .description = (char *) driver_name, .product_desc = "Dummy host controller", .hcd_priv_size = sizeof(struct dummy_hcd), .reset = dummy_setup, .start = dummy_start, .stop = dummy_stop, .urb_enqueue = dummy_urb_enqueue, .urb_dequeue = dummy_urb_dequeue, .get_frame_number = dummy_h_get_frame, .hub_status_data = dummy_hub_status, .hub_control = dummy_hub_control, .bus_suspend = dummy_bus_suspend, .bus_resume = dummy_bus_resume, .alloc_streams = dummy_alloc_streams, .free_streams = dummy_free_streams, }; static int dummy_hcd_probe(struct platform_device *pdev) { struct dummy *dum; struct usb_hcd *hs_hcd; struct usb_hcd *ss_hcd; int retval; dev_info(&pdev->dev, "%s, driver " DRIVER_VERSION "\n", driver_desc); dum = *((void **)dev_get_platdata(&pdev->dev)); if (mod_data.is_super_speed) dummy_hcd.flags = HCD_USB3 | HCD_SHARED; else if (mod_data.is_high_speed) dummy_hcd.flags = HCD_USB2; else dummy_hcd.flags = HCD_USB11; hs_hcd = usb_create_hcd(&dummy_hcd, &pdev->dev, dev_name(&pdev->dev)); if (!hs_hcd) return -ENOMEM; hs_hcd->has_tt = 1; retval = usb_add_hcd(hs_hcd, 0, 0); if (retval) goto put_usb2_hcd; if (mod_data.is_super_speed) { ss_hcd = usb_create_shared_hcd(&dummy_hcd, &pdev->dev, dev_name(&pdev->dev), hs_hcd); if (!ss_hcd) { retval = -ENOMEM; goto dealloc_usb2_hcd; } retval = usb_add_hcd(ss_hcd, 0, 0); if (retval) goto put_usb3_hcd; } return 0; put_usb3_hcd: usb_put_hcd(ss_hcd); dealloc_usb2_hcd: usb_remove_hcd(hs_hcd); put_usb2_hcd: usb_put_hcd(hs_hcd); dum->hs_hcd = dum->ss_hcd = NULL; return retval; } static void dummy_hcd_remove(struct platform_device *pdev) { struct dummy *dum; dum = hcd_to_dummy_hcd(platform_get_drvdata(pdev))->dum; if (dum->ss_hcd) { usb_remove_hcd(dummy_hcd_to_hcd(dum->ss_hcd)); usb_put_hcd(dummy_hcd_to_hcd(dum->ss_hcd)); } usb_remove_hcd(dummy_hcd_to_hcd(dum->hs_hcd)); usb_put_hcd(dummy_hcd_to_hcd(dum->hs_hcd)); dum->hs_hcd = NULL; dum->ss_hcd = NULL; } static int dummy_hcd_suspend(struct platform_device *pdev, pm_message_t state) { struct usb_hcd *hcd; struct dummy_hcd *dum_hcd; int rc = 0; dev_dbg(&pdev->dev, "%s\n", __func__); hcd = platform_get_drvdata(pdev); dum_hcd = hcd_to_dummy_hcd(hcd); if (dum_hcd->rh_state == DUMMY_RH_RUNNING) { dev_warn(&pdev->dev, "Root hub isn't suspended!\n"); rc = -EBUSY; } else clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); return rc; } static int dummy_hcd_resume(struct platform_device *pdev) { struct usb_hcd *hcd; dev_dbg(&pdev->dev, "%s\n", __func__); hcd = platform_get_drvdata(pdev); set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); usb_hcd_poll_rh_status(hcd); return 0; } static struct platform_driver dummy_hcd_driver = { .probe = dummy_hcd_probe, .remove = dummy_hcd_remove, .suspend = dummy_hcd_suspend, .resume = dummy_hcd_resume, .driver = { .name = driver_name, }, }; /*-------------------------------------------------------------------------*/ #define MAX_NUM_UDC 32 static struct platform_device *the_udc_pdev[MAX_NUM_UDC]; static struct platform_device *the_hcd_pdev[MAX_NUM_UDC]; static int __init dummy_hcd_init(void) { int retval = -ENOMEM; int i; struct dummy *dum[MAX_NUM_UDC] = {}; if (usb_disabled()) return -ENODEV; if (!mod_data.is_high_speed && mod_data.is_super_speed) return -EINVAL; if (mod_data.num < 1 || mod_data.num > MAX_NUM_UDC) { pr_err("Number of emulated UDC must be in range of 1...%d\n", MAX_NUM_UDC); return -EINVAL; } for (i = 0; i < mod_data.num; i++) { the_hcd_pdev[i] = platform_device_alloc(driver_name, i); if (!the_hcd_pdev[i]) { i--; while (i >= 0) platform_device_put(the_hcd_pdev[i--]); return retval; } } for (i = 0; i < mod_data.num; i++) { the_udc_pdev[i] = platform_device_alloc(gadget_name, i); if (!the_udc_pdev[i]) { i--; while (i >= 0) platform_device_put(the_udc_pdev[i--]); goto err_alloc_udc; } } for (i = 0; i < mod_data.num; i++) { dum[i] = kzalloc(sizeof(struct dummy), GFP_KERNEL); if (!dum[i]) { retval = -ENOMEM; goto err_add_pdata; } retval = platform_device_add_data(the_hcd_pdev[i], &dum[i], sizeof(void *)); if (retval) goto err_add_pdata; retval = platform_device_add_data(the_udc_pdev[i], &dum[i], sizeof(void *)); if (retval) goto err_add_pdata; } retval = platform_driver_register(&dummy_hcd_driver); if (retval < 0) goto err_add_pdata; retval = platform_driver_register(&dummy_udc_driver); if (retval < 0) goto err_register_udc_driver; for (i = 0; i < mod_data.num; i++) { retval = platform_device_add(the_hcd_pdev[i]); if (retval < 0) { i--; while (i >= 0) platform_device_del(the_hcd_pdev[i--]); goto err_add_hcd; } } for (i = 0; i < mod_data.num; i++) { if (!dum[i]->hs_hcd || (!dum[i]->ss_hcd && mod_data.is_super_speed)) { /* * The hcd was added successfully but its probe * function failed for some reason. */ retval = -EINVAL; goto err_add_udc; } } for (i = 0; i < mod_data.num; i++) { retval = platform_device_add(the_udc_pdev[i]); if (retval < 0) { i--; while (i >= 0) platform_device_del(the_udc_pdev[i--]); goto err_add_udc; } } for (i = 0; i < mod_data.num; i++) { if (!platform_get_drvdata(the_udc_pdev[i])) { /* * The udc was added successfully but its probe * function failed for some reason. */ retval = -EINVAL; goto err_probe_udc; } } return retval; err_probe_udc: for (i = 0; i < mod_data.num; i++) platform_device_del(the_udc_pdev[i]); err_add_udc: for (i = 0; i < mod_data.num; i++) platform_device_del(the_hcd_pdev[i]); err_add_hcd: platform_driver_unregister(&dummy_udc_driver); err_register_udc_driver: platform_driver_unregister(&dummy_hcd_driver); err_add_pdata: for (i = 0; i < mod_data.num; i++) kfree(dum[i]); for (i = 0; i < mod_data.num; i++) platform_device_put(the_udc_pdev[i]); err_alloc_udc: for (i = 0; i < mod_data.num; i++) platform_device_put(the_hcd_pdev[i]); return retval; } module_init(dummy_hcd_init); static void __exit dummy_hcd_cleanup(void) { int i; for (i = 0; i < mod_data.num; i++) { struct dummy *dum; dum = *((void **)dev_get_platdata(&the_udc_pdev[i]->dev)); platform_device_unregister(the_udc_pdev[i]); platform_device_unregister(the_hcd_pdev[i]); kfree(dum); } platform_driver_unregister(&dummy_udc_driver); platform_driver_unregister(&dummy_hcd_driver); } module_exit(dummy_hcd_cleanup); |
22 22 20 21 22 22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | // SPDX-License-Identifier: GPL-2.0-only /* * File: sysctl.c * * Phonet /proc/sys/net/phonet interface implementation * * Copyright (C) 2008 Nokia Corporation. * * Author: Rémi Denis-Courmont */ #include <linux/seqlock.h> #include <linux/sysctl.h> #include <linux/errno.h> #include <linux/init.h> #include <net/sock.h> #include <linux/phonet.h> #include <net/phonet/phonet.h> #define DYNAMIC_PORT_MIN 0x40 #define DYNAMIC_PORT_MAX 0x7f static DEFINE_SEQLOCK(local_port_range_lock); static int local_port_range_min[2] = {0, 0}; static int local_port_range_max[2] = {1023, 1023}; static int local_port_range[2] = {DYNAMIC_PORT_MIN, DYNAMIC_PORT_MAX}; static struct ctl_table_header *phonet_table_hrd; static void set_local_port_range(int range[2]) { write_seqlock(&local_port_range_lock); local_port_range[0] = range[0]; local_port_range[1] = range[1]; write_sequnlock(&local_port_range_lock); } void phonet_get_local_port_range(int *min, int *max) { unsigned int seq; do { seq = read_seqbegin(&local_port_range_lock); if (min) *min = local_port_range[0]; if (max) *max = local_port_range[1]; } while (read_seqretry(&local_port_range_lock, seq)); } static int proc_local_port_range(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; int range[2] = {local_port_range[0], local_port_range[1]}; struct ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, .extra1 = &local_port_range_min, .extra2 = &local_port_range_max, }; ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { if (range[1] < range[0]) ret = -EINVAL; else set_local_port_range(range); } return ret; } static struct ctl_table phonet_table[] = { { .procname = "local_port_range", .data = &local_port_range, .maxlen = sizeof(local_port_range), .mode = 0644, .proc_handler = proc_local_port_range, }, }; int __init phonet_sysctl_init(void) { phonet_table_hrd = register_net_sysctl(&init_net, "net/phonet", phonet_table); return phonet_table_hrd == NULL ? -ENOMEM : 0; } void phonet_sysctl_exit(void) { unregister_net_sysctl_table(phonet_table_hrd); } |
2 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 1 1 1 2 2 2 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 | // SPDX-License-Identifier: GPL-2.0-or-later /* XTS: as defined in IEEE1619/D16 * http://grouper.ieee.org/groups/1619/email/pdf00086.pdf * * Copyright (c) 2007 Rik Snel <rsnel@cube.dyndns.org> * * Based on ecb.c * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/cipher.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <crypto/xts.h> #include <crypto/b128ops.h> #include <crypto/gf128mul.h> struct xts_tfm_ctx { struct crypto_skcipher *child; struct crypto_cipher *tweak; }; struct xts_instance_ctx { struct crypto_skcipher_spawn spawn; struct crypto_cipher_spawn tweak_spawn; }; struct xts_request_ctx { le128 t; struct scatterlist *tail; struct scatterlist sg[2]; struct skcipher_request subreq; }; static int xts_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(parent); struct crypto_skcipher *child; struct crypto_cipher *tweak; int err; err = xts_verify_key(parent, key, keylen); if (err) return err; keylen /= 2; /* we need two cipher instances: one to compute the initial 'tweak' * by encrypting the IV (usually the 'plain' iv) and the other * one to encrypt and decrypt the data */ /* tweak cipher, uses Key2 i.e. the second half of *key */ tweak = ctx->tweak; crypto_cipher_clear_flags(tweak, CRYPTO_TFM_REQ_MASK); crypto_cipher_set_flags(tweak, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); err = crypto_cipher_setkey(tweak, key + keylen, keylen); if (err) return err; /* data cipher, uses Key1 i.e. the first half of *key */ child = ctx->child; crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); return crypto_skcipher_setkey(child, key, keylen); } /* * We compute the tweak masks twice (both before and after the ECB encryption or * decryption) to avoid having to allocate a temporary buffer and/or make * mutliple calls to the 'ecb(..)' instance, which usually would be slower than * just doing the gf128mul_x_ble() calls again. */ static int xts_xor_tweak(struct skcipher_request *req, bool second_pass, bool enc) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); const bool cts = (req->cryptlen % XTS_BLOCK_SIZE); const int bs = XTS_BLOCK_SIZE; struct skcipher_walk w; le128 t = rctx->t; int err; if (second_pass) { req = &rctx->subreq; /* set to our TFM to enforce correct alignment: */ skcipher_request_set_tfm(req, tfm); } err = skcipher_walk_virt(&w, req, false); while (w.nbytes) { unsigned int avail = w.nbytes; const le128 *wsrc; le128 *wdst; wsrc = w.src.virt.addr; wdst = w.dst.virt.addr; do { if (unlikely(cts) && w.total - w.nbytes + avail < 2 * XTS_BLOCK_SIZE) { if (!enc) { if (second_pass) rctx->t = t; gf128mul_x_ble(&t, &t); } le128_xor(wdst, &t, wsrc); if (enc && second_pass) gf128mul_x_ble(&rctx->t, &t); skcipher_walk_done(&w, avail - bs); return 0; } le128_xor(wdst++, &t, wsrc++); gf128mul_x_ble(&t, &t); } while ((avail -= bs) >= bs); err = skcipher_walk_done(&w, avail); } return err; } static int xts_xor_tweak_pre(struct skcipher_request *req, bool enc) { return xts_xor_tweak(req, false, enc); } static int xts_xor_tweak_post(struct skcipher_request *req, bool enc) { return xts_xor_tweak(req, true, enc); } static void xts_cts_done(void *data, int err) { struct skcipher_request *req = data; le128 b; if (!err) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); scatterwalk_map_and_copy(&b, rctx->tail, 0, XTS_BLOCK_SIZE, 0); le128_xor(&b, &rctx->t, &b); scatterwalk_map_and_copy(&b, rctx->tail, 0, XTS_BLOCK_SIZE, 1); } skcipher_request_complete(req, err); } static int xts_cts_final(struct skcipher_request *req, int (*crypt)(struct skcipher_request *req)) { const struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); int offset = req->cryptlen & ~(XTS_BLOCK_SIZE - 1); struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; int tail = req->cryptlen % XTS_BLOCK_SIZE; le128 b[2]; int err; rctx->tail = scatterwalk_ffwd(rctx->sg, req->dst, offset - XTS_BLOCK_SIZE); scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE, 0); b[1] = b[0]; scatterwalk_map_and_copy(b, req->src, offset, tail, 0); le128_xor(b, &rctx->t, b); scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE + tail, 1); skcipher_request_set_tfm(subreq, ctx->child); skcipher_request_set_callback(subreq, req->base.flags, xts_cts_done, req); skcipher_request_set_crypt(subreq, rctx->tail, rctx->tail, XTS_BLOCK_SIZE, NULL); err = crypt(subreq); if (err) return err; scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE, 0); le128_xor(b, &rctx->t, b); scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE, 1); return 0; } static void xts_encrypt_done(void *data, int err) { struct skcipher_request *req = data; if (!err) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; err = xts_xor_tweak_post(req, true); if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { err = xts_cts_final(req, crypto_skcipher_encrypt); if (err == -EINPROGRESS || err == -EBUSY) return; } } skcipher_request_complete(req, err); } static void xts_decrypt_done(void *data, int err) { struct skcipher_request *req = data; if (!err) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; err = xts_xor_tweak_post(req, false); if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { err = xts_cts_final(req, crypto_skcipher_decrypt); if (err == -EINPROGRESS || err == -EBUSY) return; } } skcipher_request_complete(req, err); } static int xts_init_crypt(struct skcipher_request *req, crypto_completion_t compl) { const struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; if (req->cryptlen < XTS_BLOCK_SIZE) return -EINVAL; skcipher_request_set_tfm(subreq, ctx->child); skcipher_request_set_callback(subreq, req->base.flags, compl, req); skcipher_request_set_crypt(subreq, req->dst, req->dst, req->cryptlen & ~(XTS_BLOCK_SIZE - 1), NULL); /* calculate first value of T */ crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv); return 0; } static int xts_encrypt(struct skcipher_request *req) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; int err; err = xts_init_crypt(req, xts_encrypt_done) ?: xts_xor_tweak_pre(req, true) ?: crypto_skcipher_encrypt(subreq) ?: xts_xor_tweak_post(req, true); if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0)) return err; return xts_cts_final(req, crypto_skcipher_encrypt); } static int xts_decrypt(struct skcipher_request *req) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; int err; err = xts_init_crypt(req, xts_decrypt_done) ?: xts_xor_tweak_pre(req, false) ?: crypto_skcipher_decrypt(subreq) ?: xts_xor_tweak_post(req, false); if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0)) return err; return xts_cts_final(req, crypto_skcipher_decrypt); } static int xts_init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); struct xts_instance_ctx *ictx = skcipher_instance_ctx(inst); struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *child; struct crypto_cipher *tweak; child = crypto_spawn_skcipher(&ictx->spawn); if (IS_ERR(child)) return PTR_ERR(child); ctx->child = child; tweak = crypto_spawn_cipher(&ictx->tweak_spawn); if (IS_ERR(tweak)) { crypto_free_skcipher(ctx->child); return PTR_ERR(tweak); } ctx->tweak = tweak; crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(child) + sizeof(struct xts_request_ctx)); return 0; } static void xts_exit_tfm(struct crypto_skcipher *tfm) { struct xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); crypto_free_skcipher(ctx->child); crypto_free_cipher(ctx->tweak); } static void xts_free_instance(struct skcipher_instance *inst) { struct xts_instance_ctx *ictx = skcipher_instance_ctx(inst); crypto_drop_skcipher(&ictx->spawn); crypto_drop_cipher(&ictx->tweak_spawn); kfree(inst); } static int xts_create(struct crypto_template *tmpl, struct rtattr **tb) { struct skcipher_alg_common *alg; char name[CRYPTO_MAX_ALG_NAME]; struct skcipher_instance *inst; struct xts_instance_ctx *ctx; const char *cipher_name; u32 mask; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask); if (err) return err; cipher_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(cipher_name)) return PTR_ERR(cipher_name); inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = skcipher_instance_ctx(inst); err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst), cipher_name, 0, mask); if (err == -ENOENT) { err = -ENAMETOOLONG; if (snprintf(name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst), name, 0, mask); } if (err) goto err_free_inst; alg = crypto_spawn_skcipher_alg_common(&ctx->spawn); err = -EINVAL; if (alg->base.cra_blocksize != XTS_BLOCK_SIZE) goto err_free_inst; if (alg->ivsize) goto err_free_inst; err = crypto_inst_setname(skcipher_crypto_instance(inst), "xts", &alg->base); if (err) goto err_free_inst; err = -EINVAL; cipher_name = alg->base.cra_name; /* Alas we screwed up the naming so we have to mangle the * cipher name. */ if (!strncmp(cipher_name, "ecb(", 4)) { int len; len = strscpy(name, cipher_name + 4, sizeof(name)); if (len < 2) goto err_free_inst; if (name[len - 1] != ')') goto err_free_inst; name[len - 1] = 0; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "xts(%s)", name) >= CRYPTO_MAX_ALG_NAME) { err = -ENAMETOOLONG; goto err_free_inst; } } else goto err_free_inst; err = crypto_grab_cipher(&ctx->tweak_spawn, skcipher_crypto_instance(inst), name, 0, mask); if (err) goto err_free_inst; inst->alg.base.cra_priority = alg->base.cra_priority; inst->alg.base.cra_blocksize = XTS_BLOCK_SIZE; inst->alg.base.cra_alignmask = alg->base.cra_alignmask | (__alignof__(u64) - 1); inst->alg.ivsize = XTS_BLOCK_SIZE; inst->alg.min_keysize = alg->min_keysize * 2; inst->alg.max_keysize = alg->max_keysize * 2; inst->alg.base.cra_ctxsize = sizeof(struct xts_tfm_ctx); inst->alg.init = xts_init_tfm; inst->alg.exit = xts_exit_tfm; inst->alg.setkey = xts_setkey; inst->alg.encrypt = xts_encrypt; inst->alg.decrypt = xts_decrypt; inst->free = xts_free_instance; err = skcipher_register_instance(tmpl, inst); if (err) { err_free_inst: xts_free_instance(inst); } return err; } static struct crypto_template xts_tmpl = { .name = "xts", .create = xts_create, .module = THIS_MODULE, }; static int __init xts_module_init(void) { return crypto_register_template(&xts_tmpl); } static void __exit xts_module_exit(void) { crypto_unregister_template(&xts_tmpl); } subsys_initcall(xts_module_init); module_exit(xts_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("XTS block cipher mode"); MODULE_ALIAS_CRYPTO("xts"); MODULE_IMPORT_NS("CRYPTO_INTERNAL"); MODULE_SOFTDEP("pre: ecb"); |
4 4 4 4 14068 14058 3134 13387 1447 1451 1450 1445 1630 1630 1632 1632 1630 1633 1631 1630 1420 1418 1407 1402 1408 1411 1406 1411 160 159 158 2 2 229 230 230 226 222 224 94 95 95 2 2 2 2 94 94 229 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 | // SPDX-License-Identifier: GPL-2.0 /* * Fast batching percpu counters. */ #include <linux/percpu_counter.h> #include <linux/mutex.h> #include <linux/init.h> #include <linux/cpu.h> #include <linux/module.h> #include <linux/debugobjects.h> #ifdef CONFIG_HOTPLUG_CPU static LIST_HEAD(percpu_counters); static DEFINE_SPINLOCK(percpu_counters_lock); #endif #ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER static const struct debug_obj_descr percpu_counter_debug_descr; static bool percpu_counter_fixup_free(void *addr, enum debug_obj_state state) { struct percpu_counter *fbc = addr; switch (state) { case ODEBUG_STATE_ACTIVE: percpu_counter_destroy(fbc); debug_object_free(fbc, &percpu_counter_debug_descr); return true; default: return false; } } static const struct debug_obj_descr percpu_counter_debug_descr = { .name = "percpu_counter", .fixup_free = percpu_counter_fixup_free, }; static inline void debug_percpu_counter_activate(struct percpu_counter *fbc) { debug_object_init(fbc, &percpu_counter_debug_descr); debug_object_activate(fbc, &percpu_counter_debug_descr); } static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc) { debug_object_deactivate(fbc, &percpu_counter_debug_descr); debug_object_free(fbc, &percpu_counter_debug_descr); } #else /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */ static inline void debug_percpu_counter_activate(struct percpu_counter *fbc) { } static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc) { } #endif /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */ void percpu_counter_set(struct percpu_counter *fbc, s64 amount) { int cpu; unsigned long flags; raw_spin_lock_irqsave(&fbc->lock, flags); for_each_possible_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); *pcount = 0; } fbc->count = amount; raw_spin_unlock_irqrestore(&fbc->lock, flags); } EXPORT_SYMBOL(percpu_counter_set); /* * Add to a counter while respecting batch size. * * There are 2 implementations, both dealing with the following problem: * * The decision slow path/fast path and the actual update must be atomic. * Otherwise a call in process context could check the current values and * decide that the fast path can be used. If now an interrupt occurs before * the this_cpu_add(), and the interrupt updates this_cpu(*fbc->counters), * then the this_cpu_add() that is executed after the interrupt has completed * can produce values larger than "batch" or even overflows. */ #ifdef CONFIG_HAVE_CMPXCHG_LOCAL /* * Safety against interrupts is achieved in 2 ways: * 1. the fast path uses local cmpxchg (note: no lock prefix) * 2. the slow path operates with interrupts disabled */ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { s64 count; unsigned long flags; count = this_cpu_read(*fbc->counters); do { if (unlikely(abs(count + amount) >= batch)) { raw_spin_lock_irqsave(&fbc->lock, flags); /* * Note: by now we might have migrated to another CPU * or the value might have changed. */ count = __this_cpu_read(*fbc->counters); fbc->count += count + amount; __this_cpu_sub(*fbc->counters, count); raw_spin_unlock_irqrestore(&fbc->lock, flags); return; } } while (!this_cpu_try_cmpxchg(*fbc->counters, &count, count + amount)); } #else /* * local_irq_save() is used to make the function irq safe: * - The slow path would be ok as protected by an irq-safe spinlock. * - this_cpu_add would be ok as it is irq-safe by definition. */ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { s64 count; unsigned long flags; local_irq_save(flags); count = __this_cpu_read(*fbc->counters) + amount; if (abs(count) >= batch) { raw_spin_lock(&fbc->lock); fbc->count += count; __this_cpu_sub(*fbc->counters, count - amount); raw_spin_unlock(&fbc->lock); } else { this_cpu_add(*fbc->counters, amount); } local_irq_restore(flags); } #endif EXPORT_SYMBOL(percpu_counter_add_batch); /* * For percpu_counter with a big batch, the devication of its count could * be big, and there is requirement to reduce the deviation, like when the * counter's batch could be runtime decreased to get a better accuracy, * which can be achieved by running this sync function on each CPU. */ void percpu_counter_sync(struct percpu_counter *fbc) { unsigned long flags; s64 count; raw_spin_lock_irqsave(&fbc->lock, flags); count = __this_cpu_read(*fbc->counters); fbc->count += count; __this_cpu_sub(*fbc->counters, count); raw_spin_unlock_irqrestore(&fbc->lock, flags); } EXPORT_SYMBOL(percpu_counter_sync); /* * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive(). * * We use the cpu mask of (cpu_online_mask | cpu_dying_mask) to capture sums * from CPUs that are in the process of being taken offline. Dying cpus have * been removed from the online mask, but may not have had the hotplug dead * notifier called to fold the percpu count back into the global counter sum. * By including dying CPUs in the iteration mask, we avoid this race condition * so __percpu_counter_sum() just does the right thing when CPUs are being taken * offline. */ s64 __percpu_counter_sum(struct percpu_counter *fbc) { s64 ret; int cpu; unsigned long flags; raw_spin_lock_irqsave(&fbc->lock, flags); ret = fbc->count; for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; } raw_spin_unlock_irqrestore(&fbc->lock, flags); return ret; } EXPORT_SYMBOL(__percpu_counter_sum); int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, gfp_t gfp, u32 nr_counters, struct lock_class_key *key) { unsigned long flags __maybe_unused; size_t counter_size; s32 __percpu *counters; u32 i; counter_size = ALIGN(sizeof(*counters), __alignof__(*counters)); counters = __alloc_percpu_gfp(nr_counters * counter_size, __alignof__(*counters), gfp); if (!counters) { fbc[0].counters = NULL; return -ENOMEM; } for (i = 0; i < nr_counters; i++) { raw_spin_lock_init(&fbc[i].lock); lockdep_set_class(&fbc[i].lock, key); #ifdef CONFIG_HOTPLUG_CPU INIT_LIST_HEAD(&fbc[i].list); #endif fbc[i].count = amount; fbc[i].counters = (void __percpu *)counters + i * counter_size; debug_percpu_counter_activate(&fbc[i]); } #ifdef CONFIG_HOTPLUG_CPU spin_lock_irqsave(&percpu_counters_lock, flags); for (i = 0; i < nr_counters; i++) list_add(&fbc[i].list, &percpu_counters); spin_unlock_irqrestore(&percpu_counters_lock, flags); #endif return 0; } EXPORT_SYMBOL(__percpu_counter_init_many); void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters) { unsigned long flags __maybe_unused; u32 i; if (WARN_ON_ONCE(!fbc)) return; if (!fbc[0].counters) return; for (i = 0; i < nr_counters; i++) debug_percpu_counter_deactivate(&fbc[i]); #ifdef CONFIG_HOTPLUG_CPU spin_lock_irqsave(&percpu_counters_lock, flags); for (i = 0; i < nr_counters; i++) list_del(&fbc[i].list); spin_unlock_irqrestore(&percpu_counters_lock, flags); #endif free_percpu(fbc[0].counters); for (i = 0; i < nr_counters; i++) fbc[i].counters = NULL; } EXPORT_SYMBOL(percpu_counter_destroy_many); int percpu_counter_batch __read_mostly = 32; EXPORT_SYMBOL(percpu_counter_batch); static int compute_batch_value(unsigned int cpu) { int nr = num_online_cpus(); percpu_counter_batch = max(32, nr*2); return 0; } static int percpu_counter_cpu_dead(unsigned int cpu) { #ifdef CONFIG_HOTPLUG_CPU struct percpu_counter *fbc; compute_batch_value(cpu); spin_lock_irq(&percpu_counters_lock); list_for_each_entry(fbc, &percpu_counters, list) { s32 *pcount; raw_spin_lock(&fbc->lock); pcount = per_cpu_ptr(fbc->counters, cpu); fbc->count += *pcount; *pcount = 0; raw_spin_unlock(&fbc->lock); } spin_unlock_irq(&percpu_counters_lock); #endif return 0; } /* * Compare counter against given value. * Return 1 if greater, 0 if equal and -1 if less */ int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) { s64 count; count = percpu_counter_read(fbc); /* Check to see if rough count will be sufficient for comparison */ if (abs(count - rhs) > (batch * num_online_cpus())) { if (count > rhs) return 1; else return -1; } /* Need to use precise count */ count = percpu_counter_sum(fbc); if (count > rhs) return 1; else if (count < rhs) return -1; else return 0; } EXPORT_SYMBOL(__percpu_counter_compare); /* * Compare counter, and add amount if total is: less than or equal to limit if * amount is positive, or greater than or equal to limit if amount is negative. * Return true if amount is added, or false if total would be beyond the limit. * * Negative limit is allowed, but unusual. * When negative amounts (subs) are given to percpu_counter_limited_add(), * the limit would most naturally be 0 - but other limits are also allowed. * * Overflow beyond S64_MAX is not allowed for: counter, limit and amount * are all assumed to be sane (far from S64_MIN and S64_MAX). */ bool __percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount, s32 batch) { s64 count; s64 unknown; unsigned long flags; bool good = false; if (amount == 0) return true; local_irq_save(flags); unknown = batch * num_online_cpus(); count = __this_cpu_read(*fbc->counters); /* Skip taking the lock when safe */ if (abs(count + amount) <= batch && ((amount > 0 && fbc->count + unknown <= limit) || (amount < 0 && fbc->count - unknown >= limit))) { this_cpu_add(*fbc->counters, amount); local_irq_restore(flags); return true; } raw_spin_lock(&fbc->lock); count = fbc->count + amount; /* Skip percpu_counter_sum() when safe */ if (amount > 0) { if (count - unknown > limit) goto out; if (count + unknown <= limit) good = true; } else { if (count + unknown < limit) goto out; if (count - unknown >= limit) good = true; } if (!good) { s32 *pcount; int cpu; for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) { pcount = per_cpu_ptr(fbc->counters, cpu); count += *pcount; } if (amount > 0) { if (count > limit) goto out; } else { if (count < limit) goto out; } good = true; } count = __this_cpu_read(*fbc->counters); fbc->count += count + amount; __this_cpu_sub(*fbc->counters, count); out: raw_spin_unlock(&fbc->lock); local_irq_restore(flags); return good; } static int __init percpu_counter_startup(void) { int ret; ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "lib/percpu_cnt:online", compute_batch_value, NULL); WARN_ON(ret < 0); ret = cpuhp_setup_state_nocalls(CPUHP_PERCPU_CNT_DEAD, "lib/percpu_cnt:dead", NULL, percpu_counter_cpu_dead); WARN_ON(ret < 0); return 0; } module_init(percpu_counter_startup); |
141 91 178 1460 1078 1445 1075 1551 1078 1078 693 1077 96 8 7 177 1384 1445 94 1468 1376 33 96 60 37 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* include/asm-generic/tlb.h * * Generic TLB shootdown code * * Copyright 2001 Red Hat, Inc. * Based on code from mm/memory.c Copyright Linus Torvalds and others. * * Copyright 2011 Red Hat, Inc., Peter Zijlstra */ #ifndef _ASM_GENERIC__TLB_H #define _ASM_GENERIC__TLB_H #include <linux/mmu_notifier.h> #include <linux/swap.h> #include <linux/hugetlb_inline.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> /* * Blindly accessing user memory from NMI context can be dangerous * if we're in the middle of switching the current user task or switching * the loaded mm. */ #ifndef nmi_uaccess_okay # define nmi_uaccess_okay() true #endif #ifdef CONFIG_MMU /* * Generic MMU-gather implementation. * * The mmu_gather data structure is used by the mm code to implement the * correct and efficient ordering of freeing pages and TLB invalidations. * * This correct ordering is: * * 1) unhook page * 2) TLB invalidate page * 3) free page * * That is, we must never free a page before we have ensured there are no live * translations left to it. Otherwise it might be possible to observe (or * worse, change) the page content after it has been reused. * * The mmu_gather API consists of: * * - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_finish_mmu() * * start and finish a mmu_gather * * Finish in particular will issue a (final) TLB invalidate and free * all (remaining) queued pages. * * - tlb_start_vma() / tlb_end_vma(); marks the start / end of a VMA * * Defaults to flushing at tlb_end_vma() to reset the range; helps when * there's large holes between the VMAs. * * - tlb_remove_table() * * tlb_remove_table() is the basic primitive to free page-table directories * (__p*_free_tlb()). In it's most primitive form it is an alias for * tlb_remove_page() below, for when page directories are pages and have no * additional constraints. * * See also MMU_GATHER_TABLE_FREE and MMU_GATHER_RCU_TABLE_FREE. * * - tlb_remove_page() / tlb_remove_page_size() * - __tlb_remove_folio_pages() / __tlb_remove_page_size() * - __tlb_remove_folio_pages_size() * * __tlb_remove_folio_pages_size() is the basic primitive that queues pages * for freeing. It will return a boolean indicating if the queue is (now) * full and a call to tlb_flush_mmu() is required. * * tlb_remove_page() and tlb_remove_page_size() imply the call to * tlb_flush_mmu() when required and has no return value. * * __tlb_remove_folio_pages() is similar to __tlb_remove_page_size(), * however, instead of removing a single page, assume PAGE_SIZE and remove * the given number of consecutive pages that are all part of the * same (large) folio. * * - tlb_change_page_size() * * call before __tlb_remove_page*() to set the current page-size; implies a * possible tlb_flush_mmu() call. * * - tlb_flush_mmu() / tlb_flush_mmu_tlbonly() * * tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets * related state, like the range) * * tlb_flush_mmu() - in addition to the above TLB invalidate, also frees * whatever pages are still batched. * * - mmu_gather::fullmm * * A flag set by tlb_gather_mmu_fullmm() to indicate we're going to free * the entire mm; this allows a number of optimizations. * * - We can ignore tlb_{start,end}_vma(); because we don't * care about ranges. Everything will be shot down. * * - (RISC) architectures that use ASIDs can cycle to a new ASID * and delay the invalidation until ASID space runs out. * * - mmu_gather::need_flush_all * * A flag that can be set by the arch code if it wants to force * flush the entire TLB irrespective of the range. For instance * x86-PAE needs this when changing top-level entries. * * And allows the architecture to provide and implement tlb_flush(): * * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make * use of: * * - mmu_gather::start / mmu_gather::end * * which provides the range that needs to be flushed to cover the pages to * be freed. * * - mmu_gather::freed_tables * * set when we freed page table pages * * - tlb_get_unmap_shift() / tlb_get_unmap_size() * * returns the smallest TLB entry size unmapped in this range. * * If an architecture does not provide tlb_flush() a default implementation * based on flush_tlb_range() will be used, unless MMU_GATHER_NO_RANGE is * specified, in which case we'll default to flush_tlb_mm(). * * Additionally there are a few opt-in features: * * MMU_GATHER_PAGE_SIZE * * This ensures we call tlb_flush() every time tlb_change_page_size() actually * changes the size and provides mmu_gather::page_size to tlb_flush(). * * This might be useful if your architecture has size specific TLB * invalidation instructions. * * MMU_GATHER_TABLE_FREE * * This provides tlb_remove_table(), to be used instead of tlb_remove_page() * for page directores (__p*_free_tlb()). * * Useful if your architecture has non-page page directories. * * When used, an architecture is expected to provide __tlb_remove_table() or * use the generic __tlb_remove_table(), which does the actual freeing of these * pages. * * MMU_GATHER_RCU_TABLE_FREE * * Like MMU_GATHER_TABLE_FREE, and adds semi-RCU semantics to the free (see * comment below). * * Useful if your architecture doesn't use IPIs for remote TLB invalidates * and therefore doesn't naturally serialize with software page-table walkers. * * MMU_GATHER_NO_FLUSH_CACHE * * Indicates the architecture has flush_cache_range() but it needs *NOT* be called * before unmapping a VMA. * * NOTE: strictly speaking we shouldn't have this knob and instead rely on * flush_cache_range() being a NOP, except Sparc64 seems to be * different here. * * MMU_GATHER_MERGE_VMAS * * Indicates the architecture wants to merge ranges over VMAs; typical when * multiple range invalidates are more expensive than a full invalidate. * * MMU_GATHER_NO_RANGE * * Use this if your architecture lacks an efficient flush_tlb_range(). This * option implies MMU_GATHER_MERGE_VMAS above. * * MMU_GATHER_NO_GATHER * * If the option is set the mmu_gather will not track individual pages for * delayed page free anymore. A platform that enables the option needs to * provide its own implementation of the __tlb_remove_page_size() function to * free pages. * * This is useful if your architecture already flushes TLB entries in the * various ptep_get_and_clear() functions. */ #ifdef CONFIG_MMU_GATHER_TABLE_FREE struct mmu_table_batch { #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE struct rcu_head rcu; #endif unsigned int nr; void *tables[]; }; #define MAX_TABLE_BATCH \ ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) #ifndef __HAVE_ARCH_TLB_REMOVE_TABLE static inline void __tlb_remove_table(void *table) { struct ptdesc *ptdesc = (struct ptdesc *)table; pagetable_dtor_free(ptdesc); } #endif extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #else /* !CONFIG_MMU_GATHER_TABLE_FREE */ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page); /* * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based * page directories and we can use the normal page batching to free them. */ static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct ptdesc *ptdesc = (struct ptdesc *)table; pagetable_dtor(ptdesc); tlb_remove_page(tlb, ptdesc_page(ptdesc)); } #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE /* * This allows an architecture that does not use the linux page-tables for * hardware to skip the TLBI when freeing page tables. */ #ifndef tlb_needs_table_invalidate #define tlb_needs_table_invalidate() (true) #endif void tlb_remove_table_sync_one(void); #else #ifdef tlb_needs_table_invalidate #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE #endif static inline void tlb_remove_table_sync_one(void) { } #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ #ifndef CONFIG_MMU_GATHER_NO_GATHER /* * If we can't allocate a page to make a big batch of page pointers * to work on, then just handle a few from the on-stack structure. */ #define MMU_GATHER_BUNDLE 8 struct mmu_gather_batch { struct mmu_gather_batch *next; unsigned int nr; unsigned int max; struct encoded_page *encoded_pages[]; }; #define MAX_GATHER_BATCH \ ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *)) /* * Limit the maximum number of mmu_gather batches to reduce a risk of soft * lockups for non-preemptible kernels on huge machines when a lot of memory * is zapped during unmapping. * 10K pages freed at once should be safe even without a preemption point. */ #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, bool delay_rmap, int page_size); bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page, unsigned int nr_pages, bool delay_rmap); #ifdef CONFIG_SMP /* * This both sets 'delayed_rmap', and returns true. It would be an inline * function, except we define it before the 'struct mmu_gather'. */ #define tlb_delay_rmap(tlb) (((tlb)->delayed_rmap = 1), true) extern void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma); #endif #endif /* * We have a no-op version of the rmap removal that doesn't * delay anything. That is used on S390, which flushes remote * TLBs synchronously, and on UP, which doesn't have any * remote TLBs to flush and is not preemptible due to this * all happening under the page table lock. */ #ifndef tlb_delay_rmap #define tlb_delay_rmap(tlb) (false) static inline void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { } #endif /* * struct mmu_gather is an opaque type used by the mm code for passing around * any data needed by arch specific code for tlb_remove_page. */ struct mmu_gather { struct mm_struct *mm; #ifdef CONFIG_MMU_GATHER_TABLE_FREE struct mmu_table_batch *batch; #endif unsigned long start; unsigned long end; /* * we are in the middle of an operation to clear * a full mm and can make some optimizations */ unsigned int fullmm : 1; /* * we have performed an operation which * requires a complete flush of the tlb */ unsigned int need_flush_all : 1; /* * we have removed page directories */ unsigned int freed_tables : 1; /* * Do we have pending delayed rmap removals? */ unsigned int delayed_rmap : 1; /* * at which levels have we cleared entries? */ unsigned int cleared_ptes : 1; unsigned int cleared_pmds : 1; unsigned int cleared_puds : 1; unsigned int cleared_p4ds : 1; /* * tracks VM_EXEC | VM_HUGETLB in tlb_start_vma */ unsigned int vma_exec : 1; unsigned int vma_huge : 1; unsigned int vma_pfn : 1; unsigned int batch_count; #ifndef CONFIG_MMU_GATHER_NO_GATHER struct mmu_gather_batch *active; struct mmu_gather_batch local; struct page *__pages[MMU_GATHER_BUNDLE]; #ifdef CONFIG_MMU_GATHER_PAGE_SIZE unsigned int page_size; #endif #endif }; void tlb_flush_mmu(struct mmu_gather *tlb); static inline void __tlb_adjust_range(struct mmu_gather *tlb, unsigned long address, unsigned int range_size) { tlb->start = min(tlb->start, address); tlb->end = max(tlb->end, address + range_size); } static inline void __tlb_reset_range(struct mmu_gather *tlb) { if (tlb->fullmm) { tlb->start = tlb->end = ~0; } else { tlb->start = TASK_SIZE; tlb->end = 0; } tlb->freed_tables = 0; tlb->cleared_ptes = 0; tlb->cleared_pmds = 0; tlb->cleared_puds = 0; tlb->cleared_p4ds = 0; /* * Do not reset mmu_gather::vma_* fields here, we do not * call into tlb_start_vma() again to set them if there is an * intermediate flush. */ } #ifdef CONFIG_MMU_GATHER_NO_RANGE #if defined(tlb_flush) #error MMU_GATHER_NO_RANGE relies on default tlb_flush() #endif /* * When an architecture does not have efficient means of range flushing TLBs * there is no point in doing intermediate flushes on tlb_end_vma() to keep the * range small. We equally don't have to worry about page granularity or other * things. * * All we need to do is issue a full flush for any !0 range. */ static inline void tlb_flush(struct mmu_gather *tlb) { if (tlb->end) flush_tlb_mm(tlb->mm); } #else /* CONFIG_MMU_GATHER_NO_RANGE */ #ifndef tlb_flush /* * When an architecture does not provide its own tlb_flush() implementation * but does have a reasonably efficient flush_vma_range() implementation * use that. */ static inline void tlb_flush(struct mmu_gather *tlb) { if (tlb->fullmm || tlb->need_flush_all) { flush_tlb_mm(tlb->mm); } else if (tlb->end) { struct vm_area_struct vma = { .vm_mm = tlb->mm, .vm_flags = (tlb->vma_exec ? VM_EXEC : 0) | (tlb->vma_huge ? VM_HUGETLB : 0), }; flush_tlb_range(&vma, tlb->start, tlb->end); } } #endif #endif /* CONFIG_MMU_GATHER_NO_RANGE */ static inline void tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { /* * flush_tlb_range() implementations that look at VM_HUGETLB (tile, * mips-4k) flush only large pages. * * flush_tlb_range() implementations that flush I-TLB also flush D-TLB * (tile, xtensa, arm), so it's ok to just add VM_EXEC to an existing * range. * * We rely on tlb_end_vma() to issue a flush, such that when we reset * these values the batch is empty. */ tlb->vma_huge = is_vm_hugetlb_page(vma); tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)); } static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { /* * Anything calling __tlb_adjust_range() also sets at least one of * these bits. */ if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds || tlb->cleared_puds || tlb->cleared_p4ds)) return; tlb_flush(tlb); __tlb_reset_range(tlb); } static inline void tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) { if (__tlb_remove_page_size(tlb, page, false, page_size)) tlb_flush_mmu(tlb); } static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { return tlb_remove_page_size(tlb, page, PAGE_SIZE); } static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) { tlb_remove_table(tlb, pt); } static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size) { #ifdef CONFIG_MMU_GATHER_PAGE_SIZE if (tlb->page_size && tlb->page_size != page_size) { if (!tlb->fullmm && !tlb->need_flush_all) tlb_flush_mmu(tlb); } tlb->page_size = page_size; #endif } static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb) { if (tlb->cleared_ptes) return PAGE_SHIFT; if (tlb->cleared_pmds) return PMD_SHIFT; if (tlb->cleared_puds) return PUD_SHIFT; if (tlb->cleared_p4ds) return P4D_SHIFT; return PAGE_SHIFT; } static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) { return 1UL << tlb_get_unmap_shift(tlb); } /* * In the case of tlb vma handling, we can optimise these away in the * case where we're doing a full MM flush. When we're doing a munmap, * the vmas are adjusted to only cover the region to be torn down. */ static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; tlb_update_vma_flags(tlb, vma); #ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE flush_cache_range(vma, vma->vm_start, vma->vm_end); #endif } static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; /* * VM_PFNMAP is more fragile because the core mm will not track the * page mapcount -- there might not be page-frames for these PFNs after * all. Force flush TLBs for such ranges to avoid munmap() vs * unmap_mapping_range() races. */ if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) { /* * Do a TLB flush and reset the range at VMA boundaries; this avoids * the ranges growing with the unused space between consecutive VMAs. */ tlb_flush_mmu_tlbonly(tlb); } } /* * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end, * and set corresponding cleared_*. */ static inline void tlb_flush_pte_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_ptes = 1; } static inline void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_pmds = 1; } static inline void tlb_flush_pud_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_puds = 1; } static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_p4ds = 1; } #ifndef __tlb_remove_tlb_entry static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address) { } #endif /** * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. * * Record the fact that pte's were really unmapped by updating the range, * so we can later optimise away the tlb invalidate. This helps when * userspace is unmapping already-unmapped pages, which happens quite a lot. */ #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ tlb_flush_pte_range(tlb, address, PAGE_SIZE); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) /** * tlb_remove_tlb_entries - remember unmapping of multiple consecutive ptes for * later tlb invalidation. * * Similar to tlb_remove_tlb_entry(), but remember unmapping of multiple * consecutive ptes instead of only a single one. */ static inline void tlb_remove_tlb_entries(struct mmu_gather *tlb, pte_t *ptep, unsigned int nr, unsigned long address) { tlb_flush_pte_range(tlb, address, PAGE_SIZE * nr); for (;;) { __tlb_remove_tlb_entry(tlb, ptep, address); if (--nr == 0) break; ptep++; address += PAGE_SIZE; } } #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ do { \ unsigned long _sz = huge_page_size(h); \ if (_sz >= P4D_SIZE) \ tlb_flush_p4d_range(tlb, address, _sz); \ else if (_sz >= PUD_SIZE) \ tlb_flush_pud_range(tlb, address, _sz); \ else if (_sz >= PMD_SIZE) \ tlb_flush_pmd_range(tlb, address, _sz); \ else \ tlb_flush_pte_range(tlb, address, _sz); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) /** * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation * This is a nop so far, because only x86 needs it. */ #ifndef __tlb_remove_pmd_tlb_entry #define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0) #endif #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ do { \ tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE); \ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) /** * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb * invalidation. This is a nop so far, because only x86 needs it. */ #ifndef __tlb_remove_pud_tlb_entry #define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0) #endif #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ do { \ tlb_flush_pud_range(tlb, address, HPAGE_PUD_SIZE); \ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ } while (0) /* * For things like page tables caches (ie caching addresses "inside" the * page tables, like x86 does), for legacy reasons, flushing an * individual page had better flush the page table caches behind it. This * is definitely how x86 works, for example. And if you have an * architected non-legacy page table cache (which I'm not aware of * anybody actually doing), you're going to have some architecturally * explicit flushing for that, likely *separate* from a regular TLB entry * flush, and thus you'd need more than just some range expansion.. * * So if we ever find an architecture * that would want something that odd, I think it is up to that * architecture to do its own odd thing, not cause pain for others * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com * * For now w.r.t page table cache, mark the range_size as PAGE_SIZE */ #ifndef pte_free_tlb #define pte_free_tlb(tlb, ptep, address) \ do { \ tlb_flush_pmd_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #endif #ifndef pmd_free_tlb #define pmd_free_tlb(tlb, pmdp, address) \ do { \ tlb_flush_pud_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) #endif #ifndef pud_free_tlb #define pud_free_tlb(tlb, pudp, address) \ do { \ tlb_flush_p4d_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif #ifndef p4d_free_tlb #define p4d_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __p4d_free_tlb(tlb, pudp, address); \ } while (0) #endif #ifndef pte_needs_flush static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) { return true; } #endif #ifndef huge_pmd_needs_flush static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) { return true; } #endif #endif /* CONFIG_MMU */ #endif /* _ASM_GENERIC__TLB_H */ |
1493 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_TIMEX_H #define _ASM_X86_TIMEX_H #include <asm/processor.h> #include <asm/tsc.h> static inline unsigned long random_get_entropy(void) { if (!IS_ENABLED(CONFIG_X86_TSC) && !cpu_feature_enabled(X86_FEATURE_TSC)) return random_get_entropy_fallback(); return rdtsc(); } #define random_get_entropy random_get_entropy /* Assume we use the PIT time source for the clock tick */ #define CLOCK_TICK_RATE PIT_TICK_RATE #define ARCH_HAS_READ_CURRENT_TIMER #endif /* _ASM_X86_TIMEX_H */ |
13 19 16 12 2 1768 39 1765 39 3 2 18 18 18 18 43 43 41 4 3 40 13 2 1 2 1 2 1 2 1 13 3 2 3 2 3 2 3 2 11 5 2 5 1 5 3 2 2 11 6 1 6 3 3 2 4 3 27 3 1 3 1 3 2 26 6 3 6 5 6 4 22 10 2 10 5 5 4 23 8 5 3 4 36 3 18 30 10 11 5 5 3 3 2 1 1 24 12 12 6 5 3 4 2 1 1 18 2 28 1 28 10 4 43 7 23 23 22 22 21 8 1 1 8 6 6 5 4 4 3 2 2 2 2 23 1 1 24 24 23 23 22 2 1 1 1 24 111 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 | // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* gw.c - CAN frame Gateway/Router/Bridge with netlink interface * * Copyright (c) 2019 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Volkswagen nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * The provided data structures and external interfaces from this code * are not restricted to be used by modules with a GPL compatible license. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/rculist.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> #include <linux/can/skb.h> #include <linux/can/gw.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> #define CAN_GW_NAME "can-gw" MODULE_DESCRIPTION("PF_CAN netlink gateway"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>"); MODULE_ALIAS(CAN_GW_NAME); #define CGW_MIN_HOPS 1 #define CGW_MAX_HOPS 6 #define CGW_DEFAULT_HOPS 1 static unsigned int max_hops __read_mostly = CGW_DEFAULT_HOPS; module_param(max_hops, uint, 0444); MODULE_PARM_DESC(max_hops, "maximum " CAN_GW_NAME " routing hops for CAN frames " "(valid values: " __stringify(CGW_MIN_HOPS) "-" __stringify(CGW_MAX_HOPS) " hops, " "default: " __stringify(CGW_DEFAULT_HOPS) ")"); static struct notifier_block notifier; static struct kmem_cache *cgw_cache __read_mostly; /* structure that contains the (on-the-fly) CAN frame modifications */ struct cf_mod { struct { struct canfd_frame and; struct canfd_frame or; struct canfd_frame xor; struct canfd_frame set; } modframe; struct { u8 and; u8 or; u8 xor; u8 set; } modtype; void (*modfunc[MAX_MODFUNCTIONS])(struct canfd_frame *cf, struct cf_mod *mod); /* CAN frame checksum calculation after CAN frame modifications */ struct { struct cgw_csum_xor xor; struct cgw_csum_crc8 crc8; } csum; struct { void (*xor)(struct canfd_frame *cf, struct cgw_csum_xor *xor); void (*crc8)(struct canfd_frame *cf, struct cgw_csum_crc8 *crc8); } csumfunc; u32 uid; }; /* So far we just support CAN -> CAN routing and frame modifications. * * The internal can_can_gw structure contains data and attributes for * a CAN -> CAN gateway job. */ struct can_can_gw { struct can_filter filter; int src_idx; int dst_idx; }; /* list entry for CAN gateways jobs */ struct cgw_job { struct hlist_node list; struct rcu_head rcu; u32 handled_frames; u32 dropped_frames; u32 deleted_frames; struct cf_mod mod; union { /* CAN frame data source */ struct net_device *dev; } src; union { /* CAN frame data destination */ struct net_device *dev; } dst; union { struct can_can_gw ccgw; /* tbc */ }; u8 gwtype; u8 limit_hops; u16 flags; }; /* modification functions that are invoked in the hot path in can_can_gw_rcv */ #define MODFUNC(func, op) static void func(struct canfd_frame *cf, \ struct cf_mod *mod) { op ; } MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id) MODFUNC(mod_and_len, cf->len &= mod->modframe.and.len) MODFUNC(mod_and_flags, cf->flags &= mod->modframe.and.flags) MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data) MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id) MODFUNC(mod_or_len, cf->len |= mod->modframe.or.len) MODFUNC(mod_or_flags, cf->flags |= mod->modframe.or.flags) MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data) MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id) MODFUNC(mod_xor_len, cf->len ^= mod->modframe.xor.len) MODFUNC(mod_xor_flags, cf->flags ^= mod->modframe.xor.flags) MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data) MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id) MODFUNC(mod_set_len, cf->len = mod->modframe.set.len) MODFUNC(mod_set_flags, cf->flags = mod->modframe.set.flags) MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data) static void mod_and_fddata(struct canfd_frame *cf, struct cf_mod *mod) { int i; for (i = 0; i < CANFD_MAX_DLEN; i += 8) *(u64 *)(cf->data + i) &= *(u64 *)(mod->modframe.and.data + i); } static void mod_or_fddata(struct canfd_frame *cf, struct cf_mod *mod) { int i; for (i = 0; i < CANFD_MAX_DLEN; i += 8) *(u64 *)(cf->data + i) |= *(u64 *)(mod->modframe.or.data + i); } static void mod_xor_fddata(struct canfd_frame *cf, struct cf_mod *mod) { int i; for (i = 0; i < CANFD_MAX_DLEN; i += 8) *(u64 *)(cf->data + i) ^= *(u64 *)(mod->modframe.xor.data + i); } static void mod_set_fddata(struct canfd_frame *cf, struct cf_mod *mod) { memcpy(cf->data, mod->modframe.set.data, CANFD_MAX_DLEN); } /* retrieve valid CC DLC value and store it into 'len' */ static void mod_retrieve_ccdlc(struct canfd_frame *cf) { struct can_frame *ccf = (struct can_frame *)cf; /* len8_dlc is only valid if len == CAN_MAX_DLEN */ if (ccf->len != CAN_MAX_DLEN) return; /* do we have a valid len8_dlc value from 9 .. 15 ? */ if (ccf->len8_dlc > CAN_MAX_DLEN && ccf->len8_dlc <= CAN_MAX_RAW_DLC) ccf->len = ccf->len8_dlc; } /* convert valid CC DLC value in 'len' into struct can_frame elements */ static void mod_store_ccdlc(struct canfd_frame *cf) { struct can_frame *ccf = (struct can_frame *)cf; /* clear potential leftovers */ ccf->len8_dlc = 0; /* plain data length 0 .. 8 - that was easy */ if (ccf->len <= CAN_MAX_DLEN) return; /* potentially broken values are caught in can_can_gw_rcv() */ if (ccf->len > CAN_MAX_RAW_DLC) return; /* we have a valid dlc value from 9 .. 15 in ccf->len */ ccf->len8_dlc = ccf->len; ccf->len = CAN_MAX_DLEN; } static void mod_and_ccdlc(struct canfd_frame *cf, struct cf_mod *mod) { mod_retrieve_ccdlc(cf); mod_and_len(cf, mod); mod_store_ccdlc(cf); } static void mod_or_ccdlc(struct canfd_frame *cf, struct cf_mod *mod) { mod_retrieve_ccdlc(cf); mod_or_len(cf, mod); mod_store_ccdlc(cf); } static void mod_xor_ccdlc(struct canfd_frame *cf, struct cf_mod *mod) { mod_retrieve_ccdlc(cf); mod_xor_len(cf, mod); mod_store_ccdlc(cf); } static void mod_set_ccdlc(struct canfd_frame *cf, struct cf_mod *mod) { mod_set_len(cf, mod); mod_store_ccdlc(cf); } static void canframecpy(struct canfd_frame *dst, struct can_frame *src) { /* Copy the struct members separately to ensure that no uninitialized * data are copied in the 3 bytes hole of the struct. This is needed * to make easy compares of the data in the struct cf_mod. */ dst->can_id = src->can_id; dst->len = src->len; *(u64 *)dst->data = *(u64 *)src->data; } static void canfdframecpy(struct canfd_frame *dst, struct canfd_frame *src) { /* Copy the struct members separately to ensure that no uninitialized * data are copied in the 2 bytes hole of the struct. This is needed * to make easy compares of the data in the struct cf_mod. */ dst->can_id = src->can_id; dst->flags = src->flags; dst->len = src->len; memcpy(dst->data, src->data, CANFD_MAX_DLEN); } static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re, struct rtcanmsg *r) { s8 dlen = CAN_MAX_DLEN; if (r->flags & CGW_FLAGS_CAN_FD) dlen = CANFD_MAX_DLEN; /* absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0] * relative to received dlc -1 .. -8 : * e.g. for received dlc = 8 * -1 => index = 7 (data[7]) * -3 => index = 5 (data[5]) * -8 => index = 0 (data[0]) */ if (fr >= -dlen && fr < dlen && to >= -dlen && to < dlen && re >= -dlen && re < dlen) return 0; else return -EINVAL; } static inline int calc_idx(int idx, int rx_len) { if (idx < 0) return rx_len + idx; else return idx; } static void cgw_csum_xor_rel(struct canfd_frame *cf, struct cgw_csum_xor *xor) { int from = calc_idx(xor->from_idx, cf->len); int to = calc_idx(xor->to_idx, cf->len); int res = calc_idx(xor->result_idx, cf->len); u8 val = xor->init_xor_val; int i; if (from < 0 || to < 0 || res < 0) return; if (from <= to) { for (i = from; i <= to; i++) val ^= cf->data[i]; } else { for (i = from; i >= to; i--) val ^= cf->data[i]; } cf->data[res] = val; } static void cgw_csum_xor_pos(struct canfd_frame *cf, struct cgw_csum_xor *xor) { u8 val = xor->init_xor_val; int i; for (i = xor->from_idx; i <= xor->to_idx; i++) val ^= cf->data[i]; cf->data[xor->result_idx] = val; } static void cgw_csum_xor_neg(struct canfd_frame *cf, struct cgw_csum_xor *xor) { u8 val = xor->init_xor_val; int i; for (i = xor->from_idx; i >= xor->to_idx; i--) val ^= cf->data[i]; cf->data[xor->result_idx] = val; } static void cgw_csum_crc8_rel(struct canfd_frame *cf, struct cgw_csum_crc8 *crc8) { int from = calc_idx(crc8->from_idx, cf->len); int to = calc_idx(crc8->to_idx, cf->len); int res = calc_idx(crc8->result_idx, cf->len); u8 crc = crc8->init_crc_val; int i; if (from < 0 || to < 0 || res < 0) return; if (from <= to) { for (i = crc8->from_idx; i <= crc8->to_idx; i++) crc = crc8->crctab[crc ^ cf->data[i]]; } else { for (i = crc8->from_idx; i >= crc8->to_idx; i--) crc = crc8->crctab[crc ^ cf->data[i]]; } switch (crc8->profile) { case CGW_CRC8PRF_1U8: crc = crc8->crctab[crc ^ crc8->profile_data[0]]; break; case CGW_CRC8PRF_16U8: crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]]; break; case CGW_CRC8PRF_SFFID_XOR: crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^ (cf->can_id >> 8 & 0xFF)]; break; } cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; } static void cgw_csum_crc8_pos(struct canfd_frame *cf, struct cgw_csum_crc8 *crc8) { u8 crc = crc8->init_crc_val; int i; for (i = crc8->from_idx; i <= crc8->to_idx; i++) crc = crc8->crctab[crc ^ cf->data[i]]; switch (crc8->profile) { case CGW_CRC8PRF_1U8: crc = crc8->crctab[crc ^ crc8->profile_data[0]]; break; case CGW_CRC8PRF_16U8: crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]]; break; case CGW_CRC8PRF_SFFID_XOR: crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^ (cf->can_id >> 8 & 0xFF)]; break; } cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; } static void cgw_csum_crc8_neg(struct canfd_frame *cf, struct cgw_csum_crc8 *crc8) { u8 crc = crc8->init_crc_val; int i; for (i = crc8->from_idx; i >= crc8->to_idx; i--) crc = crc8->crctab[crc ^ cf->data[i]]; switch (crc8->profile) { case CGW_CRC8PRF_1U8: crc = crc8->crctab[crc ^ crc8->profile_data[0]]; break; case CGW_CRC8PRF_16U8: crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]]; break; case CGW_CRC8PRF_SFFID_XOR: crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^ (cf->can_id >> 8 & 0xFF)]; break; } cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; } /* the receive & process & send function */ static void can_can_gw_rcv(struct sk_buff *skb, void *data) { struct cgw_job *gwj = (struct cgw_job *)data; struct canfd_frame *cf; struct sk_buff *nskb; int modidx = 0; /* process strictly Classic CAN or CAN FD frames */ if (gwj->flags & CGW_FLAGS_CAN_FD) { if (!can_is_canfd_skb(skb)) return; } else { if (!can_is_can_skb(skb)) return; } /* Do not handle CAN frames routed more than 'max_hops' times. * In general we should never catch this delimiter which is intended * to cover a misconfiguration protection (e.g. circular CAN routes). * * The Controller Area Network controllers only accept CAN frames with * correct CRCs - which are not visible in the controller registers. * According to skbuff.h documentation the csum_start element for IP * checksums is undefined/unused when ip_summed == CHECKSUM_UNNECESSARY. * Only CAN skbs can be processed here which already have this property. */ #define cgw_hops(skb) ((skb)->csum_start) BUG_ON(skb->ip_summed != CHECKSUM_UNNECESSARY); if (cgw_hops(skb) >= max_hops) { /* indicate deleted frames due to misconfiguration */ gwj->deleted_frames++; return; } if (!(gwj->dst.dev->flags & IFF_UP)) { gwj->dropped_frames++; return; } /* is sending the skb back to the incoming interface not allowed? */ if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) && can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex) return; /* clone the given skb, which has not been done in can_rcv() * * When there is at least one modification function activated, * we need to copy the skb as we want to modify skb->data. */ if (gwj->mod.modfunc[0]) nskb = skb_copy(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) { gwj->dropped_frames++; return; } /* put the incremented hop counter in the cloned skb */ cgw_hops(nskb) = cgw_hops(skb) + 1; /* first processing of this CAN frame -> adjust to private hop limit */ if (gwj->limit_hops && cgw_hops(nskb) == 1) cgw_hops(nskb) = max_hops - gwj->limit_hops + 1; nskb->dev = gwj->dst.dev; /* pointer to modifiable CAN frame */ cf = (struct canfd_frame *)nskb->data; /* perform preprocessed modification functions if there are any */ while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); /* Has the CAN frame been modified? */ if (modidx) { /* get available space for the processed CAN frame type */ int max_len = nskb->len - offsetof(struct canfd_frame, data); /* dlc may have changed, make sure it fits to the CAN frame */ if (cf->len > max_len) { /* delete frame due to misconfiguration */ gwj->deleted_frames++; kfree_skb(nskb); return; } /* check for checksum updates */ if (gwj->mod.csumfunc.crc8) (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); if (gwj->mod.csumfunc.xor) (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); } /* clear the skb timestamp if not configured the other way */ if (!(gwj->flags & CGW_FLAGS_CAN_SRC_TSTAMP)) nskb->tstamp = 0; /* send to netdevice */ if (can_send(nskb, gwj->flags & CGW_FLAGS_CAN_ECHO)) gwj->dropped_frames++; else gwj->handled_frames++; } static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj) { return can_rx_register(net, gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj, "gw", NULL); } static inline void cgw_unregister_filter(struct net *net, struct cgw_job *gwj) { can_rx_unregister(net, gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj); } static void cgw_job_free_rcu(struct rcu_head *rcu_head) { struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu); kmem_cache_free(cgw_cache, gwj); } static int cgw_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; if (msg == NETDEV_UNREGISTER) { struct cgw_job *gwj = NULL; struct hlist_node *nx; ASSERT_RTNL(); hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); call_rcu(&gwj->rcu, cgw_job_free_rcu); } } } return NOTIFY_DONE; } static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, u32 pid, u32 seq, int flags) { struct rtcanmsg *rtcan; struct nlmsghdr *nlh; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtcan), flags); if (!nlh) return -EMSGSIZE; rtcan = nlmsg_data(nlh); rtcan->can_family = AF_CAN; rtcan->gwtype = gwj->gwtype; rtcan->flags = gwj->flags; /* add statistics if available */ if (gwj->handled_frames) { if (nla_put_u32(skb, CGW_HANDLED, gwj->handled_frames) < 0) goto cancel; } if (gwj->dropped_frames) { if (nla_put_u32(skb, CGW_DROPPED, gwj->dropped_frames) < 0) goto cancel; } if (gwj->deleted_frames) { if (nla_put_u32(skb, CGW_DELETED, gwj->deleted_frames) < 0) goto cancel; } /* check non default settings of attributes */ if (gwj->limit_hops) { if (nla_put_u8(skb, CGW_LIM_HOPS, gwj->limit_hops) < 0) goto cancel; } if (gwj->flags & CGW_FLAGS_CAN_FD) { struct cgw_fdframe_mod mb; if (gwj->mod.modtype.and) { memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.and; if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0) goto cancel; } if (gwj->mod.modtype.or) { memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.or; if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0) goto cancel; } if (gwj->mod.modtype.xor) { memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.xor; if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } if (gwj->mod.modtype.set) { memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.set; if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } else { struct cgw_frame_mod mb; if (gwj->mod.modtype.and) { memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.and; if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) goto cancel; } if (gwj->mod.modtype.or) { memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.or; if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) goto cancel; } if (gwj->mod.modtype.xor) { memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.xor; if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } if (gwj->mod.modtype.set) { memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); mb.modtype = gwj->mod.modtype.set; if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } if (gwj->mod.uid) { if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0) goto cancel; } if (gwj->mod.csumfunc.crc8) { if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, &gwj->mod.csum.crc8) < 0) goto cancel; } if (gwj->mod.csumfunc.xor) { if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN, &gwj->mod.csum.xor) < 0) goto cancel; } if (gwj->gwtype == CGW_TYPE_CAN_CAN) { if (gwj->ccgw.filter.can_id || gwj->ccgw.filter.can_mask) { if (nla_put(skb, CGW_FILTER, sizeof(struct can_filter), &gwj->ccgw.filter) < 0) goto cancel; } if (nla_put_u32(skb, CGW_SRC_IF, gwj->ccgw.src_idx) < 0) goto cancel; if (nla_put_u32(skb, CGW_DST_IF, gwj->ccgw.dst_idx) < 0) goto cancel; } nlmsg_end(skb, nlh); return 0; cancel: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } /* Dump information about all CAN gateway jobs, in response to RTM_GETROUTE */ static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct cgw_job *gwj = NULL; int idx = 0; int s_idx = cb->args[0]; rcu_read_lock(); hlist_for_each_entry_rcu(gwj, &net->can.cgw_list, list) { if (idx < s_idx) goto cont; if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) break; cont: idx++; } rcu_read_unlock(); cb->args[0] = idx; return skb->len; } static const struct nla_policy cgw_policy[CGW_MAX + 1] = { [CGW_MOD_AND] = { .len = sizeof(struct cgw_frame_mod) }, [CGW_MOD_OR] = { .len = sizeof(struct cgw_frame_mod) }, [CGW_MOD_XOR] = { .len = sizeof(struct cgw_frame_mod) }, [CGW_MOD_SET] = { .len = sizeof(struct cgw_frame_mod) }, [CGW_CS_XOR] = { .len = sizeof(struct cgw_csum_xor) }, [CGW_CS_CRC8] = { .len = sizeof(struct cgw_csum_crc8) }, [CGW_SRC_IF] = { .type = NLA_U32 }, [CGW_DST_IF] = { .type = NLA_U32 }, [CGW_FILTER] = { .len = sizeof(struct can_filter) }, [CGW_LIM_HOPS] = { .type = NLA_U8 }, [CGW_MOD_UID] = { .type = NLA_U32 }, [CGW_FDMOD_AND] = { .len = sizeof(struct cgw_fdframe_mod) }, [CGW_FDMOD_OR] = { .len = sizeof(struct cgw_fdframe_mod) }, [CGW_FDMOD_XOR] = { .len = sizeof(struct cgw_fdframe_mod) }, [CGW_FDMOD_SET] = { .len = sizeof(struct cgw_fdframe_mod) }, }; /* check for common and gwtype specific attributes */ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, u8 gwtype, void *gwtypeattr, u8 *limhops) { struct nlattr *tb[CGW_MAX + 1]; struct rtcanmsg *r = nlmsg_data(nlh); int modidx = 0; int err = 0; /* initialize modification & checksum data space */ memset(mod, 0, sizeof(*mod)); err = nlmsg_parse_deprecated(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX, cgw_policy, NULL); if (err < 0) return err; if (tb[CGW_LIM_HOPS]) { *limhops = nla_get_u8(tb[CGW_LIM_HOPS]); if (*limhops < 1 || *limhops > max_hops) return -EINVAL; } /* check for AND/OR/XOR/SET modifications */ if (r->flags & CGW_FLAGS_CAN_FD) { struct cgw_fdframe_mod mb; if (tb[CGW_FDMOD_AND]) { nla_memcpy(&mb, tb[CGW_FDMOD_AND], CGW_FDMODATTR_LEN); canfdframecpy(&mod->modframe.and, &mb.cf); mod->modtype.and = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_and_id; if (mb.modtype & CGW_MOD_LEN) mod->modfunc[modidx++] = mod_and_len; if (mb.modtype & CGW_MOD_FLAGS) mod->modfunc[modidx++] = mod_and_flags; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_and_fddata; } if (tb[CGW_FDMOD_OR]) { nla_memcpy(&mb, tb[CGW_FDMOD_OR], CGW_FDMODATTR_LEN); canfdframecpy(&mod->modframe.or, &mb.cf); mod->modtype.or = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_or_id; if (mb.modtype & CGW_MOD_LEN) mod->modfunc[modidx++] = mod_or_len; if (mb.modtype & CGW_MOD_FLAGS) mod->modfunc[modidx++] = mod_or_flags; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_or_fddata; } if (tb[CGW_FDMOD_XOR]) { nla_memcpy(&mb, tb[CGW_FDMOD_XOR], CGW_FDMODATTR_LEN); canfdframecpy(&mod->modframe.xor, &mb.cf); mod->modtype.xor = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_xor_id; if (mb.modtype & CGW_MOD_LEN) mod->modfunc[modidx++] = mod_xor_len; if (mb.modtype & CGW_MOD_FLAGS) mod->modfunc[modidx++] = mod_xor_flags; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_xor_fddata; } if (tb[CGW_FDMOD_SET]) { nla_memcpy(&mb, tb[CGW_FDMOD_SET], CGW_FDMODATTR_LEN); canfdframecpy(&mod->modframe.set, &mb.cf); mod->modtype.set = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_set_id; if (mb.modtype & CGW_MOD_LEN) mod->modfunc[modidx++] = mod_set_len; if (mb.modtype & CGW_MOD_FLAGS) mod->modfunc[modidx++] = mod_set_flags; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_set_fddata; } } else { struct cgw_frame_mod mb; if (tb[CGW_MOD_AND]) { nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN); canframecpy(&mod->modframe.and, &mb.cf); mod->modtype.and = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_and_id; if (mb.modtype & CGW_MOD_DLC) mod->modfunc[modidx++] = mod_and_ccdlc; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_and_data; } if (tb[CGW_MOD_OR]) { nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN); canframecpy(&mod->modframe.or, &mb.cf); mod->modtype.or = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_or_id; if (mb.modtype & CGW_MOD_DLC) mod->modfunc[modidx++] = mod_or_ccdlc; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_or_data; } if (tb[CGW_MOD_XOR]) { nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN); canframecpy(&mod->modframe.xor, &mb.cf); mod->modtype.xor = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_xor_id; if (mb.modtype & CGW_MOD_DLC) mod->modfunc[modidx++] = mod_xor_ccdlc; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_xor_data; } if (tb[CGW_MOD_SET]) { nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN); canframecpy(&mod->modframe.set, &mb.cf); mod->modtype.set = mb.modtype; if (mb.modtype & CGW_MOD_ID) mod->modfunc[modidx++] = mod_set_id; if (mb.modtype & CGW_MOD_DLC) mod->modfunc[modidx++] = mod_set_ccdlc; if (mb.modtype & CGW_MOD_DATA) mod->modfunc[modidx++] = mod_set_data; } } /* check for checksum operations after CAN frame modifications */ if (modidx) { if (tb[CGW_CS_CRC8]) { struct cgw_csum_crc8 *c = nla_data(tb[CGW_CS_CRC8]); err = cgw_chk_csum_parms(c->from_idx, c->to_idx, c->result_idx, r); if (err) return err; nla_memcpy(&mod->csum.crc8, tb[CGW_CS_CRC8], CGW_CS_CRC8_LEN); /* select dedicated processing function to reduce * runtime operations in receive hot path. */ if (c->from_idx < 0 || c->to_idx < 0 || c->result_idx < 0) mod->csumfunc.crc8 = cgw_csum_crc8_rel; else if (c->from_idx <= c->to_idx) mod->csumfunc.crc8 = cgw_csum_crc8_pos; else mod->csumfunc.crc8 = cgw_csum_crc8_neg; } if (tb[CGW_CS_XOR]) { struct cgw_csum_xor *c = nla_data(tb[CGW_CS_XOR]); err = cgw_chk_csum_parms(c->from_idx, c->to_idx, c->result_idx, r); if (err) return err; nla_memcpy(&mod->csum.xor, tb[CGW_CS_XOR], CGW_CS_XOR_LEN); /* select dedicated processing function to reduce * runtime operations in receive hot path. */ if (c->from_idx < 0 || c->to_idx < 0 || c->result_idx < 0) mod->csumfunc.xor = cgw_csum_xor_rel; else if (c->from_idx <= c->to_idx) mod->csumfunc.xor = cgw_csum_xor_pos; else mod->csumfunc.xor = cgw_csum_xor_neg; } if (tb[CGW_MOD_UID]) nla_memcpy(&mod->uid, tb[CGW_MOD_UID], sizeof(u32)); } if (gwtype == CGW_TYPE_CAN_CAN) { /* check CGW_TYPE_CAN_CAN specific attributes */ struct can_can_gw *ccgw = (struct can_can_gw *)gwtypeattr; memset(ccgw, 0, sizeof(*ccgw)); /* check for can_filter in attributes */ if (tb[CGW_FILTER]) nla_memcpy(&ccgw->filter, tb[CGW_FILTER], sizeof(struct can_filter)); err = -ENODEV; /* specifying two interfaces is mandatory */ if (!tb[CGW_SRC_IF] || !tb[CGW_DST_IF]) return err; ccgw->src_idx = nla_get_u32(tb[CGW_SRC_IF]); ccgw->dst_idx = nla_get_u32(tb[CGW_DST_IF]); /* both indices set to 0 for flushing all routing entries */ if (!ccgw->src_idx && !ccgw->dst_idx) return 0; /* only one index set to 0 is an error */ if (!ccgw->src_idx || !ccgw->dst_idx) return err; } /* add the checks for other gwtypes here */ return 0; } static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct rtcanmsg *r; struct cgw_job *gwj; struct cf_mod mod; struct can_can_gw ccgw; u8 limhops = 0; int err = 0; if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (nlmsg_len(nlh) < sizeof(*r)) return -EINVAL; r = nlmsg_data(nlh); if (r->can_family != AF_CAN) return -EPFNOSUPPORT; /* so far we only support CAN -> CAN routings */ if (r->gwtype != CGW_TYPE_CAN_CAN) return -EINVAL; err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); if (err < 0) return err; if (mod.uid) { ASSERT_RTNL(); /* check for updating an existing job with identical uid */ hlist_for_each_entry(gwj, &net->can.cgw_list, list) { if (gwj->mod.uid != mod.uid) continue; /* interfaces & filters must be identical */ if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) return -EINVAL; /* update modifications with disabled softirq & quit */ local_bh_disable(); memcpy(&gwj->mod, &mod, sizeof(mod)); local_bh_enable(); return 0; } } /* ifindex == 0 is not allowed for job creation */ if (!ccgw.src_idx || !ccgw.dst_idx) return -ENODEV; gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); if (!gwj) return -ENOMEM; gwj->handled_frames = 0; gwj->dropped_frames = 0; gwj->deleted_frames = 0; gwj->flags = r->flags; gwj->gwtype = r->gwtype; gwj->limit_hops = limhops; /* insert already parsed information */ memcpy(&gwj->mod, &mod, sizeof(mod)); memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw)); err = -ENODEV; gwj->src.dev = __dev_get_by_index(net, gwj->ccgw.src_idx); if (!gwj->src.dev) goto out; if (gwj->src.dev->type != ARPHRD_CAN) goto out; gwj->dst.dev = __dev_get_by_index(net, gwj->ccgw.dst_idx); if (!gwj->dst.dev) goto out; if (gwj->dst.dev->type != ARPHRD_CAN) goto out; /* is sending the skb back to the incoming interface intended? */ if (gwj->src.dev == gwj->dst.dev && !(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK)) { err = -EINVAL; goto out; } ASSERT_RTNL(); err = cgw_register_filter(net, gwj); if (!err) hlist_add_head_rcu(&gwj->list, &net->can.cgw_list); out: if (err) kmem_cache_free(cgw_cache, gwj); return err; } static void cgw_remove_all_jobs(struct net *net) { struct cgw_job *gwj = NULL; struct hlist_node *nx; ASSERT_RTNL(); hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); call_rcu(&gwj->rcu, cgw_job_free_rcu); } } static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct cgw_job *gwj = NULL; struct hlist_node *nx; struct rtcanmsg *r; struct cf_mod mod; struct can_can_gw ccgw; u8 limhops = 0; int err = 0; if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (nlmsg_len(nlh) < sizeof(*r)) return -EINVAL; r = nlmsg_data(nlh); if (r->can_family != AF_CAN) return -EPFNOSUPPORT; /* so far we only support CAN -> CAN routings */ if (r->gwtype != CGW_TYPE_CAN_CAN) return -EINVAL; err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); if (err < 0) return err; /* two interface indices both set to 0 => remove all entries */ if (!ccgw.src_idx && !ccgw.dst_idx) { cgw_remove_all_jobs(net); return 0; } err = -EINVAL; ASSERT_RTNL(); /* remove only the first matching entry */ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { if (gwj->flags != r->flags) continue; if (gwj->limit_hops != limhops) continue; /* we have a match when uid is enabled and identical */ if (gwj->mod.uid || mod.uid) { if (gwj->mod.uid != mod.uid) continue; } else { /* no uid => check for identical modifications */ if (memcmp(&gwj->mod, &mod, sizeof(mod))) continue; } /* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */ if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) continue; hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); call_rcu(&gwj->rcu, cgw_job_free_rcu); err = 0; break; } return err; } static int __net_init cangw_pernet_init(struct net *net) { INIT_HLIST_HEAD(&net->can.cgw_list); return 0; } static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list) { struct net *net; rtnl_lock(); list_for_each_entry(net, net_list, exit_list) cgw_remove_all_jobs(net); rtnl_unlock(); } static struct pernet_operations cangw_pernet_ops = { .init = cangw_pernet_init, .exit_batch = cangw_pernet_exit_batch, }; static const struct rtnl_msg_handler cgw_rtnl_msg_handlers[] __initconst_or_module = { {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_NEWROUTE, .doit = cgw_create_job}, {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_DELROUTE, .doit = cgw_remove_job}, {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_GETROUTE, .dumpit = cgw_dump_jobs}, }; static __init int cgw_module_init(void) { int ret; /* sanitize given module parameter */ max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS); pr_info("can: netlink gateway - max_hops=%d\n", max_hops); ret = register_pernet_subsys(&cangw_pernet_ops); if (ret) return ret; ret = -ENOMEM; cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job), 0, 0, NULL); if (!cgw_cache) goto out_cache_create; /* set notifier */ notifier.notifier_call = cgw_notifier; ret = register_netdevice_notifier(¬ifier); if (ret) goto out_register_notifier; ret = rtnl_register_many(cgw_rtnl_msg_handlers); if (ret) goto out_rtnl_register; return 0; out_rtnl_register: unregister_netdevice_notifier(¬ifier); out_register_notifier: kmem_cache_destroy(cgw_cache); out_cache_create: unregister_pernet_subsys(&cangw_pernet_ops); return ret; } static __exit void cgw_module_exit(void) { rtnl_unregister_all(PF_CAN); unregister_netdevice_notifier(¬ifier); unregister_pernet_subsys(&cangw_pernet_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ kmem_cache_destroy(cgw_cache); } module_init(cgw_module_init); module_exit(cgw_module_exit); |
145 140 145 145 145 110 145 38 38 38 239 239 1 1 239 239 239 51 51 51 231 231 231 238 120 239 122 262 26 260 261 261 118 145 145 262 2 2 2 2 2 2 2 2 258 259 260 260 260 260 259 257 260 4 252 248 4 260 242 242 241 239 238 237 1 5 7 2 2 2 2 2 145 2 144 49 145 145 145 145 145 145 145 145 145 2 2 2 2 2 6 6 5 4 4 1 3 2 6 7 6 5 1 4 4 4 3 7 253 254 244 19 242 254 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 | // SPDX-License-Identifier: GPL-2.0-only /* * Fd transport layer. Includes deprecated socket layer. * * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/in.h> #include <linux/in6.h> #include <linux/module.h> #include <linux/net.h> #include <linux/ipv6.h> #include <linux/kthread.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/un.h> #include <linux/uaccess.h> #include <linux/inet.h> #include <linux/file.h> #include <linux/parser.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include <net/9p/transport.h> #include <linux/syscalls.h> /* killme */ #define P9_PORT 564 #define MAX_SOCK_BUF (1024*1024) #define MAXPOLLWADDR 2 static struct p9_trans_module p9_tcp_trans; static struct p9_trans_module p9_fd_trans; /** * struct p9_fd_opts - per-transport options * @rfd: file descriptor for reading (trans=fd) * @wfd: file descriptor for writing (trans=fd) * @port: port to connect to (trans=tcp) * @privport: port is privileged */ struct p9_fd_opts { int rfd; int wfd; u16 port; bool privport; }; /* * Option Parsing (code inspired by NFS code) * - a little lazy - parse all fd-transport options */ enum { /* Options that take integer arguments */ Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, /* Options that take no arguments */ Opt_privport, }; static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, {Opt_privport, "privport"}, {Opt_err, NULL}, }; enum { Rworksched = 1, /* read work scheduled or running */ Rpending = 2, /* can read */ Wworksched = 4, /* write work scheduled or running */ Wpending = 8, /* can write */ }; struct p9_poll_wait { struct p9_conn *conn; wait_queue_entry_t wait; wait_queue_head_t *wait_addr; }; /** * struct p9_conn - fd mux connection state information * @mux_list: list link for mux to manage multiple connections (?) * @client: reference to client instance for this connection * @err: error state * @req_lock: lock protecting req_list and requests statuses * @req_list: accounting for requests which have been sent * @unsent_req_list: accounting for requests that haven't been sent * @rreq: read request * @wreq: write request * @tmp_buf: temporary buffer to read in header * @rc: temporary fcall for reading current frame * @wpos: write position for current frame * @wsize: amount of data to write for current frame * @wbuf: current write buffer * @poll_pending_link: pending links to be polled per conn * @poll_wait: array of wait_q's for various worker threads * @pt: poll state * @rq: current read work * @wq: current write work * @wsched: ???? * */ struct p9_conn { struct list_head mux_list; struct p9_client *client; int err; spinlock_t req_lock; struct list_head req_list; struct list_head unsent_req_list; struct p9_req_t *rreq; struct p9_req_t *wreq; char tmp_buf[P9_HDRSZ]; struct p9_fcall rc; int wpos; int wsize; char *wbuf; struct list_head poll_pending_link; struct p9_poll_wait poll_wait[MAXPOLLWADDR]; poll_table pt; struct work_struct rq; struct work_struct wq; unsigned long wsched; }; /** * struct p9_trans_fd - transport state * @rd: reference to file to read from * @wr: reference of file to write to * @conn: connection state reference * */ struct p9_trans_fd { struct file *rd; struct file *wr; struct p9_conn conn; }; static void p9_poll_workfn(struct work_struct *work); static DEFINE_SPINLOCK(p9_poll_lock); static LIST_HEAD(p9_poll_pending_list); static DECLARE_WORK(p9_poll_work, p9_poll_workfn); static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT; static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT; static void p9_mux_poll_stop(struct p9_conn *m) { unsigned long flags; int i; for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { struct p9_poll_wait *pwait = &m->poll_wait[i]; if (pwait->wait_addr) { remove_wait_queue(pwait->wait_addr, &pwait->wait); pwait->wait_addr = NULL; } } spin_lock_irqsave(&p9_poll_lock, flags); list_del_init(&m->poll_pending_link); spin_unlock_irqrestore(&p9_poll_lock, flags); flush_work(&p9_poll_work); } /** * p9_conn_cancel - cancel all pending requests with error * @m: mux data * @err: error code * */ static void p9_conn_cancel(struct p9_conn *m, int err) { struct p9_req_t *req, *rtmp; LIST_HEAD(cancel_list); p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); spin_lock(&m->req_lock); if (READ_ONCE(m->err)) { spin_unlock(&m->req_lock); return; } WRITE_ONCE(m->err, err); ASSERT_EXCLUSIVE_WRITER(m->err); list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); WRITE_ONCE(req->status, REQ_STATUS_ERROR); } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { list_move(&req->req_list, &cancel_list); WRITE_ONCE(req->status, REQ_STATUS_ERROR); } spin_unlock(&m->req_lock); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); list_del(&req->req_list); if (!req->t_err) req->t_err = err; p9_client_cb(m->client, req, REQ_STATUS_ERROR); } } static __poll_t p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err) { __poll_t ret; struct p9_trans_fd *ts = NULL; if (client && client->status == Connected) ts = client->trans; if (!ts) { if (err) *err = -EREMOTEIO; return EPOLLERR; } ret = vfs_poll(ts->rd, pt); if (ts->rd != ts->wr) ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN); return ret; } /** * p9_fd_read- read from a fd * @client: client instance * @v: buffer to receive data into * @len: size of receive buffer * */ static int p9_fd_read(struct p9_client *client, void *v, int len) { int ret; struct p9_trans_fd *ts = NULL; loff_t pos; if (client && client->status != Disconnected) ts = client->trans; if (!ts) return -EREMOTEIO; if (!(ts->rd->f_flags & O_NONBLOCK)) p9_debug(P9_DEBUG_ERROR, "blocking read ...\n"); pos = ts->rd->f_pos; ret = kernel_read(ts->rd, v, len, &pos); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) client->status = Disconnected; return ret; } /** * p9_read_work - called when there is some data to be read from a transport * @work: container of work to be done * */ static void p9_read_work(struct work_struct *work) { __poll_t n; int err; struct p9_conn *m; m = container_of(work, struct p9_conn, rq); if (READ_ONCE(m->err) < 0) return; p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset); if (!m->rc.sdata) { m->rc.sdata = m->tmp_buf; m->rc.offset = 0; m->rc.capacity = P9_HDRSZ; /* start by reading header */ } clear_bit(Rpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n", m, m->rc.offset, m->rc.capacity, m->rc.capacity - m->rc.offset); err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset, m->rc.capacity - m->rc.offset); p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); if (err == -EAGAIN) goto end_clear; if (err <= 0) goto error; m->rc.offset += err; /* header read in */ if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new header\n"); /* Header size */ m->rc.size = P9_HDRSZ; err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0); if (err) { p9_debug(P9_DEBUG_ERROR, "error parsing header: %d\n", err); goto error; } p9_debug(P9_DEBUG_TRANS, "mux %p pkt: size: %d bytes tag: %d\n", m, m->rc.size, m->rc.tag); m->rreq = p9_tag_lookup(m->client, m->rc.tag); if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) { p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", m->rc.tag); err = -EIO; goto error; } if (m->rc.size > m->rreq->rc.capacity) { p9_debug(P9_DEBUG_ERROR, "requested packet size too big: %d for tag %d with capacity %zd\n", m->rc.size, m->rc.tag, m->rreq->rc.capacity); err = -EIO; goto error; } if (!m->rreq->rc.sdata) { p9_debug(P9_DEBUG_ERROR, "No recv fcall for tag %d (req %p), disconnecting!\n", m->rc.tag, m->rreq); p9_req_put(m->client, m->rreq); m->rreq = NULL; err = -EIO; goto error; } m->rc.sdata = m->rreq->rc.sdata; memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity); m->rc.capacity = m->rc.size; } /* packet is read in * not an else because some packets (like clunk) have no payload */ if ((m->rreq) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new packet\n"); m->rreq->rc.size = m->rc.offset; spin_lock(&m->req_lock); if (m->rreq->status == REQ_STATUS_SENT) { list_del(&m->rreq->req_list); p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD); } else if (m->rreq->status == REQ_STATUS_FLSHD) { /* Ignore replies associated with a cancelled request. */ p9_debug(P9_DEBUG_TRANS, "Ignore replies associated with a cancelled request\n"); } else { spin_unlock(&m->req_lock); p9_debug(P9_DEBUG_ERROR, "Request tag %d errored out while we were reading the reply\n", m->rc.tag); err = -EIO; goto error; } spin_unlock(&m->req_lock); m->rc.sdata = NULL; m->rc.offset = 0; m->rc.capacity = 0; p9_req_put(m->client, m->rreq); m->rreq = NULL; } end_clear: clear_bit(Rworksched, &m->wsched); if (!list_empty(&m->req_list)) { if (test_and_clear_bit(Rpending, &m->wsched)) n = EPOLLIN; else n = p9_fd_poll(m->client, NULL, NULL); if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } } return; error: p9_conn_cancel(m, err); clear_bit(Rworksched, &m->wsched); } /** * p9_fd_write - write to a socket * @client: client instance * @v: buffer to send data from * @len: size of send buffer * */ static int p9_fd_write(struct p9_client *client, void *v, int len) { ssize_t ret; struct p9_trans_fd *ts = NULL; if (client && client->status != Disconnected) ts = client->trans; if (!ts) return -EREMOTEIO; if (!(ts->wr->f_flags & O_NONBLOCK)) p9_debug(P9_DEBUG_ERROR, "blocking write ...\n"); ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) client->status = Disconnected; return ret; } /** * p9_write_work - called when a transport can send some data * @work: container for work to be done * */ static void p9_write_work(struct work_struct *work) { __poll_t n; int err; struct p9_conn *m; struct p9_req_t *req; m = container_of(work, struct p9_conn, wq); if (READ_ONCE(m->err) < 0) { clear_bit(Wworksched, &m->wsched); return; } if (!m->wsize) { spin_lock(&m->req_lock); if (list_empty(&m->unsent_req_list)) { clear_bit(Wworksched, &m->wsched); spin_unlock(&m->req_lock); return; } req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); WRITE_ONCE(req->status, REQ_STATUS_SENT); p9_debug(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); m->wbuf = req->tc.sdata; m->wsize = req->tc.size; m->wpos = 0; p9_req_get(req); m->wreq = req; spin_unlock(&m->req_lock); } p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos, m->wsize); clear_bit(Wpending, &m->wsched); err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); if (err == -EAGAIN) goto end_clear; if (err < 0) goto error; else if (err == 0) { err = -EREMOTEIO; goto error; } m->wpos += err; if (m->wpos == m->wsize) { m->wpos = m->wsize = 0; p9_req_put(m->client, m->wreq); m->wreq = NULL; } end_clear: clear_bit(Wworksched, &m->wsched); if (m->wsize || !list_empty(&m->unsent_req_list)) { if (test_and_clear_bit(Wpending, &m->wsched)) n = EPOLLOUT; else n = p9_fd_poll(m->client, NULL, NULL); if ((n & EPOLLOUT) && !test_and_set_bit(Wworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } } return; error: p9_conn_cancel(m, err); clear_bit(Wworksched, &m->wsched); } static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) { struct p9_poll_wait *pwait = container_of(wait, struct p9_poll_wait, wait); struct p9_conn *m = pwait->conn; unsigned long flags; spin_lock_irqsave(&p9_poll_lock, flags); if (list_empty(&m->poll_pending_link)) list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); spin_unlock_irqrestore(&p9_poll_lock, flags); schedule_work(&p9_poll_work); return 1; } /** * p9_pollwait - add poll task to the wait queue * @filp: file pointer being polled * @wait_address: wait_q to block on * @p: poll state * * called by files poll operation to add v9fs-poll task to files wait queue */ static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { struct p9_conn *m = container_of(p, struct p9_conn, pt); struct p9_poll_wait *pwait = NULL; int i; for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { if (m->poll_wait[i].wait_addr == NULL) { pwait = &m->poll_wait[i]; break; } } if (!pwait) { p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n"); return; } pwait->conn = m; pwait->wait_addr = wait_address; init_waitqueue_func_entry(&pwait->wait, p9_pollwake); add_wait_queue(wait_address, &pwait->wait); } /** * p9_conn_create - initialize the per-session mux data * @client: client instance * * Note: Creates the polling task if this is the first session. */ static void p9_conn_create(struct p9_client *client) { __poll_t n; struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize); INIT_LIST_HEAD(&m->mux_list); m->client = client; spin_lock_init(&m->req_lock); INIT_LIST_HEAD(&m->req_list); INIT_LIST_HEAD(&m->unsent_req_list); INIT_WORK(&m->rq, p9_read_work); INIT_WORK(&m->wq, p9_write_work); INIT_LIST_HEAD(&m->poll_pending_link); init_poll_funcptr(&m->pt, p9_pollwait); n = p9_fd_poll(client, &m->pt, NULL); if (n & EPOLLIN) { p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); set_bit(Rpending, &m->wsched); } if (n & EPOLLOUT) { p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); set_bit(Wpending, &m->wsched); } } /** * p9_poll_mux - polls a mux and schedules read or write works if necessary * @m: connection to poll * */ static void p9_poll_mux(struct p9_conn *m) { __poll_t n; int err = -ECONNRESET; if (READ_ONCE(m->err) < 0) return; n = p9_fd_poll(m->client, NULL, &err); if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) { p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); p9_conn_cancel(m, err); } if (n & EPOLLIN) { set_bit(Rpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); if (!test_and_set_bit(Rworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } } if (n & EPOLLOUT) { set_bit(Wpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); if ((m->wsize || !list_empty(&m->unsent_req_list)) && !test_and_set_bit(Wworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } } } /** * p9_fd_request - send 9P request * The function can sleep until the request is scheduled for sending. * The function can be interrupted. Return from the function is not * a guarantee that the request is sent successfully. * * @client: client instance * @req: request to be sent * */ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { __poll_t n; int err; struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, current, &req->tc, req->tc.id); spin_lock(&m->req_lock); err = READ_ONCE(m->err); if (err < 0) { spin_unlock(&m->req_lock); return err; } WRITE_ONCE(req->status, REQ_STATUS_UNSENT); list_add_tail(&req->req_list, &m->unsent_req_list); spin_unlock(&m->req_lock); if (test_and_clear_bit(Wpending, &m->wsched)) n = EPOLLOUT; else n = p9_fd_poll(m->client, NULL, NULL); if (n & EPOLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) schedule_work(&m->wq); return 0; } static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) { struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; int ret = 1; p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&m->req_lock); if (req->status == REQ_STATUS_UNSENT) { list_del(&req->req_list); WRITE_ONCE(req->status, REQ_STATUS_FLSHD); p9_req_put(client, req); ret = 0; } spin_unlock(&m->req_lock); return ret; } static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) { struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&m->req_lock); /* Ignore cancelled request if message has been received * before lock. */ if (req->status == REQ_STATUS_RCVD) { spin_unlock(&m->req_lock); return 0; } /* we haven't received a response for oldreq, * remove it from the list. */ list_del(&req->req_list); WRITE_ONCE(req->status, REQ_STATUS_FLSHD); spin_unlock(&m->req_lock); p9_req_put(client, req); return 0; } static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt) { if (clnt->trans_mod == &p9_tcp_trans) { if (clnt->trans_opts.tcp.port != P9_PORT) seq_printf(m, ",port=%u", clnt->trans_opts.tcp.port); } else if (clnt->trans_mod == &p9_fd_trans) { if (clnt->trans_opts.fd.rfd != ~0) seq_printf(m, ",rfd=%u", clnt->trans_opts.fd.rfd); if (clnt->trans_opts.fd.wfd != ~0) seq_printf(m, ",wfd=%u", clnt->trans_opts.fd.wfd); } return 0; } /** * parse_opts - parse mount options into p9_fd_opts structure * @params: options string passed from mount * @opts: fd transport-specific structure to parse options into * * Returns 0 upon success, -ERRNO upon failure */ static int parse_opts(char *params, struct p9_fd_opts *opts) { char *p; substring_t args[MAX_OPT_ARGS]; int option; char *options, *tmp_options; opts->port = P9_PORT; opts->rfd = ~0; opts->wfd = ~0; opts->privport = false; if (!params) return 0; tmp_options = kstrdup(params, GFP_KERNEL); if (!tmp_options) { p9_debug(P9_DEBUG_ERROR, "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; int r; if (!*p) continue; token = match_token(p, tokens, args); if ((token != Opt_err) && (token != Opt_privport)) { r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, "integer field, but no integer?\n"); continue; } } switch (token) { case Opt_port: opts->port = option; break; case Opt_rfdno: opts->rfd = option; break; case Opt_wfdno: opts->wfd = option; break; case Opt_privport: opts->privport = true; break; default: continue; } } kfree(tmp_options); return 0; } static int p9_fd_open(struct p9_client *client, int rfd, int wfd) { struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!ts) return -ENOMEM; ts->rd = fget(rfd); if (!ts->rd) goto out_free_ts; if (!(ts->rd->f_mode & FMODE_READ)) goto out_put_rd; /* Prevent workers from hanging on IO when fd is a pipe. * It's technically possible for userspace or concurrent mounts to * modify this flag concurrently, which will likely result in a * broken filesystem. However, just having bad flags here should * not crash the kernel or cause any other sort of bug, so mark this * particular data race as intentional so that tooling (like KCSAN) * can allow it and detect further problems. */ data_race(ts->rd->f_flags |= O_NONBLOCK); ts->wr = fget(wfd); if (!ts->wr) goto out_put_rd; if (!(ts->wr->f_mode & FMODE_WRITE)) goto out_put_wr; data_race(ts->wr->f_flags |= O_NONBLOCK); client->trans = ts; client->status = Connected; return 0; out_put_wr: fput(ts->wr); out_put_rd: fput(ts->rd); out_free_ts: kfree(ts); return -EIO; } static int p9_socket_open(struct p9_client *client, struct socket *csocket) { struct p9_trans_fd *p; struct file *file; p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!p) { sock_release(csocket); return -ENOMEM; } csocket->sk->sk_allocation = GFP_NOIO; csocket->sk->sk_use_task_frag = false; file = sock_alloc_file(csocket, 0, NULL); if (IS_ERR(file)) { pr_err("%s (%d): failed to map fd\n", __func__, task_pid_nr(current)); kfree(p); return PTR_ERR(file); } get_file(file); p->wr = p->rd = file; client->trans = p; client->status = Connected; p->rd->f_flags |= O_NONBLOCK; p9_conn_create(client); return 0; } /** * p9_conn_destroy - cancels all pending requests of mux * @m: mux to destroy * */ static void p9_conn_destroy(struct p9_conn *m) { p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m, m->mux_list.prev, m->mux_list.next); p9_mux_poll_stop(m); cancel_work_sync(&m->rq); if (m->rreq) { p9_req_put(m->client, m->rreq); m->rreq = NULL; } cancel_work_sync(&m->wq); if (m->wreq) { p9_req_put(m->client, m->wreq); m->wreq = NULL; } p9_conn_cancel(m, -ECONNRESET); m->client = NULL; } /** * p9_fd_close - shutdown file descriptor transport * @client: client instance * */ static void p9_fd_close(struct p9_client *client) { struct p9_trans_fd *ts; if (!client) return; ts = client->trans; if (!ts) return; client->status = Disconnected; p9_conn_destroy(&ts->conn); if (ts->rd) fput(ts->rd); if (ts->wr) fput(ts->wr); kfree(ts); } static int p9_bind_privport(struct socket *sock) { struct sockaddr_storage stor = { 0 }; int port, err = -EINVAL; stor.ss_family = sock->ops->family; if (stor.ss_family == AF_INET) ((struct sockaddr_in *)&stor)->sin_addr.s_addr = htonl(INADDR_ANY); else ((struct sockaddr_in6 *)&stor)->sin6_addr = in6addr_any; for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { if (stor.ss_family == AF_INET) ((struct sockaddr_in *)&stor)->sin_port = htons((ushort)port); else ((struct sockaddr_in6 *)&stor)->sin6_port = htons((ushort)port); err = kernel_bind(sock, (struct sockaddr *)&stor, sizeof(stor)); if (err != -EADDRINUSE) break; } return err; } static int p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { int err; char port_str[6]; struct socket *csocket; struct sockaddr_storage stor = { 0 }; struct p9_fd_opts opts; err = parse_opts(args, &opts); if (err < 0) return err; if (!addr) return -EINVAL; sprintf(port_str, "%u", opts.port); err = inet_pton_with_scope(current->nsproxy->net_ns, AF_UNSPEC, addr, port_str, &stor); if (err < 0) return err; csocket = NULL; client->trans_opts.tcp.port = opts.port; client->trans_opts.tcp.privport = opts.privport; err = __sock_create(current->nsproxy->net_ns, stor.ss_family, SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { pr_err("%s (%d): problem creating socket\n", __func__, task_pid_nr(current)); return err; } if (opts.privport) { err = p9_bind_privport(csocket); if (err < 0) { pr_err("%s (%d): problem binding to privport\n", __func__, task_pid_nr(current)); sock_release(csocket); return err; } } err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&stor, sizeof(stor), 0); if (err < 0) { pr_err("%s (%d): problem connecting socket to %s\n", __func__, task_pid_nr(current), addr); sock_release(csocket); return err; } return p9_socket_open(client, csocket); } static int p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) { int err; struct socket *csocket; struct sockaddr_un sun_server; csocket = NULL; if (!addr || !strlen(addr)) return -EINVAL; if (strlen(addr) >= UNIX_PATH_MAX) { pr_err("%s (%d): address too long: %s\n", __func__, task_pid_nr(current), addr); return -ENAMETOOLONG; } sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); err = __sock_create(current->nsproxy->net_ns, PF_UNIX, SOCK_STREAM, 0, &csocket, 1); if (err < 0) { pr_err("%s (%d): problem creating socket\n", __func__, task_pid_nr(current)); return err; } err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&sun_server, sizeof(struct sockaddr_un) - 1, 0); if (err < 0) { pr_err("%s (%d): problem connecting socket: %s: %d\n", __func__, task_pid_nr(current), addr, err); sock_release(csocket); return err; } return p9_socket_open(client, csocket); } static int p9_fd_create(struct p9_client *client, const char *addr, char *args) { int err; struct p9_fd_opts opts; err = parse_opts(args, &opts); if (err < 0) return err; client->trans_opts.fd.rfd = opts.rfd; client->trans_opts.fd.wfd = opts.wfd; if (opts.rfd == ~0 || opts.wfd == ~0) { pr_err("Insufficient options for proto=fd\n"); return -ENOPROTOOPT; } err = p9_fd_open(client, opts.rfd, opts.wfd); if (err < 0) return err; p9_conn_create(client); return 0; } static struct p9_trans_module p9_tcp_trans = { .name = "tcp", .maxsize = MAX_SOCK_BUF, .pooled_rbuffers = false, .def = 0, .create = p9_fd_create_tcp, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; MODULE_ALIAS_9P("tcp"); static struct p9_trans_module p9_unix_trans = { .name = "unix", .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_fd_create_unix, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; MODULE_ALIAS_9P("unix"); static struct p9_trans_module p9_fd_trans = { .name = "fd", .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_fd_create, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; MODULE_ALIAS_9P("fd"); /** * p9_poll_workfn - poll worker thread * @work: work queue * * polls all v9fs transports for new events and queues the appropriate * work to the work queue * */ static void p9_poll_workfn(struct work_struct *work) { unsigned long flags; p9_debug(P9_DEBUG_TRANS, "start %p\n", current); spin_lock_irqsave(&p9_poll_lock, flags); while (!list_empty(&p9_poll_pending_list)) { struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, struct p9_conn, poll_pending_link); list_del_init(&conn->poll_pending_link); spin_unlock_irqrestore(&p9_poll_lock, flags); p9_poll_mux(conn); spin_lock_irqsave(&p9_poll_lock, flags); } spin_unlock_irqrestore(&p9_poll_lock, flags); p9_debug(P9_DEBUG_TRANS, "finish\n"); } static int __init p9_trans_fd_init(void) { v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); v9fs_register_trans(&p9_fd_trans); return 0; } static void __exit p9_trans_fd_exit(void) { flush_work(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); } module_init(p9_trans_fd_init); module_exit(p9_trans_fd_exit); MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); MODULE_DESCRIPTION("Filedescriptor Transport for 9P"); MODULE_LICENSE("GPL"); |
144 143 1 24 145 26 132 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 | /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #ifndef __DRM_CONNECTOR_H__ #define __DRM_CONNECTOR_H__ #include <linux/list.h> #include <linux/llist.h> #include <linux/ctype.h> #include <linux/hdmi.h> #include <linux/notifier.h> #include <drm/drm_mode_object.h> #include <drm/drm_util.h> #include <drm/drm_property.h> #include <uapi/drm/drm_mode.h> struct drm_connector_helper_funcs; struct drm_modeset_acquire_ctx; struct drm_device; struct drm_crtc; struct drm_display_mode; struct drm_encoder; struct drm_panel; struct drm_property; struct drm_property_blob; struct drm_printer; struct drm_privacy_screen; struct drm_edid; struct edid; struct hdmi_codec_daifmt; struct hdmi_codec_params; struct i2c_adapter; enum drm_connector_force { DRM_FORCE_UNSPECIFIED, DRM_FORCE_OFF, DRM_FORCE_ON, /* force on analog part normally */ DRM_FORCE_ON_DIGITAL, /* for DVI-I use digital connector */ }; /** * enum drm_connector_status - status for a &drm_connector * * This enum is used to track the connector status. There are no separate * #defines for the uapi! */ enum drm_connector_status { /** * @connector_status_connected: The connector is definitely connected to * a sink device, and can be enabled. */ connector_status_connected = 1, /** * @connector_status_disconnected: The connector isn't connected to a * sink device which can be autodetect. For digital outputs like DP or * HDMI (which can be realiable probed) this means there's really * nothing there. It is driver-dependent whether a connector with this * status can be lit up or not. */ connector_status_disconnected = 2, /** * @connector_status_unknown: The connector's status could not be * reliably detected. This happens when probing would either cause * flicker (like load-detection when the connector is in use), or when a * hardware resource isn't available (like when load-detection needs a * free CRTC). It should be possible to light up the connector with one * of the listed fallback modes. For default configuration userspace * should only try to light up connectors with unknown status when * there's not connector with @connector_status_connected. */ connector_status_unknown = 3, }; /** * enum drm_connector_registration_state - userspace registration status for * a &drm_connector * * This enum is used to track the status of initializing a connector and * registering it with userspace, so that DRM can prevent bogus modesets on * connectors that no longer exist. */ enum drm_connector_registration_state { /** * @DRM_CONNECTOR_INITIALIZING: The connector has just been created, * but has yet to be exposed to userspace. There should be no * additional restrictions to how the state of this connector may be * modified. */ DRM_CONNECTOR_INITIALIZING = 0, /** * @DRM_CONNECTOR_REGISTERED: The connector has been fully initialized * and registered with sysfs, as such it has been exposed to * userspace. There should be no additional restrictions to how the * state of this connector may be modified. */ DRM_CONNECTOR_REGISTERED = 1, /** * @DRM_CONNECTOR_UNREGISTERED: The connector has either been exposed * to userspace and has since been unregistered and removed from * userspace, or the connector was unregistered before it had a chance * to be exposed to userspace (e.g. still in the * @DRM_CONNECTOR_INITIALIZING state). When a connector is * unregistered, there are additional restrictions to how its state * may be modified: * * - An unregistered connector may only have its DPMS changed from * On->Off. Once DPMS is changed to Off, it may not be switched back * to On. * - Modesets are not allowed on unregistered connectors, unless they * would result in disabling its assigned CRTCs. This means * disabling a CRTC on an unregistered connector is OK, but enabling * one is not. * - Removing a CRTC from an unregistered connector is OK, but new * CRTCs may never be assigned to an unregistered connector. */ DRM_CONNECTOR_UNREGISTERED = 2, }; enum subpixel_order { SubPixelUnknown = 0, SubPixelHorizontalRGB, SubPixelHorizontalBGR, SubPixelVerticalRGB, SubPixelVerticalBGR, SubPixelNone, }; /** * enum drm_connector_tv_mode - Analog TV output mode * * This enum is used to indicate the TV output mode used on an analog TV * connector. * * WARNING: The values of this enum is uABI since they're exposed in the * "TV mode" connector property. */ enum drm_connector_tv_mode { /** * @DRM_MODE_TV_MODE_NTSC: CCIR System M (aka 525-lines) * together with the NTSC Color Encoding. */ DRM_MODE_TV_MODE_NTSC, /** * @DRM_MODE_TV_MODE_NTSC_443: Variant of * @DRM_MODE_TV_MODE_NTSC. Uses a color subcarrier frequency * of 4.43 MHz. */ DRM_MODE_TV_MODE_NTSC_443, /** * @DRM_MODE_TV_MODE_NTSC_J: Variant of @DRM_MODE_TV_MODE_NTSC * used in Japan. Uses a black level equals to the blanking * level. */ DRM_MODE_TV_MODE_NTSC_J, /** * @DRM_MODE_TV_MODE_PAL: CCIR System B together with the PAL * color system. */ DRM_MODE_TV_MODE_PAL, /** * @DRM_MODE_TV_MODE_PAL_M: CCIR System M (aka 525-lines) * together with the PAL color encoding */ DRM_MODE_TV_MODE_PAL_M, /** * @DRM_MODE_TV_MODE_PAL_N: CCIR System N together with the PAL * color encoding. It uses 625 lines, but has a color subcarrier * frequency of 3.58MHz, the SECAM color space, and narrower * channels compared to most of the other PAL variants. */ DRM_MODE_TV_MODE_PAL_N, /** * @DRM_MODE_TV_MODE_SECAM: CCIR System B together with the * SECAM color system. */ DRM_MODE_TV_MODE_SECAM, /** * @DRM_MODE_TV_MODE_MONOCHROME: Use timings appropriate to * the DRM mode, including equalizing pulses for a 525-line * or 625-line mode, with no pedestal or color encoding. */ DRM_MODE_TV_MODE_MONOCHROME, /** * @DRM_MODE_TV_MODE_MAX: Number of analog TV output modes. * * Internal implementation detail; this is not uABI. */ DRM_MODE_TV_MODE_MAX, }; /** * struct drm_scrambling: sink's scrambling support. */ struct drm_scrambling { /** * @supported: scrambling supported for rates > 340 Mhz. */ bool supported; /** * @low_rates: scrambling supported for rates <= 340 Mhz. */ bool low_rates; }; /* * struct drm_scdc - Information about scdc capabilities of a HDMI 2.0 sink * * Provides SCDC register support and capabilities related information on a * HDMI 2.0 sink. In case of a HDMI 1.4 sink, all parameter must be 0. */ struct drm_scdc { /** * @supported: status control & data channel present. */ bool supported; /** * @read_request: sink is capable of generating scdc read request. */ bool read_request; /** * @scrambling: sink's scrambling capabilities */ struct drm_scrambling scrambling; }; /** * struct drm_hdmi_dsc_cap - DSC capabilities of HDMI sink * * Describes the DSC support provided by HDMI 2.1 sink. * The information is fetched fom additional HFVSDB blocks defined * for HDMI 2.1. */ struct drm_hdmi_dsc_cap { /** @v_1p2: flag for dsc1.2 version support by sink */ bool v_1p2; /** @native_420: Does sink support DSC with 4:2:0 compression */ bool native_420; /** * @all_bpp: Does sink support all bpp with 4:4:4: or 4:2:2 * compressed formats */ bool all_bpp; /** * @bpc_supported: compressed bpc supported by sink : 10, 12 or 16 bpc */ u8 bpc_supported; /** @max_slices: maximum number of Horizontal slices supported by */ u8 max_slices; /** @clk_per_slice : max pixel clock in MHz supported per slice */ int clk_per_slice; /** @max_lanes : dsc max lanes supported for Fixed rate Link training */ u8 max_lanes; /** @max_frl_rate_per_lane : maximum frl rate with DSC per lane */ u8 max_frl_rate_per_lane; /** @total_chunk_kbytes: max size of chunks in KBs supported per line*/ u8 total_chunk_kbytes; }; /** * struct drm_hdmi_info - runtime information about the connected HDMI sink * * Describes if a given display supports advanced HDMI 2.0 features. * This information is available in CEA-861-F extension blocks (like HF-VSDB). */ struct drm_hdmi_info { /** @scdc: sink's scdc support and capabilities */ struct drm_scdc scdc; /** * @y420_vdb_modes: bitmap of modes which can support ycbcr420 * output only (not normal RGB/YCBCR444/422 outputs). The max VIC * defined by the CEA-861-G spec is 219, so the size is 256 bits to map * up to 256 VICs. */ unsigned long y420_vdb_modes[BITS_TO_LONGS(256)]; /** * @y420_cmdb_modes: bitmap of modes which can support ycbcr420 * output also, along with normal HDMI outputs. The max VIC defined by * the CEA-861-G spec is 219, so the size is 256 bits to map up to 256 * VICs. */ unsigned long y420_cmdb_modes[BITS_TO_LONGS(256)]; /** @y420_dc_modes: bitmap of deep color support index */ u8 y420_dc_modes; /** @max_frl_rate_per_lane: support fixed rate link */ u8 max_frl_rate_per_lane; /** @max_lanes: supported by sink */ u8 max_lanes; /** @dsc_cap: DSC capabilities of the sink */ struct drm_hdmi_dsc_cap dsc_cap; }; /** * enum drm_link_status - connector's link_status property value * * This enum is used as the connector's link status property value. * It is set to the values defined in uapi. * * @DRM_LINK_STATUS_GOOD: DP Link is Good as a result of successful * link training * @DRM_LINK_STATUS_BAD: DP Link is BAD as a result of link training * failure */ enum drm_link_status { DRM_LINK_STATUS_GOOD = DRM_MODE_LINK_STATUS_GOOD, DRM_LINK_STATUS_BAD = DRM_MODE_LINK_STATUS_BAD, }; /** * enum drm_panel_orientation - panel_orientation info for &drm_display_info * * This enum is used to track the (LCD) panel orientation. There are no * separate #defines for the uapi! * * @DRM_MODE_PANEL_ORIENTATION_UNKNOWN: The drm driver has not provided any * panel orientation information (normal * for non panels) in this case the "panel * orientation" connector prop will not be * attached. * @DRM_MODE_PANEL_ORIENTATION_NORMAL: The top side of the panel matches the * top side of the device's casing. * @DRM_MODE_PANEL_ORIENTATION_BOTTOM_UP: The top side of the panel matches the * bottom side of the device's casing, iow * the panel is mounted upside-down. * @DRM_MODE_PANEL_ORIENTATION_LEFT_UP: The left side of the panel matches the * top side of the device's casing. * @DRM_MODE_PANEL_ORIENTATION_RIGHT_UP: The right side of the panel matches the * top side of the device's casing. */ enum drm_panel_orientation { DRM_MODE_PANEL_ORIENTATION_UNKNOWN = -1, DRM_MODE_PANEL_ORIENTATION_NORMAL = 0, DRM_MODE_PANEL_ORIENTATION_BOTTOM_UP, DRM_MODE_PANEL_ORIENTATION_LEFT_UP, DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; /** * enum drm_hdmi_broadcast_rgb - Broadcast RGB Selection for an HDMI @drm_connector */ enum drm_hdmi_broadcast_rgb { /** * @DRM_HDMI_BROADCAST_RGB_AUTO: The RGB range is selected * automatically based on the mode. */ DRM_HDMI_BROADCAST_RGB_AUTO, /** * @DRM_HDMI_BROADCAST_RGB_FULL: Full range RGB is forced. */ DRM_HDMI_BROADCAST_RGB_FULL, /** * @DRM_HDMI_BROADCAST_RGB_LIMITED: Limited range RGB is forced. */ DRM_HDMI_BROADCAST_RGB_LIMITED, }; const char * drm_hdmi_connector_get_broadcast_rgb_name(enum drm_hdmi_broadcast_rgb broadcast_rgb); const char * drm_hdmi_connector_get_output_format_name(enum hdmi_colorspace fmt); /** * struct drm_monitor_range_info - Panel's Monitor range in EDID for * &drm_display_info * * This struct is used to store a frequency range supported by panel * as parsed from EDID's detailed monitor range descriptor block. * * @min_vfreq: This is the min supported refresh rate in Hz from * EDID's detailed monitor range. * @max_vfreq: This is the max supported refresh rate in Hz from * EDID's detailed monitor range */ struct drm_monitor_range_info { u16 min_vfreq; u16 max_vfreq; }; /** * struct drm_luminance_range_info - Panel's luminance range for * &drm_display_info. Calculated using data in EDID * * This struct is used to store a luminance range supported by panel * as calculated using data from EDID's static hdr metadata. * * @min_luminance: This is the min supported luminance value * * @max_luminance: This is the max supported luminance value */ struct drm_luminance_range_info { u32 min_luminance; u32 max_luminance; }; /** * enum drm_privacy_screen_status - privacy screen status * * This enum is used to track and control the state of the integrated privacy * screen present on some display panels, via the "privacy-screen sw-state" * and "privacy-screen hw-state" properties. Note the _LOCKED enum values * are only valid for the "privacy-screen hw-state" property. * * @PRIVACY_SCREEN_DISABLED: * The privacy-screen on the panel is disabled * @PRIVACY_SCREEN_ENABLED: * The privacy-screen on the panel is enabled * @PRIVACY_SCREEN_DISABLED_LOCKED: * The privacy-screen on the panel is disabled and locked (cannot be changed) * @PRIVACY_SCREEN_ENABLED_LOCKED: * The privacy-screen on the panel is enabled and locked (cannot be changed) */ enum drm_privacy_screen_status { PRIVACY_SCREEN_DISABLED = 0, PRIVACY_SCREEN_ENABLED, PRIVACY_SCREEN_DISABLED_LOCKED, PRIVACY_SCREEN_ENABLED_LOCKED, }; /** * enum drm_colorspace - color space * * This enum is a consolidated colorimetry list supported by HDMI and * DP protocol standard. The respective connectors will register * a property with the subset of this list (supported by that * respective protocol). Userspace will set the colorspace through * a colorspace property which will be created and exposed to * userspace. * * DP definitions come from the DP v2.0 spec * HDMI definitions come from the CTA-861-H spec * * @DRM_MODE_COLORIMETRY_DEFAULT: * Driver specific behavior. * @DRM_MODE_COLORIMETRY_NO_DATA: * Driver specific behavior. * @DRM_MODE_COLORIMETRY_SMPTE_170M_YCC: * (HDMI) * SMPTE ST 170M colorimetry format * @DRM_MODE_COLORIMETRY_BT709_YCC: * (HDMI, DP) * ITU-R BT.709 colorimetry format * @DRM_MODE_COLORIMETRY_XVYCC_601: * (HDMI, DP) * xvYCC601 colorimetry format * @DRM_MODE_COLORIMETRY_XVYCC_709: * (HDMI, DP) * xvYCC709 colorimetry format * @DRM_MODE_COLORIMETRY_SYCC_601: * (HDMI, DP) * sYCC601 colorimetry format * @DRM_MODE_COLORIMETRY_OPYCC_601: * (HDMI, DP) * opYCC601 colorimetry format * @DRM_MODE_COLORIMETRY_OPRGB: * (HDMI, DP) * opRGB colorimetry format * @DRM_MODE_COLORIMETRY_BT2020_CYCC: * (HDMI, DP) * ITU-R BT.2020 Y'c C'bc C'rc (constant luminance) colorimetry format * @DRM_MODE_COLORIMETRY_BT2020_RGB: * (HDMI, DP) * ITU-R BT.2020 R' G' B' colorimetry format * @DRM_MODE_COLORIMETRY_BT2020_YCC: * (HDMI, DP) * ITU-R BT.2020 Y' C'b C'r colorimetry format * @DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65: * (HDMI) * SMPTE ST 2113 P3D65 colorimetry format * @DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER: * (HDMI) * SMPTE ST 2113 P3DCI colorimetry format * @DRM_MODE_COLORIMETRY_RGB_WIDE_FIXED: * (DP) * RGB wide gamut fixed point colorimetry format * @DRM_MODE_COLORIMETRY_RGB_WIDE_FLOAT: * (DP) * RGB wide gamut floating point * (scRGB (IEC 61966-2-2)) colorimetry format * @DRM_MODE_COLORIMETRY_BT601_YCC: * (DP) * ITU-R BT.601 colorimetry format * The DP spec does not say whether this is the 525 or the 625 * line version. * @DRM_MODE_COLORIMETRY_COUNT: * Not a valid value; merely used four counting */ enum drm_colorspace { /* For Default case, driver will set the colorspace */ DRM_MODE_COLORIMETRY_DEFAULT = 0, /* CEA 861 Normal Colorimetry options */ DRM_MODE_COLORIMETRY_NO_DATA = 0, DRM_MODE_COLORIMETRY_SMPTE_170M_YCC = 1, DRM_MODE_COLORIMETRY_BT709_YCC = 2, /* CEA 861 Extended Colorimetry Options */ DRM_MODE_COLORIMETRY_XVYCC_601 = 3, DRM_MODE_COLORIMETRY_XVYCC_709 = 4, DRM_MODE_COLORIMETRY_SYCC_601 = 5, DRM_MODE_COLORIMETRY_OPYCC_601 = 6, DRM_MODE_COLORIMETRY_OPRGB = 7, DRM_MODE_COLORIMETRY_BT2020_CYCC = 8, DRM_MODE_COLORIMETRY_BT2020_RGB = 9, DRM_MODE_COLORIMETRY_BT2020_YCC = 10, /* Additional Colorimetry extension added as part of CTA 861.G */ DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65 = 11, DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER = 12, /* Additional Colorimetry Options added for DP 1.4a VSC Colorimetry Format */ DRM_MODE_COLORIMETRY_RGB_WIDE_FIXED = 13, DRM_MODE_COLORIMETRY_RGB_WIDE_FLOAT = 14, DRM_MODE_COLORIMETRY_BT601_YCC = 15, DRM_MODE_COLORIMETRY_COUNT }; /** * enum drm_bus_flags - bus_flags info for &drm_display_info * * This enum defines signal polarities and clock edge information for signals on * a bus as bitmask flags. * * The clock edge information is conveyed by two sets of symbols, * DRM_BUS_FLAGS_*_DRIVE_\* and DRM_BUS_FLAGS_*_SAMPLE_\*. When this enum is * used to describe a bus from the point of view of the transmitter, the * \*_DRIVE_\* flags should be used. When used from the point of view of the * receiver, the \*_SAMPLE_\* flags should be used. The \*_DRIVE_\* and * \*_SAMPLE_\* flags alias each other, with the \*_SAMPLE_POSEDGE and * \*_SAMPLE_NEGEDGE flags being equal to \*_DRIVE_NEGEDGE and \*_DRIVE_POSEDGE * respectively. This simplifies code as signals are usually sampled on the * opposite edge of the driving edge. Transmitters and receivers may however * need to take other signal timings into account to convert between driving * and sample edges. */ enum drm_bus_flags { /** * @DRM_BUS_FLAG_DE_LOW: * * The Data Enable signal is active low */ DRM_BUS_FLAG_DE_LOW = BIT(0), /** * @DRM_BUS_FLAG_DE_HIGH: * * The Data Enable signal is active high */ DRM_BUS_FLAG_DE_HIGH = BIT(1), /** * @DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE: * * Data is driven on the rising edge of the pixel clock */ DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE = BIT(2), /** * @DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE: * * Data is driven on the falling edge of the pixel clock */ DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE = BIT(3), /** * @DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE: * * Data is sampled on the rising edge of the pixel clock */ DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE = DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE, /** * @DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE: * * Data is sampled on the falling edge of the pixel clock */ DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE = DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE, /** * @DRM_BUS_FLAG_DATA_MSB_TO_LSB: * * Data is transmitted MSB to LSB on the bus */ DRM_BUS_FLAG_DATA_MSB_TO_LSB = BIT(4), /** * @DRM_BUS_FLAG_DATA_LSB_TO_MSB: * * Data is transmitted LSB to MSB on the bus */ DRM_BUS_FLAG_DATA_LSB_TO_MSB = BIT(5), /** * @DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE: * * Sync signals are driven on the rising edge of the pixel clock */ DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE = BIT(6), /** * @DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE: * * Sync signals are driven on the falling edge of the pixel clock */ DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE = BIT(7), /** * @DRM_BUS_FLAG_SYNC_SAMPLE_POSEDGE: * * Sync signals are sampled on the rising edge of the pixel clock */ DRM_BUS_FLAG_SYNC_SAMPLE_POSEDGE = DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE, /** * @DRM_BUS_FLAG_SYNC_SAMPLE_NEGEDGE: * * Sync signals are sampled on the falling edge of the pixel clock */ DRM_BUS_FLAG_SYNC_SAMPLE_NEGEDGE = DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE, /** * @DRM_BUS_FLAG_SHARP_SIGNALS: * * Set if the Sharp-specific signals (SPL, CLS, PS, REV) must be used */ DRM_BUS_FLAG_SHARP_SIGNALS = BIT(8), }; /** * struct drm_display_info - runtime data about the connected sink * * Describes a given display (e.g. CRT or flat panel) and its limitations. For * fixed display sinks like built-in panels there's not much difference between * this and &struct drm_connector. But for sinks with a real cable this * structure is meant to describe all the things at the other end of the cable. * * For sinks which provide an EDID this can be filled out by calling * drm_add_edid_modes(). */ struct drm_display_info { /** * @width_mm: Physical width in mm. */ unsigned int width_mm; /** * @height_mm: Physical height in mm. */ unsigned int height_mm; /** * @bpc: Maximum bits per color channel. Used by HDMI and DP outputs. */ unsigned int bpc; /** * @subpixel_order: Subpixel order of LCD panels. */ enum subpixel_order subpixel_order; #define DRM_COLOR_FORMAT_RGB444 (1<<0) #define DRM_COLOR_FORMAT_YCBCR444 (1<<1) #define DRM_COLOR_FORMAT_YCBCR422 (1<<2) #define DRM_COLOR_FORMAT_YCBCR420 (1<<3) /** * @panel_orientation: Read only connector property for built-in panels, * indicating the orientation of the panel vs the device's casing. * drm_connector_init() sets this to DRM_MODE_PANEL_ORIENTATION_UNKNOWN. * When not UNKNOWN this gets used by the drm_fb_helpers to rotate the * fb to compensate and gets exported as prop to userspace. */ int panel_orientation; /** * @color_formats: HDMI Color formats, selects between RGB and YCrCb * modes. Used DRM_COLOR_FORMAT\_ defines, which are _not_ the same ones * as used to describe the pixel format in framebuffers, and also don't * match the formats in @bus_formats which are shared with v4l. */ u32 color_formats; /** * @bus_formats: Pixel data format on the wire, somewhat redundant with * @color_formats. Array of size @num_bus_formats encoded using * MEDIA_BUS_FMT\_ defines shared with v4l and media drivers. */ const u32 *bus_formats; /** * @num_bus_formats: Size of @bus_formats array. */ unsigned int num_bus_formats; /** * @bus_flags: Additional information (like pixel signal polarity) for * the pixel data on the bus, using &enum drm_bus_flags values * DRM_BUS_FLAGS\_. */ u32 bus_flags; /** * @max_tmds_clock: Maximum TMDS clock rate supported by the * sink in kHz. 0 means undefined. */ int max_tmds_clock; /** * @dvi_dual: Dual-link DVI sink? */ bool dvi_dual; /** * @is_hdmi: True if the sink is an HDMI device. * * This field shall be used instead of calling * drm_detect_hdmi_monitor() when possible. */ bool is_hdmi; /** * @has_audio: True if the sink supports audio. * * This field shall be used instead of calling * drm_detect_monitor_audio() when possible. */ bool has_audio; /** * @has_hdmi_infoframe: Does the sink support the HDMI infoframe? */ bool has_hdmi_infoframe; /** * @rgb_quant_range_selectable: Does the sink support selecting * the RGB quantization range? */ bool rgb_quant_range_selectable; /** * @edid_hdmi_rgb444_dc_modes: Mask of supported hdmi deep color modes * in RGB 4:4:4. Even more stuff redundant with @bus_formats. */ u8 edid_hdmi_rgb444_dc_modes; /** * @edid_hdmi_ycbcr444_dc_modes: Mask of supported hdmi deep color * modes in YCbCr 4:4:4. Even more stuff redundant with @bus_formats. */ u8 edid_hdmi_ycbcr444_dc_modes; /** * @cea_rev: CEA revision of the HDMI sink. */ u8 cea_rev; /** * @hdmi: advance features of a HDMI sink. */ struct drm_hdmi_info hdmi; /** * @non_desktop: Non desktop display (HMD). */ bool non_desktop; /** * @monitor_range: Frequency range supported by monitor range descriptor */ struct drm_monitor_range_info monitor_range; /** * @luminance_range: Luminance range supported by panel */ struct drm_luminance_range_info luminance_range; /** * @mso_stream_count: eDP Multi-SST Operation (MSO) stream count from * the DisplayID VESA vendor block. 0 for conventional Single-Stream * Transport (SST), or 2 or 4 MSO streams. */ u8 mso_stream_count; /** * @mso_pixel_overlap: eDP MSO segment pixel overlap, 0-8 pixels. */ u8 mso_pixel_overlap; /** * @max_dsc_bpp: Maximum DSC target bitrate, if it is set to 0 the * monitor's default value is used instead. */ u32 max_dsc_bpp; /** * @vics: Array of vics_len VICs. Internal to EDID parsing. */ u8 *vics; /** * @vics_len: Number of elements in vics. Internal to EDID parsing. */ int vics_len; /** * @quirks: EDID based quirks. Internal to EDID parsing. */ u32 quirks; /** * @source_physical_address: Source Physical Address from HDMI * Vendor-Specific Data Block, for CEC usage. * * Defaults to CEC_PHYS_ADDR_INVALID (0xffff). */ u16 source_physical_address; }; int drm_display_info_set_bus_formats(struct drm_display_info *info, const u32 *formats, unsigned int num_formats); /** * struct drm_connector_tv_margins - TV connector related margins * * Describes the margins in pixels to put around the image on TV * connectors to deal with overscan. */ struct drm_connector_tv_margins { /** * @bottom: Bottom margin in pixels. */ unsigned int bottom; /** * @left: Left margin in pixels. */ unsigned int left; /** * @right: Right margin in pixels. */ unsigned int right; /** * @top: Top margin in pixels. */ unsigned int top; }; /** * struct drm_tv_connector_state - TV connector related states * @select_subconnector: selected subconnector * @subconnector: detected subconnector * @margins: TV margins * @legacy_mode: Legacy TV mode, driver specific value * @mode: TV mode * @brightness: brightness in percent * @contrast: contrast in percent * @flicker_reduction: flicker reduction in percent * @overscan: overscan in percent * @saturation: saturation in percent * @hue: hue in percent */ struct drm_tv_connector_state { enum drm_mode_subconnector select_subconnector; enum drm_mode_subconnector subconnector; struct drm_connector_tv_margins margins; unsigned int legacy_mode; unsigned int mode; unsigned int brightness; unsigned int contrast; unsigned int flicker_reduction; unsigned int overscan; unsigned int saturation; unsigned int hue; }; /** * struct drm_connector_hdmi_infoframe - HDMI Infoframe container */ struct drm_connector_hdmi_infoframe { /** * @data: HDMI Infoframe structure */ union hdmi_infoframe data; /** * @set: Is the content of @data valid? */ bool set; }; /* * struct drm_connector_hdmi_state - HDMI state container */ struct drm_connector_hdmi_state { /** * @broadcast_rgb: Connector property to pass the * Broadcast RGB selection value. */ enum drm_hdmi_broadcast_rgb broadcast_rgb; /** * @infoframes: HDMI Infoframes matching that state */ struct { /** * @avi: AVI Infoframes structure matching our * state. */ struct drm_connector_hdmi_infoframe avi; /** * @hdr_drm: DRM (Dynamic Range and Mastering) * Infoframes structure matching our state. */ struct drm_connector_hdmi_infoframe hdr_drm; /** * @spd: SPD Infoframes structure matching our * state. */ struct drm_connector_hdmi_infoframe spd; /** * @vendor: HDMI Vendor Infoframes structure * matching our state. */ struct drm_connector_hdmi_infoframe hdmi; } infoframes; /** * @is_limited_range: Is the output supposed to use a limited * RGB Quantization Range or not? */ bool is_limited_range; /** * @output_bpc: Bits per color channel to output. */ unsigned int output_bpc; /** * @output_format: Pixel format to output in. */ enum hdmi_colorspace output_format; /** * @tmds_char_rate: TMDS Character Rate, in Hz. */ unsigned long long tmds_char_rate; }; /** * struct drm_connector_state - mutable connector state */ struct drm_connector_state { /** @connector: backpointer to the connector */ struct drm_connector *connector; /** * @crtc: CRTC to connect connector to, NULL if disabled. * * Do not change this directly, use drm_atomic_set_crtc_for_connector() * instead. */ struct drm_crtc *crtc; /** * @best_encoder: * * Used by the atomic helpers to select the encoder, through the * &drm_connector_helper_funcs.atomic_best_encoder or * &drm_connector_helper_funcs.best_encoder callbacks. * * This is also used in the atomic helpers to map encoders to their * current and previous connectors, see * drm_atomic_get_old_connector_for_encoder() and * drm_atomic_get_new_connector_for_encoder(). * * NOTE: Atomic drivers must fill this out (either themselves or through * helpers), for otherwise the GETCONNECTOR and GETENCODER IOCTLs will * not return correct data to userspace. */ struct drm_encoder *best_encoder; /** * @link_status: Connector link_status to keep track of whether link is * GOOD or BAD to notify userspace if retraining is necessary. */ enum drm_link_status link_status; /** @state: backpointer to global drm_atomic_state */ struct drm_atomic_state *state; /** * @commit: Tracks the pending commit to prevent use-after-free conditions. * * Is only set when @crtc is NULL. */ struct drm_crtc_commit *commit; /** @tv: TV connector state */ struct drm_tv_connector_state tv; /** * @self_refresh_aware: * * This tracks whether a connector is aware of the self refresh state. * It should be set to true for those connector implementations which * understand the self refresh state. This is needed since the crtc * registers the self refresh helpers and it doesn't know if the * connectors downstream have implemented self refresh entry/exit. * * Drivers should set this to true in atomic_check if they know how to * handle self_refresh requests. */ bool self_refresh_aware; /** * @picture_aspect_ratio: Connector property to control the * HDMI infoframe aspect ratio setting. * * The %DRM_MODE_PICTURE_ASPECT_\* values much match the * values for &enum hdmi_picture_aspect */ enum hdmi_picture_aspect picture_aspect_ratio; /** * @content_type: Connector property to control the * HDMI infoframe content type setting. * The %DRM_MODE_CONTENT_TYPE_\* values much * match the values. */ unsigned int content_type; /** * @hdcp_content_type: Connector property to pass the type of * protected content. This is most commonly used for HDCP. */ unsigned int hdcp_content_type; /** * @scaling_mode: Connector property to control the * upscaling, mostly used for built-in panels. */ unsigned int scaling_mode; /** * @content_protection: Connector property to request content * protection. This is most commonly used for HDCP. */ unsigned int content_protection; /** * @colorspace: State variable for Connector property to request * colorspace change on Sink. This is most commonly used to switch * to wider color gamuts like BT2020. */ enum drm_colorspace colorspace; /** * @writeback_job: Writeback job for writeback connectors * * Holds the framebuffer and out-fence for a writeback connector. As * the writeback completion may be asynchronous to the normal commit * cycle, the writeback job lifetime is managed separately from the * normal atomic state by this object. * * See also: drm_writeback_queue_job() and * drm_writeback_signal_completion() */ struct drm_writeback_job *writeback_job; /** * @max_requested_bpc: Connector property to limit the maximum bit * depth of the pixels. */ u8 max_requested_bpc; /** * @max_bpc: Connector max_bpc based on the requested max_bpc property * and the connector bpc limitations obtained from edid. */ u8 max_bpc; /** * @privacy_screen_sw_state: See :ref:`Standard Connector * Properties<standard_connector_properties>` */ enum drm_privacy_screen_status privacy_screen_sw_state; /** * @hdr_output_metadata: * DRM blob property for HDR output metadata */ struct drm_property_blob *hdr_output_metadata; /** * @hdmi: HDMI-related variable and properties. Filled by * @drm_atomic_helper_connector_hdmi_check(). */ struct drm_connector_hdmi_state hdmi; }; struct drm_connector_hdmi_audio_funcs { /** * @startup: * * Called when ASoC starts an audio stream setup. The * @startup() is optional. * * Returns: * 0 on success, a negative error code otherwise */ int (*startup)(struct drm_connector *connector); /** * @prepare: * Configures HDMI-encoder for audio stream. Can be called * multiple times for each setup. Mandatory. * * Returns: * 0 on success, a negative error code otherwise */ int (*prepare)(struct drm_connector *connector, struct hdmi_codec_daifmt *fmt, struct hdmi_codec_params *hparms); /** * @shutdown: * * Shut down the audio stream. Mandatory. * * Returns: * 0 on success, a negative error code otherwise */ void (*shutdown)(struct drm_connector *connector); /** * @mute_stream: * * Mute/unmute HDMI audio stream. The @mute_stream callback is * optional. * * Returns: * 0 on success, a negative error code otherwise */ int (*mute_stream)(struct drm_connector *connector, bool enable, int direction); }; /** * struct drm_connector_hdmi_funcs - drm_hdmi_connector control functions */ struct drm_connector_hdmi_funcs { /** * @tmds_char_rate_valid: * * This callback is invoked at atomic_check time to figure out * whether a particular TMDS character rate is supported by the * driver. * * The @tmds_char_rate_valid callback is optional. * * Returns: * * Either &drm_mode_status.MODE_OK or one of the failure reasons * in &enum drm_mode_status. */ enum drm_mode_status (*tmds_char_rate_valid)(const struct drm_connector *connector, const struct drm_display_mode *mode, unsigned long long tmds_rate); /** * @clear_infoframe: * * This callback is invoked through * @drm_atomic_helper_connector_hdmi_update_infoframes during a * commit to clear the infoframes into the hardware. It will be * called multiple times, once for every disabled infoframe * type. * * The @clear_infoframe callback is optional. * * Returns: * 0 on success, a negative error code otherwise */ int (*clear_infoframe)(struct drm_connector *connector, enum hdmi_infoframe_type type); /** * @write_infoframe: * * This callback is invoked through * @drm_atomic_helper_connector_hdmi_update_infoframes during a * commit to program the infoframes into the hardware. It will * be called multiple times, once for every updated infoframe * type. * * The @write_infoframe callback is mandatory. * * Returns: * 0 on success, a negative error code otherwise */ int (*write_infoframe)(struct drm_connector *connector, enum hdmi_infoframe_type type, const u8 *buffer, size_t len); /** * @read_edid: * * This callback is used by the framework as a replacement for reading * the EDID from connector->ddc. It is still recommended to provide * connector->ddc instead of implementing this callback. Returned EDID * should be freed via the drm_edid_free(). * * The @read_edid callback is optional. * * Returns: * Valid EDID on success, NULL in case of failure. */ const struct drm_edid *(*read_edid)(struct drm_connector *connector); }; /** * struct drm_connector_funcs - control connectors on a given device * * Each CRTC may have one or more connectors attached to it. The functions * below allow the core DRM code to control connectors, enumerate available modes, * etc. */ struct drm_connector_funcs { /** * @dpms: * * Legacy entry point to set the per-connector DPMS state. Legacy DPMS * is exposed as a standard property on the connector, but diverted to * this callback in the drm core. Note that atomic drivers don't * implement the 4 level DPMS support on the connector any more, but * instead only have an on/off "ACTIVE" property on the CRTC object. * * This hook is not used by atomic drivers, remapping of the legacy DPMS * property is entirely handled in the DRM core. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*dpms)(struct drm_connector *connector, int mode); /** * @reset: * * Reset connector hardware and software state to off. This function isn't * called by the core directly, only through drm_mode_config_reset(). * It's not a helper hook only for historical reasons. * * Atomic drivers can use drm_atomic_helper_connector_reset() to reset * atomic state using this hook. */ void (*reset)(struct drm_connector *connector); /** * @detect: * * Check to see if anything is attached to the connector. The parameter * force is set to false whilst polling, true when checking the * connector due to a user request. force can be used by the driver to * avoid expensive, destructive operations during automated probing. * * This callback is optional, if not implemented the connector will be * considered as always being attached. * * FIXME: * * Note that this hook is only called by the probe helper. It's not in * the helper library vtable purely for historical reasons. The only DRM * core entry point to probe connector state is @fill_modes. * * Note that the helper library will already hold * &drm_mode_config.connection_mutex. Drivers which need to grab additional * locks to avoid races with concurrent modeset changes need to use * &drm_connector_helper_funcs.detect_ctx instead. * * Also note that this callback can be called no matter the * state the connector is in. Drivers that need the underlying * device to be powered to perform the detection will first need * to make sure it's been properly enabled. * * RETURNS: * * drm_connector_status indicating the connector's status. */ enum drm_connector_status (*detect)(struct drm_connector *connector, bool force); /** * @force: * * This function is called to update internal encoder state when the * connector is forced to a certain state by userspace, either through * the sysfs interfaces or on the kernel cmdline. In that case the * @detect callback isn't called. * * FIXME: * * Note that this hook is only called by the probe helper. It's not in * the helper library vtable purely for historical reasons. The only DRM * core entry point to probe connector state is @fill_modes. */ void (*force)(struct drm_connector *connector); /** * @fill_modes: * * Entry point for output detection and basic mode validation. The * driver should reprobe the output if needed (e.g. when hotplug * handling is unreliable), add all detected modes to &drm_connector.modes * and filter out any the device can't support in any configuration. It * also needs to filter out any modes wider or higher than the * parameters max_width and max_height indicate. * * The drivers must also prune any modes no longer valid from * &drm_connector.modes. Furthermore it must update * &drm_connector.status and &drm_connector.edid. If no EDID has been * received for this output connector->edid must be NULL. * * Drivers using the probe helpers should use * drm_helper_probe_single_connector_modes() to implement this * function. * * RETURNS: * * The number of modes detected and filled into &drm_connector.modes. */ int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height); /** * @set_property: * * This is the legacy entry point to update a property attached to the * connector. * * This callback is optional if the driver does not support any legacy * driver-private properties. For atomic drivers it is not used because * property handling is done entirely in the DRM core. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*set_property)(struct drm_connector *connector, struct drm_property *property, uint64_t val); /** * @late_register: * * This optional hook can be used to register additional userspace * interfaces attached to the connector, light backlight control, i2c, * DP aux or similar interfaces. It is called late in the driver load * sequence from drm_connector_register() when registering all the * core drm connector interfaces. Everything added from this callback * should be unregistered in the early_unregister callback. * * This is called while holding &drm_connector.mutex. * * Returns: * * 0 on success, or a negative error code on failure. */ int (*late_register)(struct drm_connector *connector); /** * @early_unregister: * * This optional hook should be used to unregister the additional * userspace interfaces attached to the connector from * late_register(). It is called from drm_connector_unregister(), * early in the driver unload sequence to disable userspace access * before data structures are torndown. * * This is called while holding &drm_connector.mutex. */ void (*early_unregister)(struct drm_connector *connector); /** * @destroy: * * Clean up connector resources. This is called at driver unload time * through drm_mode_config_cleanup(). It can also be called at runtime * when a connector is being hot-unplugged for drivers that support * connector hotplugging (e.g. DisplayPort MST). */ void (*destroy)(struct drm_connector *connector); /** * @atomic_duplicate_state: * * Duplicate the current atomic state for this connector and return it. * The core and helpers guarantee that any atomic state duplicated with * this hook and still owned by the caller (i.e. not transferred to the * driver by calling &drm_mode_config_funcs.atomic_commit) will be * cleaned up by calling the @atomic_destroy_state hook in this * structure. * * This callback is mandatory for atomic drivers. * * Atomic drivers which don't subclass &struct drm_connector_state should use * drm_atomic_helper_connector_duplicate_state(). Drivers that subclass the * state structure to extend it with driver-private state should use * __drm_atomic_helper_connector_duplicate_state() to make sure shared state is * duplicated in a consistent fashion across drivers. * * It is an error to call this hook before &drm_connector.state has been * initialized correctly. * * NOTE: * * If the duplicate state references refcounted resources this hook must * acquire a reference for each of them. The driver must release these * references again in @atomic_destroy_state. * * RETURNS: * * Duplicated atomic state or NULL when the allocation failed. */ struct drm_connector_state *(*atomic_duplicate_state)(struct drm_connector *connector); /** * @atomic_destroy_state: * * Destroy a state duplicated with @atomic_duplicate_state and release * or unreference all resources it references * * This callback is mandatory for atomic drivers. */ void (*atomic_destroy_state)(struct drm_connector *connector, struct drm_connector_state *state); /** * @atomic_set_property: * * Decode a driver-private property value and store the decoded value * into the passed-in state structure. Since the atomic core decodes all * standardized properties (even for extensions beyond the core set of * properties which might not be implemented by all drivers) this * requires drivers to subclass the state structure. * * Such driver-private properties should really only be implemented for * truly hardware/vendor specific state. Instead it is preferred to * standardize atomic extension and decode the properties used to expose * such an extension in the core. * * Do not call this function directly, use * drm_atomic_connector_set_property() instead. * * This callback is optional if the driver does not support any * driver-private atomic properties. * * NOTE: * * This function is called in the state assembly phase of atomic * modesets, which can be aborted for any reason (including on * userspace's request to just check whether a configuration would be * possible). Drivers MUST NOT touch any persistent state (hardware or * software) or data structures except the passed in @state parameter. * * Also since userspace controls in which order properties are set this * function must not do any input validation (since the state update is * incomplete and hence likely inconsistent). Instead any such input * validation must be done in the various atomic_check callbacks. * * RETURNS: * * 0 if the property has been found, -EINVAL if the property isn't * implemented by the driver (which shouldn't ever happen, the core only * asks for properties attached to this connector). No other validation * is allowed by the driver. The core already checks that the property * value is within the range (integer, valid enum value, ...) the driver * set when registering the property. */ int (*atomic_set_property)(struct drm_connector *connector, struct drm_connector_state *state, struct drm_property *property, uint64_t val); /** * @atomic_get_property: * * Reads out the decoded driver-private property. This is used to * implement the GETCONNECTOR IOCTL. * * Do not call this function directly, use * drm_atomic_connector_get_property() instead. * * This callback is optional if the driver does not support any * driver-private atomic properties. * * RETURNS: * * 0 on success, -EINVAL if the property isn't implemented by the * driver (which shouldn't ever happen, the core only asks for * properties attached to this connector). */ int (*atomic_get_property)(struct drm_connector *connector, const struct drm_connector_state *state, struct drm_property *property, uint64_t *val); /** * @atomic_print_state: * * If driver subclasses &struct drm_connector_state, it should implement * this optional hook for printing additional driver specific state. * * Do not call this directly, use drm_atomic_connector_print_state() * instead. */ void (*atomic_print_state)(struct drm_printer *p, const struct drm_connector_state *state); /** * @oob_hotplug_event: * * This will get called when a hotplug-event for a drm-connector * has been received from a source outside the display driver / device. */ void (*oob_hotplug_event)(struct drm_connector *connector, enum drm_connector_status status); /** * @debugfs_init: * * Allows connectors to create connector-specific debugfs files. */ void (*debugfs_init)(struct drm_connector *connector, struct dentry *root); }; /** * struct drm_cmdline_mode - DRM Mode passed through the kernel command-line * * Each connector can have an initial mode with additional options * passed through the kernel command line. This structure allows to * express those parameters and will be filled by the command-line * parser. */ struct drm_cmdline_mode { /** * @name: * * Name of the mode. */ char name[DRM_DISPLAY_MODE_LEN]; /** * @specified: * * Has a mode been read from the command-line? */ bool specified; /** * @refresh_specified: * * Did the mode have a preferred refresh rate? */ bool refresh_specified; /** * @bpp_specified: * * Did the mode have a preferred BPP? */ bool bpp_specified; /** * @pixel_clock: * * Pixel Clock in kHz. Optional. */ unsigned int pixel_clock; /** * @xres: * * Active resolution on the X axis, in pixels. */ int xres; /** * @yres: * * Active resolution on the Y axis, in pixels. */ int yres; /** * @bpp: * * Bits per pixels for the mode. */ int bpp; /** * @refresh: * * Refresh rate, in Hertz. */ int refresh; /** * @rb: * * Do we need to use reduced blanking? */ bool rb; /** * @interlace: * * The mode is interlaced. */ bool interlace; /** * @cvt: * * The timings will be calculated using the VESA Coordinated * Video Timings instead of looking up the mode from a table. */ bool cvt; /** * @margins: * * Add margins to the mode calculation (1.8% of xres rounded * down to 8 pixels and 1.8% of yres). */ bool margins; /** * @force: * * Ignore the hotplug state of the connector, and force its * state to one of the DRM_FORCE_* values. */ enum drm_connector_force force; /** * @rotation_reflection: * * Initial rotation and reflection of the mode setup from the * command line. See DRM_MODE_ROTATE_* and * DRM_MODE_REFLECT_*. The only rotations supported are * DRM_MODE_ROTATE_0 and DRM_MODE_ROTATE_180. */ unsigned int rotation_reflection; /** * @panel_orientation: * * drm-connector "panel orientation" property override value, * DRM_MODE_PANEL_ORIENTATION_UNKNOWN if not set. */ enum drm_panel_orientation panel_orientation; /** * @tv_margins: TV margins to apply to the mode. */ struct drm_connector_tv_margins tv_margins; /** * @tv_mode: TV mode standard. See DRM_MODE_TV_MODE_*. */ enum drm_connector_tv_mode tv_mode; /** * @tv_mode_specified: * * Did the mode have a preferred TV mode? */ bool tv_mode_specified; }; /** * struct drm_connector_hdmi_audio - DRM gemeric HDMI Codec-related structure * * HDMI drivers usually incorporate a HDMI Codec. This structure expresses the * generic HDMI Codec as used by the DRM HDMI Codec framework. */ struct drm_connector_hdmi_audio { /** * @funcs: * * Implementation of the HDMI codec functionality to be used by the DRM * HDMI Codec framework. */ const struct drm_connector_hdmi_audio_funcs *funcs; /** * @codec_pdev: * * Platform device created to hold the HDMI Codec. It will be * automatically unregistered during drm_connector_cleanup(). */ struct platform_device *codec_pdev; /** * @lock: * * Mutex to protect @last_state, @plugged_cb and @plugged_cb_dev. */ struct mutex lock; /** * @plugged_cb: * * Callback to be called when the HDMI sink get plugged to or unplugged * from this connector. This is assigned by the framework when * requested by the ASoC code. */ void (*plugged_cb)(struct device *dev, bool plugged); /** * @plugged_cb_dev: * * The data for @plugged_cb(). It is being provided by the ASoC. */ struct device *plugged_cb_dev; /** * @last_state: * * Last plugged state recored by the framework. It is used to correctly * report the state to @plugged_cb(). */ bool last_state; /** * @dai_port: * * The port in DT that is used for the Codec DAI. */ int dai_port; }; /* * struct drm_connector_hdmi - DRM Connector HDMI-related structure */ struct drm_connector_hdmi { #define DRM_CONNECTOR_HDMI_VENDOR_LEN 8 /** * @vendor: HDMI Controller Vendor Name */ unsigned char vendor[DRM_CONNECTOR_HDMI_VENDOR_LEN] __nonstring; #define DRM_CONNECTOR_HDMI_PRODUCT_LEN 16 /** * @product: HDMI Controller Product Name */ unsigned char product[DRM_CONNECTOR_HDMI_PRODUCT_LEN] __nonstring; /** * @supported_formats: Bitmask of @hdmi_colorspace * supported by the controller. */ unsigned long supported_formats; /** * @funcs: HDMI connector Control Functions */ const struct drm_connector_hdmi_funcs *funcs; /** * @infoframes: Current Infoframes output by the connector */ struct { /** * @lock: Mutex protecting against concurrent access to * the infoframes, most notably between KMS and ALSA. */ struct mutex lock; /** * @audio: Current Audio Infoframes structure. Protected * by @lock. */ struct drm_connector_hdmi_infoframe audio; } infoframes; }; /** * struct drm_connector - central DRM connector control structure * * Each connector may be connected to one or more CRTCs, or may be clonable by * another connector if they can share a CRTC. Each connector also has a specific * position in the broader display (referred to as a 'screen' though it could * span multiple monitors). */ struct drm_connector { /** @dev: parent DRM device */ struct drm_device *dev; /** @kdev: kernel device for sysfs attributes */ struct device *kdev; /** @attr: sysfs attributes */ struct device_attribute *attr; /** * @fwnode: associated fwnode supplied by platform firmware * * Drivers can set this to associate a fwnode with a connector, drivers * are expected to get a reference on the fwnode when setting this. * drm_connector_cleanup() will call fwnode_handle_put() on this. */ struct fwnode_handle *fwnode; /** * @head: * * List of all connectors on a @dev, linked from * &drm_mode_config.connector_list. Protected by * &drm_mode_config.connector_list_lock, but please only use * &drm_connector_list_iter to walk this list. */ struct list_head head; /** * @global_connector_list_entry: * * Connector entry in the global connector-list, used by * drm_connector_find_by_fwnode(). */ struct list_head global_connector_list_entry; /** @base: base KMS object */ struct drm_mode_object base; /** @name: human readable name, can be overwritten by the driver */ char *name; /** * @mutex: Lock for general connector state, but currently only protects * @registered. Most of the connector state is still protected by * &drm_mode_config.mutex. */ struct mutex mutex; /** * @index: Compacted connector index, which matches the position inside * the mode_config.list for drivers not supporting hot-add/removing. Can * be used as an array index. It is invariant over the lifetime of the * connector. */ unsigned index; /** * @connector_type: * one of the DRM_MODE_CONNECTOR_<foo> types from drm_mode.h */ int connector_type; /** @connector_type_id: index into connector type enum */ int connector_type_id; /** * @interlace_allowed: * Can this connector handle interlaced modes? Only used by * drm_helper_probe_single_connector_modes() for mode filtering. */ bool interlace_allowed; /** * @doublescan_allowed: * Can this connector handle doublescan? Only used by * drm_helper_probe_single_connector_modes() for mode filtering. */ bool doublescan_allowed; /** * @stereo_allowed: * Can this connector handle stereo modes? Only used by * drm_helper_probe_single_connector_modes() for mode filtering. */ bool stereo_allowed; /** * @ycbcr_420_allowed : This bool indicates if this connector is * capable of handling YCBCR 420 output. While parsing the EDID * blocks it's very helpful to know if the source is capable of * handling YCBCR 420 outputs. */ bool ycbcr_420_allowed; /** * @registration_state: Is this connector initializing, exposed * (registered) with userspace, or unregistered? * * Protected by @mutex. */ enum drm_connector_registration_state registration_state; /** * @modes: * Modes available on this connector (from fill_modes() + user). * Protected by &drm_mode_config.mutex. */ struct list_head modes; /** * @status: * One of the drm_connector_status enums (connected, not, or unknown). * Protected by &drm_mode_config.mutex. */ enum drm_connector_status status; /** * @probed_modes: * These are modes added by probing with DDC or the BIOS, before * filtering is applied. Used by the probe helpers. Protected by * &drm_mode_config.mutex. */ struct list_head probed_modes; /** * @display_info: Display information is filled from EDID information * when a display is detected. For non hot-pluggable displays such as * flat panels in embedded systems, the driver should initialize the * &drm_display_info.width_mm and &drm_display_info.height_mm fields * with the physical size of the display. * * Protected by &drm_mode_config.mutex. */ struct drm_display_info display_info; /** @funcs: connector control functions */ const struct drm_connector_funcs *funcs; /** * @edid_blob_ptr: DRM property containing EDID if present. Protected by * &drm_mode_config.mutex. * * This must be updated only by calling drm_edid_connector_update() or * drm_connector_update_edid_property(). * * This must not be used by drivers directly. */ struct drm_property_blob *edid_blob_ptr; /** @properties: property tracking for this connector */ struct drm_object_properties properties; /** * @scaling_mode_property: Optional atomic property to control the * upscaling. See drm_connector_attach_content_protection_property(). */ struct drm_property *scaling_mode_property; /** * @vrr_capable_property: Optional property to help userspace * query hardware support for variable refresh rate on a connector. * connector. Drivers can add the property to a connector by * calling drm_connector_attach_vrr_capable_property(). * * This should be updated only by calling * drm_connector_set_vrr_capable_property(). */ struct drm_property *vrr_capable_property; /** * @colorspace_property: Connector property to set the suitable * colorspace supported by the sink. */ struct drm_property *colorspace_property; /** * @path_blob_ptr: * * DRM blob property data for the DP MST path property. This should only * be updated by calling drm_connector_set_path_property(). */ struct drm_property_blob *path_blob_ptr; /** * @max_bpc: Maximum bits per color channel the connector supports. */ unsigned int max_bpc; /** * @max_bpc_property: Default connector property for the max bpc to be * driven out of the connector. */ struct drm_property *max_bpc_property; /** @privacy_screen: drm_privacy_screen for this connector, or NULL. */ struct drm_privacy_screen *privacy_screen; /** @privacy_screen_notifier: privacy-screen notifier_block */ struct notifier_block privacy_screen_notifier; /** * @privacy_screen_sw_state_property: Optional atomic property for the * connector to control the integrated privacy screen. */ struct drm_property *privacy_screen_sw_state_property; /** * @privacy_screen_hw_state_property: Optional atomic property for the * connector to report the actual integrated privacy screen state. */ struct drm_property *privacy_screen_hw_state_property; /** * @broadcast_rgb_property: Connector property to set the * Broadcast RGB selection to output with. */ struct drm_property *broadcast_rgb_property; #define DRM_CONNECTOR_POLL_HPD (1 << 0) #define DRM_CONNECTOR_POLL_CONNECT (1 << 1) #define DRM_CONNECTOR_POLL_DISCONNECT (1 << 2) /** * @polled: * * Connector polling mode, a combination of * * DRM_CONNECTOR_POLL_HPD * The connector generates hotplug events and doesn't need to be * periodically polled. The CONNECT and DISCONNECT flags must not * be set together with the HPD flag. * * DRM_CONNECTOR_POLL_CONNECT * Periodically poll the connector for connection. * * DRM_CONNECTOR_POLL_DISCONNECT * Periodically poll the connector for disconnection, without * causing flickering even when the connector is in use. DACs should * rarely do this without a lot of testing. * * Set to 0 for connectors that don't support connection status * discovery. */ uint8_t polled; /** * @dpms: Current dpms state. For legacy drivers the * &drm_connector_funcs.dpms callback must update this. For atomic * drivers, this is handled by the core atomic code, and drivers must * only take &drm_crtc_state.active into account. */ int dpms; /** @helper_private: mid-layer private data */ const struct drm_connector_helper_funcs *helper_private; /** @cmdline_mode: mode line parsed from the kernel cmdline for this connector */ struct drm_cmdline_mode cmdline_mode; /** @force: a DRM_FORCE_<foo> state for forced mode sets */ enum drm_connector_force force; /** * @edid_override: Override EDID set via debugfs. * * Do not modify or access outside of the drm_edid_override_* family of * functions. */ const struct drm_edid *edid_override; /** * @edid_override_mutex: Protect access to edid_override. */ struct mutex edid_override_mutex; /** @epoch_counter: used to detect any other changes in connector, besides status */ u64 epoch_counter; /** * @possible_encoders: Bit mask of encoders that can drive this * connector, drm_encoder_index() determines the index into the bitfield * and the bits are set with drm_connector_attach_encoder(). */ u32 possible_encoders; /** * @encoder: Currently bound encoder driving this connector, if any. * Only really meaningful for non-atomic drivers. Atomic drivers should * instead look at &drm_connector_state.best_encoder, and in case they * need the CRTC driving this output, &drm_connector_state.crtc. */ struct drm_encoder *encoder; #define MAX_ELD_BYTES 128 /** @eld: EDID-like data, if present, protected by @eld_mutex */ uint8_t eld[MAX_ELD_BYTES]; /** @eld_mutex: protection for concurrenct access to @eld */ struct mutex eld_mutex; /** @latency_present: AV delay info from ELD, if found */ bool latency_present[2]; /** * @video_latency: Video latency info from ELD, if found. * [0]: progressive, [1]: interlaced */ int video_latency[2]; /** * @audio_latency: audio latency info from ELD, if found * [0]: progressive, [1]: interlaced */ int audio_latency[2]; /** * @ddc: associated ddc adapter. * A connector usually has its associated ddc adapter. If a driver uses * this field, then an appropriate symbolic link is created in connector * sysfs directory to make it easy for the user to tell which i2c * adapter is for a particular display. * * The field should be set by calling drm_connector_init_with_ddc(). */ struct i2c_adapter *ddc; /** * @null_edid_counter: track sinks that give us all zeros for the EDID. * Needed to workaround some HW bugs where we get all 0s */ int null_edid_counter; /** @bad_edid_counter: track sinks that give us an EDID with invalid checksum */ unsigned bad_edid_counter; /** * @edid_corrupt: Indicates whether the last read EDID was corrupt. Used * in Displayport compliance testing - Displayport Link CTS Core 1.2 * rev1.1 4.2.2.6 */ bool edid_corrupt; /** * @real_edid_checksum: real edid checksum for corrupted edid block. * Required in Displayport 1.4 compliance testing * rev1.1 4.2.2.6 */ u8 real_edid_checksum; /** @debugfs_entry: debugfs directory for this connector */ struct dentry *debugfs_entry; /** * @state: * * Current atomic state for this connector. * * This is protected by &drm_mode_config.connection_mutex. Note that * nonblocking atomic commits access the current connector state without * taking locks. Either by going through the &struct drm_atomic_state * pointers, see for_each_oldnew_connector_in_state(), * for_each_old_connector_in_state() and * for_each_new_connector_in_state(). Or through careful ordering of * atomic commit operations as implemented in the atomic helpers, see * &struct drm_crtc_commit. */ struct drm_connector_state *state; /* DisplayID bits. FIXME: Extract into a substruct? */ /** * @tile_blob_ptr: * * DRM blob property data for the tile property (used mostly by DP MST). * This is meant for screens which are driven through separate display * pipelines represented by &drm_crtc, which might not be running with * genlocked clocks. For tiled panels which are genlocked, like * dual-link LVDS or dual-link DSI, the driver should try to not expose * the tiling and virtualize both &drm_crtc and &drm_plane if needed. * * This should only be updated by calling * drm_connector_set_tile_property(). */ struct drm_property_blob *tile_blob_ptr; /** @has_tile: is this connector connected to a tiled monitor */ bool has_tile; /** @tile_group: tile group for the connected monitor */ struct drm_tile_group *tile_group; /** @tile_is_single_monitor: whether the tile is one monitor housing */ bool tile_is_single_monitor; /** @num_h_tile: number of horizontal tiles in the tile group */ /** @num_v_tile: number of vertical tiles in the tile group */ uint8_t num_h_tile, num_v_tile; /** @tile_h_loc: horizontal location of this tile */ /** @tile_v_loc: vertical location of this tile */ uint8_t tile_h_loc, tile_v_loc; /** @tile_h_size: horizontal size of this tile. */ /** @tile_v_size: vertical size of this tile. */ uint16_t tile_h_size, tile_v_size; /** * @free_node: * * List used only by &drm_connector_list_iter to be able to clean up a * connector from any context, in conjunction with * &drm_mode_config.connector_free_work. */ struct llist_node free_node; /** @hdr_sink_metadata: HDR Metadata Information read from sink */ struct hdr_sink_metadata hdr_sink_metadata; /** * @hdmi: HDMI-related variable and properties. */ struct drm_connector_hdmi hdmi; /** * @hdmi_audio: HDMI codec properties and non-DRM state. */ struct drm_connector_hdmi_audio hdmi_audio; }; #define obj_to_connector(x) container_of(x, struct drm_connector, base) int drm_connector_init(struct drm_device *dev, struct drm_connector *connector, const struct drm_connector_funcs *funcs, int connector_type); int drm_connector_dynamic_init(struct drm_device *dev, struct drm_connector *connector, const struct drm_connector_funcs *funcs, int connector_type, struct i2c_adapter *ddc); int drm_connector_init_with_ddc(struct drm_device *dev, struct drm_connector *connector, const struct drm_connector_funcs *funcs, int connector_type, struct i2c_adapter *ddc); int drmm_connector_init(struct drm_device *dev, struct drm_connector *connector, const struct drm_connector_funcs *funcs, int connector_type, struct i2c_adapter *ddc); int drmm_connector_hdmi_init(struct drm_device *dev, struct drm_connector *connector, const char *vendor, const char *product, const struct drm_connector_funcs *funcs, const struct drm_connector_hdmi_funcs *hdmi_funcs, int connector_type, struct i2c_adapter *ddc, unsigned long supported_formats, unsigned int max_bpc); void drm_connector_attach_edid_property(struct drm_connector *connector); int drm_connector_register(struct drm_connector *connector); int drm_connector_dynamic_register(struct drm_connector *connector); void drm_connector_unregister(struct drm_connector *connector); int drm_connector_attach_encoder(struct drm_connector *connector, struct drm_encoder *encoder); void drm_connector_cleanup(struct drm_connector *connector); static inline unsigned int drm_connector_index(const struct drm_connector *connector) { return connector->index; } static inline u32 drm_connector_mask(const struct drm_connector *connector) { return 1 << connector->index; } /** * drm_connector_lookup - lookup connector object * @dev: DRM device * @file_priv: drm file to check for lease against. * @id: connector object id * * This function looks up the connector object specified by id * add takes a reference to it. */ static inline struct drm_connector *drm_connector_lookup(struct drm_device *dev, struct drm_file *file_priv, uint32_t id) { struct drm_mode_object *mo; mo = drm_mode_object_find(dev, file_priv, id, DRM_MODE_OBJECT_CONNECTOR); return mo ? obj_to_connector(mo) : NULL; } /** * drm_connector_get - acquire a connector reference * @connector: DRM connector * * This function increments the connector's refcount. */ static inline void drm_connector_get(struct drm_connector *connector) { drm_mode_object_get(&connector->base); } /** * drm_connector_put - release a connector reference * @connector: DRM connector * * This function decrements the connector's reference count and frees the * object if the reference count drops to zero. */ static inline void drm_connector_put(struct drm_connector *connector) { drm_mode_object_put(&connector->base); } /** * drm_connector_is_unregistered - has the connector been unregistered from * userspace? * @connector: DRM connector * * Checks whether or not @connector has been unregistered from userspace. * * Returns: * True if the connector was unregistered, false if the connector is * registered or has not yet been registered with userspace. */ static inline bool drm_connector_is_unregistered(struct drm_connector *connector) { return READ_ONCE(connector->registration_state) == DRM_CONNECTOR_UNREGISTERED; } void drm_connector_oob_hotplug_event(struct fwnode_handle *connector_fwnode, enum drm_connector_status status); const char *drm_get_connector_type_name(unsigned int connector_type); const char *drm_get_connector_status_name(enum drm_connector_status status); const char *drm_get_subpixel_order_name(enum subpixel_order order); const char *drm_get_dpms_name(int val); const char *drm_get_dvi_i_subconnector_name(int val); const char *drm_get_dvi_i_select_name(int val); const char *drm_get_tv_mode_name(int val); const char *drm_get_tv_subconnector_name(int val); const char *drm_get_tv_select_name(int val); const char *drm_get_dp_subconnector_name(int val); const char *drm_get_content_protection_name(int val); const char *drm_get_hdcp_content_type_name(int val); int drm_get_tv_mode_from_name(const char *name, size_t len); int drm_mode_create_dvi_i_properties(struct drm_device *dev); void drm_connector_attach_dp_subconnector_property(struct drm_connector *connector); int drm_mode_create_tv_margin_properties(struct drm_device *dev); int drm_mode_create_tv_properties_legacy(struct drm_device *dev, unsigned int num_modes, const char * const modes[]); int drm_mode_create_tv_properties(struct drm_device *dev, unsigned int supported_tv_modes); void drm_connector_attach_tv_margin_properties(struct drm_connector *conn); int drm_mode_create_scaling_mode_property(struct drm_device *dev); int drm_connector_attach_content_type_property(struct drm_connector *dev); int drm_connector_attach_scaling_mode_property(struct drm_connector *connector, u32 scaling_mode_mask); int drm_connector_attach_vrr_capable_property( struct drm_connector *connector); int drm_connector_attach_broadcast_rgb_property(struct drm_connector *connector); int drm_connector_attach_colorspace_property(struct drm_connector *connector); int drm_connector_attach_hdr_output_metadata_property(struct drm_connector *connector); bool drm_connector_atomic_hdr_metadata_equal(struct drm_connector_state *old_state, struct drm_connector_state *new_state); int drm_mode_create_aspect_ratio_property(struct drm_device *dev); int drm_mode_create_hdmi_colorspace_property(struct drm_connector *connector, u32 supported_colorspaces); int drm_mode_create_dp_colorspace_property(struct drm_connector *connector, u32 supported_colorspaces); int drm_mode_create_content_type_property(struct drm_device *dev); int drm_mode_create_suggested_offset_properties(struct drm_device *dev); int drm_connector_set_path_property(struct drm_connector *connector, const char *path); int drm_connector_set_tile_property(struct drm_connector *connector); int drm_connector_update_edid_property(struct drm_connector *connector, const struct edid *edid); void drm_connector_set_link_status_property(struct drm_connector *connector, uint64_t link_status); void drm_connector_set_vrr_capable_property( struct drm_connector *connector, bool capable); int drm_connector_set_panel_orientation( struct drm_connector *connector, enum drm_panel_orientation panel_orientation); int drm_connector_set_panel_orientation_with_quirk( struct drm_connector *connector, enum drm_panel_orientation panel_orientation, int width, int height); int drm_connector_set_orientation_from_panel( struct drm_connector *connector, struct drm_panel *panel); int drm_connector_attach_max_bpc_property(struct drm_connector *connector, int min, int max); void drm_connector_create_privacy_screen_properties(struct drm_connector *conn); void drm_connector_attach_privacy_screen_properties(struct drm_connector *conn); void drm_connector_attach_privacy_screen_provider( struct drm_connector *connector, struct drm_privacy_screen *priv); void drm_connector_update_privacy_screen(const struct drm_connector_state *connector_state); /** * struct drm_tile_group - Tile group metadata * @refcount: reference count * @dev: DRM device * @id: tile group id exposed to userspace * @group_data: Sink-private data identifying this group * * @group_data corresponds to displayid vend/prod/serial for external screens * with an EDID. */ struct drm_tile_group { struct kref refcount; struct drm_device *dev; int id; u8 group_data[8]; }; struct drm_tile_group *drm_mode_create_tile_group(struct drm_device *dev, const char topology[8]); struct drm_tile_group *drm_mode_get_tile_group(struct drm_device *dev, const char topology[8]); void drm_mode_put_tile_group(struct drm_device *dev, struct drm_tile_group *tg); /** * struct drm_connector_list_iter - connector_list iterator * * This iterator tracks state needed to be able to walk the connector_list * within struct drm_mode_config. Only use together with * drm_connector_list_iter_begin(), drm_connector_list_iter_end() and * drm_connector_list_iter_next() respectively the convenience macro * drm_for_each_connector_iter(). * * Note that the return value of drm_connector_list_iter_next() is only valid * up to the next drm_connector_list_iter_next() or * drm_connector_list_iter_end() call. If you want to use the connector later, * then you need to grab your own reference first using drm_connector_get(). */ struct drm_connector_list_iter { /* private: */ struct drm_device *dev; struct drm_connector *conn; }; void drm_connector_list_iter_begin(struct drm_device *dev, struct drm_connector_list_iter *iter); struct drm_connector * drm_connector_list_iter_next(struct drm_connector_list_iter *iter); void drm_connector_list_iter_end(struct drm_connector_list_iter *iter); bool drm_connector_has_possible_encoder(struct drm_connector *connector, struct drm_encoder *encoder); const char *drm_get_colorspace_name(enum drm_colorspace colorspace); /** * drm_for_each_connector_iter - connector_list iterator macro * @connector: &struct drm_connector pointer used as cursor * @iter: &struct drm_connector_list_iter * * Note that @connector is only valid within the list body, if you want to use * @connector after calling drm_connector_list_iter_end() then you need to grab * your own reference first using drm_connector_get(). */ #define drm_for_each_connector_iter(connector, iter) \ while ((connector = drm_connector_list_iter_next(iter))) /** * drm_connector_for_each_possible_encoder - iterate connector's possible encoders * @connector: &struct drm_connector pointer * @encoder: &struct drm_encoder pointer used as cursor */ #define drm_connector_for_each_possible_encoder(connector, encoder) \ drm_for_each_encoder_mask(encoder, (connector)->dev, \ (connector)->possible_encoders) #endif |
3 2 1 2 3 1 1 3 1 2 2 2 2 1 2 16 16 1 1 1 12 10 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 10 3 10 1 1 1 1 3 3 3 3 2 1 2 1 1 1 7 8 6 5 4 3 6 4 2 1 8 1 6 6 6 5 5 6 6 6 5 6 2 4 8 7 6 1 5 4 3 3 8 2 484 489 487 485 490 485 490 484 447 448 444 441 5 4 440 438 445 428 445 418 447 1 445 1 446 436 18 445 435 14 1 442 3 426 275 259 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 | /* * Created: Fri Jan 8 09:01:26 1999 by faith@valinux.com * * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. * All Rights Reserved. * * Author Rickard E. (Rik) Faith <faith@valinux.com> * Author Gareth Hughes <gareth@valinux.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/export.h> #include <linux/nospec.h> #include <linux/pci.h> #include <linux/uaccess.h> #include <drm/drm_auth.h> #include <drm/drm_crtc.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_ioctl.h> #include <drm/drm_print.h> #include "drm_crtc_internal.h" #include "drm_internal.h" /** * DOC: getunique and setversion story * * BEWARE THE DRAGONS! MIND THE TRAPDOORS! * * In an attempt to warn anyone else who's trying to figure out what's going * on here, I'll try to summarize the story. First things first, let's clear up * the names, because the kernel internals, libdrm and the ioctls are all named * differently: * * - GET_UNIQUE ioctl, implemented by drm_getunique is wrapped up in libdrm * through the drmGetBusid function. * - The libdrm drmSetBusid function is backed by the SET_UNIQUE ioctl. All * that code is nerved in the kernel with drm_invalid_op(). * - The internal set_busid kernel functions and driver callbacks are * exclusively use by the SET_VERSION ioctl, because only drm 1.0 (which is * nerved) allowed userspace to set the busid through the above ioctl. * - Other ioctls and functions involved are named consistently. * * For anyone wondering what's the difference between drm 1.1 and 1.4: Correctly * handling pci domains in the busid on ppc. Doing this correctly was only * implemented in libdrm in 2010, hence can't be nerved yet. No one knows what's * special with drm 1.2 and 1.3. * * Now the actual horror story of how device lookup in drm works. At large, * there's 2 different ways, either by busid, or by device driver name. * * Opening by busid is fairly simple: * * 1. First call SET_VERSION to make sure pci domains are handled properly. As a * side-effect this fills out the unique name in the master structure. * 2. Call GET_UNIQUE to read out the unique name from the master structure, * which matches the busid thanks to step 1. If it doesn't, proceed to try * the next device node. * * Opening by name is slightly different: * * 1. Directly call VERSION to get the version and to match against the driver * name returned by that ioctl. Note that SET_VERSION is not called, which * means the unique name for the master node just opening is _not_ filled * out. This despite that with current drm device nodes are always bound to * one device, and can't be runtime assigned like with drm 1.0. * 2. Match driver name. If it mismatches, proceed to the next device node. * 3. Call GET_UNIQUE, and check whether the unique name has length zero (by * checking that the first byte in the string is 0). If that's not the case * libdrm skips and proceeds to the next device node. Probably this is just * copypasta from drm 1.0 times where a set unique name meant that the driver * was in use already, but that's just conjecture. * * Long story short: To keep the open by name logic working, GET_UNIQUE must * _not_ return a unique string when SET_VERSION hasn't been called yet, * otherwise libdrm breaks. Even when that unique string can't ever change, and * is totally irrelevant for actually opening the device because runtime * assignable device instances were only support in drm 1.0, which is long dead. * But the libdrm code in drmOpenByName somehow survived, hence this can't be * broken. */ /* * Get the bus id. * * \param inode device inode. * \param file_priv DRM file private. * \param cmd command. * \param arg user argument, pointing to a drm_unique structure. * \return zero on success or a negative number on failure. * * Copies the bus id from drm_device::unique into user space. */ int drm_getunique(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_unique *u = data; struct drm_master *master; mutex_lock(&dev->master_mutex); master = file_priv->master; if (u->unique_len >= master->unique_len) { if (copy_to_user(u->unique, master->unique, master->unique_len)) { mutex_unlock(&dev->master_mutex); return -EFAULT; } } u->unique_len = master->unique_len; mutex_unlock(&dev->master_mutex); return 0; } static void drm_unset_busid(struct drm_device *dev, struct drm_master *master) { kfree(master->unique); master->unique = NULL; master->unique_len = 0; } static int drm_set_busid(struct drm_device *dev, struct drm_file *file_priv) { struct drm_master *master = file_priv->master; int ret; if (master->unique != NULL) drm_unset_busid(dev, master); if (dev->dev && dev_is_pci(dev->dev)) { ret = drm_pci_set_busid(dev, master); if (ret) { drm_unset_busid(dev, master); return ret; } } else { WARN_ON(!dev->unique); master->unique = kstrdup(dev->unique, GFP_KERNEL); if (master->unique) master->unique_len = strlen(dev->unique); } return 0; } /* * Get client information. * * \param inode device inode. * \param file_priv DRM file private. * \param cmd command. * \param arg user argument, pointing to a drm_client structure. * * \return zero on success or a negative number on failure. * * Searches for the client with the specified index and copies its information * into userspace */ int drm_getclient(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_client *client = data; /* * Hollowed-out getclient ioctl to keep some dead old drm tests/tools * not breaking completely. Userspace tools stop enumerating one they * get -EINVAL, hence this is the return value we need to hand back for * no clients tracked. * * Unfortunately some clients (*cough* libva *cough*) use this in a fun * attempt to figure out whether they're authenticated or not. Since * that's the only thing they care about, give it to the directly * instead of walking one giant list. */ if (client->idx == 0) { client->auth = file_priv->authenticated; client->pid = task_pid_vnr(current); client->uid = overflowuid; client->magic = 0; client->iocs = 0; return 0; } else { return -EINVAL; } } /* * Get statistics information. * * \param inode device inode. * \param file_priv DRM file private. * \param cmd command. * \param arg user argument, pointing to a drm_stats structure. * * \return zero on success or a negative number on failure. */ static int drm_getstats(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_stats *stats = data; /* Clear stats to prevent userspace from eating its stack garbage. */ memset(stats, 0, sizeof(*stats)); return 0; } /* * Get device/driver capabilities */ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_get_cap *req = data; struct drm_crtc *crtc; req->value = 0; /* Only some caps make sense with UMS/render-only drivers. */ switch (req->capability) { case DRM_CAP_TIMESTAMP_MONOTONIC: req->value = 1; return 0; case DRM_CAP_PRIME: req->value = DRM_PRIME_CAP_IMPORT | DRM_PRIME_CAP_EXPORT; return 0; case DRM_CAP_SYNCOBJ: req->value = drm_core_check_feature(dev, DRIVER_SYNCOBJ); return 0; case DRM_CAP_SYNCOBJ_TIMELINE: req->value = drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE); return 0; } /* Other caps only work with KMS drivers */ if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; switch (req->capability) { case DRM_CAP_DUMB_BUFFER: if (dev->driver->dumb_create) req->value = 1; break; case DRM_CAP_VBLANK_HIGH_CRTC: req->value = 1; break; case DRM_CAP_DUMB_PREFERRED_DEPTH: req->value = dev->mode_config.preferred_depth; break; case DRM_CAP_DUMB_PREFER_SHADOW: req->value = dev->mode_config.prefer_shadow; break; case DRM_CAP_ASYNC_PAGE_FLIP: req->value = dev->mode_config.async_page_flip; break; case DRM_CAP_PAGE_FLIP_TARGET: req->value = 1; drm_for_each_crtc(crtc, dev) { if (!crtc->funcs->page_flip_target) req->value = 0; } break; case DRM_CAP_CURSOR_WIDTH: if (dev->mode_config.cursor_width) req->value = dev->mode_config.cursor_width; else req->value = 64; break; case DRM_CAP_CURSOR_HEIGHT: if (dev->mode_config.cursor_height) req->value = dev->mode_config.cursor_height; else req->value = 64; break; case DRM_CAP_ADDFB2_MODIFIERS: req->value = !dev->mode_config.fb_modifiers_not_supported; break; case DRM_CAP_CRTC_IN_VBLANK_EVENT: req->value = 1; break; case DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP: req->value = drm_core_check_feature(dev, DRIVER_ATOMIC) && dev->mode_config.async_page_flip; break; default: return -EINVAL; } return 0; } /* * Set device/driver capabilities */ static int drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_set_client_cap *req = data; /* No render-only settable capabilities for now */ /* Below caps that only works with KMS drivers */ if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; switch (req->capability) { case DRM_CLIENT_CAP_STEREO_3D: if (req->value > 1) return -EINVAL; file_priv->stereo_allowed = req->value; break; case DRM_CLIENT_CAP_UNIVERSAL_PLANES: if (req->value > 1) return -EINVAL; file_priv->universal_planes = req->value; break; case DRM_CLIENT_CAP_ATOMIC: if (!drm_core_check_feature(dev, DRIVER_ATOMIC)) return -EOPNOTSUPP; /* The modesetting DDX has a totally broken idea of atomic. */ if (current->comm[0] == 'X' && req->value == 1) { pr_info("broken atomic modeset userspace detected, disabling atomic\n"); return -EOPNOTSUPP; } if (req->value > 2) return -EINVAL; file_priv->atomic = req->value; file_priv->universal_planes = req->value; /* * No atomic user-space blows up on aspect ratio mode bits. */ file_priv->aspect_ratio_allowed = req->value; break; case DRM_CLIENT_CAP_ASPECT_RATIO: if (req->value > 1) return -EINVAL; file_priv->aspect_ratio_allowed = req->value; break; case DRM_CLIENT_CAP_WRITEBACK_CONNECTORS: if (!file_priv->atomic) return -EINVAL; if (req->value > 1) return -EINVAL; file_priv->writeback_connectors = req->value; break; case DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT: if (!drm_core_check_feature(dev, DRIVER_CURSOR_HOTSPOT)) return -EOPNOTSUPP; if (!file_priv->atomic) return -EINVAL; if (req->value > 1) return -EINVAL; file_priv->supports_virtualized_cursor_plane = req->value; break; default: return -EINVAL; } return 0; } /* * Setversion ioctl. * * \param inode device inode. * \param file_priv DRM file private. * \param cmd command. * \param arg user argument, pointing to a drm_lock structure. * \return zero on success or negative number on failure. * * Sets the requested interface version */ static int drm_setversion(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_set_version *sv = data; int if_version, retcode = 0; mutex_lock(&dev->master_mutex); if (sv->drm_di_major != -1) { if (sv->drm_di_major != DRM_IF_MAJOR || sv->drm_di_minor < 0 || sv->drm_di_minor > DRM_IF_MINOR) { retcode = -EINVAL; goto done; } if_version = DRM_IF_VERSION(sv->drm_di_major, sv->drm_di_minor); dev->if_version = max(if_version, dev->if_version); if (sv->drm_di_minor >= 1) { /* * Version 1.1 includes tying of DRM to specific device * Version 1.4 has proper PCI domain support */ retcode = drm_set_busid(dev, file_priv); if (retcode) goto done; } } if (sv->drm_dd_major != -1) { if (sv->drm_dd_major != dev->driver->major || sv->drm_dd_minor < 0 || sv->drm_dd_minor > dev->driver->minor) { retcode = -EINVAL; goto done; } } done: sv->drm_di_major = DRM_IF_MAJOR; sv->drm_di_minor = DRM_IF_MINOR; sv->drm_dd_major = dev->driver->major; sv->drm_dd_minor = dev->driver->minor; mutex_unlock(&dev->master_mutex); return retcode; } /** * drm_noop - DRM no-op ioctl implementation * @dev: DRM device for the ioctl * @data: data pointer for the ioctl * @file_priv: DRM file for the ioctl call * * This no-op implementation for drm ioctls is useful for deprecated * functionality where we can't return a failure code because existing userspace * checks the result of the ioctl, but doesn't care about the action. * * Always returns successfully with 0. */ int drm_noop(struct drm_device *dev, void *data, struct drm_file *file_priv) { drm_dbg_core(dev, "\n"); return 0; } EXPORT_SYMBOL(drm_noop); /** * drm_invalid_op - DRM invalid ioctl implementation * @dev: DRM device for the ioctl * @data: data pointer for the ioctl * @file_priv: DRM file for the ioctl call * * This no-op implementation for drm ioctls is useful for deprecated * functionality where we really don't want to allow userspace to call the ioctl * any more. This is the case for old ums interfaces for drivers that * transitioned to kms gradually and so kept the old legacy tables around. This * only applies to radeon and i915 kms drivers, other drivers shouldn't need to * use this function. * * Always fails with a return value of -EINVAL. */ int drm_invalid_op(struct drm_device *dev, void *data, struct drm_file *file_priv) { return -EINVAL; } EXPORT_SYMBOL(drm_invalid_op); /* * Copy and IOCTL return string to user space */ static int drm_copy_field(char __user *buf, size_t *buf_len, const char *value) { size_t len; /* don't attempt to copy a NULL pointer */ if (WARN_ONCE(!value, "BUG: the value to copy was not set!")) { *buf_len = 0; return 0; } /* don't overflow userbuf */ len = strlen(value); if (len > *buf_len) len = *buf_len; /* let userspace know exact length of driver value (which could be * larger than the userspace-supplied buffer) */ *buf_len = strlen(value); /* finally, try filling in the userbuf */ if (len && buf) if (copy_to_user(buf, value, len)) return -EFAULT; return 0; } /* * Get version information * * \param inode device inode. * \param filp file pointer. * \param cmd command. * \param arg user argument, pointing to a drm_version structure. * \return zero on success or negative number on failure. * * Fills in the version information in \p arg. */ int drm_version(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_version *version = data; int err; version->version_major = dev->driver->major; version->version_minor = dev->driver->minor; version->version_patchlevel = dev->driver->patchlevel; err = drm_copy_field(version->name, &version->name_len, dev->driver->name); /* Driver date is deprecated. Userspace expects a non-empty string. */ if (!err) err = drm_copy_field(version->date, &version->date_len, "0"); if (!err) err = drm_copy_field(version->desc, &version->desc_len, dev->driver->desc); return err; } /* * Check if the passed string contains control char or spaces or * anything that would mess up a formatted output. */ static int drm_validate_value_string(const char *value, size_t len) { int i; for (i = 0; i < len; i++) { if (!isascii(value[i]) || !isgraph(value[i])) return -EINVAL; } return 0; } static int drm_set_client_name(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_set_client_name *name = data; size_t len = name->name_len; void __user *user_ptr; char *new_name; if (len > DRM_CLIENT_NAME_MAX_LEN) { return -EINVAL; } else if (len) { user_ptr = u64_to_user_ptr(name->name); new_name = memdup_user_nul(user_ptr, len); if (IS_ERR(new_name)) return PTR_ERR(new_name); if (strlen(new_name) != len || drm_validate_value_string(new_name, len) < 0) { kfree(new_name); return -EINVAL; } } else { new_name = NULL; } mutex_lock(&file_priv->client_name_lock); kfree(file_priv->client_name); file_priv->client_name = new_name; mutex_unlock(&file_priv->client_name_lock); return 0; } static int drm_ioctl_permit(u32 flags, struct drm_file *file_priv) { /* ROOT_ONLY is only for CAP_SYS_ADMIN */ if (unlikely((flags & DRM_ROOT_ONLY) && !capable(CAP_SYS_ADMIN))) return -EACCES; /* AUTH is only for authenticated or render client */ if (unlikely((flags & DRM_AUTH) && !drm_is_render_client(file_priv) && !file_priv->authenticated)) return -EACCES; /* MASTER is only for master or control clients */ if (unlikely((flags & DRM_MASTER) && !drm_is_current_master(file_priv))) return -EACCES; /* Render clients must be explicitly allowed */ if (unlikely(!(flags & DRM_RENDER_ALLOW) && drm_is_render_client(file_priv))) return -EACCES; return 0; } #define DRM_IOCTL_DEF(ioctl, _func, _flags) \ [DRM_IOCTL_NR(ioctl)] = { \ .cmd = ioctl, \ .func = _func, \ .flags = _flags, \ .name = #ioctl \ } /* Ioctl table */ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, 0), DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, 0), DRM_IOCTL_DEF(DRM_IOCTL_GET_CLIENT, drm_getclient, 0), DRM_IOCTL_DEF(DRM_IOCTL_GET_STATS, drm_getstats, 0), DRM_IOCTL_DEF(DRM_IOCTL_GET_CAP, drm_getcap, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SET_CLIENT_CAP, drm_setclientcap, 0), DRM_IOCTL_DEF(DRM_IOCTL_SET_VERSION, drm_setversion, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_SET_UNIQUE, drm_invalid_op, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_BLOCK, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_UNBLOCK, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_AUTH_MAGIC, drm_authmagic, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_SET_MASTER, drm_setmaster_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_DROP_MASTER, drm_dropmaster_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_ADD_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_RM_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_FINISH, drm_noop, DRM_AUTH), DRM_IOCTL_DEF(DRM_IOCTL_WAIT_VBLANK, drm_wait_vblank_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH), DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, 0), DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SET_CLIENT_NAME, drm_set_client_name, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_connector_property_set_ioctl, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB2, drm_mode_getfb2_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_CLOSEFB, drm_mode_closefb_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_CREATE, drm_syncobj_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_DESTROY, drm_syncobj_destroy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, drm_syncobj_handle_to_fd_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TRANSFER, drm_syncobj_transfer_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_EVENTFD, drm_syncobj_eventfd_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, drm_syncobj_timeline_signal_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER), DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE(drm_ioctls) /** * DOC: driver specific ioctls * * First things first, driver private IOCTLs should only be needed for drivers * supporting rendering. Kernel modesetting is all standardized, and extended * through properties. There are a few exceptions in some existing drivers, * which define IOCTL for use by the display DRM master, but they all predate * properties. * * Now if you do have a render driver you always have to support it through * driver private properties. There's a few steps needed to wire all the things * up. * * First you need to define the structure for your IOCTL in your driver private * UAPI header in ``include/uapi/drm/my_driver_drm.h``:: * * struct my_driver_operation { * u32 some_thing; * u32 another_thing; * }; * * Please make sure that you follow all the best practices from * ``Documentation/process/botching-up-ioctls.rst``. Note that drm_ioctl() * automatically zero-extends structures, hence make sure you can add more stuff * at the end, i.e. don't put a variable sized array there. * * Then you need to define your IOCTL number, using one of DRM_IO(), DRM_IOR(), * DRM_IOW() or DRM_IOWR(). It must start with the DRM_IOCTL\_ prefix:: * * ##define DRM_IOCTL_MY_DRIVER_OPERATION \ * DRM_IOW(DRM_COMMAND_BASE, struct my_driver_operation) * * DRM driver private IOCTL must be in the range from DRM_COMMAND_BASE to * DRM_COMMAND_END. Finally you need an array of &struct drm_ioctl_desc to wire * up the handlers and set the access rights:: * * static const struct drm_ioctl_desc my_driver_ioctls[] = { * DRM_IOCTL_DEF_DRV(MY_DRIVER_OPERATION, my_driver_operation, * DRM_AUTH|DRM_RENDER_ALLOW), * }; * * And then assign this to the &drm_driver.ioctls field in your driver * structure. * * See the separate chapter on :ref:`file operations<drm_driver_fops>` for how * the driver-specific IOCTLs are wired up. */ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata, u32 flags) { struct drm_file *file_priv = file->private_data; struct drm_device *dev = file_priv->minor->dev; int ret; /* Update drm_file owner if fd was passed along. */ drm_file_update_pid(file_priv); if (drm_dev_is_unplugged(dev)) return -ENODEV; ret = drm_ioctl_permit(flags, file_priv); if (unlikely(ret)) return ret; return func(dev, kdata, file_priv); } EXPORT_SYMBOL(drm_ioctl_kernel); /** * drm_ioctl - ioctl callback implementation for DRM drivers * @filp: file this ioctl is called on * @cmd: ioctl cmd number * @arg: user argument * * Looks up the ioctl function in the DRM core and the driver dispatch table, * stored in &drm_driver.ioctls. It checks for necessary permission by calling * drm_ioctl_permit(), and dispatches to the respective function. * * Returns: * Zero on success, negative error code on failure. */ long drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct drm_file *file_priv = filp->private_data; struct drm_device *dev; const struct drm_ioctl_desc *ioctl = NULL; drm_ioctl_t *func; unsigned int nr = DRM_IOCTL_NR(cmd); int retcode = -EINVAL; char stack_kdata[128]; char *kdata = NULL; unsigned int in_size, out_size, drv_size, ksize; bool is_driver_ioctl; dev = file_priv->minor->dev; if (drm_dev_is_unplugged(dev)) return -ENODEV; if (DRM_IOCTL_TYPE(cmd) != DRM_IOCTL_BASE) return -ENOTTY; is_driver_ioctl = nr >= DRM_COMMAND_BASE && nr < DRM_COMMAND_END; if (is_driver_ioctl) { /* driver ioctl */ unsigned int index = nr - DRM_COMMAND_BASE; if (index >= dev->driver->num_ioctls) goto err_i1; index = array_index_nospec(index, dev->driver->num_ioctls); ioctl = &dev->driver->ioctls[index]; } else { /* core ioctl */ if (nr >= DRM_CORE_IOCTL_COUNT) goto err_i1; nr = array_index_nospec(nr, DRM_CORE_IOCTL_COUNT); ioctl = &drm_ioctls[nr]; } drv_size = _IOC_SIZE(ioctl->cmd); out_size = in_size = _IOC_SIZE(cmd); if ((cmd & ioctl->cmd & IOC_IN) == 0) in_size = 0; if ((cmd & ioctl->cmd & IOC_OUT) == 0) out_size = 0; ksize = max(max(in_size, out_size), drv_size); drm_dbg_core(dev, "comm=\"%s\" pid=%d, dev=0x%lx, auth=%d, %s\n", current->comm, task_pid_nr(current), (long)old_encode_dev(file_priv->minor->kdev->devt), file_priv->authenticated, ioctl->name); /* Do not trust userspace, use our own definition */ func = ioctl->func; if (unlikely(!func)) { drm_dbg_core(dev, "no function\n"); retcode = -EINVAL; goto err_i1; } if (ksize <= sizeof(stack_kdata)) { kdata = stack_kdata; } else { kdata = kmalloc(ksize, GFP_KERNEL); if (!kdata) { retcode = -ENOMEM; goto err_i1; } } if (copy_from_user(kdata, (void __user *)arg, in_size) != 0) { retcode = -EFAULT; goto err_i1; } if (ksize > in_size) memset(kdata + in_size, 0, ksize - in_size); retcode = drm_ioctl_kernel(filp, func, kdata, ioctl->flags); if (copy_to_user((void __user *)arg, kdata, out_size) != 0) retcode = -EFAULT; err_i1: if (!ioctl) drm_dbg_core(dev, "invalid ioctl: comm=\"%s\", pid=%d, dev=0x%lx, auth=%d, cmd=0x%02x, nr=0x%02x\n", current->comm, task_pid_nr(current), (long)old_encode_dev(file_priv->minor->kdev->devt), file_priv->authenticated, cmd, nr); if (kdata != stack_kdata) kfree(kdata); if (retcode) drm_dbg_core(dev, "comm=\"%s\", pid=%d, ret=%d\n", current->comm, task_pid_nr(current), retcode); return retcode; } EXPORT_SYMBOL(drm_ioctl); /** * drm_ioctl_flags - Check for core ioctl and return ioctl permission flags * @nr: ioctl number * @flags: where to return the ioctl permission flags * * This ioctl is only used by the vmwgfx driver to augment the access checks * done by the drm core and insofar a pretty decent layering violation. This * shouldn't be used by any drivers. * * Returns: * True if the @nr corresponds to a DRM core ioctl number, false otherwise. */ bool drm_ioctl_flags(unsigned int nr, unsigned int *flags) { if (nr >= DRM_COMMAND_BASE && nr < DRM_COMMAND_END) return false; if (nr >= DRM_CORE_IOCTL_COUNT) return false; nr = array_index_nospec(nr, DRM_CORE_IOCTL_COUNT); *flags = drm_ioctls[nr].flags; return true; } EXPORT_SYMBOL(drm_ioctl_flags); |
1 6 1 1 5 6 9 8 8 8 1 8 9 5 5 1 1 1 1 1 1 1 8 7 8 8 1 13 13 12 5 10 11 3 6 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 | // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" struct pause_req_info { struct ethnl_req_info base; enum ethtool_mac_stats_src src; }; #define PAUSE_REQINFO(__req_base) \ container_of(__req_base, struct pause_req_info, base) struct pause_reply_data { struct ethnl_reply_data base; struct ethtool_pauseparam pauseparam; struct ethtool_pause_stats pausestat; }; #define PAUSE_REPDATA(__reply_base) \ container_of(__reply_base, struct pause_reply_data, base) const struct nla_policy ethnl_pause_get_policy[] = { [ETHTOOL_A_PAUSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats), [ETHTOOL_A_PAUSE_STATS_SRC] = NLA_POLICY_MAX(NLA_U32, ETHTOOL_MAC_STATS_SRC_PMAC), }; static int pause_parse_request(struct ethnl_req_info *req_base, struct nlattr **tb, struct netlink_ext_ack *extack) { enum ethtool_mac_stats_src src = ETHTOOL_MAC_STATS_SRC_AGGREGATE; struct pause_req_info *req_info = PAUSE_REQINFO(req_base); if (tb[ETHTOOL_A_PAUSE_STATS_SRC]) { if (!(req_base->flags & ETHTOOL_FLAG_STATS)) { NL_SET_ERR_MSG_MOD(extack, "ETHTOOL_FLAG_STATS must be set when requesting a source of stats"); return -EINVAL; } src = nla_get_u32(tb[ETHTOOL_A_PAUSE_STATS_SRC]); } req_info->src = src; return 0; } static int pause_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { const struct pause_req_info *req_info = PAUSE_REQINFO(req_base); struct pause_reply_data *data = PAUSE_REPDATA(reply_base); enum ethtool_mac_stats_src src = req_info->src; struct net_device *dev = reply_base->dev; int ret; if (!dev->ethtool_ops->get_pauseparam) return -EOPNOTSUPP; ethtool_stats_init((u64 *)&data->pausestat, sizeof(data->pausestat) / 8); data->pausestat.src = src; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; if ((src == ETHTOOL_MAC_STATS_SRC_EMAC || src == ETHTOOL_MAC_STATS_SRC_PMAC) && !__ethtool_dev_mm_supported(dev)) { NL_SET_ERR_MSG_MOD(info->extack, "Device does not support MAC merge layer"); ethnl_ops_complete(dev); return -EOPNOTSUPP; } dev->ethtool_ops->get_pauseparam(dev, &data->pauseparam); if (req_base->flags & ETHTOOL_FLAG_STATS && dev->ethtool_ops->get_pause_stats) dev->ethtool_ops->get_pause_stats(dev, &data->pausestat); ethnl_ops_complete(dev); return 0; } static int pause_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { int n = nla_total_size(sizeof(u8)) + /* _PAUSE_AUTONEG */ nla_total_size(sizeof(u8)) + /* _PAUSE_RX */ nla_total_size(sizeof(u8)); /* _PAUSE_TX */ if (req_base->flags & ETHTOOL_FLAG_STATS) n += nla_total_size(0) + /* _PAUSE_STATS */ nla_total_size(sizeof(u32)) + /* _PAUSE_STATS_SRC */ nla_total_size_64bit(sizeof(u64)) * ETHTOOL_PAUSE_STAT_CNT; return n; } static int ethtool_put_stat(struct sk_buff *skb, u64 val, u16 attrtype, u16 padtype) { if (val == ETHTOOL_STAT_NOT_SET) return 0; if (nla_put_u64_64bit(skb, attrtype, val, padtype)) return -EMSGSIZE; return 0; } static int pause_put_stats(struct sk_buff *skb, const struct ethtool_pause_stats *pause_stats) { const u16 pad = ETHTOOL_A_PAUSE_STAT_PAD; struct nlattr *nest; if (nla_put_u32(skb, ETHTOOL_A_PAUSE_STATS_SRC, pause_stats->src)) return -EMSGSIZE; nest = nla_nest_start(skb, ETHTOOL_A_PAUSE_STATS); if (!nest) return -EMSGSIZE; if (ethtool_put_stat(skb, pause_stats->tx_pause_frames, ETHTOOL_A_PAUSE_STAT_TX_FRAMES, pad) || ethtool_put_stat(skb, pause_stats->rx_pause_frames, ETHTOOL_A_PAUSE_STAT_RX_FRAMES, pad)) goto err_cancel; nla_nest_end(skb, nest); return 0; err_cancel: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int pause_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct pause_reply_data *data = PAUSE_REPDATA(reply_base); const struct ethtool_pauseparam *pauseparam = &data->pauseparam; if (nla_put_u8(skb, ETHTOOL_A_PAUSE_AUTONEG, !!pauseparam->autoneg) || nla_put_u8(skb, ETHTOOL_A_PAUSE_RX, !!pauseparam->rx_pause) || nla_put_u8(skb, ETHTOOL_A_PAUSE_TX, !!pauseparam->tx_pause)) return -EMSGSIZE; if (req_base->flags & ETHTOOL_FLAG_STATS && pause_put_stats(skb, &data->pausestat)) return -EMSGSIZE; return 0; } /* PAUSE_SET */ const struct nla_policy ethnl_pause_set_policy[] = { [ETHTOOL_A_PAUSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_PAUSE_AUTONEG] = { .type = NLA_U8 }, [ETHTOOL_A_PAUSE_RX] = { .type = NLA_U8 }, [ETHTOOL_A_PAUSE_TX] = { .type = NLA_U8 }, }; static int ethnl_set_pause_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; return ops->get_pauseparam && ops->set_pauseparam ? 1 : -EOPNOTSUPP; } static int ethnl_set_pause(struct ethnl_req_info *req_info, struct genl_info *info) { struct net_device *dev = req_info->dev; struct ethtool_pauseparam params = {}; struct nlattr **tb = info->attrs; bool mod = false; int ret; dev->ethtool_ops->get_pauseparam(dev, ¶ms); ethnl_update_bool32(¶ms.autoneg, tb[ETHTOOL_A_PAUSE_AUTONEG], &mod); ethnl_update_bool32(¶ms.rx_pause, tb[ETHTOOL_A_PAUSE_RX], &mod); ethnl_update_bool32(¶ms.tx_pause, tb[ETHTOOL_A_PAUSE_TX], &mod); if (!mod) return 0; ret = dev->ethtool_ops->set_pauseparam(dev, ¶ms); return ret < 0 ? ret : 1; } const struct ethnl_request_ops ethnl_pause_request_ops = { .request_cmd = ETHTOOL_MSG_PAUSE_GET, .reply_cmd = ETHTOOL_MSG_PAUSE_GET_REPLY, .hdr_attr = ETHTOOL_A_PAUSE_HEADER, .req_info_size = sizeof(struct pause_req_info), .reply_data_size = sizeof(struct pause_reply_data), .parse_request = pause_parse_request, .prepare_data = pause_prepare_data, .reply_size = pause_reply_size, .fill_reply = pause_fill_reply, .set_validate = ethnl_set_pause_validate, .set = ethnl_set_pause, .set_ntf_cmd = ETHTOOL_MSG_PAUSE_NTF, }; |
9557 9880 9873 44 9362 9402 36 39 44 3 788 8 8 3 6 6 4 2 2 2 2 820 804 797 795 18 804 44 5 44 5 228 597 23 21 56 21 21 21 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NET_SCM_H #define __LINUX_NET_SCM_H #include <linux/limits.h> #include <linux/net.h> #include <linux/cred.h> #include <linux/file.h> #include <linux/security.h> #include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/sched/signal.h> #include <net/compat.h> /* Well, we should have at least one descriptor open * to accept passed FDs 8) */ #define SCM_MAX_FD 253 struct scm_creds { u32 pid; kuid_t uid; kgid_t gid; }; #ifdef CONFIG_UNIX struct unix_edge; #endif struct scm_fp_list { short count; short count_unix; short max; #ifdef CONFIG_UNIX bool inflight; bool dead; struct list_head vertices; struct unix_edge *edges; #endif struct user_struct *user; struct file *fp[SCM_MAX_FD]; }; struct scm_cookie { struct pid *pid; /* Skb credentials */ struct scm_fp_list *fp; /* Passed files */ struct scm_creds creds; /* Skb credentials */ #ifdef CONFIG_SECURITY_NETWORK u32 secid; /* Passed security ID */ #endif }; void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm); void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm); int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm); void __scm_destroy(struct scm_cookie *scm); struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl); #ifdef CONFIG_SECURITY_NETWORK static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_cookie *scm) { security_socket_getpeersec_dgram(sock, NULL, &scm->secid); } #else static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_cookie *scm) { } #endif /* CONFIG_SECURITY_NETWORK */ static __inline__ void scm_set_cred(struct scm_cookie *scm, struct pid *pid, kuid_t uid, kgid_t gid) { scm->pid = get_pid(pid); scm->creds.pid = pid_vnr(pid); scm->creds.uid = uid; scm->creds.gid = gid; } static __inline__ void scm_destroy_cred(struct scm_cookie *scm) { put_pid(scm->pid); scm->pid = NULL; } static __inline__ void scm_destroy(struct scm_cookie *scm) { scm_destroy_cred(scm); if (scm->fp) __scm_destroy(scm); } static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, bool forcecreds) { memset(scm, 0, sizeof(*scm)); scm->creds.uid = INVALID_UID; scm->creds.gid = INVALID_GID; if (forcecreds) scm_set_cred(scm, task_tgid(current), current_uid(), current_gid()); unix_get_peersec_dgram(sock, scm); if (msg->msg_controllen <= 0) return 0; return __scm_send(sock, msg, scm); } #ifdef CONFIG_SECURITY_NETWORK static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) { struct lsm_context ctx; int err; if (test_bit(SOCK_PASSSEC, &sock->flags)) { err = security_secid_to_secctx(scm->secid, &ctx); if (err >= 0) { put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, ctx.len, ctx.context); security_release_secctx(&ctx); } } } static inline bool scm_has_secdata(struct socket *sock) { return test_bit(SOCK_PASSSEC, &sock->flags); } #else static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) { } static inline bool scm_has_secdata(struct socket *sock) { return false; } #endif /* CONFIG_SECURITY_NETWORK */ static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm) { struct file *pidfd_file = NULL; int len, pidfd; /* put_cmsg() doesn't return an error if CMSG is truncated, * that's why we need to opencode these checks here. */ if (msg->msg_flags & MSG_CMSG_COMPAT) len = sizeof(struct compat_cmsghdr) + sizeof(int); else len = sizeof(struct cmsghdr) + sizeof(int); if (msg->msg_controllen < len) { msg->msg_flags |= MSG_CTRUNC; return; } if (!scm->pid) return; pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file); if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) { if (pidfd_file) { put_unused_fd(pidfd); fput(pidfd_file); } return; } if (pidfd_file) fd_install(pidfd, pidfd_file); } static inline bool __scm_recv_common(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, int flags) { if (!msg->msg_control) { if (test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags) || scm->fp || scm_has_secdata(sock)) msg->msg_flags |= MSG_CTRUNC; scm_destroy(scm); return false; } if (test_bit(SOCK_PASSCRED, &sock->flags)) { struct user_namespace *current_ns = current_user_ns(); struct ucred ucreds = { .pid = scm->creds.pid, .uid = from_kuid_munged(current_ns, scm->creds.uid), .gid = from_kgid_munged(current_ns, scm->creds.gid), }; put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds); } scm_passec(sock, msg, scm); if (scm->fp) scm_detach_fds(msg, scm); return true; } static inline void scm_recv(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, int flags) { if (!__scm_recv_common(sock, msg, scm, flags)) return; scm_destroy_cred(scm); } static inline void scm_recv_unix(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, int flags) { if (!__scm_recv_common(sock, msg, scm, flags)) return; if (test_bit(SOCK_PASSPIDFD, &sock->flags)) scm_pidfd_recv(msg, scm); scm_destroy_cred(scm); } static inline int scm_recv_one_fd(struct file *f, int __user *ufd, unsigned int flags) { if (!ufd) return -EFAULT; return receive_fd(f, ufd, flags); } #endif /* __LINUX_NET_SCM_H */ |
10886 10832 10776 2742 10822 10821 10793 10757 51 10825 338 122 64 64 22 16 22 42 338 372 368 357 357 371 66 45 45 45 66 402 336 336 336 66 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 | // SPDX-License-Identifier: GPL-2.0-only /* * Access kernel or user memory without faulting. */ #include <linux/export.h> #include <linux/mm.h> #include <linux/uaccess.h> #include <asm/tlb.h> bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return true; } /* * The below only uses kmsan_check_memory() to ensure uninitialized kernel * memory isn't leaked. */ #define copy_from_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __get_kernel_nofault(dst, src, type, err_label); \ kmsan_check_memory(src, sizeof(type)); \ dst += sizeof(type); \ src += sizeof(type); \ len -= sizeof(type); \ } long copy_from_kernel_nofault(void *dst, const void *src, size_t size) { unsigned long align = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) align = (unsigned long)dst | (unsigned long)src; if (!copy_from_kernel_nofault_allowed(src, size)) return -ERANGE; pagefault_disable(); if (!(align & 7)) copy_from_kernel_nofault_loop(dst, src, size, u64, Efault); if (!(align & 3)) copy_from_kernel_nofault_loop(dst, src, size, u32, Efault); if (!(align & 1)) copy_from_kernel_nofault_loop(dst, src, size, u16, Efault); copy_from_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } EXPORT_SYMBOL_GPL(copy_from_kernel_nofault); #define copy_to_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __put_kernel_nofault(dst, src, type, err_label); \ instrument_write(dst, sizeof(type)); \ dst += sizeof(type); \ src += sizeof(type); \ len -= sizeof(type); \ } long copy_to_kernel_nofault(void *dst, const void *src, size_t size) { unsigned long align = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) align = (unsigned long)dst | (unsigned long)src; pagefault_disable(); if (!(align & 7)) copy_to_kernel_nofault_loop(dst, src, size, u64, Efault); if (!(align & 3)) copy_to_kernel_nofault_loop(dst, src, size, u32, Efault); if (!(align & 1)) copy_to_kernel_nofault_loop(dst, src, size, u16, Efault); copy_to_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } EXPORT_SYMBOL_GPL(copy_to_kernel_nofault); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) { const void *src = unsafe_addr; if (unlikely(count <= 0)) return 0; if (!copy_from_kernel_nofault_allowed(unsafe_addr, count)) return -ERANGE; pagefault_disable(); do { __get_kernel_nofault(dst, src, u8, Efault); dst++; src++; } while (dst[-1] && src - unsafe_addr < count); pagefault_enable(); dst[-1] = '\0'; return src - unsafe_addr; Efault: pagefault_enable(); dst[0] = '\0'; return -EFAULT; } /** * copy_from_user_nofault(): safely attempt to read from a user-space location * @dst: pointer to the buffer that shall take the data * @src: address to read from. This must be a user address. * @size: size of the data chunk * * Safely read from user address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ long copy_from_user_nofault(void *dst, const void __user *src, size_t size) { long ret = -EFAULT; if (!__access_ok(src, size)) return ret; if (!nmi_uaccess_okay()) return ret; pagefault_disable(); ret = __copy_from_user_inatomic(dst, src, size); pagefault_enable(); if (ret) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(copy_from_user_nofault); /** * copy_to_user_nofault(): safely attempt to write to a user-space location * @dst: address to write to * @src: pointer to the data that shall be written * @size: size of the data chunk * * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ long copy_to_user_nofault(void __user *dst, const void *src, size_t size) { long ret = -EFAULT; if (access_ok(dst, size)) { pagefault_disable(); ret = __copy_to_user_inatomic(dst, src, size); pagefault_enable(); } if (ret) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(copy_to_user_nofault); /** * strncpy_from_user_nofault: - Copy a NUL terminated string from unsafe user * address. * @dst: Destination address, in kernel space. This buffer must be at * least @count bytes long. * @unsafe_addr: Unsafe user address. * @count: Maximum number of bytes to copy, including the trailing NUL. * * Copies a NUL-terminated string from unsafe user address to kernel buffer. * * On success, returns the length of the string INCLUDING the trailing NUL. * * If access fails, returns -EFAULT (some data may have been copied * and the trailing NUL added). * * If @count is smaller than the length of the string, copies @count-1 bytes, * sets the last byte of @dst buffer to NUL and returns @count. */ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long count) { long ret; if (unlikely(count <= 0)) return 0; pagefault_disable(); ret = strncpy_from_user(dst, unsafe_addr, count); pagefault_enable(); if (ret >= count) { ret = count; dst[ret - 1] = '\0'; } else if (ret > 0) { ret++; } return ret; } /** * strnlen_user_nofault: - Get the size of a user string INCLUDING final NUL. * @unsafe_addr: The string to measure. * @count: Maximum count (including NUL) * * Get the size of a NUL-terminated string in user space without pagefault. * * Returns the size of the string INCLUDING the terminating NUL. * * If the string is too long, returns a number larger than @count. User * has to check the return value against "> count". * On exception (or invalid count), returns 0. * * Unlike strnlen_user, this can be used from IRQ handler etc. because * it disables pagefaults. */ long strnlen_user_nofault(const void __user *unsafe_addr, long count) { int ret; pagefault_disable(); ret = strnlen_user(unsafe_addr, count); pagefault_enable(); return ret; } void __copy_overflow(int size, unsigned long count) { WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); } EXPORT_SYMBOL(__copy_overflow); |
20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 | /* * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/rbtree.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/completion.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/sysctl.h> #include <rdma/iw_cm.h> #include <rdma/ib_addr.h> #include <rdma/iw_portmap.h> #include <rdma/rdma_netlink.h> #include "iwcm.h" MODULE_AUTHOR("Tom Tucker"); MODULE_DESCRIPTION("iWARP CM"); MODULE_LICENSE("Dual BSD/GPL"); static const char * const iwcm_rej_reason_strs[] = { [ECONNRESET] = "reset by remote host", [ECONNREFUSED] = "refused by remote application", [ETIMEDOUT] = "setup timeout", }; const char *__attribute_const__ iwcm_reject_msg(int reason) { size_t index; /* iWARP uses negative errnos */ index = -reason; if (index < ARRAY_SIZE(iwcm_rej_reason_strs) && iwcm_rej_reason_strs[index]) return iwcm_rej_reason_strs[index]; else return "unrecognized reason"; } EXPORT_SYMBOL(iwcm_reject_msg); static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = { [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}, [RDMA_NL_IWPM_HELLO] = {.dump = iwpm_hello_cb} }; static struct workqueue_struct *iwcm_wq; struct iwcm_work { struct work_struct work; struct iwcm_id_private *cm_id; struct list_head list; struct iw_cm_event event; struct list_head free_list; }; static unsigned int default_backlog = 256; static struct ctl_table_header *iwcm_ctl_table_hdr; static struct ctl_table iwcm_ctl_table[] = { { .procname = "default_backlog", .data = &default_backlog, .maxlen = sizeof(default_backlog), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, }; /* * The following services provide a mechanism for pre-allocating iwcm_work * elements. The design pre-allocates them based on the cm_id type: * LISTENING IDS: Get enough elements preallocated to handle the * listen backlog. * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE * * Allocating them in connect and listen avoids having to deal * with allocation failures on the event upcall from the provider (which * is called in the interrupt context). * * One exception is when creating the cm_id for incoming connection requests. * There are two cases: * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If * the backlog is exceeded, then no more connection request events will * be processed. cm_event_handler() returns -ENOMEM in this case. Its up * to the provider to reject the connection request. * 2) in the connection request workqueue handler, cm_conn_req_handler(). * If work elements cannot be allocated for the new connect request cm_id, * then IWCM will call the provider reject method. This is ok since * cm_conn_req_handler() runs in the workqueue thread context. */ static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) { struct iwcm_work *work; if (list_empty(&cm_id_priv->work_free_list)) return NULL; work = list_first_entry(&cm_id_priv->work_free_list, struct iwcm_work, free_list); list_del_init(&work->free_list); return work; } static void put_work(struct iwcm_work *work) { list_add(&work->free_list, &work->cm_id->work_free_list); } static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct list_head *e, *tmp; list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { list_del(e); kfree(list_entry(e, struct iwcm_work, free_list)); } } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) { struct iwcm_work *work; BUG_ON(!list_empty(&cm_id_priv->work_free_list)); while (count--) { work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); if (!work) { dealloc_work_entries(cm_id_priv); return -ENOMEM; } work->cm_id = cm_id_priv; INIT_LIST_HEAD(&work->list); put_work(work); } return 0; } /* * Save private data from incoming connection requests to * iw_cm_event, so the low level driver doesn't have to. Adjust * the event ptr to point to the local copy. */ static int copy_private_data(struct iw_cm_event *event) { void *p; p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); if (!p) return -ENOMEM; event->private_data = p; return 0; } static void free_cm_id(struct iwcm_id_private *cm_id_priv) { dealloc_work_entries(cm_id_priv); kfree(cm_id_priv); } /* * Release a reference on cm_id. If the last reference is being * released, free the cm_id and return 'true'. */ static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv) { if (refcount_dec_and_test(&cm_id_priv->refcount)) { BUG_ON(!list_empty(&cm_id_priv->work_list)); free_cm_id(cm_id_priv); return true; } return false; } static void add_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); refcount_inc(&cm_id_priv->refcount); } static void rem_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); (void)iwcm_deref_id(cm_id_priv); } static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); struct iw_cm_id *iw_create_cm_id(struct ib_device *device, iw_cm_handler cm_handler, void *context) { struct iwcm_id_private *cm_id_priv; cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); if (!cm_id_priv) return ERR_PTR(-ENOMEM); cm_id_priv->state = IW_CM_STATE_IDLE; cm_id_priv->id.device = device; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.event_handler = cm_event_handler; cm_id_priv->id.add_ref = add_ref; cm_id_priv->id.rem_ref = rem_ref; spin_lock_init(&cm_id_priv->lock); refcount_set(&cm_id_priv->refcount, 1); init_waitqueue_head(&cm_id_priv->connect_wait); init_completion(&cm_id_priv->destroy_comp); INIT_LIST_HEAD(&cm_id_priv->work_list); INIT_LIST_HEAD(&cm_id_priv->work_free_list); return &cm_id_priv->id; } EXPORT_SYMBOL(iw_create_cm_id); static int iwcm_modify_qp_err(struct ib_qp *qp) { struct ib_qp_attr qp_attr; if (!qp) return -EINVAL; qp_attr.qp_state = IB_QPS_ERR; return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); } /* * This is really the RDMAC CLOSING state. It is most similar to the * IB SQD QP state. */ static int iwcm_modify_qp_sqd(struct ib_qp *qp) { struct ib_qp_attr qp_attr; BUG_ON(qp == NULL); qp_attr.qp_state = IB_QPS_SQD; return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); } /* * CM_ID <-- CLOSING * * Block if a passive or active connection is currently being processed. Then * process the event as follows: * - If we are ESTABLISHED, move to CLOSING and modify the QP state * based on the abrupt flag * - If the connection is already in the CLOSING or IDLE state, the peer is * disconnecting concurrently with us and we've already seen the * DISCONNECT event -- ignore the request and return 0 * - Disconnect on a listening endpoint returns -EINVAL */ int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) { struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret = 0; struct ib_qp *qp = NULL; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* Wait if we're currently in a connect or accept downcall */ wait_event(cm_id_priv->connect_wait, !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: cm_id_priv->state = IW_CM_STATE_CLOSING; /* QP could be <nul> for user-mode client */ if (cm_id_priv->qp) qp = cm_id_priv->qp; else ret = -EINVAL; break; case IW_CM_STATE_LISTEN: ret = -EINVAL; break; case IW_CM_STATE_CLOSING: /* remote peer closed first */ case IW_CM_STATE_IDLE: /* accept or connect returned !0 */ break; case IW_CM_STATE_CONN_RECV: /* * App called disconnect before/without calling accept after * connect_request event delivered. */ break; case IW_CM_STATE_CONN_SENT: /* Can only get here if wait above fails */ default: BUG(); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) { if (abrupt) ret = iwcm_modify_qp_err(qp); else ret = iwcm_modify_qp_sqd(qp); /* * If both sides are disconnecting the QP could * already be in ERR or SQD states */ ret = 0; } return ret; } EXPORT_SYMBOL(iw_cm_disconnect); /* * CM_ID <-- DESTROYING * * Clean up all resources associated with the connection and release * the initial reference taken by iw_create_cm_id. * * Returns true if and only if the last cm_id_priv reference has been dropped. */ static bool destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; struct ib_qp *qp; unsigned long flags; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* * Wait if we're currently in a connect or accept downcall. A * listening endpoint should never block here. */ wait_event(cm_id_priv->connect_wait, !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); /* * Since we're deleting the cm_id, drop any events that * might arrive before the last dereference. */ set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); qp = cm_id_priv->qp; cm_id_priv->qp = NULL; switch (cm_id_priv->state) { case IW_CM_STATE_LISTEN: cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* destroy the listening endpoint */ cm_id->device->ops.iw_destroy_listen(cm_id); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_ESTABLISHED: cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* Abrupt close of the connection */ (void)iwcm_modify_qp_err(qp); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_IDLE: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_DESTROYING; break; case IW_CM_STATE_CONN_RECV: /* * App called destroy before/without calling accept after * receiving connection request event notification or * returned non zero from the event callback function. * In either case, must tell the provider to reject. */ cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_id->device->ops.iw_reject(cm_id, NULL, 0); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_DESTROYING: default: BUG(); break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) cm_id_priv->id.device->ops.iw_rem_ref(qp); if (cm_id->mapped) { iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); } return iwcm_deref_id(cm_id_priv); } /* * This function is only called by the application thread and cannot * be called by the event thread. The function will wait for all * references to be released on the cm_id and then kfree the cm_id * object. */ void iw_destroy_cm_id(struct iw_cm_id *cm_id) { if (!destroy_cm_id(cm_id)) flush_workqueue(iwcm_wq); } EXPORT_SYMBOL(iw_destroy_cm_id); /** * iw_cm_check_wildcard - If IP address is 0 then use original * @pm_addr: sockaddr containing the ip to check for wildcard * @cm_addr: sockaddr containing the actual IP address * @cm_outaddr: sockaddr to set IP addr which leaving port * * Checks the pm_addr for wildcard and then sets cm_outaddr's * IP to the actual (cm_addr). */ static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, struct sockaddr_storage *cm_addr, struct sockaddr_storage *cm_outaddr) { if (pm_addr->ss_family == AF_INET) { struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) { struct sockaddr_in *cm4_addr = (struct sockaddr_in *)cm_addr; struct sockaddr_in *cm4_outaddr = (struct sockaddr_in *)cm_outaddr; cm4_outaddr->sin_addr = cm4_addr->sin_addr; } } else { struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr; if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) { struct sockaddr_in6 *cm6_addr = (struct sockaddr_in6 *)cm_addr; struct sockaddr_in6 *cm6_outaddr = (struct sockaddr_in6 *)cm_outaddr; cm6_outaddr->sin6_addr = cm6_addr->sin6_addr; } } } /** * iw_cm_map - Use portmapper to map the ports * @cm_id: connection manager pointer * @active: Indicates the active side when true * returns nonzero for error only if iwpm_create_mapinfo() fails * * Tries to add a mapping for a port using the Portmapper. If * successful in mapping the IP/Port it will check the remote * mapped IP address for a wildcard IP address and replace the * zero IP address with the remote_addr. */ static int iw_cm_map(struct iw_cm_id *cm_id, bool active) { const char *devname = dev_name(&cm_id->device->dev); const char *ifname = cm_id->device->iw_ifname; struct iwpm_dev_data pm_reg_msg = {}; struct iwpm_sa_data pm_msg; int status; if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) || strlen(ifname) >= sizeof(pm_reg_msg.if_name)) return -EINVAL; cm_id->m_local_addr = cm_id->local_addr; cm_id->m_remote_addr = cm_id->remote_addr; strcpy(pm_reg_msg.dev_name, devname); strcpy(pm_reg_msg.if_name, ifname); if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || !iwpm_valid_pid()) return 0; cm_id->mapped = true; pm_msg.loc_addr = cm_id->local_addr; pm_msg.rem_addr = cm_id->remote_addr; pm_msg.flags = (cm_id->device->iw_driver_flags & IW_F_NO_PORT_MAP) ? IWPM_FLAGS_NO_PORT_MAP : 0; if (active) status = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_IWCM); else status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM); if (!status) { cm_id->m_local_addr = pm_msg.mapped_loc_addr; if (active) { cm_id->m_remote_addr = pm_msg.mapped_rem_addr; iw_cm_check_wildcard(&pm_msg.mapped_rem_addr, &cm_id->remote_addr, &cm_id->m_remote_addr); } } return iwpm_create_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr, RDMA_NL_IWCM, pm_msg.flags); } /* * CM_ID <-- LISTEN * * Start listening for connect requests. Generates one CONNECT_REQUEST * event for each inbound connect request. */ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) { struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); if (!backlog) backlog = default_backlog; ret = alloc_work_entries(cm_id_priv, backlog); if (ret) return ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: cm_id_priv->state = IW_CM_STATE_LISTEN; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = iw_cm_map(cm_id, false); if (!ret) ret = cm_id->device->ops.iw_create_listen(cm_id, backlog); if (ret) cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); break; default: ret = -EINVAL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(iw_cm_listen); /* * CM_ID <-- IDLE * * Rejects an inbound connection request. No events are generated. */ int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } cm_id_priv->state = IW_CM_STATE_IDLE; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id->device->ops.iw_reject(cm_id, private_data, private_data_len); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return ret; } EXPORT_SYMBOL(iw_cm_reject); /* * CM_ID <-- ESTABLISHED * * Accepts an inbound connection request and generates an ESTABLISHED * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block * until the ESTABLISHED event is received from the provider. */ int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) { struct iwcm_id_private *cm_id_priv; struct ib_qp *qp; unsigned long flags; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } /* Get the ib_qp given the QPN */ qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); if (!qp) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } cm_id->device->ops.iw_add_ref(qp); cm_id_priv->qp = qp; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id->device->ops.iw_accept(cm_id, iw_param); if (ret) { /* An error on accept precludes provider events */ BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); qp = cm_id_priv->qp; cm_id_priv->qp = NULL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) cm_id->device->ops.iw_rem_ref(qp); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); } return ret; } EXPORT_SYMBOL(iw_cm_accept); /* * Active Side: CM_ID <-- CONN_SENT * * If successful, results in the generation of a CONNECT_REPLY * event. iw_cm_disconnect and iw_cm_destroy will block until the * CONNECT_REPLY event is received from the provider. */ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) { struct iwcm_id_private *cm_id_priv; int ret; unsigned long flags; struct ib_qp *qp = NULL; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); ret = alloc_work_entries(cm_id_priv, 4); if (ret) return ret; set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_IDLE) { ret = -EINVAL; goto err; } /* Get the ib_qp given the QPN */ qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); if (!qp) { ret = -EINVAL; goto err; } cm_id->device->ops.iw_add_ref(qp); cm_id_priv->qp = qp; cm_id_priv->state = IW_CM_STATE_CONN_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = iw_cm_map(cm_id, true); if (!ret) ret = cm_id->device->ops.iw_connect(cm_id, iw_param); if (!ret) return 0; /* success */ spin_lock_irqsave(&cm_id_priv->lock, flags); qp = cm_id_priv->qp; cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; err: spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) cm_id->device->ops.iw_rem_ref(qp); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return ret; } EXPORT_SYMBOL(iw_cm_connect); /* * Passive Side: new CM_ID <-- CONN_RECV * * Handles an inbound connect request. The function creates a new * iw_cm_id to represent the new connection and inherits the client * callback function and other attributes from the listening parent. * * The work item contains a pointer to the listen_cm_id and the event. The * listen_cm_id contains the client cm_handler, context and * device. These are copied when the device is cloned. The event * contains the new four tuple. * * An error on the child should not affect the parent, so this * function does not return a value. */ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; struct iw_cm_id *cm_id; struct iwcm_id_private *cm_id_priv; int ret; /* * The provider should never generate a connection request * event with a bad status. */ BUG_ON(iw_event->status); cm_id = iw_create_cm_id(listen_id_priv->id.device, listen_id_priv->id.cm_handler, listen_id_priv->id.context); /* If the cm_id could not be created, ignore the request */ if (IS_ERR(cm_id)) goto out; cm_id->provider_data = iw_event->provider_data; cm_id->m_local_addr = iw_event->local_addr; cm_id->m_remote_addr = iw_event->remote_addr; cm_id->local_addr = listen_id_priv->id.local_addr; ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr, &iw_event->remote_addr, &cm_id->remote_addr, RDMA_NL_IWCM); if (ret) { cm_id->remote_addr = iw_event->remote_addr; } else { iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr, &iw_event->local_addr, &cm_id->local_addr); iw_event->local_addr = cm_id->local_addr; iw_event->remote_addr = cm_id->remote_addr; } cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); cm_id_priv->state = IW_CM_STATE_CONN_RECV; /* * We could be destroying the listening id. If so, ignore this * upcall. */ spin_lock_irqsave(&listen_id_priv->lock, flags); if (listen_id_priv->state != IW_CM_STATE_LISTEN) { spin_unlock_irqrestore(&listen_id_priv->lock, flags); iw_cm_reject(cm_id, NULL, 0); iw_destroy_cm_id(cm_id); goto out; } spin_unlock_irqrestore(&listen_id_priv->lock, flags); ret = alloc_work_entries(cm_id_priv, 3); if (ret) { iw_cm_reject(cm_id, NULL, 0); iw_destroy_cm_id(cm_id); goto out; } /* Call the client CM handler */ ret = cm_id->cm_handler(cm_id, iw_event); if (ret) { iw_cm_reject(cm_id, NULL, 0); iw_destroy_cm_id(cm_id); } out: if (iw_event->private_data_len) kfree(iw_event->private_data); } /* * Passive Side: CM_ID <-- ESTABLISHED * * The provider generated an ESTABLISHED event which means that * the MPA negotion has completed successfully and we are now in MPA * FPDU mode. * * This event can only be received in the CONN_RECV state. If the * remote peer closed, the ESTABLISHED event would be received followed * by the CLOSE event. If the app closes, it will block until we wake * it up after processing this event. */ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); /* * We clear the CONNECT_WAIT bit here to allow the callback * function to call iw_cm_disconnect. Calling iw_destroy_cm_id * from a callback handler is not allowed. */ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_ESTABLISHED; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); wake_up_all(&cm_id_priv->connect_wait); return ret; } /* * Active Side: CM_ID <-- ESTABLISHED * * The app has called connect and is waiting for the established event to * post it's requests to the server. This event will wake up anyone * blocked in iw_cm_disconnect or iw_destroy_id. */ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { struct ib_qp *qp = NULL; unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); /* * Clear the connect wait bit so a callback function calling * iw_cm_disconnect will not wait and deadlock this thread */ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); if (iw_event->status == 0) { cm_id_priv->id.m_local_addr = iw_event->local_addr; cm_id_priv->id.m_remote_addr = iw_event->remote_addr; iw_event->local_addr = cm_id_priv->id.local_addr; iw_event->remote_addr = cm_id_priv->id.remote_addr; cm_id_priv->state = IW_CM_STATE_ESTABLISHED; } else { /* REJECTED or RESET */ qp = cm_id_priv->qp; cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) cm_id_priv->id.device->ops.iw_rem_ref(qp); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); if (iw_event->private_data_len) kfree(iw_event->private_data); /* Wake up waiters on connect complete */ wake_up_all(&cm_id_priv->connect_wait); return ret; } /* * CM_ID <-- CLOSING * * If in the ESTABLISHED state, move to CLOSING. */ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) cm_id_priv->state = IW_CM_STATE_CLOSING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); } /* * CM_ID <-- IDLE * * If in the ESTBLISHED or CLOSING states, the QP will have have been * moved by the provider to the ERR state. Disassociate the CM_ID from * the QP, move to IDLE, and remove the 'connected' reference. * * If in some other state, the cm_id was destroyed asynchronously. * This is the last reference that will result in waking up * the app thread blocked in iw_destroy_cm_id. */ static int cm_close_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { struct ib_qp *qp; unsigned long flags; int ret = 0, notify_event = 0; spin_lock_irqsave(&cm_id_priv->lock, flags); qp = cm_id_priv->qp; cm_id_priv->qp = NULL; switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_IDLE; notify_event = 1; break; case IW_CM_STATE_DESTROYING: break; default: BUG(); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) cm_id_priv->id.device->ops.iw_rem_ref(qp); if (notify_event) ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); return ret; } static int process_event(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { int ret = 0; switch (iw_event->event) { case IW_CM_EVENT_CONNECT_REQUEST: cm_conn_req_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_CONNECT_REPLY: ret = cm_conn_rep_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_ESTABLISHED: ret = cm_conn_est_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_DISCONNECT: cm_disconnect_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_CLOSE: ret = cm_close_handler(cm_id_priv, iw_event); break; default: BUG(); } return ret; } /* * Process events on the work_list for the cm_id. If the callback * function requests that the cm_id be deleted, a flag is set in the * cm_id flags to indicate that when the last reference is * removed, the cm_id is to be destroyed. This is necessary to * distinguish between an object that will be destroyed by the app * thread asleep on the destroy_comp list vs. an object destroyed * here synchronously when the last reference is removed. */ static void cm_work_handler(struct work_struct *_work) { struct iwcm_work *work = container_of(_work, struct iwcm_work, work); struct iw_cm_event levent; struct iwcm_id_private *cm_id_priv = work->cm_id; unsigned long flags; int ret = 0; spin_lock_irqsave(&cm_id_priv->lock, flags); while (!list_empty(&cm_id_priv->work_list)) { work = list_first_entry(&cm_id_priv->work_list, struct iwcm_work, list); list_del_init(&work->list); levent = work->event; put_work(work); spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { ret = process_event(cm_id_priv, &levent); if (ret) WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id)); } else pr_debug("dropping event %d\n", levent.event); if (iwcm_deref_id(cm_id_priv)) return; spin_lock_irqsave(&cm_id_priv->lock, flags); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); } /* * This function is called on interrupt context. Schedule events on * the iwcm_wq thread to allow callback functions to downcall into * the CM and/or block. Events are queued to a per-CM_ID * work_list. If this is the first event on the work_list, the work * element is also queued on the iwcm_wq thread. * * Each event holds a reference on the cm_id. Until the last posted * event has been delivered and processed, the cm_id cannot be * deleted. * * Returns: * 0 - the event was handled. * -ENOMEM - the event was not handled due to lack of resources. */ static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *iw_event) { struct iwcm_work *work; struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret = 0; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); work = get_work(cm_id_priv); if (!work) { ret = -ENOMEM; goto out; } INIT_WORK(&work->work, cm_work_handler); work->cm_id = cm_id_priv; work->event = *iw_event; if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || work->event.event == IW_CM_EVENT_CONNECT_REPLY) && work->event.private_data_len) { ret = copy_private_data(&work->event); if (ret) { put_work(work); goto out; } } refcount_inc(&cm_id_priv->refcount); list_add_tail(&work->list, &cm_id_priv->work_list); queue_work(iwcm_wq, &work->work); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_CONN_RECV: case IW_CM_STATE_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| IB_ACCESS_REMOTE_READ; ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_CONN_RECV: case IW_CM_STATE_ESTABLISHED: *qp_attr_mask = 0; ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct iwcm_id_private *cm_id_priv; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); switch (qp_attr->qp_state) { case IB_QPS_INIT: case IB_QPS_RTR: ret = iwcm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask); break; case IB_QPS_RTS: ret = iwcm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask); break; default: ret = -EINVAL; break; } return ret; } EXPORT_SYMBOL(iw_cm_init_qp_attr); static int __init iw_cm_init(void) { int ret; ret = iwpm_init(RDMA_NL_IWCM); if (ret) return ret; iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); if (!iwcm_wq) goto err_alloc; iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", iwcm_ctl_table); if (!iwcm_ctl_table_hdr) { pr_err("iw_cm: couldn't register sysctl paths\n"); goto err_sysctl; } rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table); return 0; err_sysctl: destroy_workqueue(iwcm_wq); err_alloc: iwpm_exit(RDMA_NL_IWCM); return -ENOMEM; } static void __exit iw_cm_cleanup(void) { rdma_nl_unregister(RDMA_NL_IWCM); unregister_net_sysctl_table(iwcm_ctl_table_hdr); destroy_workqueue(iwcm_wq); iwpm_exit(RDMA_NL_IWCM); } MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_IWCM, 2); module_init(iw_cm_init); module_exit(iw_cm_cleanup); |
1771 649 43893 575 20396 20437 17848 575 575 20398 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __X86_KERNEL_FPU_CONTEXT_H #define __X86_KERNEL_FPU_CONTEXT_H #include <asm/fpu/xstate.h> #include <asm/trace/fpu.h> /* Functions related to FPU context tracking */ /* * The in-register FPU state for an FPU context on a CPU is assumed to be * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx * matches the FPU. * * If the FPU register state is valid, the kernel can skip restoring the * FPU state from memory. * * Any code that clobbers the FPU registers or updates the in-memory * FPU state for a task MUST let the rest of the kernel know that the * FPU registers are no longer valid for this task. * * Invalidate a resource you control: CPU if using the CPU for something else * (with preemption disabled), FPU for the current task, or a task that * is prevented from running by the current task. */ static inline void __cpu_invalidate_fpregs_state(void) { __this_cpu_write(fpu_fpregs_owner_ctx, NULL); } static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) { fpu->last_cpu = -1; } static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) { return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; } static inline void fpregs_deactivate(struct fpu *fpu) { __this_cpu_write(fpu_fpregs_owner_ctx, NULL); trace_x86_fpu_regs_deactivated(fpu); } static inline void fpregs_activate(struct fpu *fpu) { __this_cpu_write(fpu_fpregs_owner_ctx, fpu); trace_x86_fpu_regs_activated(fpu); } /* Internal helper for switch_fpu_return() and signal frame setup */ static inline void fpregs_restore_userregs(void) { struct fpu *fpu = ¤t->thread.fpu; int cpu = smp_processor_id(); if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER))) return; if (!fpregs_state_valid(fpu, cpu)) { /* * This restores _all_ xstate which has not been * established yet. * * If PKRU is enabled, then the PKRU value is already * correct because it was either set in switch_to() or in * flush_thread(). So it is excluded because it might be * not up to date in current->thread.fpu.xsave state. * * XFD state is handled in restore_fpregs_from_fpstate(). */ restore_fpregs_from_fpstate(fpu->fpstate, XFEATURE_MASK_FPSTATE); fpregs_activate(fpu); fpu->last_cpu = cpu; } clear_thread_flag(TIF_NEED_FPU_LOAD); } #endif |
254 254 3 2 253 140 142 132 136 8 3 32 11 256 4 3 19 27 16 21 18 2 9 13 25 25 23 25 23 23 22 21 10 10 10 8 9 8 7 7 30 29 28 26 3 23 23 18 23 7 29 7 3 9 9 9 6 3 8 7 7 4 2 261 16 13 258 254 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 | // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/syscalls.h> #include <linux/time_namespace.h> #include "futex.h" /* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. * * Implementation: user-space maintains a per-thread list of locks it * is holding. Upon do_exit(), the kernel carefully walks this list, * and marks all locks that are owned by this thread with the * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is * always manipulated with the lock held, so the list is private and * per-thread. Userspace also maintains a per-thread 'list_op_pending' * field, to allow the kernel to clean up if the thread dies after * acquiring the lock, but just before it could have added itself to * the list. There can only be one such pending lock. */ /** * sys_set_robust_list() - Set the robust-futex list head of a task * @head: pointer to the list-head * @len: length of the list-head, as userspace expects */ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, size_t, len) { /* * The kernel knows only one size for now: */ if (unlikely(len != sizeof(*head))) return -EINVAL; current->robust_list = head; return 0; } /** * sys_get_robust_list() - Get the robust-futex list head of a task * @pid: pid of the process [zero for current task] * @head_ptr: pointer to a list-head pointer, the kernel fills it in * @len_ptr: pointer to a length field, the kernel fills in the header size */ SYSCALL_DEFINE3(get_robust_list, int, pid, struct robust_list_head __user * __user *, head_ptr, size_t __user *, len_ptr) { struct robust_list_head __user *head; unsigned long ret; struct task_struct *p; rcu_read_lock(); ret = -ESRCH; if (!pid) p = current; else { p = find_task_by_vpid(pid); if (!p) goto err_unlock; } ret = -EPERM; if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock; head = p->robust_list; rcu_read_unlock(); if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(head, head_ptr); err_unlock: rcu_read_unlock(); return ret; } long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) { unsigned int flags = futex_to_flags(op); int cmd = op & FUTEX_CMD_MASK; if (flags & FLAGS_CLOCKRT) { if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI && cmd != FUTEX_LOCK_PI2) return -ENOSYS; } switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; fallthrough; case FUTEX_WAIT_BITSET: return futex_wait(uaddr, flags, val, timeout, val3); case FUTEX_WAKE: val3 = FUTEX_BITSET_MATCH_ANY; fallthrough; case FUTEX_WAKE_BITSET: return futex_wake(uaddr, flags, val, val3); case FUTEX_REQUEUE: return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, NULL, 0); case FUTEX_CMP_REQUEUE: return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 0); case FUTEX_WAKE_OP: return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); case FUTEX_LOCK_PI: flags |= FLAGS_CLOCKRT; fallthrough; case FUTEX_LOCK_PI2: return futex_lock_pi(uaddr, flags, timeout, 0); case FUTEX_UNLOCK_PI: return futex_unlock_pi(uaddr, flags); case FUTEX_TRYLOCK_PI: return futex_lock_pi(uaddr, flags, NULL, 1); case FUTEX_WAIT_REQUEUE_PI: val3 = FUTEX_BITSET_MATCH_ANY; return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, uaddr2); case FUTEX_CMP_REQUEUE_PI: return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 1); } return -ENOSYS; } static __always_inline bool futex_cmd_has_timeout(u32 cmd) { switch (cmd) { case FUTEX_WAIT: case FUTEX_LOCK_PI: case FUTEX_LOCK_PI2: case FUTEX_WAIT_BITSET: case FUTEX_WAIT_REQUEUE_PI: return true; } return false; } static __always_inline int futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t) { if (!timespec64_valid(ts)) return -EINVAL; *t = timespec64_to_ktime(*ts); if (cmd == FUTEX_WAIT) *t = ktime_add_safe(ktime_get(), *t); else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME)) *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t); return 0; } SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, const struct __kernel_timespec __user *, utime, u32 __user *, uaddr2, u32, val3) { int ret, cmd = op & FUTEX_CMD_MASK; ktime_t t, *tp = NULL; struct timespec64 ts; if (utime && futex_cmd_has_timeout(cmd)) { if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) return -EFAULT; if (get_timespec64(&ts, utime)) return -EFAULT; ret = futex_init_timeout(cmd, op, &ts, &t); if (ret) return ret; tp = &t; } return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); } /** * futex_parse_waitv - Parse a waitv array from userspace * @futexv: Kernel side list of waiters to be filled * @uwaitv: Userspace list to be parsed * @nr_futexes: Length of futexv * @wake: Wake to call when futex is woken * @wake_data: Data for the wake handler * * Return: Error code on failure, 0 on success */ int futex_parse_waitv(struct futex_vector *futexv, struct futex_waitv __user *uwaitv, unsigned int nr_futexes, futex_wake_fn *wake, void *wake_data) { struct futex_waitv aux; unsigned int i; for (i = 0; i < nr_futexes; i++) { unsigned int flags; if (copy_from_user(&aux, &uwaitv[i], sizeof(aux))) return -EFAULT; if ((aux.flags & ~FUTEX2_VALID_MASK) || aux.__reserved) return -EINVAL; flags = futex2_to_flags(aux.flags); if (!futex_flags_valid(flags)) return -EINVAL; if (!futex_validate_input(flags, aux.val)) return -EINVAL; futexv[i].w.flags = flags; futexv[i].w.val = aux.val; futexv[i].w.uaddr = aux.uaddr; futexv[i].q = futex_q_init; futexv[i].q.wake = wake; futexv[i].q.wake_data = wake_data; } return 0; } static int futex2_setup_timeout(struct __kernel_timespec __user *timeout, clockid_t clockid, struct hrtimer_sleeper *to) { int flag_clkid = 0, flag_init = 0; struct timespec64 ts; ktime_t time; int ret; if (!timeout) return 0; if (clockid == CLOCK_REALTIME) { flag_clkid = FLAGS_CLOCKRT; flag_init = FUTEX_CLOCK_REALTIME; } if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) return -EINVAL; if (get_timespec64(&ts, timeout)) return -EFAULT; /* * Since there's no opcode for futex_waitv, use * FUTEX_WAIT_BITSET that uses absolute timeout as well */ ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); if (ret) return ret; futex_setup_timer(&time, to, flag_clkid, 0); return 0; } static inline void futex2_destroy_timeout(struct hrtimer_sleeper *to) { hrtimer_cancel(&to->timer); destroy_hrtimer_on_stack(&to->timer); } /** * sys_futex_waitv - Wait on a list of futexes * @waiters: List of futexes to wait on * @nr_futexes: Length of futexv * @flags: Flag for timeout (monotonic/realtime) * @timeout: Optional absolute timeout. * @clockid: Clock to be used for the timeout, realtime or monotonic. * * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes * if a futex_wake() is performed at any uaddr. The syscall returns immediately * if any waiter has *uaddr != val. *timeout is an optional timeout value for * the operation. Each waiter has individual flags. The `flags` argument for * the syscall should be used solely for specifying the timeout as realtime, if * needed. Flags for private futexes, sizes, etc. should be used on the * individual flags of each waiter. * * Returns the array index of one of the woken futexes. No further information * is provided: any number of other futexes may also have been woken by the * same event, and if more than one futex was woken, the retrned index may * refer to any one of them. (It is not necessaryily the futex with the * smallest index, nor the one most recently woken, nor...) */ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, unsigned int, nr_futexes, unsigned int, flags, struct __kernel_timespec __user *, timeout, clockid_t, clockid) { struct hrtimer_sleeper to; struct futex_vector *futexv; int ret; /* This syscall supports no flags for now */ if (flags) return -EINVAL; if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters) return -EINVAL; if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) return ret; futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL); if (!futexv) { ret = -ENOMEM; goto destroy_timer; } ret = futex_parse_waitv(futexv, waiters, nr_futexes, futex_wake_mark, NULL); if (!ret) ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL); kfree(futexv); destroy_timer: if (timeout) futex2_destroy_timeout(&to); return ret; } /* * sys_futex_wake - Wake a number of futexes * @uaddr: Address of the futex(es) to wake * @mask: bitmask * @nr: Number of the futexes to wake * @flags: FUTEX2 flags * * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the * futex2 family of calls. */ SYSCALL_DEFINE4(futex_wake, void __user *, uaddr, unsigned long, mask, int, nr, unsigned int, flags) { if (flags & ~FUTEX2_VALID_MASK) return -EINVAL; flags = futex2_to_flags(flags); if (!futex_flags_valid(flags)) return -EINVAL; if (!futex_validate_input(flags, mask)) return -EINVAL; return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask); } /* * sys_futex_wait - Wait on a futex * @uaddr: Address of the futex to wait on * @val: Value of @uaddr * @mask: bitmask * @flags: FUTEX2 flags * @timeout: Optional absolute timeout * @clockid: Clock to be used for the timeout, realtime or monotonic * * Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the * futex2 familiy of calls. */ SYSCALL_DEFINE6(futex_wait, void __user *, uaddr, unsigned long, val, unsigned long, mask, unsigned int, flags, struct __kernel_timespec __user *, timeout, clockid_t, clockid) { struct hrtimer_sleeper to; int ret; if (flags & ~FUTEX2_VALID_MASK) return -EINVAL; flags = futex2_to_flags(flags); if (!futex_flags_valid(flags)) return -EINVAL; if (!futex_validate_input(flags, val) || !futex_validate_input(flags, mask)) return -EINVAL; if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) return ret; ret = __futex_wait(uaddr, flags, val, timeout ? &to : NULL, mask); if (timeout) futex2_destroy_timeout(&to); return ret; } /* * sys_futex_requeue - Requeue a waiter from one futex to another * @waiters: array describing the source and destination futex * @flags: unused * @nr_wake: number of futexes to wake * @nr_requeue: number of futexes to requeue * * Identical to the traditional FUTEX_CMP_REQUEUE op, except it is part of the * futex2 family of calls. */ SYSCALL_DEFINE4(futex_requeue, struct futex_waitv __user *, waiters, unsigned int, flags, int, nr_wake, int, nr_requeue) { struct futex_vector futexes[2]; u32 cmpval; int ret; if (flags) return -EINVAL; if (!waiters) return -EINVAL; ret = futex_parse_waitv(futexes, waiters, 2, futex_wake_mark, NULL); if (ret) return ret; cmpval = futexes[0].w.val; return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags, u64_to_user_ptr(futexes[1].w.uaddr), futexes[1].w.flags, nr_wake, nr_requeue, &cmpval, 0); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, compat_size_t, len) { if (unlikely(len != sizeof(*head))) return -EINVAL; current->compat_robust_list = head; return 0; } COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, compat_uptr_t __user *, head_ptr, compat_size_t __user *, len_ptr) { struct compat_robust_list_head __user *head; unsigned long ret; struct task_struct *p; rcu_read_lock(); ret = -ESRCH; if (!pid) p = current; else { p = find_task_by_vpid(pid); if (!p) goto err_unlock; } ret = -EPERM; if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock; head = p->compat_robust_list; rcu_read_unlock(); if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(ptr_to_compat(head), head_ptr); err_unlock: rcu_read_unlock(); return ret; } #endif /* CONFIG_COMPAT */ #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, const struct old_timespec32 __user *, utime, u32 __user *, uaddr2, u32, val3) { int ret, cmd = op & FUTEX_CMD_MASK; ktime_t t, *tp = NULL; struct timespec64 ts; if (utime && futex_cmd_has_timeout(cmd)) { if (get_old_timespec32(&ts, utime)) return -EFAULT; ret = futex_init_timeout(cmd, op, &ts, &t); if (ret) return ret; tp = &t; } return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); } #endif /* CONFIG_COMPAT_32BIT_TIME */ |
11 11 11 11 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 | // SPDX-License-Identifier: GPL-2.0-or-later /* * A generic kernel FIFO implementation * * Copyright (C) 2009/2010 Stefani Seibold <stefani@seibold.net> */ #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/export.h> #include <linux/kfifo.h> #include <linux/log2.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/uaccess.h> /* * internal helper to calculate the unused elements in a fifo */ static inline unsigned int kfifo_unused(struct __kfifo *fifo) { return (fifo->mask + 1) - (fifo->in - fifo->out); } int __kfifo_alloc(struct __kfifo *fifo, unsigned int size, size_t esize, gfp_t gfp_mask) { /* * round up to the next power of 2, since our 'let the indices * wrap' technique works only in this case. */ size = roundup_pow_of_two(size); fifo->in = 0; fifo->out = 0; fifo->esize = esize; if (size < 2) { fifo->data = NULL; fifo->mask = 0; return -EINVAL; } fifo->data = kmalloc_array(esize, size, gfp_mask); if (!fifo->data) { fifo->mask = 0; return -ENOMEM; } fifo->mask = size - 1; return 0; } EXPORT_SYMBOL(__kfifo_alloc); void __kfifo_free(struct __kfifo *fifo) { kfree(fifo->data); fifo->in = 0; fifo->out = 0; fifo->esize = 0; fifo->data = NULL; fifo->mask = 0; } EXPORT_SYMBOL(__kfifo_free); int __kfifo_init(struct __kfifo *fifo, void *buffer, unsigned int size, size_t esize) { size /= esize; if (!is_power_of_2(size)) size = rounddown_pow_of_two(size); fifo->in = 0; fifo->out = 0; fifo->esize = esize; fifo->data = buffer; if (size < 2) { fifo->mask = 0; return -EINVAL; } fifo->mask = size - 1; return 0; } EXPORT_SYMBOL(__kfifo_init); static void kfifo_copy_in(struct __kfifo *fifo, const void *src, unsigned int len, unsigned int off) { unsigned int size = fifo->mask + 1; unsigned int esize = fifo->esize; unsigned int l; off &= fifo->mask; if (esize != 1) { off *= esize; size *= esize; len *= esize; } l = min(len, size - off); memcpy(fifo->data + off, src, l); memcpy(fifo->data, src + l, len - l); /* * make sure that the data in the fifo is up to date before * incrementing the fifo->in index counter */ smp_wmb(); } unsigned int __kfifo_in(struct __kfifo *fifo, const void *buf, unsigned int len) { unsigned int l; l = kfifo_unused(fifo); if (len > l) len = l; kfifo_copy_in(fifo, buf, len, fifo->in); fifo->in += len; return len; } EXPORT_SYMBOL(__kfifo_in); static void kfifo_copy_out(struct __kfifo *fifo, void *dst, unsigned int len, unsigned int off) { unsigned int size = fifo->mask + 1; unsigned int esize = fifo->esize; unsigned int l; off &= fifo->mask; if (esize != 1) { off *= esize; size *= esize; len *= esize; } l = min(len, size - off); memcpy(dst, fifo->data + off, l); memcpy(dst + l, fifo->data, len - l); /* * make sure that the data is copied before * incrementing the fifo->out index counter */ smp_wmb(); } unsigned int __kfifo_out_peek(struct __kfifo *fifo, void *buf, unsigned int len) { unsigned int l; l = fifo->in - fifo->out; if (len > l) len = l; kfifo_copy_out(fifo, buf, len, fifo->out); return len; } EXPORT_SYMBOL(__kfifo_out_peek); unsigned int __kfifo_out_linear(struct __kfifo *fifo, unsigned int *tail, unsigned int n) { unsigned int size = fifo->mask + 1; unsigned int off = fifo->out & fifo->mask; if (tail) *tail = off; return min3(n, fifo->in - fifo->out, size - off); } EXPORT_SYMBOL(__kfifo_out_linear); unsigned int __kfifo_out(struct __kfifo *fifo, void *buf, unsigned int len) { len = __kfifo_out_peek(fifo, buf, len); fifo->out += len; return len; } EXPORT_SYMBOL(__kfifo_out); static unsigned long kfifo_copy_from_user(struct __kfifo *fifo, const void __user *from, unsigned int len, unsigned int off, unsigned int *copied) { unsigned int size = fifo->mask + 1; unsigned int esize = fifo->esize; unsigned int l; unsigned long ret; off &= fifo->mask; if (esize != 1) { off *= esize; size *= esize; len *= esize; } l = min(len, size - off); ret = copy_from_user(fifo->data + off, from, l); if (unlikely(ret)) ret = DIV_ROUND_UP(ret + len - l, esize); else { ret = copy_from_user(fifo->data, from + l, len - l); if (unlikely(ret)) ret = DIV_ROUND_UP(ret, esize); } /* * make sure that the data in the fifo is up to date before * incrementing the fifo->in index counter */ smp_wmb(); *copied = len - ret * esize; /* return the number of elements which are not copied */ return ret; } int __kfifo_from_user(struct __kfifo *fifo, const void __user *from, unsigned long len, unsigned int *copied) { unsigned int l; unsigned long ret; unsigned int esize = fifo->esize; int err; if (esize != 1) len /= esize; l = kfifo_unused(fifo); if (len > l) len = l; ret = kfifo_copy_from_user(fifo, from, len, fifo->in, copied); if (unlikely(ret)) { len -= ret; err = -EFAULT; } else err = 0; fifo->in += len; return err; } EXPORT_SYMBOL(__kfifo_from_user); static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to, unsigned int len, unsigned int off, unsigned int *copied) { unsigned int l; unsigned long ret; unsigned int size = fifo->mask + 1; unsigned int esize = fifo->esize; off &= fifo->mask; if (esize != 1) { off *= esize; size *= esize; len *= esize; } l = min(len, size - off); ret = copy_to_user(to, fifo->data + off, l); if (unlikely(ret)) ret = DIV_ROUND_UP(ret + len - l, esize); else { ret = copy_to_user(to + l, fifo->data, len - l); if (unlikely(ret)) ret = DIV_ROUND_UP(ret, esize); } /* * make sure that the data is copied before * incrementing the fifo->out index counter */ smp_wmb(); *copied = len - ret * esize; /* return the number of elements which are not copied */ return ret; } int __kfifo_to_user(struct __kfifo *fifo, void __user *to, unsigned long len, unsigned int *copied) { unsigned int l; unsigned long ret; unsigned int esize = fifo->esize; int err; if (esize != 1) len /= esize; l = fifo->in - fifo->out; if (len > l) len = l; ret = kfifo_copy_to_user(fifo, to, len, fifo->out, copied); if (unlikely(ret)) { len -= ret; err = -EFAULT; } else err = 0; fifo->out += len; return err; } EXPORT_SYMBOL(__kfifo_to_user); static unsigned int setup_sgl_buf(struct __kfifo *fifo, struct scatterlist *sgl, unsigned int data_offset, int nents, unsigned int len, dma_addr_t dma) { const void *buf = fifo->data + data_offset; if (!nents || !len) return 0; sg_set_buf(sgl, buf, len); if (dma != DMA_MAPPING_ERROR) { sg_dma_address(sgl) = dma + data_offset; sg_dma_len(sgl) = len; } return 1; } static unsigned int setup_sgl(struct __kfifo *fifo, struct scatterlist *sgl, int nents, unsigned int len, unsigned int off, dma_addr_t dma) { unsigned int size = fifo->mask + 1; unsigned int esize = fifo->esize; unsigned int len_to_end; unsigned int n; off &= fifo->mask; if (esize != 1) { off *= esize; size *= esize; len *= esize; } len_to_end = min(len, size - off); n = setup_sgl_buf(fifo, sgl, off, nents, len_to_end, dma); n += setup_sgl_buf(fifo, sgl + n, 0, nents - n, len - len_to_end, dma); return n; } unsigned int __kfifo_dma_in_prepare(struct __kfifo *fifo, struct scatterlist *sgl, int nents, unsigned int len, dma_addr_t dma) { unsigned int l; l = kfifo_unused(fifo); if (len > l) len = l; return setup_sgl(fifo, sgl, nents, len, fifo->in, dma); } EXPORT_SYMBOL(__kfifo_dma_in_prepare); unsigned int __kfifo_dma_out_prepare(struct __kfifo *fifo, struct scatterlist *sgl, int nents, unsigned int len, dma_addr_t dma) { unsigned int l; l = fifo->in - fifo->out; if (len > l) len = l; return setup_sgl(fifo, sgl, nents, len, fifo->out, dma); } EXPORT_SYMBOL(__kfifo_dma_out_prepare); unsigned int __kfifo_max_r(unsigned int len, size_t recsize) { unsigned int max = (1 << (recsize << 3)) - 1; if (len > max) return max; return len; } EXPORT_SYMBOL(__kfifo_max_r); #define __KFIFO_PEEK(data, out, mask) \ ((data)[(out) & (mask)]) /* * __kfifo_peek_n internal helper function for determinate the length of * the next record in the fifo */ static unsigned int __kfifo_peek_n(struct __kfifo *fifo, size_t recsize) { unsigned int l; unsigned int mask = fifo->mask; unsigned char *data = fifo->data; l = __KFIFO_PEEK(data, fifo->out, mask); if (--recsize) l |= __KFIFO_PEEK(data, fifo->out + 1, mask) << 8; return l; } #define __KFIFO_POKE(data, in, mask, val) \ ( \ (data)[(in) & (mask)] = (unsigned char)(val) \ ) /* * __kfifo_poke_n internal helper function for storing the length of * the record into the fifo */ static void __kfifo_poke_n(struct __kfifo *fifo, unsigned int n, size_t recsize) { unsigned int mask = fifo->mask; unsigned char *data = fifo->data; __KFIFO_POKE(data, fifo->in, mask, n); if (recsize > 1) __KFIFO_POKE(data, fifo->in + 1, mask, n >> 8); } unsigned int __kfifo_len_r(struct __kfifo *fifo, size_t recsize) { return __kfifo_peek_n(fifo, recsize); } EXPORT_SYMBOL(__kfifo_len_r); unsigned int __kfifo_in_r(struct __kfifo *fifo, const void *buf, unsigned int len, size_t recsize) { if (len + recsize > kfifo_unused(fifo)) return 0; __kfifo_poke_n(fifo, len, recsize); kfifo_copy_in(fifo, buf, len, fifo->in + recsize); fifo->in += len + recsize; return len; } EXPORT_SYMBOL(__kfifo_in_r); static unsigned int kfifo_out_copy_r(struct __kfifo *fifo, void *buf, unsigned int len, size_t recsize, unsigned int *n) { *n = __kfifo_peek_n(fifo, recsize); if (len > *n) len = *n; kfifo_copy_out(fifo, buf, len, fifo->out + recsize); return len; } unsigned int __kfifo_out_peek_r(struct __kfifo *fifo, void *buf, unsigned int len, size_t recsize) { unsigned int n; if (fifo->in == fifo->out) return 0; return kfifo_out_copy_r(fifo, buf, len, recsize, &n); } EXPORT_SYMBOL(__kfifo_out_peek_r); unsigned int __kfifo_out_linear_r(struct __kfifo *fifo, unsigned int *tail, unsigned int n, size_t recsize) { if (fifo->in == fifo->out) return 0; if (tail) *tail = fifo->out + recsize; return min(n, __kfifo_peek_n(fifo, recsize)); } EXPORT_SYMBOL(__kfifo_out_linear_r); unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf, unsigned int len, size_t recsize) { unsigned int n; if (fifo->in == fifo->out) return 0; len = kfifo_out_copy_r(fifo, buf, len, recsize, &n); fifo->out += n + recsize; return len; } EXPORT_SYMBOL(__kfifo_out_r); void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize) { unsigned int n; n = __kfifo_peek_n(fifo, recsize); fifo->out += n + recsize; } EXPORT_SYMBOL(__kfifo_skip_r); int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from, unsigned long len, unsigned int *copied, size_t recsize) { unsigned long ret; len = __kfifo_max_r(len, recsize); if (len + recsize > kfifo_unused(fifo)) { *copied = 0; return 0; } __kfifo_poke_n(fifo, len, recsize); ret = kfifo_copy_from_user(fifo, from, len, fifo->in + recsize, copied); if (unlikely(ret)) { *copied = 0; return -EFAULT; } fifo->in += len + recsize; return 0; } EXPORT_SYMBOL(__kfifo_from_user_r); int __kfifo_to_user_r(struct __kfifo *fifo, void __user *to, unsigned long len, unsigned int *copied, size_t recsize) { unsigned long ret; unsigned int n; if (fifo->in == fifo->out) { *copied = 0; return 0; } n = __kfifo_peek_n(fifo, recsize); if (len > n) len = n; ret = kfifo_copy_to_user(fifo, to, len, fifo->out + recsize, copied); if (unlikely(ret)) { *copied = 0; return -EFAULT; } fifo->out += n + recsize; return 0; } EXPORT_SYMBOL(__kfifo_to_user_r); unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo, struct scatterlist *sgl, int nents, unsigned int len, size_t recsize, dma_addr_t dma) { BUG_ON(!nents); len = __kfifo_max_r(len, recsize); if (len + recsize > kfifo_unused(fifo)) return 0; return setup_sgl(fifo, sgl, nents, len, fifo->in + recsize, dma); } EXPORT_SYMBOL(__kfifo_dma_in_prepare_r); void __kfifo_dma_in_finish_r(struct __kfifo *fifo, unsigned int len, size_t recsize) { len = __kfifo_max_r(len, recsize); __kfifo_poke_n(fifo, len, recsize); fifo->in += len + recsize; } EXPORT_SYMBOL(__kfifo_dma_in_finish_r); unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo, struct scatterlist *sgl, int nents, unsigned int len, size_t recsize, dma_addr_t dma) { BUG_ON(!nents); len = __kfifo_max_r(len, recsize); if (len + recsize > fifo->in - fifo->out) return 0; return setup_sgl(fifo, sgl, nents, len, fifo->out + recsize, dma); } EXPORT_SYMBOL(__kfifo_dma_out_prepare_r); |
124 123 124 124 65 73 73 131 131 129 14 14 130 131 2 1 3 1 7 5 5 5 5 5 1 9 9 2 1 5 5 5 124 11 2 9 9 9 9 9 117 6 5 6 116 18 115 115 114 8 115 99 115 115 115 5 13 124 124 70 70 1 1 1 1 71 70 71 71 3 71 71 70 70 70 69 65 69 3 69 70 70 71 195 155 63 51 3 49 48 48 61 61 61 52 60 14 59 58 58 54 61 52 52 52 52 52 46 48 2 1 49 51 51 4 3 50 47 46 48 48 51 1 1 1 1 1 1 1 93 94 94 94 22 5 22 21 20 1 1 21 5 18 22 7 3 3 3 2 4 50 52 51 1 50 6 14 6 83 3 3 3 1 19 1 18 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 242 242 242 242 131 143 242 1 1 1 8 8 8 2 2 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 15 14 15 18 2 16 18 18 14 14 14 14 14 14 6 6 6 4 4 6 6 2 7 9 2 2 2 9 2 1 2 1 2 1 3 6 2 4 1 3 1 2 1 6 6 1 6 6 1 5 5 1 4 3 1 2 1 5 2 11 11 11 5 6 1 5 5 4 1 4 4 4 4 4 4 2 6 5 5 3 3 3 2 2 2 18 4 4 4 18 17 8 17 17 17 2 2 12 11 10 9 7 6 8 4 7 6 6 3 6 3 6 2 6 3 2 6 2 2 6 12 6 2 2 2 6 4 2 2 2 4 13 12 12 12 13 3 2 7 7 5 3 3 3 2 6 6 5 3 3 2 2 73 2 11 6 6 3 18 12 9 3 8 74 1 1 1 5 10 10 9 9 8 7 7 7 4 4 4 3 10 10 10 10 10 10 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Timers abstract layer * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/delay.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/mutex.h> #include <linux/device.h> #include <linux/module.h> #include <linux/string.h> #include <linux/sched/signal.h> #include <linux/anon_inodes.h> #include <linux/idr.h> #include <sound/core.h> #include <sound/timer.h> #include <sound/control.h> #include <sound/info.h> #include <sound/minors.h> #include <sound/initval.h> #include <linux/kmod.h> /* internal flags */ #define SNDRV_TIMER_IFLG_PAUSED 0x00010000 #define SNDRV_TIMER_IFLG_DEAD 0x00020000 #if IS_ENABLED(CONFIG_SND_HRTIMER) #define DEFAULT_TIMER_LIMIT 4 #else #define DEFAULT_TIMER_LIMIT 1 #endif static int timer_limit = DEFAULT_TIMER_LIMIT; static int timer_tstamp_monotonic = 1; MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>, Takashi Iwai <tiwai@suse.de>"); MODULE_DESCRIPTION("ALSA timer interface"); MODULE_LICENSE("GPL"); module_param(timer_limit, int, 0444); MODULE_PARM_DESC(timer_limit, "Maximum global timers in system."); module_param(timer_tstamp_monotonic, int, 0444); MODULE_PARM_DESC(timer_tstamp_monotonic, "Use posix monotonic clock source for timestamps (default)."); MODULE_ALIAS_CHARDEV(CONFIG_SND_MAJOR, SNDRV_MINOR_TIMER); MODULE_ALIAS("devname:snd/timer"); enum timer_tread_format { TREAD_FORMAT_NONE = 0, TREAD_FORMAT_TIME64, TREAD_FORMAT_TIME32, }; struct snd_timer_tread32 { int event; s32 tstamp_sec; s32 tstamp_nsec; unsigned int val; }; struct snd_timer_tread64 { int event; u8 pad1[4]; s64 tstamp_sec; s64 tstamp_nsec; unsigned int val; u8 pad2[4]; }; struct snd_timer_user { struct snd_timer_instance *timeri; int tread; /* enhanced read with timestamps and events */ unsigned long ticks; unsigned long overrun; int qhead; int qtail; int qused; int queue_size; bool disconnected; struct snd_timer_read *queue; struct snd_timer_tread64 *tqueue; spinlock_t qlock; unsigned long last_resolution; unsigned int filter; struct timespec64 tstamp; /* trigger tstamp */ wait_queue_head_t qchange_sleep; struct snd_fasync *fasync; struct mutex ioctl_lock; }; struct snd_timer_status32 { s32 tstamp_sec; /* Timestamp - last update */ s32 tstamp_nsec; unsigned int resolution; /* current period resolution in ns */ unsigned int lost; /* counter of master tick lost */ unsigned int overrun; /* count of read queue overruns */ unsigned int queue; /* used queue size */ unsigned char reserved[64]; /* reserved */ }; #define SNDRV_TIMER_IOCTL_STATUS32 _IOR('T', 0x14, struct snd_timer_status32) struct snd_timer_status64 { s64 tstamp_sec; /* Timestamp - last update */ s64 tstamp_nsec; unsigned int resolution; /* current period resolution in ns */ unsigned int lost; /* counter of master tick lost */ unsigned int overrun; /* count of read queue overruns */ unsigned int queue; /* used queue size */ unsigned char reserved[64]; /* reserved */ }; #ifdef CONFIG_SND_UTIMER #define SNDRV_UTIMERS_MAX_COUNT 128 /* Internal data structure for keeping the state of the userspace-driven timer */ struct snd_utimer { char *name; struct snd_timer *timer; unsigned int id; }; #endif #define SNDRV_TIMER_IOCTL_STATUS64 _IOR('T', 0x14, struct snd_timer_status64) /* list of timers */ static LIST_HEAD(snd_timer_list); /* list of slave instances */ static LIST_HEAD(snd_timer_slave_list); /* lock for slave active lists */ static DEFINE_SPINLOCK(slave_active_lock); #define MAX_SLAVE_INSTANCES 1000 static int num_slaves; static DEFINE_MUTEX(register_mutex); static int snd_timer_free(struct snd_timer *timer); static int snd_timer_dev_free(struct snd_device *device); static int snd_timer_dev_register(struct snd_device *device); static int snd_timer_dev_disconnect(struct snd_device *device); static void snd_timer_reschedule(struct snd_timer * timer, unsigned long ticks_left); /* * create a timer instance with the given owner string. */ struct snd_timer_instance *snd_timer_instance_new(const char *owner) { struct snd_timer_instance *timeri; timeri = kzalloc(sizeof(*timeri), GFP_KERNEL); if (timeri == NULL) return NULL; timeri->owner = kstrdup(owner, GFP_KERNEL); if (! timeri->owner) { kfree(timeri); return NULL; } INIT_LIST_HEAD(&timeri->open_list); INIT_LIST_HEAD(&timeri->active_list); INIT_LIST_HEAD(&timeri->ack_list); INIT_LIST_HEAD(&timeri->slave_list_head); INIT_LIST_HEAD(&timeri->slave_active_head); return timeri; } EXPORT_SYMBOL(snd_timer_instance_new); void snd_timer_instance_free(struct snd_timer_instance *timeri) { if (timeri) { if (timeri->private_free) timeri->private_free(timeri); kfree(timeri->owner); kfree(timeri); } } EXPORT_SYMBOL(snd_timer_instance_free); /* * find a timer instance from the given timer id */ static struct snd_timer *snd_timer_find(struct snd_timer_id *tid) { struct snd_timer *timer; list_for_each_entry(timer, &snd_timer_list, device_list) { if (timer->tmr_class != tid->dev_class) continue; if ((timer->tmr_class == SNDRV_TIMER_CLASS_CARD || timer->tmr_class == SNDRV_TIMER_CLASS_PCM) && (timer->card == NULL || timer->card->number != tid->card)) continue; if (timer->tmr_device != tid->device) continue; if (timer->tmr_subdevice != tid->subdevice) continue; return timer; } return NULL; } #ifdef CONFIG_MODULES static void snd_timer_request(struct snd_timer_id *tid) { switch (tid->dev_class) { case SNDRV_TIMER_CLASS_GLOBAL: if (tid->device < timer_limit) request_module("snd-timer-%i", tid->device); break; case SNDRV_TIMER_CLASS_CARD: case SNDRV_TIMER_CLASS_PCM: if (tid->card < snd_ecards_limit) request_module("snd-card-%i", tid->card); break; default: break; } } #endif /* move the slave if it belongs to the master; return 1 if match */ static int check_matching_master_slave(struct snd_timer_instance *master, struct snd_timer_instance *slave) { if (slave->slave_class != master->slave_class || slave->slave_id != master->slave_id) return 0; if (master->timer->num_instances >= master->timer->max_instances) return -EBUSY; list_move_tail(&slave->open_list, &master->slave_list_head); master->timer->num_instances++; guard(spinlock_irq)(&slave_active_lock); guard(spinlock)(&master->timer->lock); slave->master = master; slave->timer = master->timer; if (slave->flags & SNDRV_TIMER_IFLG_RUNNING) list_add_tail(&slave->active_list, &master->slave_active_head); return 1; } /* * look for a master instance matching with the slave id of the given slave. * when found, relink the open_link of the slave. * * call this with register_mutex down. */ static int snd_timer_check_slave(struct snd_timer_instance *slave) { struct snd_timer *timer; struct snd_timer_instance *master; int err = 0; /* FIXME: it's really dumb to look up all entries.. */ list_for_each_entry(timer, &snd_timer_list, device_list) { list_for_each_entry(master, &timer->open_list_head, open_list) { err = check_matching_master_slave(master, slave); if (err != 0) /* match found or error */ goto out; } } out: return err < 0 ? err : 0; } /* * look for slave instances matching with the slave id of the given master. * when found, relink the open_link of slaves. * * call this with register_mutex down. */ static int snd_timer_check_master(struct snd_timer_instance *master) { struct snd_timer_instance *slave, *tmp; int err = 0; /* check all pending slaves */ list_for_each_entry_safe(slave, tmp, &snd_timer_slave_list, open_list) { err = check_matching_master_slave(master, slave); if (err < 0) break; } return err < 0 ? err : 0; } static void snd_timer_close_locked(struct snd_timer_instance *timeri, struct device **card_devp_to_put); /* * open a timer instance * when opening a master, the slave id must be here given. */ int snd_timer_open(struct snd_timer_instance *timeri, struct snd_timer_id *tid, unsigned int slave_id) { struct snd_timer *timer; struct device *card_dev_to_put = NULL; int err; mutex_lock(®ister_mutex); if (tid->dev_class == SNDRV_TIMER_CLASS_SLAVE) { /* open a slave instance */ if (tid->dev_sclass <= SNDRV_TIMER_SCLASS_NONE || tid->dev_sclass > SNDRV_TIMER_SCLASS_OSS_SEQUENCER) { pr_debug("ALSA: timer: invalid slave class %i\n", tid->dev_sclass); err = -EINVAL; goto unlock; } if (num_slaves >= MAX_SLAVE_INSTANCES) { err = -EBUSY; goto unlock; } timeri->slave_class = tid->dev_sclass; timeri->slave_id = tid->device; timeri->flags |= SNDRV_TIMER_IFLG_SLAVE; list_add_tail(&timeri->open_list, &snd_timer_slave_list); num_slaves++; err = snd_timer_check_slave(timeri); goto list_added; } /* open a master instance */ timer = snd_timer_find(tid); #ifdef CONFIG_MODULES if (!timer) { mutex_unlock(®ister_mutex); snd_timer_request(tid); mutex_lock(®ister_mutex); timer = snd_timer_find(tid); } #endif if (!timer) { err = -ENODEV; goto unlock; } if (!list_empty(&timer->open_list_head)) { struct snd_timer_instance *t = list_entry(timer->open_list_head.next, struct snd_timer_instance, open_list); if (t->flags & SNDRV_TIMER_IFLG_EXCLUSIVE) { err = -EBUSY; goto unlock; } } if (timer->num_instances >= timer->max_instances) { err = -EBUSY; goto unlock; } if (!try_module_get(timer->module)) { err = -EBUSY; goto unlock; } /* take a card refcount for safe disconnection */ if (timer->card) { get_device(&timer->card->card_dev); card_dev_to_put = &timer->card->card_dev; } if (list_empty(&timer->open_list_head) && timer->hw.open) { err = timer->hw.open(timer); if (err) { module_put(timer->module); goto unlock; } } timeri->timer = timer; timeri->slave_class = tid->dev_sclass; timeri->slave_id = slave_id; list_add_tail(&timeri->open_list, &timer->open_list_head); timer->num_instances++; err = snd_timer_check_master(timeri); list_added: if (err < 0) snd_timer_close_locked(timeri, &card_dev_to_put); unlock: mutex_unlock(®ister_mutex); /* put_device() is called after unlock for avoiding deadlock */ if (err < 0 && card_dev_to_put) put_device(card_dev_to_put); return err; } EXPORT_SYMBOL(snd_timer_open); /* remove slave links, called from snd_timer_close_locked() below */ static void remove_slave_links(struct snd_timer_instance *timeri, struct snd_timer *timer) { struct snd_timer_instance *slave, *tmp; guard(spinlock_irq)(&slave_active_lock); guard(spinlock)(&timer->lock); timeri->timer = NULL; list_for_each_entry_safe(slave, tmp, &timeri->slave_list_head, open_list) { list_move_tail(&slave->open_list, &snd_timer_slave_list); timer->num_instances--; slave->master = NULL; slave->timer = NULL; list_del_init(&slave->ack_list); list_del_init(&slave->active_list); } } /* * close a timer instance * call this with register_mutex down. */ static void snd_timer_close_locked(struct snd_timer_instance *timeri, struct device **card_devp_to_put) { struct snd_timer *timer = timeri->timer; if (timer) { guard(spinlock_irq)(&timer->lock); timeri->flags |= SNDRV_TIMER_IFLG_DEAD; } if (!list_empty(&timeri->open_list)) { list_del_init(&timeri->open_list); if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) num_slaves--; } /* force to stop the timer */ snd_timer_stop(timeri); if (timer) { timer->num_instances--; /* wait, until the active callback is finished */ spin_lock_irq(&timer->lock); while (timeri->flags & SNDRV_TIMER_IFLG_CALLBACK) { spin_unlock_irq(&timer->lock); udelay(10); spin_lock_irq(&timer->lock); } spin_unlock_irq(&timer->lock); remove_slave_links(timeri, timer); /* slave doesn't need to release timer resources below */ if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) timer = NULL; } if (timer) { if (list_empty(&timer->open_list_head) && timer->hw.close) timer->hw.close(timer); /* release a card refcount for safe disconnection */ if (timer->card) *card_devp_to_put = &timer->card->card_dev; module_put(timer->module); } } /* * close a timer instance */ void snd_timer_close(struct snd_timer_instance *timeri) { struct device *card_dev_to_put = NULL; if (snd_BUG_ON(!timeri)) return; scoped_guard(mutex, ®ister_mutex) snd_timer_close_locked(timeri, &card_dev_to_put); /* put_device() is called after unlock for avoiding deadlock */ if (card_dev_to_put) put_device(card_dev_to_put); } EXPORT_SYMBOL(snd_timer_close); static unsigned long snd_timer_hw_resolution(struct snd_timer *timer) { if (timer->hw.c_resolution) return timer->hw.c_resolution(timer); else return timer->hw.resolution; } unsigned long snd_timer_resolution(struct snd_timer_instance *timeri) { struct snd_timer * timer; unsigned long ret = 0; if (timeri == NULL) return 0; timer = timeri->timer; if (timer) { guard(spinlock_irqsave)(&timer->lock); ret = snd_timer_hw_resolution(timer); } return ret; } EXPORT_SYMBOL(snd_timer_resolution); static void snd_timer_notify1(struct snd_timer_instance *ti, int event) { struct snd_timer *timer = ti->timer; unsigned long resolution = 0; struct snd_timer_instance *ts; struct timespec64 tstamp; if (timer_tstamp_monotonic) ktime_get_ts64(&tstamp); else ktime_get_real_ts64(&tstamp); if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_START || event > SNDRV_TIMER_EVENT_PAUSE)) return; if (timer && (event == SNDRV_TIMER_EVENT_START || event == SNDRV_TIMER_EVENT_CONTINUE)) resolution = snd_timer_hw_resolution(timer); if (ti->ccallback) ti->ccallback(ti, event, &tstamp, resolution); if (ti->flags & SNDRV_TIMER_IFLG_SLAVE) return; if (timer == NULL) return; if (timer->hw.flags & SNDRV_TIMER_HW_SLAVE) return; event += 10; /* convert to SNDRV_TIMER_EVENT_MXXX */ list_for_each_entry(ts, &ti->slave_active_head, active_list) if (ts->ccallback) ts->ccallback(ts, event, &tstamp, resolution); } /* start/continue a master timer */ static int snd_timer_start1(struct snd_timer_instance *timeri, bool start, unsigned long ticks) { struct snd_timer *timer; int result; timer = timeri->timer; if (!timer) return -EINVAL; guard(spinlock_irqsave)(&timer->lock); if (timeri->flags & SNDRV_TIMER_IFLG_DEAD) return -EINVAL; if (timer->card && timer->card->shutdown) return -ENODEV; if (timeri->flags & (SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START)) return -EBUSY; /* check the actual time for the start tick; * bail out as error if it's way too low (< 100us) */ if (start && !(timer->hw.flags & SNDRV_TIMER_HW_SLAVE)) { if ((u64)snd_timer_hw_resolution(timer) * ticks < 100000) return -EINVAL; } if (start) timeri->ticks = timeri->cticks = ticks; else if (!timeri->cticks) timeri->cticks = 1; timeri->pticks = 0; list_move_tail(&timeri->active_list, &timer->active_list_head); if (timer->running) { if (timer->hw.flags & SNDRV_TIMER_HW_SLAVE) goto __start_now; timer->flags |= SNDRV_TIMER_FLG_RESCHED; timeri->flags |= SNDRV_TIMER_IFLG_START; result = 1; /* delayed start */ } else { if (start) timer->sticks = ticks; timer->hw.start(timer); __start_now: timer->running++; timeri->flags |= SNDRV_TIMER_IFLG_RUNNING; result = 0; } snd_timer_notify1(timeri, start ? SNDRV_TIMER_EVENT_START : SNDRV_TIMER_EVENT_CONTINUE); return result; } /* start/continue a slave timer */ static int snd_timer_start_slave(struct snd_timer_instance *timeri, bool start) { guard(spinlock_irqsave)(&slave_active_lock); if (timeri->flags & SNDRV_TIMER_IFLG_DEAD) return -EINVAL; if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING) return -EBUSY; timeri->flags |= SNDRV_TIMER_IFLG_RUNNING; if (timeri->master && timeri->timer) { guard(spinlock)(&timeri->timer->lock); list_add_tail(&timeri->active_list, &timeri->master->slave_active_head); snd_timer_notify1(timeri, start ? SNDRV_TIMER_EVENT_START : SNDRV_TIMER_EVENT_CONTINUE); } return 1; /* delayed start */ } /* stop/pause a master timer */ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop) { struct snd_timer *timer; timer = timeri->timer; if (!timer) return -EINVAL; guard(spinlock_irqsave)(&timer->lock); list_del_init(&timeri->ack_list); list_del_init(&timeri->active_list); if (!(timeri->flags & (SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START))) return -EBUSY; if (timer->card && timer->card->shutdown) return 0; if (stop) { timeri->cticks = timeri->ticks; timeri->pticks = 0; } if ((timeri->flags & SNDRV_TIMER_IFLG_RUNNING) && !(--timer->running)) { timer->hw.stop(timer); if (timer->flags & SNDRV_TIMER_FLG_RESCHED) { timer->flags &= ~SNDRV_TIMER_FLG_RESCHED; snd_timer_reschedule(timer, 0); if (timer->flags & SNDRV_TIMER_FLG_CHANGE) { timer->flags &= ~SNDRV_TIMER_FLG_CHANGE; timer->hw.start(timer); } } } timeri->flags &= ~(SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START); if (stop) timeri->flags &= ~SNDRV_TIMER_IFLG_PAUSED; else timeri->flags |= SNDRV_TIMER_IFLG_PAUSED; snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP : SNDRV_TIMER_EVENT_PAUSE); return 0; } /* stop/pause a slave timer */ static int snd_timer_stop_slave(struct snd_timer_instance *timeri, bool stop) { bool running; guard(spinlock_irqsave)(&slave_active_lock); running = timeri->flags & SNDRV_TIMER_IFLG_RUNNING; timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING; if (timeri->timer) { guard(spinlock)(&timeri->timer->lock); list_del_init(&timeri->ack_list); list_del_init(&timeri->active_list); if (running) snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP : SNDRV_TIMER_EVENT_PAUSE); } return running ? 0 : -EBUSY; } /* * start the timer instance */ int snd_timer_start(struct snd_timer_instance *timeri, unsigned int ticks) { if (timeri == NULL || ticks < 1) return -EINVAL; if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) return snd_timer_start_slave(timeri, true); else return snd_timer_start1(timeri, true, ticks); } EXPORT_SYMBOL(snd_timer_start); /* * stop the timer instance. * * do not call this from the timer callback! */ int snd_timer_stop(struct snd_timer_instance *timeri) { if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) return snd_timer_stop_slave(timeri, true); else return snd_timer_stop1(timeri, true); } EXPORT_SYMBOL(snd_timer_stop); /* * start again.. the tick is kept. */ int snd_timer_continue(struct snd_timer_instance *timeri) { /* timer can continue only after pause */ if (!(timeri->flags & SNDRV_TIMER_IFLG_PAUSED)) return -EINVAL; if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) return snd_timer_start_slave(timeri, false); else return snd_timer_start1(timeri, false, 0); } EXPORT_SYMBOL(snd_timer_continue); /* * pause.. remember the ticks left */ int snd_timer_pause(struct snd_timer_instance * timeri) { if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) return snd_timer_stop_slave(timeri, false); else return snd_timer_stop1(timeri, false); } EXPORT_SYMBOL(snd_timer_pause); /* * reschedule the timer * * start pending instances and check the scheduling ticks. * when the scheduling ticks is changed set CHANGE flag to reprogram the timer. */ static void snd_timer_reschedule(struct snd_timer * timer, unsigned long ticks_left) { struct snd_timer_instance *ti; unsigned long ticks = ~0UL; list_for_each_entry(ti, &timer->active_list_head, active_list) { if (ti->flags & SNDRV_TIMER_IFLG_START) { ti->flags &= ~SNDRV_TIMER_IFLG_START; ti->flags |= SNDRV_TIMER_IFLG_RUNNING; timer->running++; } if (ti->flags & SNDRV_TIMER_IFLG_RUNNING) { if (ticks > ti->cticks) ticks = ti->cticks; } } if (ticks == ~0UL) { timer->flags &= ~SNDRV_TIMER_FLG_RESCHED; return; } if (ticks > timer->hw.ticks) ticks = timer->hw.ticks; if (ticks_left != ticks) timer->flags |= SNDRV_TIMER_FLG_CHANGE; timer->sticks = ticks; } /* call callbacks in timer ack list */ static void snd_timer_process_callbacks(struct snd_timer *timer, struct list_head *head) { struct snd_timer_instance *ti; unsigned long resolution, ticks; while (!list_empty(head)) { ti = list_first_entry(head, struct snd_timer_instance, ack_list); /* remove from ack_list and make empty */ list_del_init(&ti->ack_list); if (!(ti->flags & SNDRV_TIMER_IFLG_DEAD)) { ticks = ti->pticks; ti->pticks = 0; resolution = ti->resolution; ti->flags |= SNDRV_TIMER_IFLG_CALLBACK; spin_unlock(&timer->lock); if (ti->callback) ti->callback(ti, resolution, ticks); spin_lock(&timer->lock); ti->flags &= ~SNDRV_TIMER_IFLG_CALLBACK; } } } /* clear pending instances from ack list */ static void snd_timer_clear_callbacks(struct snd_timer *timer, struct list_head *head) { guard(spinlock_irqsave)(&timer->lock); while (!list_empty(head)) list_del_init(head->next); } /* * timer work * */ static void snd_timer_work(struct work_struct *work) { struct snd_timer *timer = container_of(work, struct snd_timer, task_work); if (timer->card && timer->card->shutdown) { snd_timer_clear_callbacks(timer, &timer->sack_list_head); return; } guard(spinlock_irqsave)(&timer->lock); snd_timer_process_callbacks(timer, &timer->sack_list_head); } /* * timer interrupt * * ticks_left is usually equal to timer->sticks. * */ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left) { struct snd_timer_instance *ti, *ts, *tmp; unsigned long resolution; struct list_head *ack_list_head; if (timer == NULL) return; if (timer->card && timer->card->shutdown) { snd_timer_clear_callbacks(timer, &timer->ack_list_head); return; } guard(spinlock_irqsave)(&timer->lock); /* remember the current resolution */ resolution = snd_timer_hw_resolution(timer); /* loop for all active instances * Here we cannot use list_for_each_entry because the active_list of a * processed instance is relinked to done_list_head before the callback * is called. */ list_for_each_entry_safe(ti, tmp, &timer->active_list_head, active_list) { if (ti->flags & SNDRV_TIMER_IFLG_DEAD) continue; if (!(ti->flags & SNDRV_TIMER_IFLG_RUNNING)) continue; ti->pticks += ticks_left; ti->resolution = resolution; if (ti->cticks < ticks_left) ti->cticks = 0; else ti->cticks -= ticks_left; if (ti->cticks) /* not expired */ continue; if (ti->flags & SNDRV_TIMER_IFLG_AUTO) { ti->cticks = ti->ticks; } else { ti->flags &= ~SNDRV_TIMER_IFLG_RUNNING; --timer->running; list_del_init(&ti->active_list); } if ((timer->hw.flags & SNDRV_TIMER_HW_WORK) || (ti->flags & SNDRV_TIMER_IFLG_FAST)) ack_list_head = &timer->ack_list_head; else ack_list_head = &timer->sack_list_head; if (list_empty(&ti->ack_list)) list_add_tail(&ti->ack_list, ack_list_head); list_for_each_entry(ts, &ti->slave_active_head, active_list) { ts->pticks = ti->pticks; ts->resolution = resolution; if (list_empty(&ts->ack_list)) list_add_tail(&ts->ack_list, ack_list_head); } } if (timer->flags & SNDRV_TIMER_FLG_RESCHED) snd_timer_reschedule(timer, timer->sticks); if (timer->running) { if (timer->hw.flags & SNDRV_TIMER_HW_STOP) { timer->hw.stop(timer); timer->flags |= SNDRV_TIMER_FLG_CHANGE; } if (!(timer->hw.flags & SNDRV_TIMER_HW_AUTO) || (timer->flags & SNDRV_TIMER_FLG_CHANGE)) { /* restart timer */ timer->flags &= ~SNDRV_TIMER_FLG_CHANGE; timer->hw.start(timer); } } else { timer->hw.stop(timer); } /* now process all fast callbacks */ snd_timer_process_callbacks(timer, &timer->ack_list_head); /* do we have any slow callbacks? */ if (!list_empty(&timer->sack_list_head)) queue_work(system_highpri_wq, &timer->task_work); } EXPORT_SYMBOL(snd_timer_interrupt); /* */ int snd_timer_new(struct snd_card *card, char *id, struct snd_timer_id *tid, struct snd_timer **rtimer) { struct snd_timer *timer; int err; static const struct snd_device_ops ops = { .dev_free = snd_timer_dev_free, .dev_register = snd_timer_dev_register, .dev_disconnect = snd_timer_dev_disconnect, }; if (snd_BUG_ON(!tid)) return -EINVAL; if (tid->dev_class == SNDRV_TIMER_CLASS_CARD || tid->dev_class == SNDRV_TIMER_CLASS_PCM) { if (WARN_ON(!card)) return -EINVAL; } if (rtimer) *rtimer = NULL; timer = kzalloc(sizeof(*timer), GFP_KERNEL); if (!timer) return -ENOMEM; timer->tmr_class = tid->dev_class; timer->card = card; timer->tmr_device = tid->device; timer->tmr_subdevice = tid->subdevice; if (id) strscpy(timer->id, id, sizeof(timer->id)); timer->sticks = 1; INIT_LIST_HEAD(&timer->device_list); INIT_LIST_HEAD(&timer->open_list_head); INIT_LIST_HEAD(&timer->active_list_head); INIT_LIST_HEAD(&timer->ack_list_head); INIT_LIST_HEAD(&timer->sack_list_head); spin_lock_init(&timer->lock); INIT_WORK(&timer->task_work, snd_timer_work); timer->max_instances = 1000; /* default limit per timer */ if (card != NULL) { timer->module = card->module; err = snd_device_new(card, SNDRV_DEV_TIMER, timer, &ops); if (err < 0) { snd_timer_free(timer); return err; } } if (rtimer) *rtimer = timer; return 0; } EXPORT_SYMBOL(snd_timer_new); static int snd_timer_free(struct snd_timer *timer) { if (!timer) return 0; guard(mutex)(®ister_mutex); if (! list_empty(&timer->open_list_head)) { struct list_head *p, *n; struct snd_timer_instance *ti; pr_warn("ALSA: timer %p is busy?\n", timer); list_for_each_safe(p, n, &timer->open_list_head) { list_del_init(p); ti = list_entry(p, struct snd_timer_instance, open_list); ti->timer = NULL; } } list_del(&timer->device_list); if (timer->private_free) timer->private_free(timer); kfree(timer); return 0; } static int snd_timer_dev_free(struct snd_device *device) { struct snd_timer *timer = device->device_data; return snd_timer_free(timer); } static int snd_timer_dev_register(struct snd_device *dev) { struct snd_timer *timer = dev->device_data; struct snd_timer *timer1; if (snd_BUG_ON(!timer || !timer->hw.start || !timer->hw.stop)) return -ENXIO; if (!(timer->hw.flags & SNDRV_TIMER_HW_SLAVE) && !timer->hw.resolution && timer->hw.c_resolution == NULL) return -EINVAL; guard(mutex)(®ister_mutex); list_for_each_entry(timer1, &snd_timer_list, device_list) { if (timer1->tmr_class > timer->tmr_class) break; if (timer1->tmr_class < timer->tmr_class) continue; if (timer1->card && timer->card) { if (timer1->card->number > timer->card->number) break; if (timer1->card->number < timer->card->number) continue; } if (timer1->tmr_device > timer->tmr_device) break; if (timer1->tmr_device < timer->tmr_device) continue; if (timer1->tmr_subdevice > timer->tmr_subdevice) break; if (timer1->tmr_subdevice < timer->tmr_subdevice) continue; /* conflicts.. */ return -EBUSY; } list_add_tail(&timer->device_list, &timer1->device_list); return 0; } static int snd_timer_dev_disconnect(struct snd_device *device) { struct snd_timer *timer = device->device_data; struct snd_timer_instance *ti; guard(mutex)(®ister_mutex); list_del_init(&timer->device_list); /* wake up pending sleepers */ list_for_each_entry(ti, &timer->open_list_head, open_list) { if (ti->disconnect) ti->disconnect(ti); } return 0; } void snd_timer_notify(struct snd_timer *timer, int event, struct timespec64 *tstamp) { unsigned long resolution = 0; struct snd_timer_instance *ti, *ts; if (timer->card && timer->card->shutdown) return; if (! (timer->hw.flags & SNDRV_TIMER_HW_SLAVE)) return; if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_MSTART || event > SNDRV_TIMER_EVENT_MRESUME)) return; guard(spinlock_irqsave)(&timer->lock); if (event == SNDRV_TIMER_EVENT_MSTART || event == SNDRV_TIMER_EVENT_MCONTINUE || event == SNDRV_TIMER_EVENT_MRESUME) resolution = snd_timer_hw_resolution(timer); list_for_each_entry(ti, &timer->active_list_head, active_list) { if (ti->ccallback) ti->ccallback(ti, event, tstamp, resolution); list_for_each_entry(ts, &ti->slave_active_head, active_list) if (ts->ccallback) ts->ccallback(ts, event, tstamp, resolution); } } EXPORT_SYMBOL(snd_timer_notify); /* * exported functions for global timers */ int snd_timer_global_new(char *id, int device, struct snd_timer **rtimer) { struct snd_timer_id tid; tid.dev_class = SNDRV_TIMER_CLASS_GLOBAL; tid.dev_sclass = SNDRV_TIMER_SCLASS_NONE; tid.card = -1; tid.device = device; tid.subdevice = 0; return snd_timer_new(NULL, id, &tid, rtimer); } EXPORT_SYMBOL(snd_timer_global_new); int snd_timer_global_free(struct snd_timer *timer) { return snd_timer_free(timer); } EXPORT_SYMBOL(snd_timer_global_free); int snd_timer_global_register(struct snd_timer *timer) { struct snd_device dev; memset(&dev, 0, sizeof(dev)); dev.device_data = timer; return snd_timer_dev_register(&dev); } EXPORT_SYMBOL(snd_timer_global_register); /* * System timer */ struct snd_timer_system_private { struct timer_list tlist; struct snd_timer *snd_timer; unsigned long last_expires; unsigned long last_jiffies; unsigned long correction; }; static void snd_timer_s_function(struct timer_list *t) { struct snd_timer_system_private *priv = from_timer(priv, t, tlist); struct snd_timer *timer = priv->snd_timer; unsigned long jiff = jiffies; if (time_after(jiff, priv->last_expires)) priv->correction += (long)jiff - (long)priv->last_expires; snd_timer_interrupt(timer, (long)jiff - (long)priv->last_jiffies); } static int snd_timer_s_start(struct snd_timer * timer) { struct snd_timer_system_private *priv; unsigned long njiff; priv = (struct snd_timer_system_private *) timer->private_data; njiff = (priv->last_jiffies = jiffies); if (priv->correction > timer->sticks - 1) { priv->correction -= timer->sticks - 1; njiff++; } else { njiff += timer->sticks - priv->correction; priv->correction = 0; } priv->last_expires = njiff; mod_timer(&priv->tlist, njiff); return 0; } static int snd_timer_s_stop(struct snd_timer * timer) { struct snd_timer_system_private *priv; unsigned long jiff; priv = (struct snd_timer_system_private *) timer->private_data; timer_delete(&priv->tlist); jiff = jiffies; if (time_before(jiff, priv->last_expires)) timer->sticks = priv->last_expires - jiff; else timer->sticks = 1; priv->correction = 0; return 0; } static int snd_timer_s_close(struct snd_timer *timer) { struct snd_timer_system_private *priv; priv = (struct snd_timer_system_private *)timer->private_data; timer_delete_sync(&priv->tlist); return 0; } static const struct snd_timer_hardware snd_timer_system = { .flags = SNDRV_TIMER_HW_FIRST | SNDRV_TIMER_HW_WORK, .resolution = NSEC_PER_SEC / HZ, .ticks = 10000000L, .close = snd_timer_s_close, .start = snd_timer_s_start, .stop = snd_timer_s_stop }; static void snd_timer_free_system(struct snd_timer *timer) { kfree(timer->private_data); } static int snd_timer_register_system(void) { struct snd_timer *timer; struct snd_timer_system_private *priv; int err; err = snd_timer_global_new("system", SNDRV_TIMER_GLOBAL_SYSTEM, &timer); if (err < 0) return err; strcpy(timer->name, "system timer"); timer->hw = snd_timer_system; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (priv == NULL) { snd_timer_free(timer); return -ENOMEM; } priv->snd_timer = timer; timer_setup(&priv->tlist, snd_timer_s_function, 0); timer->private_data = priv; timer->private_free = snd_timer_free_system; return snd_timer_global_register(timer); } #ifdef CONFIG_SND_PROC_FS /* * Info interface */ static void snd_timer_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_timer *timer; struct snd_timer_instance *ti; unsigned long resolution; guard(mutex)(®ister_mutex); list_for_each_entry(timer, &snd_timer_list, device_list) { if (timer->card && timer->card->shutdown) continue; switch (timer->tmr_class) { case SNDRV_TIMER_CLASS_GLOBAL: snd_iprintf(buffer, "G%i: ", timer->tmr_device); break; case SNDRV_TIMER_CLASS_CARD: snd_iprintf(buffer, "C%i-%i: ", timer->card->number, timer->tmr_device); break; case SNDRV_TIMER_CLASS_PCM: snd_iprintf(buffer, "P%i-%i-%i: ", timer->card->number, timer->tmr_device, timer->tmr_subdevice); break; default: snd_iprintf(buffer, "?%i-%i-%i-%i: ", timer->tmr_class, timer->card ? timer->card->number : -1, timer->tmr_device, timer->tmr_subdevice); } snd_iprintf(buffer, "%s :", timer->name); scoped_guard(spinlock_irq, &timer->lock) resolution = snd_timer_hw_resolution(timer); if (resolution) snd_iprintf(buffer, " %lu.%03luus (%lu ticks)", resolution / 1000, resolution % 1000, timer->hw.ticks); if (timer->hw.flags & SNDRV_TIMER_HW_SLAVE) snd_iprintf(buffer, " SLAVE"); snd_iprintf(buffer, "\n"); list_for_each_entry(ti, &timer->open_list_head, open_list) snd_iprintf(buffer, " Client %s : %s\n", ti->owner ? ti->owner : "unknown", (ti->flags & (SNDRV_TIMER_IFLG_START | SNDRV_TIMER_IFLG_RUNNING)) ? "running" : "stopped"); } } static struct snd_info_entry *snd_timer_proc_entry; static void __init snd_timer_proc_init(void) { struct snd_info_entry *entry; entry = snd_info_create_module_entry(THIS_MODULE, "timers", NULL); if (entry != NULL) { entry->c.text.read = snd_timer_proc_read; if (snd_info_register(entry) < 0) { snd_info_free_entry(entry); entry = NULL; } } snd_timer_proc_entry = entry; } static void __exit snd_timer_proc_done(void) { snd_info_free_entry(snd_timer_proc_entry); } #else /* !CONFIG_SND_PROC_FS */ #define snd_timer_proc_init() #define snd_timer_proc_done() #endif /* * USER SPACE interface */ static void snd_timer_user_interrupt(struct snd_timer_instance *timeri, unsigned long resolution, unsigned long ticks) { struct snd_timer_user *tu = timeri->callback_data; struct snd_timer_read *r; int prev; guard(spinlock)(&tu->qlock); if (tu->qused > 0) { prev = tu->qtail == 0 ? tu->queue_size - 1 : tu->qtail - 1; r = &tu->queue[prev]; if (r->resolution == resolution) { r->ticks += ticks; goto __wake; } } if (tu->qused >= tu->queue_size) { tu->overrun++; } else { r = &tu->queue[tu->qtail++]; tu->qtail %= tu->queue_size; r->resolution = resolution; r->ticks = ticks; tu->qused++; } __wake: snd_kill_fasync(tu->fasync, SIGIO, POLL_IN); wake_up(&tu->qchange_sleep); } static void snd_timer_user_append_to_tqueue(struct snd_timer_user *tu, struct snd_timer_tread64 *tread) { if (tu->qused >= tu->queue_size) { tu->overrun++; } else { memcpy(&tu->tqueue[tu->qtail++], tread, sizeof(*tread)); tu->qtail %= tu->queue_size; tu->qused++; } } static void snd_timer_user_ccallback(struct snd_timer_instance *timeri, int event, struct timespec64 *tstamp, unsigned long resolution) { struct snd_timer_user *tu = timeri->callback_data; struct snd_timer_tread64 r1; if (event >= SNDRV_TIMER_EVENT_START && event <= SNDRV_TIMER_EVENT_PAUSE) tu->tstamp = *tstamp; if ((tu->filter & (1 << event)) == 0 || !tu->tread) return; memset(&r1, 0, sizeof(r1)); r1.event = event; r1.tstamp_sec = tstamp->tv_sec; r1.tstamp_nsec = tstamp->tv_nsec; r1.val = resolution; scoped_guard(spinlock_irqsave, &tu->qlock) snd_timer_user_append_to_tqueue(tu, &r1); snd_kill_fasync(tu->fasync, SIGIO, POLL_IN); wake_up(&tu->qchange_sleep); } static void snd_timer_user_disconnect(struct snd_timer_instance *timeri) { struct snd_timer_user *tu = timeri->callback_data; tu->disconnected = true; wake_up(&tu->qchange_sleep); } static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri, unsigned long resolution, unsigned long ticks) { struct snd_timer_user *tu = timeri->callback_data; struct snd_timer_tread64 *r, r1; struct timespec64 tstamp; int prev, append = 0; memset(&r1, 0, sizeof(r1)); memset(&tstamp, 0, sizeof(tstamp)); scoped_guard(spinlock, &tu->qlock) { if ((tu->filter & ((1 << SNDRV_TIMER_EVENT_RESOLUTION) | (1 << SNDRV_TIMER_EVENT_TICK))) == 0) return; if (tu->last_resolution != resolution || ticks > 0) { if (timer_tstamp_monotonic) ktime_get_ts64(&tstamp); else ktime_get_real_ts64(&tstamp); } if ((tu->filter & (1 << SNDRV_TIMER_EVENT_RESOLUTION)) && tu->last_resolution != resolution) { r1.event = SNDRV_TIMER_EVENT_RESOLUTION; r1.tstamp_sec = tstamp.tv_sec; r1.tstamp_nsec = tstamp.tv_nsec; r1.val = resolution; snd_timer_user_append_to_tqueue(tu, &r1); tu->last_resolution = resolution; append++; } if ((tu->filter & (1 << SNDRV_TIMER_EVENT_TICK)) == 0) break; if (ticks == 0) break; if (tu->qused > 0) { prev = tu->qtail == 0 ? tu->queue_size - 1 : tu->qtail - 1; r = &tu->tqueue[prev]; if (r->event == SNDRV_TIMER_EVENT_TICK) { r->tstamp_sec = tstamp.tv_sec; r->tstamp_nsec = tstamp.tv_nsec; r->val += ticks; append++; break; } } r1.event = SNDRV_TIMER_EVENT_TICK; r1.tstamp_sec = tstamp.tv_sec; r1.tstamp_nsec = tstamp.tv_nsec; r1.val = ticks; snd_timer_user_append_to_tqueue(tu, &r1); append++; } if (append == 0) return; snd_kill_fasync(tu->fasync, SIGIO, POLL_IN); wake_up(&tu->qchange_sleep); } static int realloc_user_queue(struct snd_timer_user *tu, int size) { struct snd_timer_read *queue = NULL; struct snd_timer_tread64 *tqueue = NULL; if (tu->tread) { tqueue = kcalloc(size, sizeof(*tqueue), GFP_KERNEL); if (!tqueue) return -ENOMEM; } else { queue = kcalloc(size, sizeof(*queue), GFP_KERNEL); if (!queue) return -ENOMEM; } guard(spinlock_irq)(&tu->qlock); kfree(tu->queue); kfree(tu->tqueue); tu->queue_size = size; tu->queue = queue; tu->tqueue = tqueue; tu->qhead = tu->qtail = tu->qused = 0; return 0; } static int snd_timer_user_open(struct inode *inode, struct file *file) { struct snd_timer_user *tu; int err; err = stream_open(inode, file); if (err < 0) return err; tu = kzalloc(sizeof(*tu), GFP_KERNEL); if (tu == NULL) return -ENOMEM; spin_lock_init(&tu->qlock); init_waitqueue_head(&tu->qchange_sleep); mutex_init(&tu->ioctl_lock); tu->ticks = 1; if (realloc_user_queue(tu, 128) < 0) { kfree(tu); return -ENOMEM; } file->private_data = tu; return 0; } static int snd_timer_user_release(struct inode *inode, struct file *file) { struct snd_timer_user *tu; if (file->private_data) { tu = file->private_data; file->private_data = NULL; scoped_guard(mutex, &tu->ioctl_lock) { if (tu->timeri) { snd_timer_close(tu->timeri); snd_timer_instance_free(tu->timeri); } } snd_fasync_free(tu->fasync); kfree(tu->queue); kfree(tu->tqueue); kfree(tu); } return 0; } static void snd_timer_user_zero_id(struct snd_timer_id *id) { id->dev_class = SNDRV_TIMER_CLASS_NONE; id->dev_sclass = SNDRV_TIMER_SCLASS_NONE; id->card = -1; id->device = -1; id->subdevice = -1; } static void snd_timer_user_copy_id(struct snd_timer_id *id, struct snd_timer *timer) { id->dev_class = timer->tmr_class; id->dev_sclass = SNDRV_TIMER_SCLASS_NONE; id->card = timer->card ? timer->card->number : -1; id->device = timer->tmr_device; id->subdevice = timer->tmr_subdevice; } static void get_next_device(struct snd_timer_id *id) { struct snd_timer *timer; struct list_head *p; if (id->dev_class < 0) { /* first item */ if (list_empty(&snd_timer_list)) snd_timer_user_zero_id(id); else { timer = list_entry(snd_timer_list.next, struct snd_timer, device_list); snd_timer_user_copy_id(id, timer); } } else { switch (id->dev_class) { case SNDRV_TIMER_CLASS_GLOBAL: id->device = id->device < 0 ? 0 : id->device + 1; list_for_each(p, &snd_timer_list) { timer = list_entry(p, struct snd_timer, device_list); if (timer->tmr_class > SNDRV_TIMER_CLASS_GLOBAL) { snd_timer_user_copy_id(id, timer); break; } if (timer->tmr_device >= id->device) { snd_timer_user_copy_id(id, timer); break; } } if (p == &snd_timer_list) snd_timer_user_zero_id(id); break; case SNDRV_TIMER_CLASS_CARD: case SNDRV_TIMER_CLASS_PCM: if (id->card < 0) { id->card = 0; } else { if (id->device < 0) { id->device = 0; } else { if (id->subdevice < 0) id->subdevice = 0; else if (id->subdevice < INT_MAX) id->subdevice++; } } list_for_each(p, &snd_timer_list) { timer = list_entry(p, struct snd_timer, device_list); if (timer->tmr_class > id->dev_class) { snd_timer_user_copy_id(id, timer); break; } if (timer->tmr_class < id->dev_class) continue; if (timer->card->number > id->card) { snd_timer_user_copy_id(id, timer); break; } if (timer->card->number < id->card) continue; if (timer->tmr_device > id->device) { snd_timer_user_copy_id(id, timer); break; } if (timer->tmr_device < id->device) continue; if (timer->tmr_subdevice > id->subdevice) { snd_timer_user_copy_id(id, timer); break; } if (timer->tmr_subdevice < id->subdevice) continue; snd_timer_user_copy_id(id, timer); break; } if (p == &snd_timer_list) snd_timer_user_zero_id(id); break; default: snd_timer_user_zero_id(id); } } } static int snd_timer_user_next_device(struct snd_timer_id __user *_tid) { struct snd_timer_id id; if (copy_from_user(&id, _tid, sizeof(id))) return -EFAULT; scoped_guard(mutex, ®ister_mutex) get_next_device(&id); if (copy_to_user(_tid, &id, sizeof(*_tid))) return -EFAULT; return 0; } static int snd_timer_user_ginfo(struct file *file, struct snd_timer_ginfo __user *_ginfo) { struct snd_timer_ginfo *ginfo __free(kfree) = NULL; struct snd_timer_id tid; struct snd_timer *t; struct list_head *p; ginfo = memdup_user(_ginfo, sizeof(*ginfo)); if (IS_ERR(ginfo)) return PTR_ERR(ginfo); tid = ginfo->tid; memset(ginfo, 0, sizeof(*ginfo)); ginfo->tid = tid; scoped_guard(mutex, ®ister_mutex) { t = snd_timer_find(&tid); if (!t) return -ENODEV; ginfo->card = t->card ? t->card->number : -1; if (t->hw.flags & SNDRV_TIMER_HW_SLAVE) ginfo->flags |= SNDRV_TIMER_FLG_SLAVE; strscpy(ginfo->id, t->id, sizeof(ginfo->id)); strscpy(ginfo->name, t->name, sizeof(ginfo->name)); scoped_guard(spinlock_irq, &t->lock) ginfo->resolution = snd_timer_hw_resolution(t); if (t->hw.resolution_min > 0) { ginfo->resolution_min = t->hw.resolution_min; ginfo->resolution_max = t->hw.resolution_max; } list_for_each(p, &t->open_list_head) { ginfo->clients++; } } if (copy_to_user(_ginfo, ginfo, sizeof(*ginfo))) return -EFAULT; return 0; } static int timer_set_gparams(struct snd_timer_gparams *gparams) { struct snd_timer *t; guard(mutex)(®ister_mutex); t = snd_timer_find(&gparams->tid); if (!t) return -ENODEV; if (!list_empty(&t->open_list_head)) return -EBUSY; if (!t->hw.set_period) return -ENOSYS; return t->hw.set_period(t, gparams->period_num, gparams->period_den); } static int snd_timer_user_gparams(struct file *file, struct snd_timer_gparams __user *_gparams) { struct snd_timer_gparams gparams; if (copy_from_user(&gparams, _gparams, sizeof(gparams))) return -EFAULT; return timer_set_gparams(&gparams); } static int snd_timer_user_gstatus(struct file *file, struct snd_timer_gstatus __user *_gstatus) { struct snd_timer_gstatus gstatus; struct snd_timer_id tid; struct snd_timer *t; if (copy_from_user(&gstatus, _gstatus, sizeof(gstatus))) return -EFAULT; tid = gstatus.tid; memset(&gstatus, 0, sizeof(gstatus)); gstatus.tid = tid; scoped_guard(mutex, ®ister_mutex) { t = snd_timer_find(&tid); if (t != NULL) { guard(spinlock_irq)(&t->lock); gstatus.resolution = snd_timer_hw_resolution(t); if (t->hw.precise_resolution) { t->hw.precise_resolution(t, &gstatus.resolution_num, &gstatus.resolution_den); } else { gstatus.resolution_num = gstatus.resolution; gstatus.resolution_den = 1000000000uL; } } else { return -ENODEV; } } if (copy_to_user(_gstatus, &gstatus, sizeof(gstatus))) return -EFAULT; return 0; } static int snd_timer_user_tselect(struct file *file, struct snd_timer_select __user *_tselect) { struct snd_timer_user *tu; struct snd_timer_select tselect; char str[32]; int err = 0; tu = file->private_data; if (tu->timeri) { snd_timer_close(tu->timeri); snd_timer_instance_free(tu->timeri); tu->timeri = NULL; } if (copy_from_user(&tselect, _tselect, sizeof(tselect))) { err = -EFAULT; goto __err; } sprintf(str, "application %i", current->pid); if (tselect.id.dev_class != SNDRV_TIMER_CLASS_SLAVE) tselect.id.dev_sclass = SNDRV_TIMER_SCLASS_APPLICATION; tu->timeri = snd_timer_instance_new(str); if (!tu->timeri) { err = -ENOMEM; goto __err; } tu->timeri->flags |= SNDRV_TIMER_IFLG_FAST; tu->timeri->callback = tu->tread ? snd_timer_user_tinterrupt : snd_timer_user_interrupt; tu->timeri->ccallback = snd_timer_user_ccallback; tu->timeri->callback_data = (void *)tu; tu->timeri->disconnect = snd_timer_user_disconnect; err = snd_timer_open(tu->timeri, &tselect.id, current->pid); if (err < 0) { snd_timer_instance_free(tu->timeri); tu->timeri = NULL; } __err: return err; } static int snd_timer_user_info(struct file *file, struct snd_timer_info __user *_info) { struct snd_timer_user *tu; struct snd_timer_info *info __free(kfree) = NULL; struct snd_timer *t; tu = file->private_data; if (!tu->timeri) return -EBADFD; t = tu->timeri->timer; if (!t) return -EBADFD; info = kzalloc(sizeof(*info), GFP_KERNEL); if (! info) return -ENOMEM; info->card = t->card ? t->card->number : -1; if (t->hw.flags & SNDRV_TIMER_HW_SLAVE) info->flags |= SNDRV_TIMER_FLG_SLAVE; strscpy(info->id, t->id, sizeof(info->id)); strscpy(info->name, t->name, sizeof(info->name)); scoped_guard(spinlock_irq, &t->lock) info->resolution = snd_timer_hw_resolution(t); if (copy_to_user(_info, info, sizeof(*_info))) return -EFAULT; return 0; } static int snd_timer_user_params(struct file *file, struct snd_timer_params __user *_params) { struct snd_timer_user *tu; struct snd_timer_params params; struct snd_timer *t; int err; tu = file->private_data; if (!tu->timeri) return -EBADFD; t = tu->timeri->timer; if (!t) return -EBADFD; if (copy_from_user(¶ms, _params, sizeof(params))) return -EFAULT; if (!(t->hw.flags & SNDRV_TIMER_HW_SLAVE)) { u64 resolution; if (params.ticks < 1) { err = -EINVAL; goto _end; } /* Don't allow resolution less than 1ms */ resolution = snd_timer_resolution(tu->timeri); resolution *= params.ticks; if (resolution < 1000000) { err = -EINVAL; goto _end; } } if (params.queue_size > 0 && (params.queue_size < 32 || params.queue_size > 1024)) { err = -EINVAL; goto _end; } if (params.filter & ~((1<<SNDRV_TIMER_EVENT_RESOLUTION)| (1<<SNDRV_TIMER_EVENT_TICK)| (1<<SNDRV_TIMER_EVENT_START)| (1<<SNDRV_TIMER_EVENT_STOP)| (1<<SNDRV_TIMER_EVENT_CONTINUE)| (1<<SNDRV_TIMER_EVENT_PAUSE)| (1<<SNDRV_TIMER_EVENT_SUSPEND)| (1<<SNDRV_TIMER_EVENT_RESUME)| (1<<SNDRV_TIMER_EVENT_MSTART)| (1<<SNDRV_TIMER_EVENT_MSTOP)| (1<<SNDRV_TIMER_EVENT_MCONTINUE)| (1<<SNDRV_TIMER_EVENT_MPAUSE)| (1<<SNDRV_TIMER_EVENT_MSUSPEND)| (1<<SNDRV_TIMER_EVENT_MRESUME))) { err = -EINVAL; goto _end; } snd_timer_stop(tu->timeri); scoped_guard(spinlock_irq, &t->lock) { tu->timeri->flags &= ~(SNDRV_TIMER_IFLG_AUTO| SNDRV_TIMER_IFLG_EXCLUSIVE| SNDRV_TIMER_IFLG_EARLY_EVENT); if (params.flags & SNDRV_TIMER_PSFLG_AUTO) tu->timeri->flags |= SNDRV_TIMER_IFLG_AUTO; if (params.flags & SNDRV_TIMER_PSFLG_EXCLUSIVE) tu->timeri->flags |= SNDRV_TIMER_IFLG_EXCLUSIVE; if (params.flags & SNDRV_TIMER_PSFLG_EARLY_EVENT) tu->timeri->flags |= SNDRV_TIMER_IFLG_EARLY_EVENT; } if (params.queue_size > 0 && (unsigned int)tu->queue_size != params.queue_size) { err = realloc_user_queue(tu, params.queue_size); if (err < 0) goto _end; } scoped_guard(spinlock_irq, &tu->qlock) { tu->qhead = tu->qtail = tu->qused = 0; if (tu->timeri->flags & SNDRV_TIMER_IFLG_EARLY_EVENT) { if (tu->tread) { struct snd_timer_tread64 tread; memset(&tread, 0, sizeof(tread)); tread.event = SNDRV_TIMER_EVENT_EARLY; tread.tstamp_sec = 0; tread.tstamp_nsec = 0; tread.val = 0; snd_timer_user_append_to_tqueue(tu, &tread); } else { struct snd_timer_read *r = &tu->queue[0]; r->resolution = 0; r->ticks = 0; tu->qused++; tu->qtail++; } } tu->filter = params.filter; tu->ticks = params.ticks; } err = 0; _end: if (copy_to_user(_params, ¶ms, sizeof(params))) return -EFAULT; return err; } static int snd_timer_user_status32(struct file *file, struct snd_timer_status32 __user *_status) { struct snd_timer_user *tu; struct snd_timer_status32 status; tu = file->private_data; if (!tu->timeri) return -EBADFD; memset(&status, 0, sizeof(status)); status.tstamp_sec = tu->tstamp.tv_sec; status.tstamp_nsec = tu->tstamp.tv_nsec; status.resolution = snd_timer_resolution(tu->timeri); status.lost = tu->timeri->lost; status.overrun = tu->overrun; scoped_guard(spinlock_irq, &tu->qlock) status.queue = tu->qused; if (copy_to_user(_status, &status, sizeof(status))) return -EFAULT; return 0; } static int snd_timer_user_status64(struct file *file, struct snd_timer_status64 __user *_status) { struct snd_timer_user *tu; struct snd_timer_status64 status; tu = file->private_data; if (!tu->timeri) return -EBADFD; memset(&status, 0, sizeof(status)); status.tstamp_sec = tu->tstamp.tv_sec; status.tstamp_nsec = tu->tstamp.tv_nsec; status.resolution = snd_timer_resolution(tu->timeri); status.lost = tu->timeri->lost; status.overrun = tu->overrun; scoped_guard(spinlock_irq, &tu->qlock) status.queue = tu->qused; if (copy_to_user(_status, &status, sizeof(status))) return -EFAULT; return 0; } static int snd_timer_user_start(struct file *file) { int err; struct snd_timer_user *tu; tu = file->private_data; if (!tu->timeri) return -EBADFD; snd_timer_stop(tu->timeri); tu->timeri->lost = 0; tu->last_resolution = 0; err = snd_timer_start(tu->timeri, tu->ticks); if (err < 0) return err; return 0; } static int snd_timer_user_stop(struct file *file) { int err; struct snd_timer_user *tu; tu = file->private_data; if (!tu->timeri) return -EBADFD; err = snd_timer_stop(tu->timeri); if (err < 0) return err; return 0; } static int snd_timer_user_continue(struct file *file) { int err; struct snd_timer_user *tu; tu = file->private_data; if (!tu->timeri) return -EBADFD; /* start timer instead of continue if it's not used before */ if (!(tu->timeri->flags & SNDRV_TIMER_IFLG_PAUSED)) return snd_timer_user_start(file); tu->timeri->lost = 0; err = snd_timer_continue(tu->timeri); if (err < 0) return err; return 0; } static int snd_timer_user_pause(struct file *file) { int err; struct snd_timer_user *tu; tu = file->private_data; if (!tu->timeri) return -EBADFD; err = snd_timer_pause(tu->timeri); if (err < 0) return err; return 0; } static int snd_timer_user_tread(void __user *argp, struct snd_timer_user *tu, unsigned int cmd, bool compat) { int __user *p = argp; int xarg, old_tread; if (tu->timeri) /* too late */ return -EBUSY; if (get_user(xarg, p)) return -EFAULT; old_tread = tu->tread; if (!xarg) tu->tread = TREAD_FORMAT_NONE; else if (cmd == SNDRV_TIMER_IOCTL_TREAD64 || (IS_ENABLED(CONFIG_64BIT) && !compat)) tu->tread = TREAD_FORMAT_TIME64; else tu->tread = TREAD_FORMAT_TIME32; if (tu->tread != old_tread && realloc_user_queue(tu, tu->queue_size) < 0) { tu->tread = old_tread; return -ENOMEM; } return 0; } enum { SNDRV_TIMER_IOCTL_START_OLD = _IO('T', 0x20), SNDRV_TIMER_IOCTL_STOP_OLD = _IO('T', 0x21), SNDRV_TIMER_IOCTL_CONTINUE_OLD = _IO('T', 0x22), SNDRV_TIMER_IOCTL_PAUSE_OLD = _IO('T', 0x23), }; #ifdef CONFIG_SND_UTIMER /* * Since userspace-driven timers are passed to userspace, we need to have an identifier * which will allow us to use them (basically, the subdevice number of udriven timer). */ static DEFINE_IDA(snd_utimer_ids); static void snd_utimer_put_id(struct snd_utimer *utimer) { int timer_id = utimer->id; snd_BUG_ON(timer_id < 0 || timer_id >= SNDRV_UTIMERS_MAX_COUNT); ida_free(&snd_utimer_ids, timer_id); } static int snd_utimer_take_id(void) { return ida_alloc_max(&snd_utimer_ids, SNDRV_UTIMERS_MAX_COUNT - 1, GFP_KERNEL); } static void snd_utimer_free(struct snd_utimer *utimer) { snd_timer_free(utimer->timer); snd_utimer_put_id(utimer); kfree(utimer->name); kfree(utimer); } static int snd_utimer_release(struct inode *inode, struct file *file) { struct snd_utimer *utimer = (struct snd_utimer *)file->private_data; snd_utimer_free(utimer); return 0; } static int snd_utimer_trigger(struct file *file) { struct snd_utimer *utimer = (struct snd_utimer *)file->private_data; snd_timer_interrupt(utimer->timer, utimer->timer->sticks); return 0; } static long snd_utimer_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) { switch (ioctl) { case SNDRV_TIMER_IOCTL_TRIGGER: return snd_utimer_trigger(file); } return -ENOTTY; } static const struct file_operations snd_utimer_fops = { .llseek = noop_llseek, .release = snd_utimer_release, .unlocked_ioctl = snd_utimer_ioctl, }; static int snd_utimer_start(struct snd_timer *t) { return 0; } static int snd_utimer_stop(struct snd_timer *t) { return 0; } static int snd_utimer_open(struct snd_timer *t) { return 0; } static int snd_utimer_close(struct snd_timer *t) { return 0; } static const struct snd_timer_hardware timer_hw = { .flags = SNDRV_TIMER_HW_AUTO | SNDRV_TIMER_HW_WORK, .open = snd_utimer_open, .close = snd_utimer_close, .start = snd_utimer_start, .stop = snd_utimer_stop, }; static int snd_utimer_create(struct snd_timer_uinfo *utimer_info, struct snd_utimer **r_utimer) { struct snd_utimer *utimer; struct snd_timer *timer; struct snd_timer_id tid; int utimer_id; int err = 0; if (!utimer_info || utimer_info->resolution == 0) return -EINVAL; utimer = kzalloc(sizeof(*utimer), GFP_KERNEL); if (!utimer) return -ENOMEM; /* We hold the ioctl lock here so we won't get a race condition when allocating id */ utimer_id = snd_utimer_take_id(); if (utimer_id < 0) { err = utimer_id; goto err_take_id; } utimer->name = kasprintf(GFP_KERNEL, "snd-utimer%d", utimer_id); if (!utimer->name) { err = -ENOMEM; goto err_get_name; } utimer->id = utimer_id; tid.dev_sclass = SNDRV_TIMER_SCLASS_APPLICATION; tid.dev_class = SNDRV_TIMER_CLASS_GLOBAL; tid.card = -1; tid.device = SNDRV_TIMER_GLOBAL_UDRIVEN; tid.subdevice = utimer_id; err = snd_timer_new(NULL, utimer->name, &tid, &timer); if (err < 0) { pr_err("Can't create userspace-driven timer\n"); goto err_timer_new; } timer->module = THIS_MODULE; timer->hw = timer_hw; timer->hw.resolution = utimer_info->resolution; timer->hw.ticks = 1; timer->max_instances = MAX_SLAVE_INSTANCES; utimer->timer = timer; err = snd_timer_global_register(timer); if (err < 0) { pr_err("Can't register a userspace-driven timer\n"); goto err_timer_reg; } *r_utimer = utimer; return 0; err_timer_reg: snd_timer_free(timer); err_timer_new: kfree(utimer->name); err_get_name: snd_utimer_put_id(utimer); err_take_id: kfree(utimer); return err; } static int snd_utimer_ioctl_create(struct file *file, struct snd_timer_uinfo __user *_utimer_info) { struct snd_utimer *utimer; struct snd_timer_uinfo *utimer_info __free(kfree) = NULL; int err, timer_fd; utimer_info = memdup_user(_utimer_info, sizeof(*utimer_info)); if (IS_ERR(utimer_info)) return PTR_ERR(utimer_info); err = snd_utimer_create(utimer_info, &utimer); if (err < 0) return err; utimer_info->id = utimer->id; timer_fd = anon_inode_getfd(utimer->name, &snd_utimer_fops, utimer, O_RDWR | O_CLOEXEC); if (timer_fd < 0) { snd_utimer_free(utimer); return timer_fd; } utimer_info->fd = timer_fd; err = copy_to_user(_utimer_info, utimer_info, sizeof(*utimer_info)); if (err) { /* * "Leak" the fd, as there is nothing we can do about it. * It might have been closed already since anon_inode_getfd * makes it available for userspace. * * We have to rely on the process exit path to do any * necessary cleanup (e.g. releasing the file). */ return -EFAULT; } return 0; } #else static int snd_utimer_ioctl_create(struct file *file, struct snd_timer_uinfo __user *_utimer_info) { return -ENOTTY; } #endif static long __snd_timer_user_ioctl(struct file *file, unsigned int cmd, unsigned long arg, bool compat) { struct snd_timer_user *tu; void __user *argp = (void __user *)arg; int __user *p = argp; tu = file->private_data; switch (cmd) { case SNDRV_TIMER_IOCTL_PVERSION: return put_user(SNDRV_TIMER_VERSION, p) ? -EFAULT : 0; case SNDRV_TIMER_IOCTL_NEXT_DEVICE: return snd_timer_user_next_device(argp); case SNDRV_TIMER_IOCTL_TREAD_OLD: case SNDRV_TIMER_IOCTL_TREAD64: return snd_timer_user_tread(argp, tu, cmd, compat); case SNDRV_TIMER_IOCTL_GINFO: return snd_timer_user_ginfo(file, argp); case SNDRV_TIMER_IOCTL_GPARAMS: return snd_timer_user_gparams(file, argp); case SNDRV_TIMER_IOCTL_GSTATUS: return snd_timer_user_gstatus(file, argp); case SNDRV_TIMER_IOCTL_SELECT: return snd_timer_user_tselect(file, argp); case SNDRV_TIMER_IOCTL_INFO: return snd_timer_user_info(file, argp); case SNDRV_TIMER_IOCTL_PARAMS: return snd_timer_user_params(file, argp); case SNDRV_TIMER_IOCTL_STATUS32: return snd_timer_user_status32(file, argp); case SNDRV_TIMER_IOCTL_STATUS64: return snd_timer_user_status64(file, argp); case SNDRV_TIMER_IOCTL_START: case SNDRV_TIMER_IOCTL_START_OLD: return snd_timer_user_start(file); case SNDRV_TIMER_IOCTL_STOP: case SNDRV_TIMER_IOCTL_STOP_OLD: return snd_timer_user_stop(file); case SNDRV_TIMER_IOCTL_CONTINUE: case SNDRV_TIMER_IOCTL_CONTINUE_OLD: return snd_timer_user_continue(file); case SNDRV_TIMER_IOCTL_PAUSE: case SNDRV_TIMER_IOCTL_PAUSE_OLD: return snd_timer_user_pause(file); case SNDRV_TIMER_IOCTL_CREATE: return snd_utimer_ioctl_create(file, argp); } return -ENOTTY; } static long snd_timer_user_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_timer_user *tu = file->private_data; guard(mutex)(&tu->ioctl_lock); return __snd_timer_user_ioctl(file, cmd, arg, false); } static int snd_timer_user_fasync(int fd, struct file * file, int on) { struct snd_timer_user *tu; tu = file->private_data; return snd_fasync_helper(fd, file, on, &tu->fasync); } static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, size_t count, loff_t *offset) { struct snd_timer_tread64 *tread; struct snd_timer_tread32 tread32; struct snd_timer_user *tu; long result = 0, unit; int qhead; int err = 0; tu = file->private_data; switch (tu->tread) { case TREAD_FORMAT_TIME64: unit = sizeof(struct snd_timer_tread64); break; case TREAD_FORMAT_TIME32: unit = sizeof(struct snd_timer_tread32); break; case TREAD_FORMAT_NONE: unit = sizeof(struct snd_timer_read); break; default: WARN_ONCE(1, "Corrupt snd_timer_user\n"); return -ENOTSUPP; } mutex_lock(&tu->ioctl_lock); spin_lock_irq(&tu->qlock); while ((long)count - result >= unit) { while (!tu->qused) { wait_queue_entry_t wait; if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) { err = -EAGAIN; goto _error; } set_current_state(TASK_INTERRUPTIBLE); init_waitqueue_entry(&wait, current); add_wait_queue(&tu->qchange_sleep, &wait); spin_unlock_irq(&tu->qlock); mutex_unlock(&tu->ioctl_lock); schedule(); mutex_lock(&tu->ioctl_lock); spin_lock_irq(&tu->qlock); remove_wait_queue(&tu->qchange_sleep, &wait); if (tu->disconnected) { err = -ENODEV; goto _error; } if (signal_pending(current)) { err = -ERESTARTSYS; goto _error; } } qhead = tu->qhead++; tu->qhead %= tu->queue_size; tu->qused--; spin_unlock_irq(&tu->qlock); tread = &tu->tqueue[qhead]; switch (tu->tread) { case TREAD_FORMAT_TIME64: if (copy_to_user(buffer, tread, sizeof(struct snd_timer_tread64))) err = -EFAULT; break; case TREAD_FORMAT_TIME32: memset(&tread32, 0, sizeof(tread32)); tread32 = (struct snd_timer_tread32) { .event = tread->event, .tstamp_sec = tread->tstamp_sec, .tstamp_nsec = tread->tstamp_nsec, .val = tread->val, }; if (copy_to_user(buffer, &tread32, sizeof(tread32))) err = -EFAULT; break; case TREAD_FORMAT_NONE: if (copy_to_user(buffer, &tu->queue[qhead], sizeof(struct snd_timer_read))) err = -EFAULT; break; default: err = -ENOTSUPP; break; } spin_lock_irq(&tu->qlock); if (err < 0) goto _error; result += unit; buffer += unit; } _error: spin_unlock_irq(&tu->qlock); mutex_unlock(&tu->ioctl_lock); return result > 0 ? result : err; } static __poll_t snd_timer_user_poll(struct file *file, poll_table * wait) { __poll_t mask; struct snd_timer_user *tu; tu = file->private_data; poll_wait(file, &tu->qchange_sleep, wait); mask = 0; guard(spinlock_irq)(&tu->qlock); if (tu->qused) mask |= EPOLLIN | EPOLLRDNORM; if (tu->disconnected) mask |= EPOLLERR; return mask; } #ifdef CONFIG_COMPAT #include "timer_compat.c" #else #define snd_timer_user_ioctl_compat NULL #endif static const struct file_operations snd_timer_f_ops = { .owner = THIS_MODULE, .read = snd_timer_user_read, .open = snd_timer_user_open, .release = snd_timer_user_release, .poll = snd_timer_user_poll, .unlocked_ioctl = snd_timer_user_ioctl, .compat_ioctl = snd_timer_user_ioctl_compat, .fasync = snd_timer_user_fasync, }; /* unregister the system timer */ static void snd_timer_free_all(void) { struct snd_timer *timer, *n; list_for_each_entry_safe(timer, n, &snd_timer_list, device_list) snd_timer_free(timer); } static struct device *timer_dev; /* * ENTRY functions */ static int __init alsa_timer_init(void) { int err; err = snd_device_alloc(&timer_dev, NULL); if (err < 0) return err; dev_set_name(timer_dev, "timer"); #ifdef SNDRV_OSS_INFO_DEV_TIMERS snd_oss_info_register(SNDRV_OSS_INFO_DEV_TIMERS, SNDRV_CARDS - 1, "system timer"); #endif err = snd_timer_register_system(); if (err < 0) { pr_err("ALSA: unable to register system timer (%i)\n", err); goto put_timer; } err = snd_register_device(SNDRV_DEVICE_TYPE_TIMER, NULL, 0, &snd_timer_f_ops, NULL, timer_dev); if (err < 0) { pr_err("ALSA: unable to register timer device (%i)\n", err); snd_timer_free_all(); goto put_timer; } snd_timer_proc_init(); return 0; put_timer: put_device(timer_dev); return err; } static void __exit alsa_timer_exit(void) { snd_unregister_device(timer_dev); snd_timer_free_all(); put_device(timer_dev); snd_timer_proc_done(); #ifdef SNDRV_OSS_INFO_DEV_TIMERS snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_TIMERS, SNDRV_CARDS - 1); #endif } module_init(alsa_timer_init) module_exit(alsa_timer_exit) |
1 1 1 13 6 13 1 12 1 11 9 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | // SPDX-License-Identifier: GPL-2.0-only /* iptables module for using new netfilter netlink queue * * (C) 2005 by Harald Welte <laforge@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter.h> #include <linux/netfilter_arp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_NFQUEUE.h> #include <net/netfilter/nf_queue.h> MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: packet forwarding to netlink"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_NFQUEUE"); MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); static u32 jhash_initval __read_mostly; static unsigned int nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info *tinfo = par->targinfo; return NF_QUEUE_NR(tinfo->queuenum); } static unsigned int nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v1 *info = par->targinfo; u32 queue = info->queuenum; if (info->queues_total > 1) { queue = nfqueue_hash(skb, queue, info->queues_total, xt_family(par), jhash_initval); } return NF_QUEUE_NR(queue); } static unsigned int nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v2 *info = par->targinfo; unsigned int ret = nfqueue_tg_v1(skb, par); if (info->bypass) ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; return ret; } static int nfqueue_tg_check(const struct xt_tgchk_param *par) { const struct xt_NFQ_info_v3 *info = par->targinfo; u32 maxid; init_hashrandom(&jhash_initval); if (info->queues_total == 0) { pr_info_ratelimited("number of total queues is 0\n"); return -EINVAL; } maxid = info->queues_total - 1 + info->queuenum; if (maxid > 0xffff) { pr_info_ratelimited("number of queues (%u) out of range (got %u)\n", info->queues_total, maxid); return -ERANGE; } if (par->target->revision == 2 && info->flags > 1) return -EINVAL; if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK) return -EINVAL; return 0; } static unsigned int nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v3 *info = par->targinfo; u32 queue = info->queuenum; int ret; if (info->queues_total > 1) { if (info->flags & NFQ_FLAG_CPU_FANOUT) { int cpu = smp_processor_id(); queue = info->queuenum + cpu % info->queues_total; } else { queue = nfqueue_hash(skb, queue, info->queues_total, xt_family(par), jhash_initval); } } ret = NF_QUEUE_NR(queue); if (info->flags & NFQ_FLAG_BYPASS) ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; return ret; } static struct xt_target nfqueue_tg_reg[] __read_mostly = { { .name = "NFQUEUE", .family = NFPROTO_UNSPEC, .target = nfqueue_tg, .targetsize = sizeof(struct xt_NFQ_info), .me = THIS_MODULE, }, { .name = "NFQUEUE", .revision = 1, .family = NFPROTO_UNSPEC, .checkentry = nfqueue_tg_check, .target = nfqueue_tg_v1, .targetsize = sizeof(struct xt_NFQ_info_v1), .me = THIS_MODULE, }, { .name = "NFQUEUE", .revision = 2, .family = NFPROTO_UNSPEC, .checkentry = nfqueue_tg_check, .target = nfqueue_tg_v2, .targetsize = sizeof(struct xt_NFQ_info_v2), .me = THIS_MODULE, }, { .name = "NFQUEUE", .revision = 3, .family = NFPROTO_UNSPEC, .checkentry = nfqueue_tg_check, .target = nfqueue_tg_v3, .targetsize = sizeof(struct xt_NFQ_info_v3), .me = THIS_MODULE, }, }; static int __init nfqueue_tg_init(void) { return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg)); } static void __exit nfqueue_tg_exit(void) { xt_unregister_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg)); } module_init(nfqueue_tg_init); module_exit(nfqueue_tg_exit); |
53 53 53 53 53 53 53 26 23 26 5 5 1 1 1 5 5 3 1 26 6 1 1 1 1 14 13 15 15 15 14 12 15 8 14 13 4 3 2 1 13 10 10 10 9 9 9 9 9 3 4 8 8 2 2 2 2 1 4 4 4 10 10 9 10 9 9 4 4 4 9 9 6 5 4 2 2 2 2 2 2 2 10 4 4 4 4 4 4 14 2 15 15 9 3 3 1 3 2 2 2 2 2 2 1 12 1 15 12 11 11 10 9 8 7 6 5 6 6 6 5 6 6 13 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 3 3 2 2 3 4 4 3 2 4 2 5 1 5 8 3 5 8 8 8 5 4 3 8 1 3 1 2 2 1 28 11 11 11 8 3 5 8 8 7 6 1 5 6 6 6 4 2 2 8 4 16 16 10 9 15 16 1 1 4 4 4 1 4 2 3 1 1 2 3 2 1 2 1 1 2 4 2 3 1 1 1 4 4 4 1 4 4 4 4 4 4 4 4 1 4 17 2 2 17 16 8 6 6 1 5 6 6 6 6 5 2 4 4 4 4 1 2 1 1 1 1 1 1 1 17 5 10 10 33 32 30 20 19 19 18 18 18 32 33 32 18 16 8 17 17 17 16 17 17 10 9 10 10 9 10 9 10 15 15 7 7 6 8 2 21 10 19 17 2 16 5 5 8 8 3 8 8 8 16 21 24 23 12 21 3 3 1 2 2 2 2 2 1 2 2 24 42 43 32 33 8 8 111 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 | // SPDX-License-Identifier: GPL-2.0-or-later /* * ebtables * * Author: * Bart De Schuymer <bdschuym@pandora.be> * * ebtables.c,v 2.0, July, 2002 * * This code is strongly inspired by the iptables code which is * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kmod.h> #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/smp.h> #include <linux/cpumask.h> #include <linux/audit.h> #include <net/sock.h> #include <net/netns/generic.h> /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" /* Each cpu has its own set of counters, so there is no need for write_lock in * the softirq * For reading or updating the counters, the user context needs to * get a write_lock */ /* The size of each set of counters is altered to get cache alignment */ #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) #define COUNTER_OFFSET(n) (SMP_ALIGN(n * sizeof(struct ebt_counter))) #define COUNTER_BASE(c, n, cpu) ((struct ebt_counter *)(((char *)c) + \ COUNTER_OFFSET(n) * cpu)) struct ebt_pernet { struct list_head tables; }; struct ebt_template { struct list_head list; char name[EBT_TABLE_MAXNAMELEN]; struct module *owner; /* called when table is needed in the given netns */ int (*table_init)(struct net *net); }; static unsigned int ebt_pernet_id __read_mostly; static LIST_HEAD(template_tables); static DEFINE_MUTEX(ebt_mutex); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT static void ebt_standard_compat_from_user(void *dst, const void *src) { int v = *(compat_int_t *)src; if (v >= 0) v += xt_compat_calc_jump(NFPROTO_BRIDGE, v); memcpy(dst, &v, sizeof(v)); } static int ebt_standard_compat_to_user(void __user *dst, const void *src) { compat_int_t cv = *(int *)src; if (cv >= 0) cv -= xt_compat_calc_jump(NFPROTO_BRIDGE, cv); return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } #endif static struct xt_target ebt_standard_target = { .name = "standard", .revision = 0, .family = NFPROTO_BRIDGE, .targetsize = sizeof(int), #ifdef CONFIG_NETFILTER_XTABLES_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = ebt_standard_compat_from_user, .compat_to_user = ebt_standard_compat_to_user, #endif }; static inline int ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb, struct xt_action_param *par) { par->target = w->u.watcher; par->targinfo = w->data; w->u.watcher->target(skb, par); /* watchers don't give a verdict */ return 0; } static inline int ebt_do_match(struct ebt_entry_match *m, const struct sk_buff *skb, struct xt_action_param *par) { par->match = m->u.match; par->matchinfo = m->data; return !m->u.match->match(skb, par); } static inline int ebt_dev_check(const char *entry, const struct net_device *device) { int i = 0; const char *devname; if (*entry == '\0') return 0; if (!device) return 1; devname = device->name; /* 1 is the wildcard token */ while (entry[i] != '\0' && entry[i] != 1 && entry[i] == devname[i]) i++; return devname[i] != entry[i] && entry[i] != 1; } /* process standard matches */ static inline int ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out) { const struct ethhdr *h = eth_hdr(skb); const struct net_bridge_port *p; __be16 ethproto; if (skb_vlan_tag_present(skb)) ethproto = htons(ETH_P_8021Q); else ethproto = h->h_proto; if (e->bitmask & EBT_802_3) { if (NF_INVF(e, EBT_IPROTO, eth_proto_is_802_3(ethproto))) return 1; } else if (!(e->bitmask & EBT_NOPROTO) && NF_INVF(e, EBT_IPROTO, e->ethproto != ethproto)) return 1; if (NF_INVF(e, EBT_IIN, ebt_dev_check(e->in, in))) return 1; if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out))) return 1; /* rcu_read_lock()ed by nf_hook_thresh */ if (in && (p = br_port_get_rcu(in)) != NULL && NF_INVF(e, EBT_ILOGICALIN, ebt_dev_check(e->logical_in, p->br->dev))) return 1; if (out && (p = br_port_get_rcu(out)) != NULL && NF_INVF(e, EBT_ILOGICALOUT, ebt_dev_check(e->logical_out, p->br->dev))) return 1; if (e->bitmask & EBT_SOURCEMAC) { if (NF_INVF(e, EBT_ISOURCE, !ether_addr_equal_masked(h->h_source, e->sourcemac, e->sourcemsk))) return 1; } if (e->bitmask & EBT_DESTMAC) { if (NF_INVF(e, EBT_IDEST, !ether_addr_equal_masked(h->h_dest, e->destmac, e->destmsk))) return 1; } return 0; } static inline struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry) { return (void *)entry + entry->next_offset; } static inline const struct ebt_entry_target * ebt_get_target_c(const struct ebt_entry *e) { return ebt_get_target((struct ebt_entry *)e); } /* Do some firewalling */ unsigned int ebt_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct ebt_table *table = priv; unsigned int hook = state->hook; int i, nentries; struct ebt_entry *point; struct ebt_counter *counter_base, *cb_base; const struct ebt_entry_target *t; int verdict, sp = 0; struct ebt_chainstack *cs; struct ebt_entries *chaininfo; const char *base; const struct ebt_table_info *private; struct xt_action_param acpar; acpar.state = state; acpar.hotdrop = false; read_lock_bh(&table->lock); private = table->private; cb_base = COUNTER_BASE(private->counters, private->nentries, smp_processor_id()); if (private->chainstack) cs = private->chainstack[smp_processor_id()]; else cs = NULL; chaininfo = private->hook_entry[hook]; nentries = private->hook_entry[hook]->nentries; point = (struct ebt_entry *)(private->hook_entry[hook]->data); counter_base = cb_base + private->hook_entry[hook]->counter_offset; /* base for chain jumps */ base = private->entries; i = 0; while (i < nentries) { if (ebt_basic_match(point, skb, state->in, state->out)) goto letscontinue; if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0) goto letscontinue; if (acpar.hotdrop) { read_unlock_bh(&table->lock); return NF_DROP; } ADD_COUNTER(*(counter_base + i), skb->len, 1); /* these should only watch: not modify, nor tell us * what to do with the packet */ EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar); t = ebt_get_target_c(point); /* standard target */ if (!t->u.target->target) verdict = ((struct ebt_standard_target *)t)->verdict; else { acpar.target = t->u.target; acpar.targinfo = t->data; verdict = t->u.target->target(skb, &acpar); } if (verdict == EBT_ACCEPT) { read_unlock_bh(&table->lock); return NF_ACCEPT; } if (verdict == EBT_DROP) { read_unlock_bh(&table->lock); return NF_DROP; } if (verdict == EBT_RETURN) { letsreturn: if (WARN(sp == 0, "RETURN on base chain")) { /* act like this is EBT_CONTINUE */ goto letscontinue; } sp--; /* put all the local variables right */ i = cs[sp].n; chaininfo = cs[sp].chaininfo; nentries = chaininfo->nentries; point = cs[sp].e; counter_base = cb_base + chaininfo->counter_offset; continue; } if (verdict == EBT_CONTINUE) goto letscontinue; if (WARN(verdict < 0, "bogus standard verdict\n")) { read_unlock_bh(&table->lock); return NF_DROP; } /* jump to a udc */ cs[sp].n = i + 1; cs[sp].chaininfo = chaininfo; cs[sp].e = ebt_next_entry(point); i = 0; chaininfo = (struct ebt_entries *) (base + verdict); if (WARN(chaininfo->distinguisher, "jump to non-chain\n")) { read_unlock_bh(&table->lock); return NF_DROP; } nentries = chaininfo->nentries; point = (struct ebt_entry *)chaininfo->data; counter_base = cb_base + chaininfo->counter_offset; sp++; continue; letscontinue: point = ebt_next_entry(point); i++; } /* I actually like this :) */ if (chaininfo->policy == EBT_RETURN) goto letsreturn; if (chaininfo->policy == EBT_ACCEPT) { read_unlock_bh(&table->lock); return NF_ACCEPT; } read_unlock_bh(&table->lock); return NF_DROP; } /* If it succeeds, returns element and locks mutex */ static inline void * find_inlist_lock_noload(struct net *net, const char *name, int *error, struct mutex *mutex) { struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); struct ebt_template *tmpl; struct ebt_table *table; mutex_lock(mutex); list_for_each_entry(table, &ebt_net->tables, list) { if (strcmp(table->name, name) == 0) return table; } list_for_each_entry(tmpl, &template_tables, list) { if (strcmp(name, tmpl->name) == 0) { struct module *owner = tmpl->owner; if (!try_module_get(owner)) goto out; mutex_unlock(mutex); *error = tmpl->table_init(net); if (*error) { module_put(owner); return NULL; } mutex_lock(mutex); module_put(owner); break; } } list_for_each_entry(table, &ebt_net->tables, list) { if (strcmp(table->name, name) == 0) return table; } out: *error = -ENOENT; mutex_unlock(mutex); return NULL; } static void * find_inlist_lock(struct net *net, const char *name, const char *prefix, int *error, struct mutex *mutex) { return try_then_request_module( find_inlist_lock_noload(net, name, error, mutex), "%s%s", prefix, name); } static inline struct ebt_table * find_table_lock(struct net *net, const char *name, int *error, struct mutex *mutex) { return find_inlist_lock(net, name, "ebtable_", error, mutex); } static inline void ebt_free_table_info(struct ebt_table_info *info) { int i; if (info->chainstack) { for_each_possible_cpu(i) vfree(info->chainstack[i]); vfree(info->chainstack); } } static inline int ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par, unsigned int *cnt) { const struct ebt_entry *e = par->entryinfo; struct xt_match *match; size_t left = ((char *)e + e->watchers_offset) - (char *)m; int ret; if (left < sizeof(struct ebt_entry_match) || left - sizeof(struct ebt_entry_match) < m->match_size) return -EINVAL; match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision); if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) { if (!IS_ERR(match)) module_put(match->me); request_module("ebt_%s", m->u.name); match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision); } if (IS_ERR(match)) return PTR_ERR(match); m->u.match = match; par->match = match; par->matchinfo = m->data; ret = xt_check_match(par, m->match_size, ntohs(e->ethproto), e->invflags & EBT_IPROTO); if (ret < 0) { module_put(match->me); return ret; } (*cnt)++; return 0; } static inline int ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par, unsigned int *cnt) { const struct ebt_entry *e = par->entryinfo; struct xt_target *watcher; size_t left = ((char *)e + e->target_offset) - (char *)w; int ret; if (left < sizeof(struct ebt_entry_watcher) || left - sizeof(struct ebt_entry_watcher) < w->watcher_size) return -EINVAL; watcher = xt_request_find_target(NFPROTO_BRIDGE, w->u.name, 0); if (IS_ERR(watcher)) return PTR_ERR(watcher); if (watcher->family != NFPROTO_BRIDGE) { module_put(watcher->me); return -ENOENT; } w->u.watcher = watcher; par->target = watcher; par->targinfo = w->data; ret = xt_check_target(par, w->watcher_size, ntohs(e->ethproto), e->invflags & EBT_IPROTO); if (ret < 0) { module_put(watcher->me); return ret; } (*cnt)++; return 0; } static int ebt_verify_pointers(const struct ebt_replace *repl, struct ebt_table_info *newinfo) { unsigned int limit = repl->entries_size; unsigned int valid_hooks = repl->valid_hooks; unsigned int offset = 0; int i; for (i = 0; i < NF_BR_NUMHOOKS; i++) newinfo->hook_entry[i] = NULL; newinfo->entries_size = repl->entries_size; newinfo->nentries = repl->nentries; while (offset < limit) { size_t left = limit - offset; struct ebt_entry *e = (void *)newinfo->entries + offset; if (left < sizeof(unsigned int)) break; for (i = 0; i < NF_BR_NUMHOOKS; i++) { if ((valid_hooks & (1 << i)) == 0) continue; if ((char __user *)repl->hook_entry[i] == repl->entries + offset) break; } if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) { if (e->bitmask != 0) { /* we make userspace set this right, * so there is no misunderstanding */ return -EINVAL; } if (i != NF_BR_NUMHOOKS) newinfo->hook_entry[i] = (struct ebt_entries *)e; if (left < sizeof(struct ebt_entries)) break; offset += sizeof(struct ebt_entries); } else { if (left < sizeof(struct ebt_entry)) break; if (left < e->next_offset) break; if (e->next_offset < sizeof(struct ebt_entry)) return -EINVAL; offset += e->next_offset; } } if (offset != limit) return -EINVAL; /* check if all valid hooks have a chain */ for (i = 0; i < NF_BR_NUMHOOKS; i++) { if (!newinfo->hook_entry[i] && (valid_hooks & (1 << i))) return -EINVAL; } return 0; } /* this one is very careful, as it is the first function * to parse the userspace data */ static inline int ebt_check_entry_size_and_hooks(const struct ebt_entry *e, const struct ebt_table_info *newinfo, unsigned int *n, unsigned int *cnt, unsigned int *totalcnt, unsigned int *udc_cnt) { int i; for (i = 0; i < NF_BR_NUMHOOKS; i++) { if ((void *)e == (void *)newinfo->hook_entry[i]) break; } /* beginning of a new chain * if i == NF_BR_NUMHOOKS it must be a user defined chain */ if (i != NF_BR_NUMHOOKS || !e->bitmask) { /* this checks if the previous chain has as many entries * as it said it has */ if (*n != *cnt) return -EINVAL; if (((struct ebt_entries *)e)->policy != EBT_DROP && ((struct ebt_entries *)e)->policy != EBT_ACCEPT) { /* only RETURN from udc */ if (i != NF_BR_NUMHOOKS || ((struct ebt_entries *)e)->policy != EBT_RETURN) return -EINVAL; } if (i == NF_BR_NUMHOOKS) /* it's a user defined chain */ (*udc_cnt)++; if (((struct ebt_entries *)e)->counter_offset != *totalcnt) return -EINVAL; *n = ((struct ebt_entries *)e)->nentries; *cnt = 0; return 0; } /* a plain old entry, heh */ if (sizeof(struct ebt_entry) > e->watchers_offset || e->watchers_offset > e->target_offset || e->target_offset >= e->next_offset) return -EINVAL; /* this is not checked anywhere else */ if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target)) return -EINVAL; (*cnt)++; (*totalcnt)++; return 0; } struct ebt_cl_stack { struct ebt_chainstack cs; int from; unsigned int hookmask; }; /* We need these positions to check that the jumps to a different part of the * entries is a jump to the beginning of a new chain. */ static inline int ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, unsigned int *n, struct ebt_cl_stack *udc) { int i; /* we're only interested in chain starts */ if (e->bitmask) return 0; for (i = 0; i < NF_BR_NUMHOOKS; i++) { if (newinfo->hook_entry[i] == (struct ebt_entries *)e) break; } /* only care about udc */ if (i != NF_BR_NUMHOOKS) return 0; udc[*n].cs.chaininfo = (struct ebt_entries *)e; /* these initialisations are depended on later in check_chainloops() */ udc[*n].cs.n = 0; udc[*n].hookmask = 0; (*n)++; return 0; } static inline int ebt_cleanup_match(struct ebt_entry_match *m, struct net *net, unsigned int *i) { struct xt_mtdtor_param par; if (i && (*i)-- == 0) return 1; par.net = net; par.match = m->u.match; par.matchinfo = m->data; par.family = NFPROTO_BRIDGE; if (par.match->destroy != NULL) par.match->destroy(&par); module_put(par.match->me); return 0; } static inline int ebt_cleanup_watcher(struct ebt_entry_watcher *w, struct net *net, unsigned int *i) { struct xt_tgdtor_param par; if (i && (*i)-- == 0) return 1; par.net = net; par.target = w->u.watcher; par.targinfo = w->data; par.family = NFPROTO_BRIDGE; if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); return 0; } static inline int ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt) { struct xt_tgdtor_param par; struct ebt_entry_target *t; if (e->bitmask == 0) return 0; /* we're done */ if (cnt && (*cnt)-- == 0) return 1; EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, NULL); EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL); t = ebt_get_target(e); par.net = net; par.target = t->u.target; par.targinfo = t->data; par.family = NFPROTO_BRIDGE; if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); return 0; } static inline int ebt_check_entry(struct ebt_entry *e, struct net *net, const struct ebt_table_info *newinfo, const char *name, unsigned int *cnt, struct ebt_cl_stack *cl_s, unsigned int udc_cnt) { struct ebt_entry_target *t; struct xt_target *target; unsigned int i, j, hook = 0, hookmask = 0; size_t gap; int ret; struct xt_mtchk_param mtpar; struct xt_tgchk_param tgpar; /* don't mess with the struct ebt_entries */ if (e->bitmask == 0) return 0; if (e->bitmask & ~EBT_F_MASK) return -EINVAL; if (e->invflags & ~EBT_INV_MASK) return -EINVAL; if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) return -EINVAL; /* what hook do we belong to? */ for (i = 0; i < NF_BR_NUMHOOKS; i++) { if (!newinfo->hook_entry[i]) continue; if ((char *)newinfo->hook_entry[i] < (char *)e) hook = i; else break; } /* (1 << NF_BR_NUMHOOKS) tells the check functions the rule is on * a base chain */ if (i < NF_BR_NUMHOOKS) hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS); else { for (i = 0; i < udc_cnt; i++) if ((char *)(cl_s[i].cs.chaininfo) > (char *)e) break; if (i == 0) hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS); else hookmask = cl_s[i - 1].hookmask; } i = 0; memset(&mtpar, 0, sizeof(mtpar)); memset(&tgpar, 0, sizeof(tgpar)); mtpar.net = tgpar.net = net; mtpar.table = tgpar.table = name; mtpar.entryinfo = tgpar.entryinfo = e; mtpar.hook_mask = tgpar.hook_mask = hookmask; mtpar.family = tgpar.family = NFPROTO_BRIDGE; ret = EBT_MATCH_ITERATE(e, ebt_check_match, &mtpar, &i); if (ret != 0) goto cleanup_matches; j = 0; ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, &tgpar, &j); if (ret != 0) goto cleanup_watchers; t = ebt_get_target(e); gap = e->next_offset - e->target_offset; target = xt_request_find_target(NFPROTO_BRIDGE, t->u.name, 0); if (IS_ERR(target)) { ret = PTR_ERR(target); goto cleanup_watchers; } /* Reject UNSPEC, xtables verdicts/return values are incompatible */ if (target->family != NFPROTO_BRIDGE) { module_put(target->me); ret = -ENOENT; goto cleanup_watchers; } t->u.target = target; if (t->u.target == &ebt_standard_target) { if (gap < sizeof(struct ebt_standard_target)) { ret = -EFAULT; goto cleanup_watchers; } if (((struct ebt_standard_target *)t)->verdict < -NUM_STANDARD_TARGETS) { ret = -EFAULT; goto cleanup_watchers; } } else if (t->target_size > gap - sizeof(struct ebt_entry_target)) { module_put(t->u.target->me); ret = -EFAULT; goto cleanup_watchers; } tgpar.target = target; tgpar.targinfo = t->data; ret = xt_check_target(&tgpar, t->target_size, ntohs(e->ethproto), e->invflags & EBT_IPROTO); if (ret < 0) { module_put(target->me); goto cleanup_watchers; } (*cnt)++; return 0; cleanup_watchers: EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, &j); cleanup_matches: EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, &i); return ret; } /* checks for loops and sets the hook mask for udc * the hook mask for udc tells us from which base chains the udc can be * accessed. This mask is a parameter to the check() functions of the extensions */ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack *cl_s, unsigned int udc_cnt, unsigned int hooknr, char *base) { int i, chain_nr = -1, pos = 0, nentries = chain->nentries, verdict; const struct ebt_entry *e = (struct ebt_entry *)chain->data; const struct ebt_entry_target *t; while (pos < nentries || chain_nr != -1) { /* end of udc, go back one 'recursion' step */ if (pos == nentries) { /* put back values of the time when this chain was called */ e = cl_s[chain_nr].cs.e; if (cl_s[chain_nr].from != -1) nentries = cl_s[cl_s[chain_nr].from].cs.chaininfo->nentries; else nentries = chain->nentries; pos = cl_s[chain_nr].cs.n; /* make sure we won't see a loop that isn't one */ cl_s[chain_nr].cs.n = 0; chain_nr = cl_s[chain_nr].from; if (pos == nentries) continue; } t = ebt_get_target_c(e); if (strcmp(t->u.name, EBT_STANDARD_TARGET)) goto letscontinue; if (e->target_offset + sizeof(struct ebt_standard_target) > e->next_offset) return -1; verdict = ((struct ebt_standard_target *)t)->verdict; if (verdict >= 0) { /* jump to another chain */ struct ebt_entries *hlp2 = (struct ebt_entries *)(base + verdict); for (i = 0; i < udc_cnt; i++) if (hlp2 == cl_s[i].cs.chaininfo) break; /* bad destination or loop */ if (i == udc_cnt) return -1; if (cl_s[i].cs.n) return -1; if (cl_s[i].hookmask & (1 << hooknr)) goto letscontinue; /* this can't be 0, so the loop test is correct */ cl_s[i].cs.n = pos + 1; pos = 0; cl_s[i].cs.e = ebt_next_entry(e); e = (struct ebt_entry *)(hlp2->data); nentries = hlp2->nentries; cl_s[i].from = chain_nr; chain_nr = i; /* this udc is accessible from the base chain for hooknr */ cl_s[i].hookmask |= (1 << hooknr); continue; } letscontinue: e = ebt_next_entry(e); pos++; } return 0; } /* do the parsing of the table/chains/entries/matches/watchers/targets, heh */ static int translate_table(struct net *net, const char *name, struct ebt_table_info *newinfo) { unsigned int i, j, k, udc_cnt; int ret; struct ebt_cl_stack *cl_s = NULL; /* used in the checking for chain loops */ i = 0; while (i < NF_BR_NUMHOOKS && !newinfo->hook_entry[i]) i++; if (i == NF_BR_NUMHOOKS) return -EINVAL; if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries) return -EINVAL; /* make sure chains are ordered after each other in same order * as their corresponding hooks */ for (j = i + 1; j < NF_BR_NUMHOOKS; j++) { if (!newinfo->hook_entry[j]) continue; if (newinfo->hook_entry[j] <= newinfo->hook_entry[i]) return -EINVAL; i = j; } /* do some early checkings and initialize some things */ i = 0; /* holds the expected nr. of entries for the chain */ j = 0; /* holds the up to now counted entries for the chain */ k = 0; /* holds the total nr. of entries, should equal * newinfo->nentries afterwards */ udc_cnt = 0; /* will hold the nr. of user defined chains (udc) */ ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, ebt_check_entry_size_and_hooks, newinfo, &i, &j, &k, &udc_cnt); if (ret != 0) return ret; if (i != j) return -EINVAL; if (k != newinfo->nentries) return -EINVAL; /* get the location of the udc, put them in an array * while we're at it, allocate the chainstack */ if (udc_cnt) { /* this will get free'd in do_replace()/ebt_register_table() * if an error occurs */ newinfo->chainstack = vmalloc(array_size(nr_cpu_ids, sizeof(*(newinfo->chainstack)))); if (!newinfo->chainstack) return -ENOMEM; for_each_possible_cpu(i) { newinfo->chainstack[i] = vmalloc_node(array_size(udc_cnt, sizeof(*(newinfo->chainstack[0]))), cpu_to_node(i)); if (!newinfo->chainstack[i]) { while (i) vfree(newinfo->chainstack[--i]); vfree(newinfo->chainstack); newinfo->chainstack = NULL; return -ENOMEM; } } cl_s = vmalloc(array_size(udc_cnt, sizeof(*cl_s))); if (!cl_s) return -ENOMEM; i = 0; /* the i'th udc */ EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, ebt_get_udc_positions, newinfo, &i, cl_s); /* sanity check */ if (i != udc_cnt) { vfree(cl_s); return -EFAULT; } } /* Check for loops */ for (i = 0; i < NF_BR_NUMHOOKS; i++) if (newinfo->hook_entry[i]) if (check_chainloops(newinfo->hook_entry[i], cl_s, udc_cnt, i, newinfo->entries)) { vfree(cl_s); return -EINVAL; } /* we now know the following (along with E=mc²): * - the nr of entries in each chain is right * - the size of the allocated space is right * - all valid hooks have a corresponding chain * - there are no loops * - wrong data can still be on the level of a single entry * - could be there are jumps to places that are not the * beginning of a chain. This can only occur in chains that * are not accessible from any base chains, so we don't care. */ /* used to know what we need to clean up if something goes wrong */ i = 0; ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, ebt_check_entry, net, newinfo, name, &i, cl_s, udc_cnt); if (ret != 0) { EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, ebt_cleanup_entry, net, &i); } vfree(cl_s); return ret; } /* called under write_lock */ static void get_counters(const struct ebt_counter *oldcounters, struct ebt_counter *counters, unsigned int nentries) { int i, cpu; struct ebt_counter *counter_base; /* counters of cpu 0 */ memcpy(counters, oldcounters, sizeof(struct ebt_counter) * nentries); /* add other counters to those of cpu 0 */ for_each_possible_cpu(cpu) { if (cpu == 0) continue; counter_base = COUNTER_BASE(oldcounters, nentries, cpu); for (i = 0; i < nentries; i++) ADD_COUNTER(counters[i], counter_base[i].bcnt, counter_base[i].pcnt); } } static int do_replace_finish(struct net *net, struct ebt_replace *repl, struct ebt_table_info *newinfo) { int ret; struct ebt_counter *counterstmp = NULL; /* used to be able to unlock earlier */ struct ebt_table_info *table; struct ebt_table *t; /* the user wants counters back * the check on the size is done later, when we have the lock */ if (repl->num_counters) { unsigned long size = repl->num_counters * sizeof(*counterstmp); counterstmp = vmalloc(size); if (!counterstmp) return -ENOMEM; } newinfo->chainstack = NULL; ret = ebt_verify_pointers(repl, newinfo); if (ret != 0) goto free_counterstmp; ret = translate_table(net, repl->name, newinfo); if (ret != 0) goto free_counterstmp; t = find_table_lock(net, repl->name, &ret, &ebt_mutex); if (!t) { ret = -ENOENT; goto free_iterate; } if (repl->valid_hooks != t->valid_hooks) { ret = -EINVAL; goto free_unlock; } if (repl->num_counters && repl->num_counters != t->private->nentries) { ret = -EINVAL; goto free_unlock; } /* we have the mutex lock, so no danger in reading this pointer */ table = t->private; /* make sure the table can only be rmmod'ed if it contains no rules */ if (!table->nentries && newinfo->nentries && !try_module_get(t->me)) { ret = -ENOENT; goto free_unlock; } else if (table->nentries && !newinfo->nentries) module_put(t->me); /* we need an atomic snapshot of the counters */ write_lock_bh(&t->lock); if (repl->num_counters) get_counters(t->private->counters, counterstmp, t->private->nentries); t->private = newinfo; write_unlock_bh(&t->lock); mutex_unlock(&ebt_mutex); /* so, a user can change the chains while having messed up her counter * allocation. Only reason why this is done is because this way the lock * is held only once, while this doesn't bring the kernel into a * dangerous state. */ if (repl->num_counters && copy_to_user(repl->counters, counterstmp, array_size(repl->num_counters, sizeof(struct ebt_counter)))) { /* Silent error, can't fail, new table is already in place */ net_warn_ratelimited("ebtables: counters copy to user failed while replacing table\n"); } /* decrease module count and free resources */ EBT_ENTRY_ITERATE(table->entries, table->entries_size, ebt_cleanup_entry, net, NULL); vfree(table->entries); ebt_free_table_info(table); vfree(table); vfree(counterstmp); audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries, AUDIT_XT_OP_REPLACE, GFP_KERNEL); return 0; free_unlock: mutex_unlock(&ebt_mutex); free_iterate: EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, ebt_cleanup_entry, net, NULL); free_counterstmp: vfree(counterstmp); /* can be initialized in translate_table() */ ebt_free_table_info(newinfo); return ret; } /* replace the table */ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret, countersize; struct ebt_table_info *newinfo; struct ebt_replace tmp; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; if (len != sizeof(tmp) + tmp.entries_size) return -EINVAL; if (tmp.entries_size == 0) return -EINVAL; /* overflow check */ if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) / NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter)) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) return -ENOMEM; tmp.name[sizeof(tmp.name) - 1] = 0; countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; newinfo = __vmalloc(sizeof(*newinfo) + countersize, GFP_KERNEL_ACCOUNT); if (!newinfo) return -ENOMEM; if (countersize) memset(newinfo->counters, 0, countersize); newinfo->entries = __vmalloc(tmp.entries_size, GFP_KERNEL_ACCOUNT); if (!newinfo->entries) { ret = -ENOMEM; goto free_newinfo; } if (copy_from_user( newinfo->entries, tmp.entries, tmp.entries_size) != 0) { ret = -EFAULT; goto free_entries; } ret = do_replace_finish(net, &tmp, newinfo); if (ret == 0) return ret; free_entries: vfree(newinfo->entries); free_newinfo: vfree(newinfo); return ret; } static void __ebt_unregister_table(struct net *net, struct ebt_table *table) { mutex_lock(&ebt_mutex); list_del(&table->list); mutex_unlock(&ebt_mutex); audit_log_nfcfg(table->name, AF_BRIDGE, table->private->nentries, AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, ebt_cleanup_entry, net, NULL); if (table->private->nentries) module_put(table->me); vfree(table->private->entries); ebt_free_table_info(table->private); vfree(table->private); kfree(table->ops); kfree(table); } int ebt_register_table(struct net *net, const struct ebt_table *input_table, const struct nf_hook_ops *template_ops) { struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); struct ebt_table_info *newinfo; struct ebt_table *t, *table; struct nf_hook_ops *ops; unsigned int num_ops; struct ebt_replace_kernel *repl; int ret, i, countersize; void *p; if (input_table == NULL || (repl = input_table->table) == NULL || repl->entries == NULL || repl->entries_size == 0 || repl->counters != NULL || input_table->private != NULL) return -EINVAL; /* Don't add one table to multiple lists. */ table = kmemdup(input_table, sizeof(struct ebt_table), GFP_KERNEL); if (!table) { ret = -ENOMEM; goto out; } countersize = COUNTER_OFFSET(repl->nentries) * nr_cpu_ids; newinfo = vmalloc(sizeof(*newinfo) + countersize); ret = -ENOMEM; if (!newinfo) goto free_table; p = vmalloc(repl->entries_size); if (!p) goto free_newinfo; memcpy(p, repl->entries, repl->entries_size); newinfo->entries = p; newinfo->entries_size = repl->entries_size; newinfo->nentries = repl->nentries; if (countersize) memset(newinfo->counters, 0, countersize); /* fill in newinfo and parse the entries */ newinfo->chainstack = NULL; for (i = 0; i < NF_BR_NUMHOOKS; i++) { if ((repl->valid_hooks & (1 << i)) == 0) newinfo->hook_entry[i] = NULL; else newinfo->hook_entry[i] = p + ((char *)repl->hook_entry[i] - repl->entries); } ret = translate_table(net, repl->name, newinfo); if (ret != 0) goto free_chainstack; table->private = newinfo; rwlock_init(&table->lock); mutex_lock(&ebt_mutex); list_for_each_entry(t, &ebt_net->tables, list) { if (strcmp(t->name, table->name) == 0) { ret = -EEXIST; goto free_unlock; } } /* Hold a reference count if the chains aren't empty */ if (newinfo->nentries && !try_module_get(table->me)) { ret = -ENOENT; goto free_unlock; } num_ops = hweight32(table->valid_hooks); if (num_ops == 0) { ret = -EINVAL; goto free_unlock; } ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); if (!ops) { ret = -ENOMEM; if (newinfo->nentries) module_put(table->me); goto free_unlock; } for (i = 0; i < num_ops; i++) ops[i].priv = table; list_add(&table->list, &ebt_net->tables); mutex_unlock(&ebt_mutex); table->ops = ops; ret = nf_register_net_hooks(net, ops, num_ops); if (ret) __ebt_unregister_table(net, table); audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries, AUDIT_XT_OP_REGISTER, GFP_KERNEL); return ret; free_unlock: mutex_unlock(&ebt_mutex); free_chainstack: ebt_free_table_info(newinfo); vfree(newinfo->entries); free_newinfo: vfree(newinfo); free_table: kfree(table); out: return ret; } int ebt_register_template(const struct ebt_table *t, int (*table_init)(struct net *net)) { struct ebt_template *tmpl; mutex_lock(&ebt_mutex); list_for_each_entry(tmpl, &template_tables, list) { if (WARN_ON_ONCE(strcmp(t->name, tmpl->name) == 0)) { mutex_unlock(&ebt_mutex); return -EEXIST; } } tmpl = kzalloc(sizeof(*tmpl), GFP_KERNEL); if (!tmpl) { mutex_unlock(&ebt_mutex); return -ENOMEM; } tmpl->table_init = table_init; strscpy(tmpl->name, t->name, sizeof(tmpl->name)); tmpl->owner = t->me; list_add(&tmpl->list, &template_tables); mutex_unlock(&ebt_mutex); return 0; } EXPORT_SYMBOL(ebt_register_template); void ebt_unregister_template(const struct ebt_table *t) { struct ebt_template *tmpl; mutex_lock(&ebt_mutex); list_for_each_entry(tmpl, &template_tables, list) { if (strcmp(t->name, tmpl->name)) continue; list_del(&tmpl->list); mutex_unlock(&ebt_mutex); kfree(tmpl); return; } mutex_unlock(&ebt_mutex); WARN_ON_ONCE(1); } EXPORT_SYMBOL(ebt_unregister_template); static struct ebt_table *__ebt_find_table(struct net *net, const char *name) { struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); struct ebt_table *t; mutex_lock(&ebt_mutex); list_for_each_entry(t, &ebt_net->tables, list) { if (strcmp(t->name, name) == 0) { mutex_unlock(&ebt_mutex); return t; } } mutex_unlock(&ebt_mutex); return NULL; } void ebt_unregister_table_pre_exit(struct net *net, const char *name) { struct ebt_table *table = __ebt_find_table(net, name); if (table) nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } EXPORT_SYMBOL(ebt_unregister_table_pre_exit); void ebt_unregister_table(struct net *net, const char *name) { struct ebt_table *table = __ebt_find_table(net, name); if (table) __ebt_unregister_table(net, table); } /* userspace just supplied us with counters */ static int do_update_counters(struct net *net, const char *name, struct ebt_counter __user *counters, unsigned int num_counters, unsigned int len) { int i, ret; struct ebt_counter *tmp; struct ebt_table *t; if (num_counters == 0) return -EINVAL; tmp = vmalloc(array_size(num_counters, sizeof(*tmp))); if (!tmp) return -ENOMEM; t = find_table_lock(net, name, &ret, &ebt_mutex); if (!t) goto free_tmp; if (num_counters != t->private->nentries) { ret = -EINVAL; goto unlock_mutex; } if (copy_from_user(tmp, counters, array_size(num_counters, sizeof(*counters)))) { ret = -EFAULT; goto unlock_mutex; } /* we want an atomic add of the counters */ write_lock_bh(&t->lock); /* we add to the counters of the first cpu */ for (i = 0; i < num_counters; i++) ADD_COUNTER(t->private->counters[i], tmp[i].bcnt, tmp[i].pcnt); write_unlock_bh(&t->lock); ret = 0; unlock_mutex: mutex_unlock(&ebt_mutex); free_tmp: vfree(tmp); return ret; } static int update_counters(struct net *net, sockptr_t arg, unsigned int len) { struct ebt_replace hlp; if (len < sizeof(hlp)) return -EINVAL; if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter)) return -EINVAL; return do_update_counters(net, hlp.name, hlp.counters, hlp.num_counters, len); } static inline int ebt_obj_to_user(char __user *um, const char *_name, const char *data, int entrysize, int usersize, int datasize, u8 revision) { char name[EBT_EXTENSION_MAXNAMELEN] = {0}; /* ebtables expects 31 bytes long names but xt_match names are 29 bytes * long. Copy 29 bytes and fill remaining bytes with zeroes. */ strscpy(name, _name, sizeof(name)); if (copy_to_user(um, name, EBT_EXTENSION_MAXNAMELEN) || put_user(revision, (u8 __user *)(um + EBT_EXTENSION_MAXNAMELEN)) || put_user(datasize, (int __user *)(um + EBT_EXTENSION_MAXNAMELEN + 1)) || xt_data_to_user(um + entrysize, data, usersize, datasize, XT_ALIGN(datasize))) return -EFAULT; return 0; } static inline int ebt_match_to_user(const struct ebt_entry_match *m, const char *base, char __user *ubase) { return ebt_obj_to_user(ubase + ((char *)m - base), m->u.match->name, m->data, sizeof(*m), m->u.match->usersize, m->match_size, m->u.match->revision); } static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w, const char *base, char __user *ubase) { return ebt_obj_to_user(ubase + ((char *)w - base), w->u.watcher->name, w->data, sizeof(*w), w->u.watcher->usersize, w->watcher_size, w->u.watcher->revision); } static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base, char __user *ubase) { int ret; char __user *hlp; const struct ebt_entry_target *t; if (e->bitmask == 0) { /* special case !EBT_ENTRY_OR_ENTRIES */ if (copy_to_user(ubase + ((char *)e - base), e, sizeof(struct ebt_entries))) return -EFAULT; return 0; } if (copy_to_user(ubase + ((char *)e - base), e, sizeof(*e))) return -EFAULT; hlp = ubase + (((char *)e + e->target_offset) - base); t = ebt_get_target_c(e); ret = EBT_MATCH_ITERATE(e, ebt_match_to_user, base, ubase); if (ret != 0) return ret; ret = EBT_WATCHER_ITERATE(e, ebt_watcher_to_user, base, ubase); if (ret != 0) return ret; ret = ebt_obj_to_user(hlp, t->u.target->name, t->data, sizeof(*t), t->u.target->usersize, t->target_size, t->u.target->revision); if (ret != 0) return ret; return 0; } static int copy_counters_to_user(struct ebt_table *t, const struct ebt_counter *oldcounters, void __user *user, unsigned int num_counters, unsigned int nentries) { struct ebt_counter *counterstmp; int ret = 0; /* userspace might not need the counters */ if (num_counters == 0) return 0; if (num_counters != nentries) return -EINVAL; counterstmp = vmalloc(array_size(nentries, sizeof(*counterstmp))); if (!counterstmp) return -ENOMEM; write_lock_bh(&t->lock); get_counters(oldcounters, counterstmp, nentries); write_unlock_bh(&t->lock); if (copy_to_user(user, counterstmp, array_size(nentries, sizeof(struct ebt_counter)))) ret = -EFAULT; vfree(counterstmp); return ret; } /* called with ebt_mutex locked */ static int copy_everything_to_user(struct ebt_table *t, void __user *user, const int *len, int cmd) { struct ebt_replace tmp; const struct ebt_counter *oldcounters; unsigned int entries_size, nentries; int ret; char *entries; if (cmd == EBT_SO_GET_ENTRIES) { entries_size = t->private->entries_size; nentries = t->private->nentries; entries = t->private->entries; oldcounters = t->private->counters; } else { entries_size = t->table->entries_size; nentries = t->table->nentries; entries = t->table->entries; oldcounters = t->table->counters; } if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; if (*len != sizeof(struct ebt_replace) + entries_size + (tmp.num_counters ? nentries * sizeof(struct ebt_counter) : 0)) return -EINVAL; if (tmp.nentries != nentries) return -EINVAL; if (tmp.entries_size != entries_size) return -EINVAL; ret = copy_counters_to_user(t, oldcounters, tmp.counters, tmp.num_counters, nentries); if (ret) return ret; /* set the match/watcher/target names right */ return EBT_ENTRY_ITERATE(entries, entries_size, ebt_entry_to_user, entries, tmp.entries); } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT /* 32 bit-userspace compatibility definitions. */ struct compat_ebt_replace { char name[EBT_TABLE_MAXNAMELEN]; compat_uint_t valid_hooks; compat_uint_t nentries; compat_uint_t entries_size; /* start of the chains */ compat_uptr_t hook_entry[NF_BR_NUMHOOKS]; /* nr of counters userspace expects back */ compat_uint_t num_counters; /* where the kernel will put the old counters. */ compat_uptr_t counters; compat_uptr_t entries; }; /* struct ebt_entry_match, _target and _watcher have same layout */ struct compat_ebt_entry_mwt { union { struct { char name[EBT_EXTENSION_MAXNAMELEN]; u8 revision; }; compat_uptr_t ptr; } u; compat_uint_t match_size; compat_uint_t data[] __aligned(__alignof__(struct compat_ebt_replace)); }; /* account for possible padding between match_size and ->data */ static int ebt_compat_entry_padsize(void) { BUILD_BUG_ON(sizeof(struct ebt_entry_match) < sizeof(struct compat_ebt_entry_mwt)); return (int) sizeof(struct ebt_entry_match) - sizeof(struct compat_ebt_entry_mwt); } static int ebt_compat_match_offset(const struct xt_match *match, unsigned int userlen) { /* ebt_among needs special handling. The kernel .matchsize is * set to -1 at registration time; at runtime an EBT_ALIGN()ed * value is expected. * Example: userspace sends 4500, ebt_among.c wants 4504. */ if (unlikely(match->matchsize == -1)) return XT_ALIGN(userlen) - COMPAT_XT_ALIGN(userlen); return xt_compat_match_offset(match); } static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr, unsigned int *size) { const struct xt_match *match = m->u.match; struct compat_ebt_entry_mwt __user *cm = *dstptr; int off = ebt_compat_match_offset(match, m->match_size); compat_uint_t msize = m->match_size - off; if (WARN_ON(off >= m->match_size)) return -EINVAL; if (copy_to_user(cm->u.name, match->name, strlen(match->name) + 1) || put_user(match->revision, &cm->u.revision) || put_user(msize, &cm->match_size)) return -EFAULT; if (match->compat_to_user) { if (match->compat_to_user(cm->data, m->data)) return -EFAULT; } else { if (xt_data_to_user(cm->data, m->data, match->usersize, msize, COMPAT_XT_ALIGN(msize))) return -EFAULT; } *size -= ebt_compat_entry_padsize() + off; *dstptr = cm->data; *dstptr += msize; return 0; } static int compat_target_to_user(struct ebt_entry_target *t, void __user **dstptr, unsigned int *size) { const struct xt_target *target = t->u.target; struct compat_ebt_entry_mwt __user *cm = *dstptr; int off = xt_compat_target_offset(target); compat_uint_t tsize = t->target_size - off; if (WARN_ON(off >= t->target_size)) return -EINVAL; if (copy_to_user(cm->u.name, target->name, strlen(target->name) + 1) || put_user(target->revision, &cm->u.revision) || put_user(tsize, &cm->match_size)) return -EFAULT; if (target->compat_to_user) { if (target->compat_to_user(cm->data, t->data)) return -EFAULT; } else { if (xt_data_to_user(cm->data, t->data, target->usersize, tsize, COMPAT_XT_ALIGN(tsize))) return -EFAULT; } *size -= ebt_compat_entry_padsize() + off; *dstptr = cm->data; *dstptr += tsize; return 0; } static int compat_watcher_to_user(struct ebt_entry_watcher *w, void __user **dstptr, unsigned int *size) { return compat_target_to_user((struct ebt_entry_target *)w, dstptr, size); } static int compat_copy_entry_to_user(struct ebt_entry *e, void __user **dstptr, unsigned int *size) { struct ebt_entry_target *t; struct ebt_entry __user *ce; u32 watchers_offset, target_offset, next_offset; compat_uint_t origsize; int ret; if (e->bitmask == 0) { if (*size < sizeof(struct ebt_entries)) return -EINVAL; if (copy_to_user(*dstptr, e, sizeof(struct ebt_entries))) return -EFAULT; *dstptr += sizeof(struct ebt_entries); *size -= sizeof(struct ebt_entries); return 0; } if (*size < sizeof(*ce)) return -EINVAL; ce = *dstptr; if (copy_to_user(ce, e, sizeof(*ce))) return -EFAULT; origsize = *size; *dstptr += sizeof(*ce); ret = EBT_MATCH_ITERATE(e, compat_match_to_user, dstptr, size); if (ret) return ret; watchers_offset = e->watchers_offset - (origsize - *size); ret = EBT_WATCHER_ITERATE(e, compat_watcher_to_user, dstptr, size); if (ret) return ret; target_offset = e->target_offset - (origsize - *size); t = ebt_get_target(e); ret = compat_target_to_user(t, dstptr, size); if (ret) return ret; next_offset = e->next_offset - (origsize - *size); if (put_user(watchers_offset, &ce->watchers_offset) || put_user(target_offset, &ce->target_offset) || put_user(next_offset, &ce->next_offset)) return -EFAULT; *size -= sizeof(*ce); return 0; } static int compat_calc_match(struct ebt_entry_match *m, int *off) { *off += ebt_compat_match_offset(m->u.match, m->match_size); *off += ebt_compat_entry_padsize(); return 0; } static int compat_calc_watcher(struct ebt_entry_watcher *w, int *off) { *off += xt_compat_target_offset(w->u.watcher); *off += ebt_compat_entry_padsize(); return 0; } static int compat_calc_entry(const struct ebt_entry *e, const struct ebt_table_info *info, const void *base, struct compat_ebt_replace *newinfo) { const struct ebt_entry_target *t; unsigned int entry_offset; int off, ret, i; if (e->bitmask == 0) return 0; off = 0; entry_offset = (void *)e - base; EBT_MATCH_ITERATE(e, compat_calc_match, &off); EBT_WATCHER_ITERATE(e, compat_calc_watcher, &off); t = ebt_get_target_c(e); off += xt_compat_target_offset(t->u.target); off += ebt_compat_entry_padsize(); newinfo->entries_size -= off; ret = xt_compat_add_offset(NFPROTO_BRIDGE, entry_offset, off); if (ret) return ret; for (i = 0; i < NF_BR_NUMHOOKS; i++) { const void *hookptr = info->hook_entry[i]; if (info->hook_entry[i] && (e < (struct ebt_entry *)(base - hookptr))) { newinfo->hook_entry[i] -= off; pr_debug("0x%08X -> 0x%08X\n", newinfo->hook_entry[i] + off, newinfo->hook_entry[i]); } } return 0; } static int ebt_compat_init_offsets(unsigned int number) { if (number > INT_MAX) return -EINVAL; /* also count the base chain policies */ number += NF_BR_NUMHOOKS; return xt_compat_init_offsets(NFPROTO_BRIDGE, number); } static int compat_table_info(const struct ebt_table_info *info, struct compat_ebt_replace *newinfo) { unsigned int size = info->entries_size; const void *entries = info->entries; int ret; newinfo->entries_size = size; ret = ebt_compat_init_offsets(info->nentries); if (ret) return ret; return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, entries, newinfo); } static int compat_copy_everything_to_user(struct ebt_table *t, void __user *user, int *len, int cmd) { struct compat_ebt_replace repl, tmp; struct ebt_counter *oldcounters; struct ebt_table_info tinfo; int ret; void __user *pos; memset(&tinfo, 0, sizeof(tinfo)); if (cmd == EBT_SO_GET_ENTRIES) { tinfo.entries_size = t->private->entries_size; tinfo.nentries = t->private->nentries; tinfo.entries = t->private->entries; oldcounters = t->private->counters; } else { tinfo.entries_size = t->table->entries_size; tinfo.nentries = t->table->nentries; tinfo.entries = t->table->entries; oldcounters = t->table->counters; } if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; if (tmp.nentries != tinfo.nentries || (tmp.num_counters && tmp.num_counters != tinfo.nentries)) return -EINVAL; memcpy(&repl, &tmp, sizeof(repl)); if (cmd == EBT_SO_GET_ENTRIES) ret = compat_table_info(t->private, &repl); else ret = compat_table_info(&tinfo, &repl); if (ret) return ret; if (*len != sizeof(tmp) + repl.entries_size + (tmp.num_counters? tinfo.nentries * sizeof(struct ebt_counter): 0)) { pr_err("wrong size: *len %d, entries_size %u, replsz %d\n", *len, tinfo.entries_size, repl.entries_size); return -EINVAL; } /* userspace might not need the counters */ ret = copy_counters_to_user(t, oldcounters, compat_ptr(tmp.counters), tmp.num_counters, tinfo.nentries); if (ret) return ret; pos = compat_ptr(tmp.entries); return EBT_ENTRY_ITERATE(tinfo.entries, tinfo.entries_size, compat_copy_entry_to_user, &pos, &tmp.entries_size); } struct ebt_entries_buf_state { char *buf_kern_start; /* kernel buffer to copy (translated) data to */ u32 buf_kern_len; /* total size of kernel buffer */ u32 buf_kern_offset; /* amount of data copied so far */ u32 buf_user_offset; /* read position in userspace buffer */ }; static int ebt_buf_count(struct ebt_entries_buf_state *state, unsigned int sz) { state->buf_kern_offset += sz; return state->buf_kern_offset >= sz ? 0 : -EINVAL; } static int ebt_buf_add(struct ebt_entries_buf_state *state, const void *data, unsigned int sz) { if (state->buf_kern_start == NULL) goto count_only; if (WARN_ON(state->buf_kern_offset + sz > state->buf_kern_len)) return -EINVAL; memcpy(state->buf_kern_start + state->buf_kern_offset, data, sz); count_only: state->buf_user_offset += sz; return ebt_buf_count(state, sz); } static int ebt_buf_add_pad(struct ebt_entries_buf_state *state, unsigned int sz) { char *b = state->buf_kern_start; if (WARN_ON(b && state->buf_kern_offset > state->buf_kern_len)) return -EINVAL; if (b != NULL && sz > 0) memset(b + state->buf_kern_offset, 0, sz); /* do not adjust ->buf_user_offset here, we added kernel-side padding */ return ebt_buf_count(state, sz); } enum compat_mwt { EBT_COMPAT_MATCH, EBT_COMPAT_WATCHER, EBT_COMPAT_TARGET, }; static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, enum compat_mwt compat_mwt, struct ebt_entries_buf_state *state, const unsigned char *base) { char name[EBT_EXTENSION_MAXNAMELEN]; struct xt_match *match; struct xt_target *wt; void *dst = NULL; int off, pad = 0; unsigned int size_kern, match_size = mwt->match_size; if (strscpy(name, mwt->u.name, sizeof(name)) < 0) return -EINVAL; if (state->buf_kern_start) dst = state->buf_kern_start + state->buf_kern_offset; switch (compat_mwt) { case EBT_COMPAT_MATCH: match = xt_request_find_match(NFPROTO_BRIDGE, name, mwt->u.revision); if (IS_ERR(match)) return PTR_ERR(match); off = ebt_compat_match_offset(match, match_size); if (dst) { if (match->compat_from_user) match->compat_from_user(dst, mwt->data); else memcpy(dst, mwt->data, match_size); } size_kern = match->matchsize; if (unlikely(size_kern == -1)) size_kern = match_size; module_put(match->me); break; case EBT_COMPAT_WATCHER: case EBT_COMPAT_TARGET: wt = xt_request_find_target(NFPROTO_BRIDGE, name, mwt->u.revision); if (IS_ERR(wt)) return PTR_ERR(wt); off = xt_compat_target_offset(wt); if (dst) { if (wt->compat_from_user) wt->compat_from_user(dst, mwt->data); else memcpy(dst, mwt->data, match_size); } size_kern = wt->targetsize; module_put(wt->me); break; default: return -EINVAL; } state->buf_kern_offset += match_size + off; state->buf_user_offset += match_size; pad = XT_ALIGN(size_kern) - size_kern; if (pad > 0 && dst) { if (WARN_ON(state->buf_kern_len <= pad)) return -EINVAL; if (WARN_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad)) return -EINVAL; memset(dst + size_kern, 0, pad); } return off + match_size; } /* return size of all matches, watchers or target, including necessary * alignment and padding. */ static int ebt_size_mwt(const struct compat_ebt_entry_mwt *match32, unsigned int size_left, enum compat_mwt type, struct ebt_entries_buf_state *state, const void *base) { const char *buf = (const char *)match32; int growth = 0; if (size_left == 0) return 0; do { struct ebt_entry_match *match_kern; int ret; if (size_left < sizeof(*match32)) return -EINVAL; match_kern = (struct ebt_entry_match *) state->buf_kern_start; if (match_kern) { char *tmp; tmp = state->buf_kern_start + state->buf_kern_offset; match_kern = (struct ebt_entry_match *) tmp; } ret = ebt_buf_add(state, buf, sizeof(*match32)); if (ret < 0) return ret; size_left -= sizeof(*match32); /* add padding before match->data (if any) */ ret = ebt_buf_add_pad(state, ebt_compat_entry_padsize()); if (ret < 0) return ret; if (match32->match_size > size_left) return -EINVAL; size_left -= match32->match_size; ret = compat_mtw_from_user(match32, type, state, base); if (ret < 0) return ret; if (WARN_ON(ret < match32->match_size)) return -EINVAL; growth += ret - match32->match_size; growth += ebt_compat_entry_padsize(); buf += sizeof(*match32); buf += match32->match_size; if (match_kern) match_kern->match_size = ret; match32 = (struct compat_ebt_entry_mwt *) buf; } while (size_left); return growth; } /* called for all ebt_entry structures. */ static int size_entry_mwt(const struct ebt_entry *entry, const unsigned char *base, unsigned int *total, struct ebt_entries_buf_state *state) { unsigned int i, j, startoff, next_expected_off, new_offset = 0; /* stores match/watchers/targets & offset of next struct ebt_entry: */ unsigned int offsets[4]; unsigned int *offsets_update = NULL; int ret; char *buf_start; if (*total < sizeof(struct ebt_entries)) return -EINVAL; if (!entry->bitmask) { *total -= sizeof(struct ebt_entries); return ebt_buf_add(state, entry, sizeof(struct ebt_entries)); } if (*total < sizeof(*entry) || entry->next_offset < sizeof(*entry)) return -EINVAL; startoff = state->buf_user_offset; /* pull in most part of ebt_entry, it does not need to be changed. */ ret = ebt_buf_add(state, entry, offsetof(struct ebt_entry, watchers_offset)); if (ret < 0) return ret; offsets[0] = sizeof(struct ebt_entry); /* matches come first */ memcpy(&offsets[1], &entry->offsets, sizeof(entry->offsets)); if (state->buf_kern_start) { buf_start = state->buf_kern_start + state->buf_kern_offset; offsets_update = (unsigned int *) buf_start; } ret = ebt_buf_add(state, &offsets[1], sizeof(offsets) - sizeof(offsets[0])); if (ret < 0) return ret; buf_start = (char *) entry; /* 0: matches offset, always follows ebt_entry. * 1: watchers offset, from ebt_entry structure * 2: target offset, from ebt_entry structure * 3: next ebt_entry offset, from ebt_entry structure * * offsets are relative to beginning of struct ebt_entry (i.e., 0). */ for (i = 0; i < 4 ; ++i) { if (offsets[i] > *total) return -EINVAL; if (i < 3 && offsets[i] == *total) return -EINVAL; if (i == 0) continue; if (offsets[i-1] > offsets[i]) return -EINVAL; } for (i = 0, j = 1 ; j < 4 ; j++, i++) { struct compat_ebt_entry_mwt *match32; unsigned int size; char *buf = buf_start + offsets[i]; if (offsets[i] > offsets[j]) return -EINVAL; match32 = (struct compat_ebt_entry_mwt *) buf; size = offsets[j] - offsets[i]; ret = ebt_size_mwt(match32, size, i, state, base); if (ret < 0) return ret; new_offset += ret; if (offsets_update && new_offset) { pr_debug("change offset %d to %d\n", offsets_update[i], offsets[j] + new_offset); offsets_update[i] = offsets[j] + new_offset; } } if (state->buf_kern_start == NULL) { unsigned int offset = buf_start - (char *) base; ret = xt_compat_add_offset(NFPROTO_BRIDGE, offset, new_offset); if (ret < 0) return ret; } next_expected_off = state->buf_user_offset - startoff; if (next_expected_off != entry->next_offset) return -EINVAL; if (*total < entry->next_offset) return -EINVAL; *total -= entry->next_offset; return 0; } /* repl->entries_size is the size of the ebt_entry blob in userspace. * It might need more memory when copied to a 64 bit kernel in case * userspace is 32-bit. So, first task: find out how much memory is needed. * * Called before validation is performed. */ static int compat_copy_entries(unsigned char *data, unsigned int size_user, struct ebt_entries_buf_state *state) { unsigned int size_remaining = size_user; int ret; ret = EBT_ENTRY_ITERATE(data, size_user, size_entry_mwt, data, &size_remaining, state); if (ret < 0) return ret; if (size_remaining) return -EINVAL; return state->buf_kern_offset; } static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl, sockptr_t arg, unsigned int len) { struct compat_ebt_replace tmp; int i; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp))) return -EFAULT; if (len != sizeof(tmp) + tmp.entries_size) return -EINVAL; if (tmp.entries_size == 0) return -EINVAL; if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) / NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter)) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) return -ENOMEM; memcpy(repl, &tmp, offsetof(struct ebt_replace, hook_entry)); /* starting with hook_entry, 32 vs. 64 bit structures are different */ for (i = 0; i < NF_BR_NUMHOOKS; i++) repl->hook_entry[i] = compat_ptr(tmp.hook_entry[i]); repl->num_counters = tmp.num_counters; repl->counters = compat_ptr(tmp.counters); repl->entries = compat_ptr(tmp.entries); return 0; } static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret, i, countersize, size64; struct ebt_table_info *newinfo; struct ebt_replace tmp; struct ebt_entries_buf_state state; void *entries_tmp; ret = compat_copy_ebt_replace_from_user(&tmp, arg, len); if (ret) { /* try real handler in case userland supplied needed padding */ if (ret == -EINVAL && do_replace(net, arg, len) == 0) ret = 0; return ret; } countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; newinfo = vmalloc(sizeof(*newinfo) + countersize); if (!newinfo) return -ENOMEM; if (countersize) memset(newinfo->counters, 0, countersize); memset(&state, 0, sizeof(state)); newinfo->entries = vmalloc(tmp.entries_size); if (!newinfo->entries) { ret = -ENOMEM; goto free_newinfo; } if (copy_from_user( newinfo->entries, tmp.entries, tmp.entries_size) != 0) { ret = -EFAULT; goto free_entries; } entries_tmp = newinfo->entries; xt_compat_lock(NFPROTO_BRIDGE); ret = ebt_compat_init_offsets(tmp.nentries); if (ret < 0) goto out_unlock; ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); if (ret < 0) goto out_unlock; pr_debug("tmp.entries_size %d, kern off %d, user off %d delta %d\n", tmp.entries_size, state.buf_kern_offset, state.buf_user_offset, xt_compat_calc_jump(NFPROTO_BRIDGE, tmp.entries_size)); size64 = ret; newinfo->entries = vmalloc(size64); if (!newinfo->entries) { vfree(entries_tmp); ret = -ENOMEM; goto out_unlock; } memset(&state, 0, sizeof(state)); state.buf_kern_start = newinfo->entries; state.buf_kern_len = size64; ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); if (WARN_ON(ret < 0)) { vfree(entries_tmp); goto out_unlock; } vfree(entries_tmp); tmp.entries_size = size64; for (i = 0; i < NF_BR_NUMHOOKS; i++) { char __user *usrptr; if (tmp.hook_entry[i]) { unsigned int delta; usrptr = (char __user *) tmp.hook_entry[i]; delta = usrptr - tmp.entries; usrptr += xt_compat_calc_jump(NFPROTO_BRIDGE, delta); tmp.hook_entry[i] = (struct ebt_entries __user *)usrptr; } } xt_compat_flush_offsets(NFPROTO_BRIDGE); xt_compat_unlock(NFPROTO_BRIDGE); ret = do_replace_finish(net, &tmp, newinfo); if (ret == 0) return ret; free_entries: vfree(newinfo->entries); free_newinfo: vfree(newinfo); return ret; out_unlock: xt_compat_flush_offsets(NFPROTO_BRIDGE); xt_compat_unlock(NFPROTO_BRIDGE); goto free_entries; } static int compat_update_counters(struct net *net, sockptr_t arg, unsigned int len) { struct compat_ebt_replace hlp; if (len < sizeof(hlp)) return -EINVAL; if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; /* try real handler in case userland supplied needed padding */ if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter)) return update_counters(net, arg, len); return do_update_counters(net, hlp.name, compat_ptr(hlp.counters), hlp.num_counters, len); } static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { int ret; struct compat_ebt_replace tmp; struct ebt_table *t; struct net *net = sock_net(sk); if ((cmd == EBT_SO_GET_INFO || cmd == EBT_SO_GET_INIT_INFO) && *len != sizeof(struct compat_ebt_replace)) return -EINVAL; if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; tmp.name[sizeof(tmp.name) - 1] = '\0'; t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; xt_compat_lock(NFPROTO_BRIDGE); switch (cmd) { case EBT_SO_GET_INFO: tmp.nentries = t->private->nentries; ret = compat_table_info(t->private, &tmp); if (ret) goto out; tmp.valid_hooks = t->valid_hooks; if (copy_to_user(user, &tmp, *len) != 0) { ret = -EFAULT; break; } ret = 0; break; case EBT_SO_GET_INIT_INFO: tmp.nentries = t->table->nentries; tmp.entries_size = t->table->entries_size; tmp.valid_hooks = t->table->valid_hooks; if (copy_to_user(user, &tmp, *len) != 0) { ret = -EFAULT; break; } ret = 0; break; case EBT_SO_GET_ENTRIES: case EBT_SO_GET_INIT_ENTRIES: /* try real handler first in case of userland-side padding. * in case we are dealing with an 'ordinary' 32 bit binary * without 64bit compatibility padding, this will fail right * after copy_from_user when the *len argument is validated. * * the compat_ variant needs to do one pass over the kernel * data set to adjust for size differences before it the check. */ if (copy_everything_to_user(t, user, len, cmd) == 0) ret = 0; else ret = compat_copy_everything_to_user(t, user, len, cmd); break; default: ret = -EINVAL; } out: xt_compat_flush_offsets(NFPROTO_BRIDGE); xt_compat_unlock(NFPROTO_BRIDGE); mutex_unlock(&ebt_mutex); return ret; } #endif static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct net *net = sock_net(sk); struct ebt_replace tmp; struct ebt_table *t; int ret; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT /* try real handler in case userland supplied needed padding */ if (in_compat_syscall() && ((cmd != EBT_SO_GET_INFO && cmd != EBT_SO_GET_INIT_INFO) || *len != sizeof(tmp))) return compat_do_ebt_get_ctl(sk, cmd, user, len); #endif if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; tmp.name[sizeof(tmp.name) - 1] = '\0'; t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; switch (cmd) { case EBT_SO_GET_INFO: case EBT_SO_GET_INIT_INFO: if (*len != sizeof(struct ebt_replace)) { ret = -EINVAL; mutex_unlock(&ebt_mutex); break; } if (cmd == EBT_SO_GET_INFO) { tmp.nentries = t->private->nentries; tmp.entries_size = t->private->entries_size; tmp.valid_hooks = t->valid_hooks; } else { tmp.nentries = t->table->nentries; tmp.entries_size = t->table->entries_size; tmp.valid_hooks = t->table->valid_hooks; } mutex_unlock(&ebt_mutex); if (copy_to_user(user, &tmp, *len) != 0) { ret = -EFAULT; break; } ret = 0; break; case EBT_SO_GET_ENTRIES: case EBT_SO_GET_INIT_ENTRIES: ret = copy_everything_to_user(t, user, len, cmd); mutex_unlock(&ebt_mutex); break; default: mutex_unlock(&ebt_mutex); ret = -EINVAL; } return ret; } static int do_ebt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len) { struct net *net = sock_net(sk); int ret; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case EBT_SO_SET_ENTRIES: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_do_replace(net, arg, len); else #endif ret = do_replace(net, arg, len); break; case EBT_SO_SET_COUNTERS: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_update_counters(net, arg, len); else #endif ret = update_counters(net, arg, len); break; default: ret = -EINVAL; } return ret; } static struct nf_sockopt_ops ebt_sockopts = { .pf = PF_INET, .set_optmin = EBT_BASE_CTL, .set_optmax = EBT_SO_SET_MAX + 1, .set = do_ebt_set_ctl, .get_optmin = EBT_BASE_CTL, .get_optmax = EBT_SO_GET_MAX + 1, .get = do_ebt_get_ctl, .owner = THIS_MODULE, }; static int __net_init ebt_pernet_init(struct net *net) { struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); INIT_LIST_HEAD(&ebt_net->tables); return 0; } static struct pernet_operations ebt_net_ops = { .init = ebt_pernet_init, .id = &ebt_pernet_id, .size = sizeof(struct ebt_pernet), }; static int __init ebtables_init(void) { int ret; ret = xt_register_target(&ebt_standard_target); if (ret < 0) return ret; ret = nf_register_sockopt(&ebt_sockopts); if (ret < 0) { xt_unregister_target(&ebt_standard_target); return ret; } ret = register_pernet_subsys(&ebt_net_ops); if (ret < 0) { nf_unregister_sockopt(&ebt_sockopts); xt_unregister_target(&ebt_standard_target); return ret; } return 0; } static void ebtables_fini(void) { nf_unregister_sockopt(&ebt_sockopts); xt_unregister_target(&ebt_standard_target); unregister_pernet_subsys(&ebt_net_ops); } EXPORT_SYMBOL(ebt_register_table); EXPORT_SYMBOL(ebt_unregister_table); EXPORT_SYMBOL(ebt_do_table); module_init(ebtables_init); module_exit(ebtables_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ebtables legacy core"); |
118 60 34 28 6 72 27 122 30 39 16 39 354 153 1 26 26 8 121 54 53 46 41 4 4 4 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the UDP module. * * Version: @(#)udp.h 1.0.2 05/07/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * * Fixes: * Alan Cox : Turned on udp checksums. I don't want to * chase 'memory corruption' bugs that aren't! */ #ifndef _UDP_H #define _UDP_H #include <linux/list.h> #include <linux/bug.h> #include <net/inet_sock.h> #include <net/gso.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ip.h> #include <linux/ipv6.h> #include <linux/seq_file.h> #include <linux/poll.h> #include <linux/indirect_call_wrapper.h> /** * struct udp_skb_cb - UDP(-Lite) private variables * * @header: private variables used by IPv4/IPv6 * @cscov: checksum coverage length (UDP-Lite only) * @partial_cov: if set indicates partial csum coverage */ struct udp_skb_cb { union { struct inet_skb_parm h4; #if IS_ENABLED(CONFIG_IPV6) struct inet6_skb_parm h6; #endif } header; __u16 cscov; __u8 partial_cov; }; #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) /** * struct udp_hslot - UDP hash slot used by udp_table.hash/hash4 * * @head: head of list of sockets * @nulls_head: head of list of sockets, only used by hash4 * @count: number of sockets in 'head' list * @lock: spinlock protecting changes to head/count */ struct udp_hslot { union { struct hlist_head head; /* hash4 uses hlist_nulls to avoid moving wrongly onto another * hlist, because rehash() can happen with lookup(). */ struct hlist_nulls_head nulls_head; }; int count; spinlock_t lock; } __aligned(2 * sizeof(long)); /** * struct udp_hslot_main - UDP hash slot used by udp_table.hash2 * * @hslot: basic hash slot * @hash4_cnt: number of sockets in hslot4 of the same * (local port, local address) */ struct udp_hslot_main { struct udp_hslot hslot; /* must be the first member */ #if !IS_ENABLED(CONFIG_BASE_SMALL) u32 hash4_cnt; #endif } __aligned(2 * sizeof(long)); #define UDP_HSLOT_MAIN(__hslot) ((struct udp_hslot_main *)(__hslot)) /** * struct udp_table - UDP table * * @hash: hash table, sockets are hashed on (local port) * @hash2: hash table, sockets are hashed on (local port, local address) * @hash4: hash table, connected sockets are hashed on * (local port, local address, remote port, remote address) * @mask: number of slots in hash tables, minus 1 * @log: log2(number of slots in hash table) */ struct udp_table { struct udp_hslot *hash; struct udp_hslot_main *hash2; #if !IS_ENABLED(CONFIG_BASE_SMALL) struct udp_hslot *hash4; #endif unsigned int mask; unsigned int log; }; extern struct udp_table udp_table; void udp_table_init(struct udp_table *, const char *); static inline struct udp_hslot *udp_hashslot(struct udp_table *table, const struct net *net, unsigned int num) { return &table->hash[udp_hashfn(net, num, table->mask)]; } /* * For secondary hash, net_hash_mix() is performed before calling * udp_hashslot2(), this explains difference with udp_hashslot() */ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table, unsigned int hash) { return &table->hash2[hash & table->mask].hslot; } #if IS_ENABLED(CONFIG_BASE_SMALL) static inline void udp_table_hash4_init(struct udp_table *table) { } static inline struct udp_hslot *udp_hashslot4(struct udp_table *table, unsigned int hash) { BUILD_BUG(); return NULL; } static inline bool udp_hashed4(const struct sock *sk) { return false; } static inline unsigned int udp_hash4_slot_size(void) { return 0; } static inline bool udp_has_hash4(const struct udp_hslot *hslot2) { return false; } static inline void udp_hash4_inc(struct udp_hslot *hslot2) { } static inline void udp_hash4_dec(struct udp_hslot *hslot2) { } #else /* !CONFIG_BASE_SMALL */ /* Must be called with table->hash2 initialized */ static inline void udp_table_hash4_init(struct udp_table *table) { table->hash4 = (void *)(table->hash2 + (table->mask + 1)); for (int i = 0; i <= table->mask; i++) { table->hash2[i].hash4_cnt = 0; INIT_HLIST_NULLS_HEAD(&table->hash4[i].nulls_head, i); table->hash4[i].count = 0; spin_lock_init(&table->hash4[i].lock); } } static inline struct udp_hslot *udp_hashslot4(struct udp_table *table, unsigned int hash) { return &table->hash4[hash & table->mask]; } static inline bool udp_hashed4(const struct sock *sk) { return !hlist_nulls_unhashed(&udp_sk(sk)->udp_lrpa_node); } static inline unsigned int udp_hash4_slot_size(void) { return sizeof(struct udp_hslot); } static inline bool udp_has_hash4(const struct udp_hslot *hslot2) { return UDP_HSLOT_MAIN(hslot2)->hash4_cnt; } static inline void udp_hash4_inc(struct udp_hslot *hslot2) { UDP_HSLOT_MAIN(hslot2)->hash4_cnt++; } static inline void udp_hash4_dec(struct udp_hslot *hslot2) { UDP_HSLOT_MAIN(hslot2)->hash4_cnt--; } #endif /* CONFIG_BASE_SMALL */ extern struct proto udp_prot; extern atomic_long_t udp_memory_allocated; DECLARE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); /* sysctl variables for udp */ extern long sysctl_udp_mem[3]; extern int sysctl_udp_rmem_min; extern int sysctl_udp_wmem_min; struct sk_buff; /* * Generic checksumming routines for UDP(-Lite) v4 and v6 */ static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb) { return (UDP_SKB_CB(skb)->cscov == skb->len ? __skb_checksum_complete(skb) : __skb_checksum_complete_head(skb, UDP_SKB_CB(skb)->cscov)); } static inline int udp_lib_checksum_complete(struct sk_buff *skb) { return !skb_csum_unnecessary(skb) && __udp_lib_checksum_complete(skb); } /** * udp_csum_outgoing - compute UDPv4/v6 checksum over fragments * @sk: socket we are writing to * @skb: sk_buff containing the filled-in UDP header * (checksum field must be zeroed out) */ static inline __wsum udp_csum_outgoing(struct sock *sk, struct sk_buff *skb) { __wsum csum = csum_partial(skb_transport_header(skb), sizeof(struct udphdr), 0); skb_queue_walk(&sk->sk_write_queue, skb) { csum = csum_add(csum, skb->csum); } return csum; } static inline __wsum udp_csum(struct sk_buff *skb) { __wsum csum = csum_partial(skb_transport_header(skb), sizeof(struct udphdr), skb->csum); for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next) { csum = csum_add(csum, skb->csum); } return csum; } static inline __sum16 udp_v4_check(int len, __be32 saddr, __be32 daddr, __wsum base) { return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base); } void udp_set_csum(bool nocheck, struct sk_buff *skb, __be32 saddr, __be32 daddr, int len); static inline void udp_csum_pull_header(struct sk_buff *skb) { if (!skb->csum_valid && skb->ip_summed == CHECKSUM_NONE) skb->csum = csum_partial(skb->data, sizeof(struct udphdr), skb->csum); skb_pull_rcsum(skb, sizeof(struct udphdr)); UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr); } typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, __be16 dport); void udp_v6_early_demux(struct sk_buff *skb); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features, bool is_ipv6); static inline void udp_lib_init_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); skb_queue_head_init(&up->reader_queue); up->forward_threshold = sk->sk_rcvbuf >> 2; set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); } /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline int udp_lib_hash(struct sock *sk) { BUG(); return 0; } void udp_lib_unhash(struct sock *sk); void udp_lib_rehash(struct sock *sk, u16 new_hash, u16 new_hash4); u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport); static inline void udp_lib_close(struct sock *sk, long timeout) { sk_common_release(sk); } /* hash4 routines shared between UDPv4/6 */ #if IS_ENABLED(CONFIG_BASE_SMALL) static inline void udp_lib_hash4(struct sock *sk, u16 hash) { } static inline void udp4_hash4(struct sock *sk) { } #else /* !CONFIG_BASE_SMALL */ void udp_lib_hash4(struct sock *sk, u16 hash); void udp4_hash4(struct sock *sk); #endif /* CONFIG_BASE_SMALL */ int udp_lib_get_port(struct sock *sk, unsigned short snum, unsigned int hash2_nulladdr); u32 udp_flow_hashrnd(void); static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, int min, int max, bool use_eth) { u32 hash; if (min >= max) { /* Use default range */ inet_get_local_port_range(net, &min, &max); } hash = skb_get_hash(skb); if (unlikely(!hash)) { if (use_eth) { /* Can't find a normal hash, caller has indicated an * Ethernet packet so use that to compute a hash. */ hash = jhash(skb->data, 2 * ETH_ALEN, (__force u32) skb->protocol); } else { /* Can't derive any sort of hash for the packet, set * to some consistent random value. */ hash = udp_flow_hashrnd(); } } /* Since this is being sent on the wire obfuscate hash a bit * to minimize possibility that any useful information to an * attacker is leaked. Only upper 16 bits are relevant in the * computation for 16 bit port value. */ hash ^= hash << 16; return htons((((u64) hash * (max - min)) >> 32) + min); } static inline int udp_rqueue_get(struct sock *sk) { return sk_rmem_alloc_get(sk) - READ_ONCE(udp_sk(sk)->forward_deficit); } static inline bool udp_sk_bound_dev_eq(const struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); #endif } /* net/ipv4/udp.c */ void udp_destruct_common(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off, int *err); static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, int *err) { int off = 0; return __skb_recv_udp(sk, flags, &off, err); } int udp_v4_early_demux(struct sk_buff *skb); bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_err(struct sk_buff *, u32); int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); void udp_splice_eof(struct socket *sock); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, int *karg); int udp_init_sock(struct sock *sk); int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, bool is_ipv6); int udp_lib_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int udp_lib_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen, int (*push_pending_frames)(struct sock *)); struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); struct sock *udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif); struct sock *__udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); /* UDP uses skb->dev_scratch to cache as much information as possible and avoid * possibly multiple cache miss on dequeue() */ struct udp_dev_scratch { /* skb->truesize and the stateless bit are embedded in a single field; * do not use a bitfield since the compiler emits better/smaller code * this way */ u32 _tsize_state; #if BITS_PER_LONG == 64 /* len and the bit needed to compute skb_csum_unnecessary * will be on cold cache lines at recvmsg time. * skb->len can be stored on 16 bits since the udp header has been * already validated and pulled. */ u16 len; bool is_linear; bool csum_unnecessary; #endif }; static inline struct udp_dev_scratch *udp_skb_scratch(struct sk_buff *skb) { return (struct udp_dev_scratch *)&skb->dev_scratch; } #if BITS_PER_LONG == 64 static inline unsigned int udp_skb_len(struct sk_buff *skb) { return udp_skb_scratch(skb)->len; } static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) { return udp_skb_scratch(skb)->csum_unnecessary; } static inline bool udp_skb_is_linear(struct sk_buff *skb) { return udp_skb_scratch(skb)->is_linear; } #else static inline unsigned int udp_skb_len(struct sk_buff *skb) { return skb->len; } static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) { return skb_csum_unnecessary(skb); } static inline bool udp_skb_is_linear(struct sk_buff *skb) { return !skb_is_nonlinear(skb); } #endif static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, struct iov_iter *to) { return copy_to_iter_full(skb->data + off, len, to) ? 0 : -EFAULT; } /* * SNMP statistics for UDP and UDP-Lite */ #define UDP_INC_STATS(net, field, is_udplite) do { \ if (is_udplite) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ else SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) #define __UDP_INC_STATS(net, field, is_udplite) do { \ if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ else __SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) #define __UDP6_INC_STATS(net, field, is_udplite) do { \ if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field);\ else __SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ } while(0) #define UDP6_INC_STATS(net, field, __lite) do { \ if (__lite) SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \ else SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ } while(0) #if IS_ENABLED(CONFIG_IPV6) #define __UDPX_MIB(sk, ipv4) \ ({ \ ipv4 ? (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ sock_net(sk)->mib.udp_statistics) : \ (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_stats_in6 : \ sock_net(sk)->mib.udp_stats_in6); \ }) #else #define __UDPX_MIB(sk, ipv4) \ ({ \ IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ sock_net(sk)->mib.udp_statistics; \ }) #endif #define __UDPX_INC_STATS(sk, field) \ __SNMP_INC_STATS(__UDPX_MIB(sk, (sk)->sk_family == AF_INET), field) #ifdef CONFIG_PROC_FS struct udp_seq_afinfo { sa_family_t family; struct udp_table *udp_table; }; struct udp_iter_state { struct seq_net_private p; int bucket; }; void *udp_seq_start(struct seq_file *seq, loff_t *pos); void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos); void udp_seq_stop(struct seq_file *seq, void *v); extern const struct seq_operations udp_seq_ops; extern const struct seq_operations udp6_seq_ops; int udp4_proc_init(void); void udp4_proc_exit(void); #endif /* CONFIG_PROC_FS */ int udpv4_offload_init(void); void udp_init(void); DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); void udp_encap_enable(void); void udp_encap_disable(void); #if IS_ENABLED(CONFIG_IPV6) DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void); #endif static inline struct sk_buff *udp_rcv_segment(struct sock *sk, struct sk_buff *skb, bool ipv4) { netdev_features_t features = NETIF_F_SG; struct sk_buff *segs; /* Avoid csum recalculation by skb_segment unless userspace explicitly * asks for the final checksum values */ if (!inet_get_convert_csum(sk)) features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; /* UDP segmentation expects packets of type CHECKSUM_PARTIAL or * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial * packets in udp_gro_complete_segment. As does UDP GSO, verified by * udp_send_skb. But when those packets are looped in dev_loopback_xmit * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY. * Reset in this specific case, where PARTIAL is both correct and * required. */ if (skb->pkt_type == PACKET_LOOPBACK) skb->ip_summed = CHECKSUM_PARTIAL; /* the GSO CB lays after the UDP one, no need to save and restore any * CB fragment */ segs = __skb_gso_segment(skb, features, false); if (IS_ERR_OR_NULL(segs)) { int segs_nr = skb_shinfo(skb)->gso_segs; atomic_add(segs_nr, &sk->sk_drops); SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr); kfree_skb(skb); return NULL; } consume_skb(skb); return segs; } static inline void udp_post_segment_fix_csum(struct sk_buff *skb) { /* UDP-lite can't land here - no GRO */ WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov); /* UDP packets generated with UDP_SEGMENT and traversing: * * UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx) * * can reach an UDP socket with CHECKSUM_NONE, because * __iptunnel_pull_header() converts CHECKSUM_PARTIAL into NONE. * SKB_GSO_UDP_L4 or SKB_GSO_FRAGLIST packets with no UDP tunnel will * have a valid checksum, as the GRO engine validates the UDP csum * before the aggregation and nobody strips such info in between. * Instead of adding another check in the tunnel fastpath, we can force * a valid csum after the segmentation. * Additionally fixup the UDP CB. */ UDP_SKB_CB(skb)->cscov = skb->len; if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid) skb->csum_valid = 1; } #ifdef CONFIG_BPF_SYSCALL struct sk_psock; int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); #endif #endif /* _UDP_H */ |
14 14 14 14 14 14 9 2 2 2 9 2 2 2 9 13 13 9 9 9 9 9 9 9 9 3 13 9 9 9 9 9 9 9 9 9 6 6 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 | // SPDX-License-Identifier: GPL-2.0-or-later /* * The Serio abstraction module * * Copyright (c) 1999-2004 Vojtech Pavlik * Copyright (c) 2004 Dmitry Torokhov * Copyright (c) 2003 Daniele Bellucci */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/stddef.h> #include <linux/module.h> #include <linux/serio.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/workqueue.h> #include <linux/mutex.h> MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>"); MODULE_DESCRIPTION("Serio abstraction core"); MODULE_LICENSE("GPL"); /* * serio_mutex protects entire serio subsystem and is taken every time * serio port or driver registered or unregistered. */ static DEFINE_MUTEX(serio_mutex); static LIST_HEAD(serio_list); static void serio_add_port(struct serio *serio); static int serio_reconnect_port(struct serio *serio); static void serio_disconnect_port(struct serio *serio); static void serio_reconnect_subtree(struct serio *serio); static void serio_attach_driver(struct serio_driver *drv); static int serio_connect_driver(struct serio *serio, struct serio_driver *drv) { guard(mutex)(&serio->drv_mutex); return drv->connect(serio, drv); } static int serio_reconnect_driver(struct serio *serio) { guard(mutex)(&serio->drv_mutex); if (serio->drv && serio->drv->reconnect) return serio->drv->reconnect(serio); return -1; } static void serio_disconnect_driver(struct serio *serio) { guard(mutex)(&serio->drv_mutex); if (serio->drv) serio->drv->disconnect(serio); } static int serio_match_port(const struct serio_device_id *ids, struct serio *serio) { while (ids->type || ids->proto) { if ((ids->type == SERIO_ANY || ids->type == serio->id.type) && (ids->proto == SERIO_ANY || ids->proto == serio->id.proto) && (ids->extra == SERIO_ANY || ids->extra == serio->id.extra) && (ids->id == SERIO_ANY || ids->id == serio->id.id)) return 1; ids++; } return 0; } /* * Basic serio -> driver core mappings */ static int serio_bind_driver(struct serio *serio, struct serio_driver *drv) { int error; if (serio_match_port(drv->id_table, serio)) { serio->dev.driver = &drv->driver; if (serio_connect_driver(serio, drv)) { serio->dev.driver = NULL; return -ENODEV; } error = device_bind_driver(&serio->dev); if (error) { dev_warn(&serio->dev, "device_bind_driver() failed for %s (%s) and %s, error: %d\n", serio->phys, serio->name, drv->description, error); serio_disconnect_driver(serio); serio->dev.driver = NULL; return error; } } return 0; } static void serio_find_driver(struct serio *serio) { int error; error = device_attach(&serio->dev); if (error < 0 && error != -EPROBE_DEFER) dev_warn(&serio->dev, "device_attach() failed for %s (%s), error: %d\n", serio->phys, serio->name, error); } /* * Serio event processing. */ enum serio_event_type { SERIO_RESCAN_PORT, SERIO_RECONNECT_PORT, SERIO_RECONNECT_SUBTREE, SERIO_REGISTER_PORT, SERIO_ATTACH_DRIVER, }; struct serio_event { enum serio_event_type type; void *object; struct module *owner; struct list_head node; }; static DEFINE_SPINLOCK(serio_event_lock); /* protects serio_event_list */ static LIST_HEAD(serio_event_list); static struct serio_event *serio_get_event(void) { struct serio_event *event = NULL; guard(spinlock_irqsave)(&serio_event_lock); if (!list_empty(&serio_event_list)) { event = list_first_entry(&serio_event_list, struct serio_event, node); list_del_init(&event->node); } return event; } static void serio_free_event(struct serio_event *event) { module_put(event->owner); kfree(event); } static void serio_remove_duplicate_events(void *object, enum serio_event_type type) { struct serio_event *e, *next; guard(spinlock_irqsave)(&serio_event_lock); list_for_each_entry_safe(e, next, &serio_event_list, node) { if (object == e->object) { /* * If this event is of different type we should not * look further - we only suppress duplicate events * that were sent back-to-back. */ if (type != e->type) break; list_del_init(&e->node); serio_free_event(e); } } } static void serio_handle_event(struct work_struct *work) { struct serio_event *event; guard(mutex)(&serio_mutex); while ((event = serio_get_event())) { switch (event->type) { case SERIO_REGISTER_PORT: serio_add_port(event->object); break; case SERIO_RECONNECT_PORT: serio_reconnect_port(event->object); break; case SERIO_RESCAN_PORT: serio_disconnect_port(event->object); serio_find_driver(event->object); break; case SERIO_RECONNECT_SUBTREE: serio_reconnect_subtree(event->object); break; case SERIO_ATTACH_DRIVER: serio_attach_driver(event->object); break; } serio_remove_duplicate_events(event->object, event->type); serio_free_event(event); } } static DECLARE_WORK(serio_event_work, serio_handle_event); static int serio_queue_event(void *object, struct module *owner, enum serio_event_type event_type) { struct serio_event *event; guard(spinlock_irqsave)(&serio_event_lock); /* * Scan event list for the other events for the same serio port, * starting with the most recent one. If event is the same we * do not need add new one. If event is of different type we * need to add this event and should not look further because * we need to preseve sequence of distinct events. */ list_for_each_entry_reverse(event, &serio_event_list, node) { if (event->object == object) { if (event->type == event_type) return 0; break; } } event = kmalloc(sizeof(*event), GFP_ATOMIC); if (!event) { pr_err("Not enough memory to queue event %d\n", event_type); return -ENOMEM; } if (!try_module_get(owner)) { pr_warn("Can't get module reference, dropping event %d\n", event_type); kfree(event); return -EINVAL; } event->type = event_type; event->object = object; event->owner = owner; list_add_tail(&event->node, &serio_event_list); queue_work(system_long_wq, &serio_event_work); return 0; } /* * Remove all events that have been submitted for a given * object, be it serio port or driver. */ static void serio_remove_pending_events(void *object) { struct serio_event *event, *next; guard(spinlock_irqsave)(&serio_event_lock); list_for_each_entry_safe(event, next, &serio_event_list, node) { if (event->object == object) { list_del_init(&event->node); serio_free_event(event); } } } /* * Locate child serio port (if any) that has not been fully registered yet. * * Children are registered by driver's connect() handler so there can't be a * grandchild pending registration together with a child. */ static struct serio *serio_get_pending_child(struct serio *parent) { struct serio_event *event; struct serio *serio; guard(spinlock_irqsave)(&serio_event_lock); list_for_each_entry(event, &serio_event_list, node) { if (event->type == SERIO_REGISTER_PORT) { serio = event->object; if (serio->parent == parent) return serio; } } return NULL; } /* * Serio port operations */ static ssize_t serio_show_description(struct device *dev, struct device_attribute *attr, char *buf) { struct serio *serio = to_serio_port(dev); return sprintf(buf, "%s\n", serio->name); } static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serio *serio = to_serio_port(dev); return sprintf(buf, "serio:ty%02Xpr%02Xid%02Xex%02X\n", serio->id.type, serio->id.proto, serio->id.id, serio->id.extra); } static ssize_t type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serio *serio = to_serio_port(dev); return sprintf(buf, "%02x\n", serio->id.type); } static ssize_t proto_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serio *serio = to_serio_port(dev); return sprintf(buf, "%02x\n", serio->id.proto); } static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serio *serio = to_serio_port(dev); return sprintf(buf, "%02x\n", serio->id.id); } static ssize_t extra_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serio *serio = to_serio_port(dev); return sprintf(buf, "%02x\n", serio->id.extra); } static ssize_t drvctl_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct serio *serio = to_serio_port(dev); struct device_driver *drv; int error; scoped_cond_guard(mutex_intr, return -EINTR, &serio_mutex) { if (!strncmp(buf, "none", count)) { serio_disconnect_port(serio); } else if (!strncmp(buf, "reconnect", count)) { serio_reconnect_subtree(serio); } else if (!strncmp(buf, "rescan", count)) { serio_disconnect_port(serio); serio_find_driver(serio); serio_remove_duplicate_events(serio, SERIO_RESCAN_PORT); } else if ((drv = driver_find(buf, &serio_bus)) != NULL) { serio_disconnect_port(serio); error = serio_bind_driver(serio, to_serio_driver(drv)); serio_remove_duplicate_events(serio, SERIO_RESCAN_PORT); if (error) return error; } else { return -EINVAL; } } return count; } static ssize_t serio_show_bind_mode(struct device *dev, struct device_attribute *attr, char *buf) { struct serio *serio = to_serio_port(dev); return sprintf(buf, "%s\n", serio->manual_bind ? "manual" : "auto"); } static ssize_t serio_set_bind_mode(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct serio *serio = to_serio_port(dev); int retval; retval = count; if (!strncmp(buf, "manual", count)) { serio->manual_bind = true; } else if (!strncmp(buf, "auto", count)) { serio->manual_bind = false; } else { retval = -EINVAL; } return retval; } static ssize_t firmware_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct serio *serio = to_serio_port(dev); return sprintf(buf, "%s\n", serio->firmware_id); } static DEVICE_ATTR_RO(type); static DEVICE_ATTR_RO(proto); static DEVICE_ATTR_RO(id); static DEVICE_ATTR_RO(extra); static struct attribute *serio_device_id_attrs[] = { &dev_attr_type.attr, &dev_attr_proto.attr, &dev_attr_id.attr, &dev_attr_extra.attr, NULL }; static const struct attribute_group serio_id_attr_group = { .name = "id", .attrs = serio_device_id_attrs, }; static DEVICE_ATTR_RO(modalias); static DEVICE_ATTR_WO(drvctl); static DEVICE_ATTR(description, S_IRUGO, serio_show_description, NULL); static DEVICE_ATTR(bind_mode, S_IWUSR | S_IRUGO, serio_show_bind_mode, serio_set_bind_mode); static DEVICE_ATTR_RO(firmware_id); static struct attribute *serio_device_attrs[] = { &dev_attr_modalias.attr, &dev_attr_description.attr, &dev_attr_drvctl.attr, &dev_attr_bind_mode.attr, &dev_attr_firmware_id.attr, NULL }; static const struct attribute_group serio_device_attr_group = { .attrs = serio_device_attrs, }; static const struct attribute_group *serio_device_attr_groups[] = { &serio_id_attr_group, &serio_device_attr_group, NULL }; static void serio_release_port(struct device *dev) { struct serio *serio = to_serio_port(dev); kfree(serio); module_put(THIS_MODULE); } /* * Prepare serio port for registration. */ static void serio_init_port(struct serio *serio) { static atomic_t serio_no = ATOMIC_INIT(-1); __module_get(THIS_MODULE); INIT_LIST_HEAD(&serio->node); INIT_LIST_HEAD(&serio->child_node); INIT_LIST_HEAD(&serio->children); spin_lock_init(&serio->lock); mutex_init(&serio->drv_mutex); device_initialize(&serio->dev); dev_set_name(&serio->dev, "serio%lu", (unsigned long)atomic_inc_return(&serio_no)); serio->dev.bus = &serio_bus; serio->dev.release = serio_release_port; serio->dev.groups = serio_device_attr_groups; if (serio->parent) { serio->dev.parent = &serio->parent->dev; serio->depth = serio->parent->depth + 1; } else serio->depth = 0; lockdep_set_subclass(&serio->lock, serio->depth); } /* * Complete serio port registration. * Driver core will attempt to find appropriate driver for the port. */ static void serio_add_port(struct serio *serio) { struct serio *parent = serio->parent; int error; if (parent) { guard(serio_pause_rx)(parent); list_add_tail(&serio->child_node, &parent->children); } list_add_tail(&serio->node, &serio_list); if (serio->start) serio->start(serio); error = device_add(&serio->dev); if (error) dev_err(&serio->dev, "device_add() failed for %s (%s), error: %d\n", serio->phys, serio->name, error); } /* * serio_destroy_port() completes unregistration process and removes * port from the system */ static void serio_destroy_port(struct serio *serio) { struct serio *child; while ((child = serio_get_pending_child(serio)) != NULL) { serio_remove_pending_events(child); put_device(&child->dev); } if (serio->stop) serio->stop(serio); if (serio->parent) { guard(serio_pause_rx)(serio->parent); list_del_init(&serio->child_node); serio->parent = NULL; } if (device_is_registered(&serio->dev)) device_del(&serio->dev); list_del_init(&serio->node); serio_remove_pending_events(serio); put_device(&serio->dev); } /* * Reconnect serio port (re-initialize attached device). * If reconnect fails (old device is no longer attached or * there was no device to begin with) we do full rescan in * hope of finding a driver for the port. */ static int serio_reconnect_port(struct serio *serio) { int error = serio_reconnect_driver(serio); if (error) { serio_disconnect_port(serio); serio_find_driver(serio); } return error; } /* * Reconnect serio port and all its children (re-initialize attached * devices). */ static void serio_reconnect_subtree(struct serio *root) { struct serio *s = root; int error; do { error = serio_reconnect_port(s); if (!error) { /* * Reconnect was successful, move on to do the * first child. */ if (!list_empty(&s->children)) { s = list_first_entry(&s->children, struct serio, child_node); continue; } } /* * Either it was a leaf node or reconnect failed and it * became a leaf node. Continue reconnecting starting with * the next sibling of the parent node. */ while (s != root) { struct serio *parent = s->parent; if (!list_is_last(&s->child_node, &parent->children)) { s = list_entry(s->child_node.next, struct serio, child_node); break; } s = parent; } } while (s != root); } /* * serio_disconnect_port() unbinds a port from its driver. As a side effect * all children ports are unbound and destroyed. */ static void serio_disconnect_port(struct serio *serio) { struct serio *s = serio; /* * Children ports should be disconnected and destroyed * first; we travel the tree in depth-first order. */ while (!list_empty(&serio->children)) { /* Locate a leaf */ while (!list_empty(&s->children)) s = list_first_entry(&s->children, struct serio, child_node); /* * Prune this leaf node unless it is the one we * started with. */ if (s != serio) { struct serio *parent = s->parent; device_release_driver(&s->dev); serio_destroy_port(s); s = parent; } } /* * OK, no children left, now disconnect this port. */ device_release_driver(&serio->dev); } void serio_rescan(struct serio *serio) { serio_queue_event(serio, NULL, SERIO_RESCAN_PORT); } EXPORT_SYMBOL(serio_rescan); void serio_reconnect(struct serio *serio) { serio_queue_event(serio, NULL, SERIO_RECONNECT_SUBTREE); } EXPORT_SYMBOL(serio_reconnect); /* * Submits register request to kseriod for subsequent execution. * Note that port registration is always asynchronous. */ void __serio_register_port(struct serio *serio, struct module *owner) { serio_init_port(serio); serio_queue_event(serio, owner, SERIO_REGISTER_PORT); } EXPORT_SYMBOL(__serio_register_port); /* * Synchronously unregisters serio port. */ void serio_unregister_port(struct serio *serio) { guard(mutex)(&serio_mutex); serio_disconnect_port(serio); serio_destroy_port(serio); } EXPORT_SYMBOL(serio_unregister_port); /* * Safely unregisters children ports if they are present. */ void serio_unregister_child_port(struct serio *serio) { struct serio *s, *next; guard(mutex)(&serio_mutex); list_for_each_entry_safe(s, next, &serio->children, child_node) { serio_disconnect_port(s); serio_destroy_port(s); } } EXPORT_SYMBOL(serio_unregister_child_port); /* * Serio driver operations */ static ssize_t description_show(struct device_driver *drv, char *buf) { struct serio_driver *driver = to_serio_driver(drv); return sprintf(buf, "%s\n", driver->description ? driver->description : "(none)"); } static DRIVER_ATTR_RO(description); static ssize_t bind_mode_show(struct device_driver *drv, char *buf) { struct serio_driver *serio_drv = to_serio_driver(drv); return sprintf(buf, "%s\n", serio_drv->manual_bind ? "manual" : "auto"); } static ssize_t bind_mode_store(struct device_driver *drv, const char *buf, size_t count) { struct serio_driver *serio_drv = to_serio_driver(drv); int retval; retval = count; if (!strncmp(buf, "manual", count)) { serio_drv->manual_bind = true; } else if (!strncmp(buf, "auto", count)) { serio_drv->manual_bind = false; } else { retval = -EINVAL; } return retval; } static DRIVER_ATTR_RW(bind_mode); static struct attribute *serio_driver_attrs[] = { &driver_attr_description.attr, &driver_attr_bind_mode.attr, NULL, }; ATTRIBUTE_GROUPS(serio_driver); static int serio_driver_probe(struct device *dev) { struct serio *serio = to_serio_port(dev); struct serio_driver *drv = to_serio_driver(dev->driver); return serio_connect_driver(serio, drv); } static void serio_driver_remove(struct device *dev) { struct serio *serio = to_serio_port(dev); serio_disconnect_driver(serio); } static void serio_cleanup(struct serio *serio) { guard(mutex)(&serio->drv_mutex); if (serio->drv && serio->drv->cleanup) serio->drv->cleanup(serio); } static void serio_shutdown(struct device *dev) { struct serio *serio = to_serio_port(dev); serio_cleanup(serio); } static void serio_attach_driver(struct serio_driver *drv) { int error; error = driver_attach(&drv->driver); if (error) pr_warn("driver_attach() failed for %s with error %d\n", drv->driver.name, error); } int __serio_register_driver(struct serio_driver *drv, struct module *owner, const char *mod_name) { bool manual_bind = drv->manual_bind; int error; drv->driver.bus = &serio_bus; drv->driver.owner = owner; drv->driver.mod_name = mod_name; /* * Temporarily disable automatic binding because probing * takes long time and we are better off doing it in kseriod */ drv->manual_bind = true; error = driver_register(&drv->driver); if (error) { pr_err("driver_register() failed for %s, error: %d\n", drv->driver.name, error); return error; } /* * Restore original bind mode and let kseriod bind the * driver to free ports */ if (!manual_bind) { drv->manual_bind = false; error = serio_queue_event(drv, NULL, SERIO_ATTACH_DRIVER); if (error) { driver_unregister(&drv->driver); return error; } } return 0; } EXPORT_SYMBOL(__serio_register_driver); void serio_unregister_driver(struct serio_driver *drv) { struct serio *serio; guard(mutex)(&serio_mutex); drv->manual_bind = true; /* so serio_find_driver ignores it */ serio_remove_pending_events(drv); start_over: list_for_each_entry(serio, &serio_list, node) { if (serio->drv == drv) { serio_disconnect_port(serio); serio_find_driver(serio); /* we could've deleted some ports, restart */ goto start_over; } } driver_unregister(&drv->driver); } EXPORT_SYMBOL(serio_unregister_driver); static void serio_set_drv(struct serio *serio, struct serio_driver *drv) { guard(serio_pause_rx)(serio); serio->drv = drv; } static int serio_bus_match(struct device *dev, const struct device_driver *drv) { struct serio *serio = to_serio_port(dev); const struct serio_driver *serio_drv = to_serio_driver(drv); if (serio->manual_bind || serio_drv->manual_bind) return 0; return serio_match_port(serio_drv->id_table, serio); } #define SERIO_ADD_UEVENT_VAR(fmt, val...) \ do { \ int err = add_uevent_var(env, fmt, val); \ if (err) \ return err; \ } while (0) static int serio_uevent(const struct device *dev, struct kobj_uevent_env *env) { const struct serio *serio; if (!dev) return -ENODEV; serio = to_serio_port(dev); SERIO_ADD_UEVENT_VAR("SERIO_TYPE=%02x", serio->id.type); SERIO_ADD_UEVENT_VAR("SERIO_PROTO=%02x", serio->id.proto); SERIO_ADD_UEVENT_VAR("SERIO_ID=%02x", serio->id.id); SERIO_ADD_UEVENT_VAR("SERIO_EXTRA=%02x", serio->id.extra); SERIO_ADD_UEVENT_VAR("MODALIAS=serio:ty%02Xpr%02Xid%02Xex%02X", serio->id.type, serio->id.proto, serio->id.id, serio->id.extra); if (serio->firmware_id[0]) SERIO_ADD_UEVENT_VAR("SERIO_FIRMWARE_ID=%s", serio->firmware_id); return 0; } #undef SERIO_ADD_UEVENT_VAR #ifdef CONFIG_PM static int serio_suspend(struct device *dev) { struct serio *serio = to_serio_port(dev); serio_cleanup(serio); return 0; } static int serio_resume(struct device *dev) { struct serio *serio = to_serio_port(dev); int error = -ENOENT; scoped_guard(mutex, &serio->drv_mutex) { if (serio->drv && serio->drv->fast_reconnect) { error = serio->drv->fast_reconnect(serio); if (error && error != -ENOENT) dev_warn(dev, "fast reconnect failed with error %d\n", error); } } if (error) { /* * Driver reconnect can take a while, so better let * kseriod deal with it. */ serio_queue_event(serio, NULL, SERIO_RECONNECT_PORT); } return 0; } static const struct dev_pm_ops serio_pm_ops = { .suspend = serio_suspend, .resume = serio_resume, .poweroff = serio_suspend, .restore = serio_resume, }; #endif /* CONFIG_PM */ /* called from serio_driver->connect/disconnect methods under serio_mutex */ int serio_open(struct serio *serio, struct serio_driver *drv) { serio_set_drv(serio, drv); if (serio->open && serio->open(serio)) { serio_set_drv(serio, NULL); return -1; } return 0; } EXPORT_SYMBOL(serio_open); /* called from serio_driver->connect/disconnect methods under serio_mutex */ void serio_close(struct serio *serio) { if (serio->close) serio->close(serio); serio_set_drv(serio, NULL); } EXPORT_SYMBOL(serio_close); irqreturn_t serio_interrupt(struct serio *serio, unsigned char data, unsigned int dfl) { guard(spinlock_irqsave)(&serio->lock); if (likely(serio->drv)) return serio->drv->interrupt(serio, data, dfl); if (!dfl && device_is_registered(&serio->dev)) { serio_rescan(serio); return IRQ_HANDLED; } return IRQ_NONE; } EXPORT_SYMBOL(serio_interrupt); const struct bus_type serio_bus = { .name = "serio", .drv_groups = serio_driver_groups, .match = serio_bus_match, .uevent = serio_uevent, .probe = serio_driver_probe, .remove = serio_driver_remove, .shutdown = serio_shutdown, #ifdef CONFIG_PM .pm = &serio_pm_ops, #endif }; EXPORT_SYMBOL(serio_bus); static int __init serio_init(void) { int error; error = bus_register(&serio_bus); if (error) { pr_err("Failed to register serio bus, error: %d\n", error); return error; } return 0; } static void __exit serio_exit(void) { bus_unregister(&serio_bus); /* * There should not be any outstanding events but work may * still be scheduled so simply cancel it. */ cancel_work_sync(&serio_event_work); } subsys_initcall(serio_init); module_exit(serio_exit); |
99 99 99 99 38 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Sysfs attributes of bridge * Linux ethernet bridge * * Authors: * Stephen Hemminger <shemminger@osdl.org> */ #include <linux/capability.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_bridge.h> #include <linux/rtnetlink.h> #include <linux/spinlock.h> #include <linux/times.h> #include <linux/sched/signal.h> #include "br_private.h" /* IMPORTANT: new bridge options must be added with netlink support only * please do not add new sysfs entries */ #define to_bridge(cd) ((struct net_bridge *)netdev_priv(to_net_dev(cd))) /* * Common code for storing bridge parameters. */ static ssize_t store_bridge_parm(struct device *d, const char *buf, size_t len, int (*set)(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack)) { struct net_bridge *br = to_bridge(d); struct netlink_ext_ack extack = {0}; unsigned long val; int err; if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; err = kstrtoul(buf, 0, &val); if (err != 0) return err; if (!rtnl_trylock()) return restart_syscall(); err = (*set)(br, val, &extack); if (!err) netdev_state_change(br->dev); if (extack._msg) { if (err) br_err(br, "%s\n", extack._msg); else br_warn(br, "%s\n", extack._msg); } rtnl_unlock(); return err ? err : len; } static ssize_t forward_delay_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); } static int set_forward_delay(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_set_forward_delay(br, val); } static ssize_t forward_delay_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_forward_delay); } static DEVICE_ATTR_RW(forward_delay); static ssize_t hello_time_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", jiffies_to_clock_t(to_bridge(d)->hello_time)); } static int set_hello_time(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_set_hello_time(br, val); } static ssize_t hello_time_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_hello_time); } static DEVICE_ATTR_RW(hello_time); static ssize_t max_age_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", jiffies_to_clock_t(to_bridge(d)->max_age)); } static int set_max_age(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_set_max_age(br, val); } static ssize_t max_age_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_max_age); } static DEVICE_ATTR_RW(max_age); static ssize_t ageing_time_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time)); } static int set_ageing_time(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_set_ageing_time(br, val); } static ssize_t ageing_time_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_ageing_time); } static DEVICE_ATTR_RW(ageing_time); static ssize_t stp_state_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->stp_enabled); } static int set_stp_state(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_stp_set_enabled(br, val, extack); } static ssize_t stp_state_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_stp_state); } static DEVICE_ATTR_RW(stp_state); static ssize_t group_fwd_mask_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%#x\n", br->group_fwd_mask); } static int set_group_fwd_mask(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { if (val & BR_GROUPFWD_RESTRICTED) return -EINVAL; br->group_fwd_mask = val; return 0; } static ssize_t group_fwd_mask_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_group_fwd_mask); } static DEVICE_ATTR_RW(group_fwd_mask); static ssize_t priority_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]); } static int set_priority(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_stp_set_bridge_priority(br, (u16) val); return 0; } static ssize_t priority_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_priority); } static DEVICE_ATTR_RW(priority); static ssize_t root_id_show(struct device *d, struct device_attribute *attr, char *buf) { return br_show_bridge_id(buf, &to_bridge(d)->designated_root); } static DEVICE_ATTR_RO(root_id); static ssize_t bridge_id_show(struct device *d, struct device_attribute *attr, char *buf) { return br_show_bridge_id(buf, &to_bridge(d)->bridge_id); } static DEVICE_ATTR_RO(bridge_id); static ssize_t root_port_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->root_port); } static DEVICE_ATTR_RO(root_port); static ssize_t root_path_cost_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->root_path_cost); } static DEVICE_ATTR_RO(root_path_cost); static ssize_t topology_change_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->topology_change); } static DEVICE_ATTR_RO(topology_change); static ssize_t topology_change_detected_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->topology_change_detected); } static DEVICE_ATTR_RO(topology_change_detected); static ssize_t hello_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer)); } static DEVICE_ATTR_RO(hello_timer); static ssize_t tcn_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer)); } static DEVICE_ATTR_RO(tcn_timer); static ssize_t topology_change_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer)); } static DEVICE_ATTR_RO(topology_change_timer); static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->gc_work.timer)); } static DEVICE_ATTR_RO(gc_timer); static ssize_t group_addr_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%pM\n", br->group_addr); } static ssize_t group_addr_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { struct net_bridge *br = to_bridge(d); u8 new_addr[6]; if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; if (!mac_pton(buf, new_addr)) return -EINVAL; if (!is_link_local_ether_addr(new_addr)) return -EINVAL; if (new_addr[5] == 1 || /* 802.3x Pause address */ new_addr[5] == 2 || /* 802.3ad Slow protocols */ new_addr[5] == 3) /* 802.1X PAE address */ return -EINVAL; if (!rtnl_trylock()) return restart_syscall(); spin_lock_bh(&br->lock); ether_addr_copy(br->group_addr, new_addr); spin_unlock_bh(&br->lock); br_opt_toggle(br, BROPT_GROUP_ADDR_SET, true); br_recalculate_fwd_mask(br); netdev_state_change(br->dev); rtnl_unlock(); return len; } static DEVICE_ATTR_RW(group_addr); static int set_flush(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { struct net_bridge_fdb_flush_desc desc = { .flags_mask = BIT(BR_FDB_STATIC) }; br_fdb_flush(br, &desc); return 0; } static ssize_t flush_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_flush); } static DEVICE_ATTR_WO(flush); static ssize_t no_linklocal_learn_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br_boolopt_get(br, BR_BOOLOPT_NO_LL_LEARN)); } static int set_no_linklocal_learn(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_boolopt_toggle(br, BR_BOOLOPT_NO_LL_LEARN, !!val, extack); } static ssize_t no_linklocal_learn_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_no_linklocal_learn); } static DEVICE_ATTR_RW(no_linklocal_learn); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t multicast_router_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->multicast_ctx.multicast_router); } static int set_multicast_router(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_multicast_set_router(&br->multicast_ctx, val); } static ssize_t multicast_router_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_multicast_router); } static DEVICE_ATTR_RW(multicast_router); static ssize_t multicast_snooping_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_ENABLED)); } static ssize_t multicast_snooping_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_toggle); } static DEVICE_ATTR_RW(multicast_snooping); static ssize_t multicast_query_use_ifaddr_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR)); } static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_opt_toggle(br, BROPT_MULTICAST_QUERY_USE_IFADDR, !!val); return 0; } static ssize_t multicast_query_use_ifaddr_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_use_ifaddr); } static DEVICE_ATTR_RW(multicast_query_use_ifaddr); static ssize_t multicast_querier_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->multicast_ctx.multicast_querier); } static int set_multicast_querier(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_multicast_set_querier(&br->multicast_ctx, val); } static ssize_t multicast_querier_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_multicast_querier); } static DEVICE_ATTR_RW(multicast_querier); static ssize_t hash_elasticity_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%u\n", RHT_ELASTICITY); } static int set_elasticity(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { /* 16 is RHT_ELASTICITY */ NL_SET_ERR_MSG_MOD(extack, "the hash_elasticity option has been deprecated and is always 16"); return 0; } static ssize_t hash_elasticity_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_elasticity); } static DEVICE_ATTR_RW(hash_elasticity); static ssize_t hash_max_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->hash_max); } static int set_hash_max(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->hash_max = val; return 0; } static ssize_t hash_max_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_hash_max); } static DEVICE_ATTR_RW(hash_max); static ssize_t multicast_igmp_version_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->multicast_ctx.multicast_igmp_version); } static int set_multicast_igmp_version(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_multicast_set_igmp_version(&br->multicast_ctx, val); } static ssize_t multicast_igmp_version_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_multicast_igmp_version); } static DEVICE_ATTR_RW(multicast_igmp_version); static ssize_t multicast_last_member_count_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->multicast_ctx.multicast_last_member_count); } static int set_last_member_count(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->multicast_ctx.multicast_last_member_count = val; return 0; } static ssize_t multicast_last_member_count_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_last_member_count); } static DEVICE_ATTR_RW(multicast_last_member_count); static ssize_t multicast_startup_query_count_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->multicast_ctx.multicast_startup_query_count); } static int set_startup_query_count(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->multicast_ctx.multicast_startup_query_count = val; return 0; } static ssize_t multicast_startup_query_count_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_startup_query_count); } static DEVICE_ATTR_RW(multicast_startup_query_count); static ssize_t multicast_last_member_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval)); } static int set_last_member_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val); return 0; } static ssize_t multicast_last_member_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_last_member_interval); } static DEVICE_ATTR_RW(multicast_last_member_interval); static ssize_t multicast_membership_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval)); } static int set_membership_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val); return 0; } static ssize_t multicast_membership_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_membership_interval); } static DEVICE_ATTR_RW(multicast_membership_interval); static ssize_t multicast_querier_interval_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval)); } static int set_querier_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val); return 0; } static ssize_t multicast_querier_interval_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_querier_interval); } static DEVICE_ATTR_RW(multicast_querier_interval); static ssize_t multicast_query_interval_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval)); } static int set_query_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_multicast_set_query_intvl(&br->multicast_ctx, val); return 0; } static ssize_t multicast_query_interval_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_interval); } static DEVICE_ATTR_RW(multicast_query_interval); static ssize_t multicast_query_response_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf( buf, "%lu\n", jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval)); } static int set_query_response_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val); return 0; } static ssize_t multicast_query_response_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_response_interval); } static DEVICE_ATTR_RW(multicast_query_response_interval); static ssize_t multicast_startup_query_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf( buf, "%lu\n", jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval)); } static int set_startup_query_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_multicast_set_startup_query_intvl(&br->multicast_ctx, val); return 0; } static ssize_t multicast_startup_query_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_startup_query_interval); } static DEVICE_ATTR_RW(multicast_startup_query_interval); static ssize_t multicast_stats_enabled_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)); } static int set_stats_enabled(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_opt_toggle(br, BROPT_MULTICAST_STATS_ENABLED, !!val); return 0; } static ssize_t multicast_stats_enabled_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_stats_enabled); } static DEVICE_ATTR_RW(multicast_stats_enabled); #if IS_ENABLED(CONFIG_IPV6) static ssize_t multicast_mld_version_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->multicast_ctx.multicast_mld_version); } static int set_multicast_mld_version(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_multicast_set_mld_version(&br->multicast_ctx, val); } static ssize_t multicast_mld_version_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_multicast_mld_version); } static DEVICE_ATTR_RW(multicast_mld_version); #endif #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static ssize_t nf_call_iptables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_IPTABLES)); } static int set_nf_call_iptables(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_opt_toggle(br, BROPT_NF_CALL_IPTABLES, !!val); return 0; } static ssize_t nf_call_iptables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_iptables); } static DEVICE_ATTR_RW(nf_call_iptables); static ssize_t nf_call_ip6tables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_IP6TABLES)); } static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_opt_toggle(br, BROPT_NF_CALL_IP6TABLES, !!val); return 0; } static ssize_t nf_call_ip6tables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_ip6tables); } static DEVICE_ATTR_RW(nf_call_ip6tables); static ssize_t nf_call_arptables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_ARPTABLES)); } static int set_nf_call_arptables(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { br_opt_toggle(br, BROPT_NF_CALL_ARPTABLES, !!val); return 0; } static ssize_t nf_call_arptables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_arptables); } static DEVICE_ATTR_RW(nf_call_arptables); #endif #ifdef CONFIG_BRIDGE_VLAN_FILTERING static ssize_t vlan_filtering_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br_opt_get(br, BROPT_VLAN_ENABLED)); } static ssize_t vlan_filtering_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_vlan_filter_toggle); } static DEVICE_ATTR_RW(vlan_filtering); static ssize_t vlan_protocol_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%#06x\n", ntohs(br->vlan_proto)); } static ssize_t vlan_protocol_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_vlan_set_proto); } static DEVICE_ATTR_RW(vlan_protocol); static ssize_t default_pvid_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->default_pvid); } static ssize_t default_pvid_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_vlan_set_default_pvid); } static DEVICE_ATTR_RW(default_pvid); static ssize_t vlan_stats_enabled_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_ENABLED)); } static int set_vlan_stats_enabled(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_vlan_set_stats(br, val); } static ssize_t vlan_stats_enabled_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_vlan_stats_enabled); } static DEVICE_ATTR_RW(vlan_stats_enabled); static ssize_t vlan_stats_per_port_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)); } static int set_vlan_stats_per_port(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return br_vlan_set_stats_per_port(br, val); } static ssize_t vlan_stats_per_port_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_vlan_stats_per_port); } static DEVICE_ATTR_RW(vlan_stats_per_port); #endif static struct attribute *bridge_attrs[] = { &dev_attr_forward_delay.attr, &dev_attr_hello_time.attr, &dev_attr_max_age.attr, &dev_attr_ageing_time.attr, &dev_attr_stp_state.attr, &dev_attr_group_fwd_mask.attr, &dev_attr_priority.attr, &dev_attr_bridge_id.attr, &dev_attr_root_id.attr, &dev_attr_root_path_cost.attr, &dev_attr_root_port.attr, &dev_attr_topology_change.attr, &dev_attr_topology_change_detected.attr, &dev_attr_hello_timer.attr, &dev_attr_tcn_timer.attr, &dev_attr_topology_change_timer.attr, &dev_attr_gc_timer.attr, &dev_attr_group_addr.attr, &dev_attr_flush.attr, &dev_attr_no_linklocal_learn.attr, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &dev_attr_multicast_router.attr, &dev_attr_multicast_snooping.attr, &dev_attr_multicast_querier.attr, &dev_attr_multicast_query_use_ifaddr.attr, &dev_attr_hash_elasticity.attr, &dev_attr_hash_max.attr, &dev_attr_multicast_last_member_count.attr, &dev_attr_multicast_startup_query_count.attr, &dev_attr_multicast_last_member_interval.attr, &dev_attr_multicast_membership_interval.attr, &dev_attr_multicast_querier_interval.attr, &dev_attr_multicast_query_interval.attr, &dev_attr_multicast_query_response_interval.attr, &dev_attr_multicast_startup_query_interval.attr, &dev_attr_multicast_stats_enabled.attr, &dev_attr_multicast_igmp_version.attr, #if IS_ENABLED(CONFIG_IPV6) &dev_attr_multicast_mld_version.attr, #endif #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) &dev_attr_nf_call_iptables.attr, &dev_attr_nf_call_ip6tables.attr, &dev_attr_nf_call_arptables.attr, #endif #ifdef CONFIG_BRIDGE_VLAN_FILTERING &dev_attr_vlan_filtering.attr, &dev_attr_vlan_protocol.attr, &dev_attr_default_pvid.attr, &dev_attr_vlan_stats_enabled.attr, &dev_attr_vlan_stats_per_port.attr, #endif NULL }; static const struct attribute_group bridge_group = { .name = SYSFS_BRIDGE_ATTR, .attrs = bridge_attrs, }; /* * Export the forwarding information table as a binary file * The records are struct __fdb_entry. * * Returns the number of bytes read. */ static ssize_t brforward_read(struct file *filp, struct kobject *kobj, const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); struct net_bridge *br = to_bridge(dev); int n; /* must read whole records */ if (off % sizeof(struct __fdb_entry) != 0) return -EINVAL; n = br_fdb_fillbuf(br, buf, count / sizeof(struct __fdb_entry), off / sizeof(struct __fdb_entry)); if (n > 0) n *= sizeof(struct __fdb_entry); return n; } static const struct bin_attribute bridge_forward = { .attr = { .name = SYSFS_BRIDGE_FDB, .mode = 0444, }, .read_new = brforward_read, }; /* * Add entries in sysfs onto the existing network class device * for the bridge. * Adds a attribute group "bridge" containing tuning parameters. * Binary attribute containing the forward table * Sub directory to hold links to interfaces. * * Note: the ifobj exists only to be a subdirectory * to hold links. The ifobj exists in same data structure * as it's parent the bridge so reference counting works. */ int br_sysfs_addbr(struct net_device *dev) { struct kobject *brobj = &dev->dev.kobj; struct net_bridge *br = netdev_priv(dev); int err; err = sysfs_create_group(brobj, &bridge_group); if (err) { pr_info("%s: can't create group %s/%s\n", __func__, dev->name, bridge_group.name); goto out1; } err = sysfs_create_bin_file(brobj, &bridge_forward); if (err) { pr_info("%s: can't create attribute file %s/%s\n", __func__, dev->name, bridge_forward.attr.name); goto out2; } br->ifobj = kobject_create_and_add(SYSFS_BRIDGE_PORT_SUBDIR, brobj); if (!br->ifobj) { pr_info("%s: can't add kobject (directory) %s/%s\n", __func__, dev->name, SYSFS_BRIDGE_PORT_SUBDIR); err = -ENOMEM; goto out3; } return 0; out3: sysfs_remove_bin_file(&dev->dev.kobj, &bridge_forward); out2: sysfs_remove_group(&dev->dev.kobj, &bridge_group); out1: return err; } void br_sysfs_delbr(struct net_device *dev) { struct kobject *kobj = &dev->dev.kobj; struct net_bridge *br = netdev_priv(dev); kobject_put(br->ifobj); sysfs_remove_bin_file(kobj, &bridge_forward); sysfs_remove_group(kobj, &bridge_group); } |
11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * NET Generic infrastructure for Network protocols. * * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #ifndef _TIMEWAIT_SOCK_H #define _TIMEWAIT_SOCK_H #include <linux/slab.h> #include <linux/bug.h> #include <net/sock.h> struct timewait_sock_ops { struct kmem_cache *twsk_slab; char *twsk_slab_name; unsigned int twsk_obj_size; void (*twsk_destructor)(struct sock *sk); }; static inline void twsk_destructor(struct sock *sk) { if (sk->sk_prot->twsk_prot->twsk_destructor != NULL) sk->sk_prot->twsk_prot->twsk_destructor(sk); } #endif /* _TIMEWAIT_SOCK_H */ |
21 20 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright (c) 2016 Facebook */ #ifndef __BPF_LRU_LIST_H_ #define __BPF_LRU_LIST_H_ #include <linux/cache.h> #include <linux/list.h> #include <linux/spinlock_types.h> #define NR_BPF_LRU_LIST_T (3) #define NR_BPF_LRU_LIST_COUNT (2) #define NR_BPF_LRU_LOCAL_LIST_T (2) #define BPF_LOCAL_LIST_T_OFFSET NR_BPF_LRU_LIST_T enum bpf_lru_list_type { BPF_LRU_LIST_T_ACTIVE, BPF_LRU_LIST_T_INACTIVE, BPF_LRU_LIST_T_FREE, BPF_LRU_LOCAL_LIST_T_FREE, BPF_LRU_LOCAL_LIST_T_PENDING, }; struct bpf_lru_node { struct list_head list; u16 cpu; u8 type; u8 ref; }; struct bpf_lru_list { struct list_head lists[NR_BPF_LRU_LIST_T]; unsigned int counts[NR_BPF_LRU_LIST_COUNT]; /* The next inactive list rotation starts from here */ struct list_head *next_inactive_rotation; raw_spinlock_t lock ____cacheline_aligned_in_smp; }; struct bpf_lru_locallist { struct list_head lists[NR_BPF_LRU_LOCAL_LIST_T]; u16 next_steal; raw_spinlock_t lock; }; struct bpf_common_lru { struct bpf_lru_list lru_list; struct bpf_lru_locallist __percpu *local_list; }; typedef bool (*del_from_htab_func)(void *arg, struct bpf_lru_node *node); struct bpf_lru { union { struct bpf_common_lru common_lru; struct bpf_lru_list __percpu *percpu_lru; }; del_from_htab_func del_from_htab; void *del_arg; unsigned int hash_offset; unsigned int nr_scans; bool percpu; }; static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node) { if (!READ_ONCE(node->ref)) WRITE_ONCE(node->ref, 1); } int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, del_from_htab_func del_from_htab, void *delete_arg); void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, u32 elem_size, u32 nr_elems); void bpf_lru_destroy(struct bpf_lru *lru); struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash); void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node); #endif |
15 15 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. */ #include <linux/kernel.h> #include <linux/bio.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/fsnotify.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/time.h> #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/writeback.h> #include <linux/compat.h> #include <linux/security.h> #include <linux/xattr.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/uuid.h> #include <linux/btrfs.h> #include <linux/uaccess.h> #include <linux/iversion.h> #include <linux/fileattr.h> #include <linux/fsverity.h> #include <linux/sched/xacct.h> #include <linux/io_uring/cmd.h> #include "ctree.h" #include "disk-io.h" #include "export.h" #include "transaction.h" #include "btrfs_inode.h" #include "volumes.h" #include "locking.h" #include "backref.h" #include "send.h" #include "dev-replace.h" #include "props.h" #include "sysfs.h" #include "qgroup.h" #include "tree-log.h" #include "compression.h" #include "space-info.h" #include "block-group.h" #include "fs.h" #include "accessors.h" #include "extent-tree.h" #include "root-tree.h" #include "defrag.h" #include "dir-item.h" #include "uuid-tree.h" #include "ioctl.h" #include "file.h" #include "scrub.h" #include "super.h" #ifdef CONFIG_64BIT /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI * structures are incorrect, as the timespec structure from userspace * is 4 bytes too small. We define these alternatives here to teach * the kernel about the 32-bit struct packing. */ struct btrfs_ioctl_timespec_32 { __u64 sec; __u32 nsec; } __attribute__ ((__packed__)); struct btrfs_ioctl_received_subvol_args_32 { char uuid[BTRFS_UUID_SIZE]; /* in */ __u64 stransid; /* in */ __u64 rtransid; /* out */ struct btrfs_ioctl_timespec_32 stime; /* in */ struct btrfs_ioctl_timespec_32 rtime; /* out */ __u64 flags; /* in */ __u64 reserved[16]; /* in */ } __attribute__ ((__packed__)); #define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \ struct btrfs_ioctl_received_subvol_args_32) #endif #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) struct btrfs_ioctl_send_args_32 { __s64 send_fd; /* in */ __u64 clone_sources_count; /* in */ compat_uptr_t clone_sources; /* in */ __u64 parent_root; /* in */ __u64 flags; /* in */ __u32 version; /* in */ __u8 reserved[28]; /* in */ } __attribute__ ((__packed__)); #define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \ struct btrfs_ioctl_send_args_32) struct btrfs_ioctl_encoded_io_args_32 { compat_uptr_t iov; compat_ulong_t iovcnt; __s64 offset; __u64 flags; __u64 len; __u64 unencoded_len; __u64 unencoded_offset; __u32 compression; __u32 encryption; __u8 reserved[64]; }; #define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \ struct btrfs_ioctl_encoded_io_args_32) #define BTRFS_IOC_ENCODED_WRITE_32 _IOW(BTRFS_IOCTL_MAGIC, 64, \ struct btrfs_ioctl_encoded_io_args_32) #endif /* Mask out flags that are inappropriate for the given type of inode. */ static unsigned int btrfs_mask_fsflags_for_type(const struct inode *inode, unsigned int flags) { if (S_ISDIR(inode->i_mode)) return flags; else if (S_ISREG(inode->i_mode)) return flags & ~FS_DIRSYNC_FL; else return flags & (FS_NODUMP_FL | FS_NOATIME_FL); } /* * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS * ioctl. */ static unsigned int btrfs_inode_flags_to_fsflags(const struct btrfs_inode *inode) { unsigned int iflags = 0; u32 flags = inode->flags; u32 ro_flags = inode->ro_flags; if (flags & BTRFS_INODE_SYNC) iflags |= FS_SYNC_FL; if (flags & BTRFS_INODE_IMMUTABLE) iflags |= FS_IMMUTABLE_FL; if (flags & BTRFS_INODE_APPEND) iflags |= FS_APPEND_FL; if (flags & BTRFS_INODE_NODUMP) iflags |= FS_NODUMP_FL; if (flags & BTRFS_INODE_NOATIME) iflags |= FS_NOATIME_FL; if (flags & BTRFS_INODE_DIRSYNC) iflags |= FS_DIRSYNC_FL; if (flags & BTRFS_INODE_NODATACOW) iflags |= FS_NOCOW_FL; if (ro_flags & BTRFS_INODE_RO_VERITY) iflags |= FS_VERITY_FL; if (flags & BTRFS_INODE_NOCOMPRESS) iflags |= FS_NOCOMP_FL; else if (flags & BTRFS_INODE_COMPRESS) iflags |= FS_COMPR_FL; return iflags; } /* * Update inode->i_flags based on the btrfs internal flags. */ void btrfs_sync_inode_flags_to_i_flags(struct btrfs_inode *inode) { unsigned int new_fl = 0; if (inode->flags & BTRFS_INODE_SYNC) new_fl |= S_SYNC; if (inode->flags & BTRFS_INODE_IMMUTABLE) new_fl |= S_IMMUTABLE; if (inode->flags & BTRFS_INODE_APPEND) new_fl |= S_APPEND; if (inode->flags & BTRFS_INODE_NOATIME) new_fl |= S_NOATIME; if (inode->flags & BTRFS_INODE_DIRSYNC) new_fl |= S_DIRSYNC; if (inode->ro_flags & BTRFS_INODE_RO_VERITY) new_fl |= S_VERITY; set_mask_bits(&inode->vfs_inode.i_flags, S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC | S_VERITY, new_fl); } /* * Check if @flags are a supported and valid set of FS_*_FL flags and that * the old and new flags are not conflicting */ static int check_fsflags(unsigned int old_flags, unsigned int flags) { if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ FS_NOATIME_FL | FS_NODUMP_FL | \ FS_SYNC_FL | FS_DIRSYNC_FL | \ FS_NOCOMP_FL | FS_COMPR_FL | FS_NOCOW_FL)) return -EOPNOTSUPP; /* COMPR and NOCOMP on new/old are valid */ if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) return -EINVAL; if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL)) return -EINVAL; /* NOCOW and compression options are mutually exclusive */ if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL))) return -EINVAL; if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL))) return -EINVAL; return 0; } static int check_fsflags_compatible(const struct btrfs_fs_info *fs_info, unsigned int flags) { if (btrfs_is_zoned(fs_info) && (flags & FS_NOCOW_FL)) return -EPERM; return 0; } int btrfs_check_ioctl_vol_args_path(const struct btrfs_ioctl_vol_args *vol_args) { if (memchr(vol_args->name, 0, sizeof(vol_args->name)) == NULL) return -ENAMETOOLONG; return 0; } static int btrfs_check_ioctl_vol_args2_subvol_name(const struct btrfs_ioctl_vol_args_v2 *vol_args2) { if (memchr(vol_args2->name, 0, sizeof(vol_args2->name)) == NULL) return -ENAMETOOLONG; return 0; } /* * Set flags/xflags from the internal inode flags. The remaining items of * fsxattr are zeroed. */ int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) { const struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); fileattr_fill_flags(fa, btrfs_inode_flags_to_fsflags(inode)); return 0; } int btrfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_trans_handle *trans; unsigned int fsflags, old_fsflags; int ret; const char *comp = NULL; u32 inode_flags; if (btrfs_root_readonly(root)) return -EROFS; if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; fsflags = btrfs_mask_fsflags_for_type(&inode->vfs_inode, fa->flags); old_fsflags = btrfs_inode_flags_to_fsflags(inode); ret = check_fsflags(old_fsflags, fsflags); if (ret) return ret; ret = check_fsflags_compatible(fs_info, fsflags); if (ret) return ret; inode_flags = inode->flags; if (fsflags & FS_SYNC_FL) inode_flags |= BTRFS_INODE_SYNC; else inode_flags &= ~BTRFS_INODE_SYNC; if (fsflags & FS_IMMUTABLE_FL) inode_flags |= BTRFS_INODE_IMMUTABLE; else inode_flags &= ~BTRFS_INODE_IMMUTABLE; if (fsflags & FS_APPEND_FL) inode_flags |= BTRFS_INODE_APPEND; else inode_flags &= ~BTRFS_INODE_APPEND; if (fsflags & FS_NODUMP_FL) inode_flags |= BTRFS_INODE_NODUMP; else inode_flags &= ~BTRFS_INODE_NODUMP; if (fsflags & FS_NOATIME_FL) inode_flags |= BTRFS_INODE_NOATIME; else inode_flags &= ~BTRFS_INODE_NOATIME; /* If coming from FS_IOC_FSSETXATTR then skip unconverted flags */ if (!fa->flags_valid) { /* 1 item for the inode */ trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) return PTR_ERR(trans); goto update_flags; } if (fsflags & FS_DIRSYNC_FL) inode_flags |= BTRFS_INODE_DIRSYNC; else inode_flags &= ~BTRFS_INODE_DIRSYNC; if (fsflags & FS_NOCOW_FL) { if (S_ISREG(inode->vfs_inode.i_mode)) { /* * It's safe to turn csums off here, no extents exist. * Otherwise we want the flag to reflect the real COW * status of the file and will not set it. */ if (inode->vfs_inode.i_size == 0) inode_flags |= BTRFS_INODE_NODATACOW | BTRFS_INODE_NODATASUM; } else { inode_flags |= BTRFS_INODE_NODATACOW; } } else { /* * Revert back under same assumptions as above */ if (S_ISREG(inode->vfs_inode.i_mode)) { if (inode->vfs_inode.i_size == 0) inode_flags &= ~(BTRFS_INODE_NODATACOW | BTRFS_INODE_NODATASUM); } else { inode_flags &= ~BTRFS_INODE_NODATACOW; } } /* * The COMPRESS flag can only be changed by users, while the NOCOMPRESS * flag may be changed automatically if compression code won't make * things smaller. */ if (fsflags & FS_NOCOMP_FL) { inode_flags &= ~BTRFS_INODE_COMPRESS; inode_flags |= BTRFS_INODE_NOCOMPRESS; } else if (fsflags & FS_COMPR_FL) { if (IS_SWAPFILE(&inode->vfs_inode)) return -ETXTBSY; inode_flags |= BTRFS_INODE_COMPRESS; inode_flags &= ~BTRFS_INODE_NOCOMPRESS; comp = btrfs_compress_type2str(fs_info->compress_type); if (!comp || comp[0] == 0) comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB); } else { inode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); } /* * 1 for inode item * 2 for properties */ trans = btrfs_start_transaction(root, 3); if (IS_ERR(trans)) return PTR_ERR(trans); if (comp) { ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp, strlen(comp), 0); if (ret) { btrfs_abort_transaction(trans, ret); goto out_end_trans; } } else { ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, 0, 0); if (ret && ret != -ENODATA) { btrfs_abort_transaction(trans, ret); goto out_end_trans; } } update_flags: inode->flags = inode_flags; btrfs_update_inode_mapping_flags(inode); btrfs_sync_inode_flags_to_i_flags(inode); inode_inc_iversion(&inode->vfs_inode); inode_set_ctime_current(&inode->vfs_inode); ret = btrfs_update_inode(trans, inode); out_end_trans: btrfs_end_transaction(trans); return ret; } static int btrfs_ioctl_getversion(const struct inode *inode, int __user *arg) { return put_user(inode->i_generation, arg); } static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info, void __user *arg) { struct btrfs_device *device; struct fstrim_range range; u64 minlen = ULLONG_MAX; u64 num_devices = 0; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; /* * btrfs_trim_block_group() depends on space cache, which is not * available in zoned filesystem. So, disallow fitrim on a zoned * filesystem for now. */ if (btrfs_is_zoned(fs_info)) return -EOPNOTSUPP; /* * If the fs is mounted with nologreplay, which requires it to be * mounted in RO mode as well, we can not allow discard on free space * inside block groups, because log trees refer to extents that are not * pinned in a block group's free space cache (pinning the extents is * precisely the first phase of replaying a log tree). */ if (btrfs_test_opt(fs_info, NOLOGREPLAY)) return -EROFS; rcu_read_lock(); list_for_each_entry_rcu(device, &fs_info->fs_devices->devices, dev_list) { if (!device->bdev || !bdev_max_discard_sectors(device->bdev)) continue; num_devices++; minlen = min_t(u64, bdev_discard_granularity(device->bdev), minlen); } rcu_read_unlock(); if (!num_devices) return -EOPNOTSUPP; if (copy_from_user(&range, arg, sizeof(range))) return -EFAULT; /* * NOTE: Don't truncate the range using super->total_bytes. Bytenr of * block group is in the logical address space, which can be any * sectorsize aligned bytenr in the range [0, U64_MAX]. */ if (range.len < fs_info->sectorsize) return -EINVAL; range.minlen = max(range.minlen, minlen); ret = btrfs_trim_fs(fs_info, &range); if (copy_to_user(arg, &range, sizeof(range))) return -EFAULT; return ret; } /* * Calculate the number of transaction items to reserve for creating a subvolume * or snapshot, not including the inode, directory entries, or parent directory. */ static unsigned int create_subvol_num_items(const struct btrfs_qgroup_inherit *inherit) { /* * 1 to add root block * 1 to add root item * 1 to add root ref * 1 to add root backref * 1 to add UUID item * 1 to add qgroup info * 1 to add qgroup limit * * Ideally the last two would only be accounted if qgroups are enabled, * but that can change between now and the time we would insert them. */ unsigned int num_items = 7; if (inherit) { /* 2 to add qgroup relations for each inherited qgroup */ num_items += 2 * inherit->num_qgroups; } return num_items; } static noinline int create_subvol(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, struct btrfs_qgroup_inherit *inherit) { struct btrfs_fs_info *fs_info = inode_to_fs_info(dir); struct btrfs_trans_handle *trans; struct btrfs_key key; struct btrfs_root_item *root_item; struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *new_root; struct btrfs_block_rsv block_rsv; struct timespec64 cur_time = current_time(dir); struct btrfs_new_inode_args new_inode_args = { .dir = dir, .dentry = dentry, .subvol = true, }; unsigned int trans_num_items; int ret; dev_t anon_dev; u64 objectid; u64 qgroup_reserved = 0; root_item = kzalloc(sizeof(*root_item), GFP_KERNEL); if (!root_item) return -ENOMEM; ret = btrfs_get_free_objectid(fs_info->tree_root, &objectid); if (ret) goto out_root_item; /* * Don't create subvolume whose level is not zero. Or qgroup will be * screwed up since it assumes subvolume qgroup's level to be 0. */ if (btrfs_qgroup_level(objectid)) { ret = -ENOSPC; goto out_root_item; } ret = get_anon_bdev(&anon_dev); if (ret < 0) goto out_root_item; new_inode_args.inode = btrfs_new_subvol_inode(idmap, dir); if (!new_inode_args.inode) { ret = -ENOMEM; goto out_anon_dev; } ret = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items); if (ret) goto out_inode; trans_num_items += create_subvol_num_items(inherit); btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, trans_num_items, false); if (ret) goto out_new_inode_args; qgroup_reserved = block_rsv.qgroup_rsv_reserved; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_release_rsv; } btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); qgroup_reserved = 0; trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; ret = btrfs_qgroup_inherit(trans, 0, objectid, btrfs_root_id(root), inherit); if (ret) goto out; leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0, 0, BTRFS_NESTING_NORMAL); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); goto out; } btrfs_mark_buffer_dirty(trans, leaf); inode_item = &root_item->inode; btrfs_set_stack_inode_generation(inode_item, 1); btrfs_set_stack_inode_size(inode_item, 3); btrfs_set_stack_inode_nlink(inode_item, 1); btrfs_set_stack_inode_nbytes(inode_item, fs_info->nodesize); btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_root_flags(root_item, 0); btrfs_set_root_limit(root_item, 0); btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); btrfs_set_root_bytenr(root_item, leaf->start); btrfs_set_root_generation(root_item, trans->transid); btrfs_set_root_level(root_item, 0); btrfs_set_root_refs(root_item, 1); btrfs_set_root_used(root_item, leaf->len); btrfs_set_root_last_snapshot(root_item, 0); btrfs_set_root_generation_v2(root_item, btrfs_root_generation(root_item)); generate_random_guid(root_item->uuid); btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec); btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec); root_item->ctime = root_item->otime; btrfs_set_root_ctransid(root_item, trans->transid); btrfs_set_root_otransid(root_item, trans->transid); btrfs_tree_unlock(leaf); btrfs_set_root_dirid(root_item, BTRFS_FIRST_FREE_OBJECTID); key.objectid = objectid; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = 0; ret = btrfs_insert_root(trans, fs_info->tree_root, &key, root_item); if (ret) { int ret2; /* * Since we don't abort the transaction in this case, free the * tree block so that we don't leak space and leave the * filesystem in an inconsistent state (an extent item in the * extent tree with a backreference for a root that does not * exists). */ btrfs_tree_lock(leaf); btrfs_clear_buffer_dirty(trans, leaf); btrfs_tree_unlock(leaf); ret2 = btrfs_free_tree_block(trans, objectid, leaf, 0, 1); if (ret2 < 0) btrfs_abort_transaction(trans, ret2); free_extent_buffer(leaf); goto out; } free_extent_buffer(leaf); leaf = NULL; new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev); if (IS_ERR(new_root)) { ret = PTR_ERR(new_root); btrfs_abort_transaction(trans, ret); goto out; } /* anon_dev is owned by new_root now. */ anon_dev = 0; BTRFS_I(new_inode_args.inode)->root = new_root; /* ... and new_root is owned by new_inode_args.inode now. */ ret = btrfs_record_root_in_trans(trans, new_root); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } ret = btrfs_uuid_tree_add(trans, root_item->uuid, BTRFS_UUID_KEY_SUBVOL, objectid); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } ret = btrfs_create_new_inode(trans, &new_inode_args); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } btrfs_record_new_subvolume(trans, BTRFS_I(dir)); d_instantiate_new(dentry, new_inode_args.inode); new_inode_args.inode = NULL; out: trans->block_rsv = NULL; trans->bytes_reserved = 0; btrfs_end_transaction(trans); out_release_rsv: btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL); if (qgroup_reserved) btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); out_new_inode_args: btrfs_new_inode_args_destroy(&new_inode_args); out_inode: iput(new_inode_args.inode); out_anon_dev: if (anon_dev) free_anon_bdev(anon_dev); out_root_item: kfree(root_item); return ret; } static int create_snapshot(struct btrfs_root *root, struct inode *dir, struct dentry *dentry, bool readonly, struct btrfs_qgroup_inherit *inherit) { struct btrfs_fs_info *fs_info = inode_to_fs_info(dir); struct inode *inode; struct btrfs_pending_snapshot *pending_snapshot; unsigned int trans_num_items; struct btrfs_trans_handle *trans; struct btrfs_block_rsv *block_rsv; u64 qgroup_reserved = 0; int ret; /* We do not support snapshotting right now. */ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { btrfs_warn(fs_info, "extent tree v2 doesn't support snapshotting yet"); return -EOPNOTSUPP; } if (btrfs_root_refs(&root->root_item) == 0) return -ENOENT; if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) return -EINVAL; if (atomic_read(&root->nr_swapfiles)) { btrfs_warn(fs_info, "cannot snapshot subvolume with active swapfile"); return -ETXTBSY; } pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL); if (!pending_snapshot) return -ENOMEM; ret = get_anon_bdev(&pending_snapshot->anon_dev); if (ret < 0) goto free_pending; pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item), GFP_KERNEL); pending_snapshot->path = btrfs_alloc_path(); if (!pending_snapshot->root_item || !pending_snapshot->path) { ret = -ENOMEM; goto free_pending; } block_rsv = &pending_snapshot->block_rsv; btrfs_init_block_rsv(block_rsv, BTRFS_BLOCK_RSV_TEMP); /* * 1 to add dir item * 1 to add dir index * 1 to update parent inode item */ trans_num_items = create_subvol_num_items(inherit) + 3; ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, block_rsv, trans_num_items, false); if (ret) goto free_pending; qgroup_reserved = block_rsv->qgroup_rsv_reserved; pending_snapshot->dentry = dentry; pending_snapshot->root = root; pending_snapshot->readonly = readonly; pending_snapshot->dir = BTRFS_I(dir); pending_snapshot->inherit = inherit; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto fail; } ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root); if (ret) { btrfs_end_transaction(trans); goto fail; } btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); qgroup_reserved = 0; trans->pending_snapshot = pending_snapshot; ret = btrfs_commit_transaction(trans); if (ret) goto fail; ret = pending_snapshot->error; if (ret) goto fail; ret = btrfs_orphan_cleanup(pending_snapshot->snap); if (ret) goto fail; inode = btrfs_lookup_dentry(d_inode(dentry->d_parent), dentry); if (IS_ERR(inode)) { ret = PTR_ERR(inode); goto fail; } d_instantiate(dentry, inode); ret = 0; pending_snapshot->anon_dev = 0; fail: /* Prevent double freeing of anon_dev */ if (ret && pending_snapshot->snap) pending_snapshot->snap->anon_dev = 0; btrfs_put_root(pending_snapshot->snap); btrfs_block_rsv_release(fs_info, block_rsv, (u64)-1, NULL); if (qgroup_reserved) btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); free_pending: if (pending_snapshot->anon_dev) free_anon_bdev(pending_snapshot->anon_dev); kfree(pending_snapshot->root_item); btrfs_free_path(pending_snapshot->path); kfree(pending_snapshot); return ret; } /* copy of may_delete in fs/namei.c() * Check whether we can remove a link victim from directory dir, check * whether the type of victim is right. * 1. We can't do it if dir is read-only (done in permission()) * 2. We should have write and exec permissions on dir * 3. We can't remove anything from append-only dir * 4. We can't do anything with immutable dir (done in permission()) * 5. If the sticky bit on dir is set we should either * a. be owner of dir, or * b. be owner of victim, or * c. have CAP_FOWNER capability * 6. If the victim is append-only or immutable we can't do anything with * links pointing to it. * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. * 9. We can't remove a root or mountpoint. * 10. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ static int btrfs_may_delete(struct mnt_idmap *idmap, struct inode *dir, struct dentry *victim, int isdir) { int error; if (d_really_is_negative(victim)) return -ENOENT; /* The @victim is not inside @dir. */ if (d_inode(victim->d_parent) != dir) return -EINVAL; audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); if (error) return error; if (IS_APPEND(dir)) return -EPERM; if (check_sticky(idmap, dir, d_inode(victim)) || IS_APPEND(d_inode(victim)) || IS_IMMUTABLE(d_inode(victim)) || IS_SWAPFILE(d_inode(victim))) return -EPERM; if (isdir) { if (!d_is_dir(victim)) return -ENOTDIR; if (IS_ROOT(victim)) return -EBUSY; } else if (d_is_dir(victim)) return -EISDIR; if (IS_DEADDIR(dir)) return -ENOENT; if (victim->d_flags & DCACHE_NFSFS_RENAMED) return -EBUSY; return 0; } /* copy of may_create in fs/namei.c() */ static inline int btrfs_may_create(struct mnt_idmap *idmap, struct inode *dir, const struct dentry *child) { if (d_really_is_positive(child)) return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; if (!fsuidgid_has_mapping(dir->i_sb, idmap)) return -EOVERFLOW; return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); } /* * Create a new subvolume below @parent. This is largely modeled after * sys_mkdirat and vfs_mkdir, but we only do a single component lookup * inside this filesystem so it's quite a bit simpler. */ static noinline int btrfs_mksubvol(const struct path *parent, struct mnt_idmap *idmap, const char *name, int namelen, struct btrfs_root *snap_src, bool readonly, struct btrfs_qgroup_inherit *inherit) { struct inode *dir = d_inode(parent->dentry); struct btrfs_fs_info *fs_info = inode_to_fs_info(dir); struct dentry *dentry; struct fscrypt_str name_str = FSTR_INIT((char *)name, namelen); int error; error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); if (error == -EINTR) return error; dentry = lookup_one(idmap, name, parent->dentry, namelen); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_unlock; error = btrfs_may_create(idmap, dir, dentry); if (error) goto out_dput; /* * even if this name doesn't exist, we may get hash collisions. * check for them now when we can safely fail */ error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, dir->i_ino, &name_str); if (error) goto out_dput; down_read(&fs_info->subvol_sem); if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) goto out_up_read; if (snap_src) error = create_snapshot(snap_src, dir, dentry, readonly, inherit); else error = create_subvol(idmap, dir, dentry, inherit); if (!error) fsnotify_mkdir(dir, dentry); out_up_read: up_read(&fs_info->subvol_sem); out_dput: dput(dentry); out_unlock: btrfs_inode_unlock(BTRFS_I(dir), 0); return error; } static noinline int btrfs_mksnapshot(const struct path *parent, struct mnt_idmap *idmap, const char *name, int namelen, struct btrfs_root *root, bool readonly, struct btrfs_qgroup_inherit *inherit) { int ret; /* * Force new buffered writes to reserve space even when NOCOW is * possible. This is to avoid later writeback (running dealloc) to * fallback to COW mode and unexpectedly fail with ENOSPC. */ btrfs_drew_read_lock(&root->snapshot_lock); ret = btrfs_start_delalloc_snapshot(root, false); if (ret) goto out; /* * All previous writes have started writeback in NOCOW mode, so now * we force future writes to fallback to COW mode during snapshot * creation. */ atomic_inc(&root->snapshot_force_cow); btrfs_wait_ordered_extents(root, U64_MAX, NULL); ret = btrfs_mksubvol(parent, idmap, name, namelen, root, readonly, inherit); atomic_dec(&root->snapshot_force_cow); out: btrfs_drew_read_unlock(&root->snapshot_lock); return ret; } /* * Try to start exclusive operation @type or cancel it if it's running. * * Return: * 0 - normal mode, newly claimed op started * >0 - normal mode, something else is running, * return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS to user space * ECANCELED - cancel mode, successful cancel * ENOTCONN - cancel mode, operation not running anymore */ static int exclop_start_or_cancel_reloc(struct btrfs_fs_info *fs_info, enum btrfs_exclusive_operation type, bool cancel) { if (!cancel) { /* Start normal op */ if (!btrfs_exclop_start(fs_info, type)) return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; /* Exclusive operation is now claimed */ return 0; } /* Cancel running op */ if (btrfs_exclop_start_try_lock(fs_info, type)) { /* * This blocks any exclop finish from setting it to NONE, so we * request cancellation. Either it runs and we will wait for it, * or it has finished and no waiting will happen. */ atomic_inc(&fs_info->reloc_cancel_req); btrfs_exclop_start_unlock(fs_info); if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) wait_on_bit(&fs_info->flags, BTRFS_FS_RELOC_RUNNING, TASK_INTERRUPTIBLE); return -ECANCELED; } /* Something else is running or none */ return -ENOTCONN; } static noinline int btrfs_ioctl_resize(struct file *file, void __user *arg) { BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_fs_info *fs_info = root->fs_info; u64 new_size; u64 old_size; u64 devid = 1; struct btrfs_ioctl_vol_args *vol_args; struct btrfs_device *device = NULL; char *sizestr; char *devstr = NULL; int ret = 0; int mod = 0; bool cancel; if (!capable(CAP_SYS_ADMIN)) return -EPERM; ret = mnt_want_write_file(file); if (ret) return ret; /* * Read the arguments before checking exclusivity to be able to * distinguish regular resize and cancel */ vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) { ret = PTR_ERR(vol_args); goto out_drop; } ret = btrfs_check_ioctl_vol_args_path(vol_args); if (ret < 0) goto out_free; sizestr = vol_args->name; cancel = (strcmp("cancel", sizestr) == 0); ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_RESIZE, cancel); if (ret) goto out_free; /* Exclusive operation is now claimed */ devstr = strchr(sizestr, ':'); if (devstr) { sizestr = devstr + 1; *devstr = '\0'; devstr = vol_args->name; ret = kstrtoull(devstr, 10, &devid); if (ret) goto out_finish; if (!devid) { ret = -EINVAL; goto out_finish; } btrfs_info(fs_info, "resizing devid %llu", devid); } args.devid = devid; device = btrfs_find_device(fs_info->fs_devices, &args); if (!device) { btrfs_info(fs_info, "resizer unable to find device %llu", devid); ret = -ENODEV; goto out_finish; } if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { btrfs_info(fs_info, "resizer unable to apply on readonly device %llu", devid); ret = -EPERM; goto out_finish; } if (!strcmp(sizestr, "max")) new_size = bdev_nr_bytes(device->bdev); else { char *retptr; if (sizestr[0] == '-') { mod = -1; sizestr++; } else if (sizestr[0] == '+') { mod = 1; sizestr++; } new_size = memparse(sizestr, &retptr); if (*retptr != '\0' || new_size == 0) { ret = -EINVAL; goto out_finish; } } if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { ret = -EPERM; goto out_finish; } old_size = btrfs_device_get_total_bytes(device); if (mod < 0) { if (new_size > old_size) { ret = -EINVAL; goto out_finish; } new_size = old_size - new_size; } else if (mod > 0) { if (new_size > ULLONG_MAX - old_size) { ret = -ERANGE; goto out_finish; } new_size = old_size + new_size; } if (new_size < SZ_256M) { ret = -EINVAL; goto out_finish; } if (new_size > bdev_nr_bytes(device->bdev)) { ret = -EFBIG; goto out_finish; } new_size = round_down(new_size, fs_info->sectorsize); if (new_size > old_size) { struct btrfs_trans_handle *trans; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_finish; } ret = btrfs_grow_device(trans, device, new_size); btrfs_commit_transaction(trans); } else if (new_size < old_size) { ret = btrfs_shrink_device(device, new_size); } /* equal, nothing need to do */ if (ret == 0 && new_size != old_size) btrfs_info_in_rcu(fs_info, "resize device %s (devid %llu) from %llu to %llu", btrfs_dev_name(device), device->devid, old_size, new_size); out_finish: btrfs_exclop_finish(fs_info); out_free: kfree(vol_args); out_drop: mnt_drop_write_file(file); return ret; } static noinline int __btrfs_ioctl_snap_create(struct file *file, struct mnt_idmap *idmap, const char *name, unsigned long fd, int subvol, bool readonly, struct btrfs_qgroup_inherit *inherit) { int namelen; int ret = 0; if (!S_ISDIR(file_inode(file)->i_mode)) return -ENOTDIR; ret = mnt_want_write_file(file); if (ret) goto out; namelen = strlen(name); if (strchr(name, '/')) { ret = -EINVAL; goto out_drop_write; } if (name[0] == '.' && (namelen == 1 || (name[1] == '.' && namelen == 2))) { ret = -EEXIST; goto out_drop_write; } if (subvol) { ret = btrfs_mksubvol(&file->f_path, idmap, name, namelen, NULL, readonly, inherit); } else { CLASS(fd, src)(fd); struct inode *src_inode; if (fd_empty(src)) { ret = -EINVAL; goto out_drop_write; } src_inode = file_inode(fd_file(src)); if (src_inode->i_sb != file_inode(file)->i_sb) { btrfs_info(BTRFS_I(file_inode(file))->root->fs_info, "Snapshot src from another FS"); ret = -EXDEV; } else if (!inode_owner_or_capable(idmap, src_inode)) { /* * Subvolume creation is not restricted, but snapshots * are limited to own subvolumes only */ ret = -EPERM; } else if (btrfs_ino(BTRFS_I(src_inode)) != BTRFS_FIRST_FREE_OBJECTID) { /* * Snapshots must be made with the src_inode referring * to the subvolume inode, otherwise the permission * checking above is useless because we may have * permission on a lower directory but not the subvol * itself. */ ret = -EINVAL; } else { ret = btrfs_mksnapshot(&file->f_path, idmap, name, namelen, BTRFS_I(src_inode)->root, readonly, inherit); } } out_drop_write: mnt_drop_write_file(file); out: return ret; } static noinline int btrfs_ioctl_snap_create(struct file *file, void __user *arg, int subvol) { struct btrfs_ioctl_vol_args *vol_args; int ret; if (!S_ISDIR(file_inode(file)->i_mode)) return -ENOTDIR; vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); ret = btrfs_check_ioctl_vol_args_path(vol_args); if (ret < 0) goto out; ret = __btrfs_ioctl_snap_create(file, file_mnt_idmap(file), vol_args->name, vol_args->fd, subvol, false, NULL); out: kfree(vol_args); return ret; } static noinline int btrfs_ioctl_snap_create_v2(struct file *file, void __user *arg, int subvol) { struct btrfs_ioctl_vol_args_v2 *vol_args; int ret; bool readonly = false; struct btrfs_qgroup_inherit *inherit = NULL; if (!S_ISDIR(file_inode(file)->i_mode)) return -ENOTDIR; vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); ret = btrfs_check_ioctl_vol_args2_subvol_name(vol_args); if (ret < 0) goto free_args; if (vol_args->flags & ~BTRFS_SUBVOL_CREATE_ARGS_MASK) { ret = -EOPNOTSUPP; goto free_args; } if (vol_args->flags & BTRFS_SUBVOL_RDONLY) readonly = true; if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { struct btrfs_fs_info *fs_info = inode_to_fs_info(file_inode(file)); if (vol_args->size < sizeof(*inherit) || vol_args->size > PAGE_SIZE) { ret = -EINVAL; goto free_args; } inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); if (IS_ERR(inherit)) { ret = PTR_ERR(inherit); goto free_args; } ret = btrfs_qgroup_check_inherit(fs_info, inherit, vol_args->size); if (ret < 0) goto free_inherit; } ret = __btrfs_ioctl_snap_create(file, file_mnt_idmap(file), vol_args->name, vol_args->fd, subvol, readonly, inherit); if (ret) goto free_inherit; free_inherit: kfree(inherit); free_args: kfree(vol_args); return ret; } static noinline int btrfs_ioctl_subvol_getflags(struct btrfs_inode *inode, void __user *arg) { struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; u64 flags = 0; if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) return -EINVAL; down_read(&fs_info->subvol_sem); if (btrfs_root_readonly(root)) flags |= BTRFS_SUBVOL_RDONLY; up_read(&fs_info->subvol_sem); if (copy_to_user(arg, &flags, sizeof(flags))) ret = -EFAULT; return ret; } static noinline int btrfs_ioctl_subvol_setflags(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; u64 root_flags; u64 flags; int ret = 0; if (!inode_owner_or_capable(file_mnt_idmap(file), inode)) return -EPERM; ret = mnt_want_write_file(file); if (ret) goto out; if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { ret = -EINVAL; goto out_drop_write; } if (copy_from_user(&flags, arg, sizeof(flags))) { ret = -EFAULT; goto out_drop_write; } if (flags & ~BTRFS_SUBVOL_RDONLY) { ret = -EOPNOTSUPP; goto out_drop_write; } down_write(&fs_info->subvol_sem); /* nothing to do */ if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) goto out_drop_sem; root_flags = btrfs_root_flags(&root->root_item); if (flags & BTRFS_SUBVOL_RDONLY) { btrfs_set_root_flags(&root->root_item, root_flags | BTRFS_ROOT_SUBVOL_RDONLY); } else { /* * Block RO -> RW transition if this subvolume is involved in * send */ spin_lock(&root->root_item_lock); if (root->send_in_progress == 0) { btrfs_set_root_flags(&root->root_item, root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); spin_unlock(&root->root_item_lock); } else { spin_unlock(&root->root_item_lock); btrfs_warn(fs_info, "Attempt to set subvolume %llu read-write during send", btrfs_root_id(root)); ret = -EPERM; goto out_drop_sem; } } trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_reset; } ret = btrfs_update_root(trans, fs_info->tree_root, &root->root_key, &root->root_item); if (ret < 0) { btrfs_end_transaction(trans); goto out_reset; } ret = btrfs_commit_transaction(trans); out_reset: if (ret) btrfs_set_root_flags(&root->root_item, root_flags); out_drop_sem: up_write(&fs_info->subvol_sem); out_drop_write: mnt_drop_write_file(file); out: return ret; } static noinline int key_in_sk(const struct btrfs_key *key, const struct btrfs_ioctl_search_key *sk) { struct btrfs_key test; int ret; test.objectid = sk->min_objectid; test.type = sk->min_type; test.offset = sk->min_offset; ret = btrfs_comp_cpu_keys(key, &test); if (ret < 0) return 0; test.objectid = sk->max_objectid; test.type = sk->max_type; test.offset = sk->max_offset; ret = btrfs_comp_cpu_keys(key, &test); if (ret > 0) return 0; return 1; } static noinline int copy_to_sk(struct btrfs_path *path, struct btrfs_key *key, const struct btrfs_ioctl_search_key *sk, u64 *buf_size, char __user *ubuf, unsigned long *sk_offset, int *num_found) { u64 found_transid; struct extent_buffer *leaf; struct btrfs_ioctl_search_header sh; struct btrfs_key test; unsigned long item_off; unsigned long item_len; int nritems; int i; int slot; int ret = 0; leaf = path->nodes[0]; slot = path->slots[0]; nritems = btrfs_header_nritems(leaf); if (btrfs_header_generation(leaf) > sk->max_transid) { i = nritems; goto advance_key; } found_transid = btrfs_header_generation(leaf); for (i = slot; i < nritems; i++) { item_off = btrfs_item_ptr_offset(leaf, i); item_len = btrfs_item_size(leaf, i); btrfs_item_key_to_cpu(leaf, key, i); if (!key_in_sk(key, sk)) continue; if (sizeof(sh) + item_len > *buf_size) { if (*num_found) { ret = 1; goto out; } /* * return one empty item back for v1, which does not * handle -EOVERFLOW */ *buf_size = sizeof(sh) + item_len; item_len = 0; ret = -EOVERFLOW; } if (sizeof(sh) + item_len + *sk_offset > *buf_size) { ret = 1; goto out; } sh.objectid = key->objectid; sh.type = key->type; sh.offset = key->offset; sh.len = item_len; sh.transid = found_transid; /* * Copy search result header. If we fault then loop again so we * can fault in the pages and -EFAULT there if there's a * problem. Otherwise we'll fault and then copy the buffer in * properly this next time through */ if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) { ret = 0; goto out; } *sk_offset += sizeof(sh); if (item_len) { char __user *up = ubuf + *sk_offset; /* * Copy the item, same behavior as above, but reset the * * sk_offset so we copy the full thing again. */ if (read_extent_buffer_to_user_nofault(leaf, up, item_off, item_len)) { ret = 0; *sk_offset -= sizeof(sh); goto out; } *sk_offset += item_len; } (*num_found)++; if (ret) /* -EOVERFLOW from above */ goto out; if (*num_found >= sk->nr_items) { ret = 1; goto out; } } advance_key: ret = 0; test.objectid = sk->max_objectid; test.type = sk->max_type; test.offset = sk->max_offset; if (btrfs_comp_cpu_keys(key, &test) >= 0) ret = 1; else if (key->offset < (u64)-1) key->offset++; else if (key->type < (u8)-1) { key->offset = 0; key->type++; } else if (key->objectid < (u64)-1) { key->offset = 0; key->type = 0; key->objectid++; } else ret = 1; out: /* * 0: all items from this leaf copied, continue with next * 1: * more items can be copied, but unused buffer is too small * * all items were found * Either way, it will stops the loop which iterates to the next * leaf * -EOVERFLOW: item was to large for buffer * -EFAULT: could not copy extent buffer back to userspace */ return ret; } static noinline int search_ioctl(struct btrfs_root *root, struct btrfs_ioctl_search_key *sk, u64 *buf_size, char __user *ubuf) { struct btrfs_fs_info *info = root->fs_info; struct btrfs_key key; struct btrfs_path *path; int ret; int num_found = 0; unsigned long sk_offset = 0; if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { *buf_size = sizeof(struct btrfs_ioctl_search_header); return -EOVERFLOW; } path = btrfs_alloc_path(); if (!path) return -ENOMEM; if (sk->tree_id == 0) { /* Search the root that we got passed. */ root = btrfs_grab_root(root); } else { /* Look up the root from the arguments. */ root = btrfs_get_fs_root(info, sk->tree_id, true); if (IS_ERR(root)) { btrfs_free_path(path); return PTR_ERR(root); } } key.objectid = sk->min_objectid; key.type = sk->min_type; key.offset = sk->min_offset; while (1) { /* * Ensure that the whole user buffer is faulted in at sub-page * granularity, otherwise the loop may live-lock. */ if (fault_in_subpage_writeable(ubuf + sk_offset, *buf_size - sk_offset)) { ret = -EFAULT; break; } ret = btrfs_search_forward(root, &key, path, sk->min_transid); if (ret) break; ret = copy_to_sk(path, &key, sk, buf_size, ubuf, &sk_offset, &num_found); btrfs_release_path(path); if (ret) break; } /* Normalize return values from btrfs_search_forward() and copy_to_sk(). */ if (ret > 0) ret = 0; sk->nr_items = num_found; btrfs_put_root(root); btrfs_free_path(path); return ret; } static noinline int btrfs_ioctl_tree_search(struct btrfs_root *root, void __user *argp) { struct btrfs_ioctl_search_args __user *uargs = argp; struct btrfs_ioctl_search_key sk; int ret; u64 buf_size; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&sk, &uargs->key, sizeof(sk))) return -EFAULT; buf_size = sizeof(uargs->buf); ret = search_ioctl(root, &sk, &buf_size, uargs->buf); /* * In the origin implementation an overflow is handled by returning a * search header with a len of zero, so reset ret. */ if (ret == -EOVERFLOW) ret = 0; if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk))) ret = -EFAULT; return ret; } static noinline int btrfs_ioctl_tree_search_v2(struct btrfs_root *root, void __user *argp) { struct btrfs_ioctl_search_args_v2 __user *uarg = argp; struct btrfs_ioctl_search_args_v2 args; int ret; u64 buf_size; const u64 buf_limit = SZ_16M; if (!capable(CAP_SYS_ADMIN)) return -EPERM; /* copy search header and buffer size */ if (copy_from_user(&args, uarg, sizeof(args))) return -EFAULT; buf_size = args.buf_size; /* limit result size to 16MB */ if (buf_size > buf_limit) buf_size = buf_limit; ret = search_ioctl(root, &args.key, &buf_size, (char __user *)(&uarg->buf[0])); if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) ret = -EFAULT; else if (ret == -EOVERFLOW && copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size))) ret = -EFAULT; return ret; } /* * Search INODE_REFs to identify path name of 'dirid' directory * in a 'tree_id' tree. and sets path name to 'name'. */ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, u64 tree_id, u64 dirid, char *name) { struct btrfs_root *root; struct btrfs_key key; char *ptr; int ret = -1; int slot; int len; int total_len = 0; struct btrfs_inode_ref *iref; struct extent_buffer *l; struct btrfs_path *path; if (dirid == BTRFS_FIRST_FREE_OBJECTID) { name[0]='\0'; return 0; } path = btrfs_alloc_path(); if (!path) return -ENOMEM; ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1]; root = btrfs_get_fs_root(info, tree_id, true); if (IS_ERR(root)) { ret = PTR_ERR(root); root = NULL; goto out; } key.objectid = dirid; key.type = BTRFS_INODE_REF_KEY; key.offset = (u64)-1; while (1) { ret = btrfs_search_backwards(root, &key, path); if (ret < 0) goto out; else if (ret > 0) { ret = -ENOENT; goto out; } l = path->nodes[0]; slot = path->slots[0]; iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); len = btrfs_inode_ref_name_len(l, iref); ptr -= len + 1; total_len += len + 1; if (ptr < name) { ret = -ENAMETOOLONG; goto out; } *(ptr + len) = '/'; read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len); if (key.offset == BTRFS_FIRST_FREE_OBJECTID) break; btrfs_release_path(path); key.objectid = key.offset; key.offset = (u64)-1; dirid = key.objectid; } memmove(name, ptr, total_len); name[total_len] = '\0'; ret = 0; out: btrfs_put_root(root); btrfs_free_path(path); return ret; } static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, struct inode *inode, struct btrfs_ioctl_ino_lookup_user_args *args) { struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; u64 upper_limit = btrfs_ino(BTRFS_I(inode)); u64 treeid = btrfs_root_id(BTRFS_I(inode)->root); u64 dirid = args->dirid; unsigned long item_off; unsigned long item_len; struct btrfs_inode_ref *iref; struct btrfs_root_ref *rref; struct btrfs_root *root = NULL; struct btrfs_path *path; struct btrfs_key key, key2; struct extent_buffer *leaf; char *ptr; int slot; int len; int total_len = 0; int ret; path = btrfs_alloc_path(); if (!path) return -ENOMEM; /* * If the bottom subvolume does not exist directly under upper_limit, * construct the path in from the bottom up. */ if (dirid != upper_limit) { ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1]; root = btrfs_get_fs_root(fs_info, treeid, true); if (IS_ERR(root)) { ret = PTR_ERR(root); goto out; } key.objectid = dirid; key.type = BTRFS_INODE_REF_KEY; key.offset = (u64)-1; while (1) { struct btrfs_inode *temp_inode; ret = btrfs_search_backwards(root, &key, path); if (ret < 0) goto out_put; else if (ret > 0) { ret = -ENOENT; goto out_put; } leaf = path->nodes[0]; slot = path->slots[0]; iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref); len = btrfs_inode_ref_name_len(leaf, iref); ptr -= len + 1; total_len += len + 1; if (ptr < args->path) { ret = -ENAMETOOLONG; goto out_put; } *(ptr + len) = '/'; read_extent_buffer(leaf, ptr, (unsigned long)(iref + 1), len); /* Check the read+exec permission of this directory */ ret = btrfs_previous_item(root, path, dirid, BTRFS_INODE_ITEM_KEY); if (ret < 0) { goto out_put; } else if (ret > 0) { ret = -ENOENT; goto out_put; } leaf = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &key2, slot); if (key2.objectid != dirid) { ret = -ENOENT; goto out_put; } /* * We don't need the path anymore, so release it and * avoid deadlocks and lockdep warnings in case * btrfs_iget() needs to lookup the inode from its root * btree and lock the same leaf. */ btrfs_release_path(path); temp_inode = btrfs_iget(key2.objectid, root); if (IS_ERR(temp_inode)) { ret = PTR_ERR(temp_inode); goto out_put; } ret = inode_permission(idmap, &temp_inode->vfs_inode, MAY_READ | MAY_EXEC); iput(&temp_inode->vfs_inode); if (ret) { ret = -EACCES; goto out_put; } if (key.offset == upper_limit) break; if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) { ret = -EACCES; goto out_put; } key.objectid = key.offset; key.offset = (u64)-1; dirid = key.objectid; } memmove(args->path, ptr, total_len); args->path[total_len] = '\0'; btrfs_put_root(root); root = NULL; btrfs_release_path(path); } /* Get the bottom subvolume's name from ROOT_REF */ key.objectid = treeid; key.type = BTRFS_ROOT_REF_KEY; key.offset = args->treeid; ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); if (ret < 0) { goto out; } else if (ret > 0) { ret = -ENOENT; goto out; } leaf = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &key, slot); item_off = btrfs_item_ptr_offset(leaf, slot); item_len = btrfs_item_size(leaf, slot); /* Check if dirid in ROOT_REF corresponds to passed dirid */ rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) { ret = -EINVAL; goto out; } /* Copy subvolume's name */ item_off += sizeof(struct btrfs_root_ref); item_len -= sizeof(struct btrfs_root_ref); read_extent_buffer(leaf, args->name, item_off, item_len); args->name[item_len] = 0; out_put: btrfs_put_root(root); out: btrfs_free_path(path); return ret; } static noinline int btrfs_ioctl_ino_lookup(struct btrfs_root *root, void __user *argp) { struct btrfs_ioctl_ino_lookup_args *args; int ret = 0; args = memdup_user(argp, sizeof(*args)); if (IS_ERR(args)) return PTR_ERR(args); /* * Unprivileged query to obtain the containing subvolume root id. The * path is reset so it's consistent with btrfs_search_path_in_tree. */ if (args->treeid == 0) args->treeid = btrfs_root_id(root); if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) { args->name[0] = 0; goto out; } if (!capable(CAP_SYS_ADMIN)) { ret = -EPERM; goto out; } ret = btrfs_search_path_in_tree(root->fs_info, args->treeid, args->objectid, args->name); out: if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) ret = -EFAULT; kfree(args); return ret; } /* * Version of ino_lookup ioctl (unprivileged) * * The main differences from ino_lookup ioctl are: * * 1. Read + Exec permission will be checked using inode_permission() during * path construction. -EACCES will be returned in case of failure. * 2. Path construction will be stopped at the inode number which corresponds * to the fd with which this ioctl is called. If constructed path does not * exist under fd's inode, -EACCES will be returned. * 3. The name of bottom subvolume is also searched and filled. */ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp) { struct btrfs_ioctl_ino_lookup_user_args *args; struct inode *inode; int ret; args = memdup_user(argp, sizeof(*args)); if (IS_ERR(args)) return PTR_ERR(args); inode = file_inode(file); if (args->dirid == BTRFS_FIRST_FREE_OBJECTID && btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { /* * The subvolume does not exist under fd with which this is * called */ kfree(args); return -EACCES; } ret = btrfs_search_path_in_tree_user(file_mnt_idmap(file), inode, args); if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) ret = -EFAULT; kfree(args); return ret; } /* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */ static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp) { struct btrfs_ioctl_get_subvol_info_args *subvol_info; struct btrfs_fs_info *fs_info; struct btrfs_root *root; struct btrfs_path *path; struct btrfs_key key; struct btrfs_root_item *root_item; struct btrfs_root_ref *rref; struct extent_buffer *leaf; unsigned long item_off; unsigned long item_len; int slot; int ret = 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; subvol_info = kzalloc(sizeof(*subvol_info), GFP_KERNEL); if (!subvol_info) { btrfs_free_path(path); return -ENOMEM; } fs_info = BTRFS_I(inode)->root->fs_info; /* Get root_item of inode's subvolume */ key.objectid = btrfs_root_id(BTRFS_I(inode)->root); root = btrfs_get_fs_root(fs_info, key.objectid, true); if (IS_ERR(root)) { ret = PTR_ERR(root); goto out_free; } root_item = &root->root_item; subvol_info->treeid = key.objectid; subvol_info->generation = btrfs_root_generation(root_item); subvol_info->flags = btrfs_root_flags(root_item); memcpy(subvol_info->uuid, root_item->uuid, BTRFS_UUID_SIZE); memcpy(subvol_info->parent_uuid, root_item->parent_uuid, BTRFS_UUID_SIZE); memcpy(subvol_info->received_uuid, root_item->received_uuid, BTRFS_UUID_SIZE); subvol_info->ctransid = btrfs_root_ctransid(root_item); subvol_info->ctime.sec = btrfs_stack_timespec_sec(&root_item->ctime); subvol_info->ctime.nsec = btrfs_stack_timespec_nsec(&root_item->ctime); subvol_info->otransid = btrfs_root_otransid(root_item); subvol_info->otime.sec = btrfs_stack_timespec_sec(&root_item->otime); subvol_info->otime.nsec = btrfs_stack_timespec_nsec(&root_item->otime); subvol_info->stransid = btrfs_root_stransid(root_item); subvol_info->stime.sec = btrfs_stack_timespec_sec(&root_item->stime); subvol_info->stime.nsec = btrfs_stack_timespec_nsec(&root_item->stime); subvol_info->rtransid = btrfs_root_rtransid(root_item); subvol_info->rtime.sec = btrfs_stack_timespec_sec(&root_item->rtime); subvol_info->rtime.nsec = btrfs_stack_timespec_nsec(&root_item->rtime); if (key.objectid != BTRFS_FS_TREE_OBJECTID) { /* Search root tree for ROOT_BACKREF of this subvolume */ key.type = BTRFS_ROOT_BACKREF_KEY; key.offset = 0; ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); if (ret < 0) { goto out; } else if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(fs_info->tree_root, path); if (ret < 0) { goto out; } else if (ret > 0) { ret = -EUCLEAN; goto out; } } leaf = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid == subvol_info->treeid && key.type == BTRFS_ROOT_BACKREF_KEY) { subvol_info->parent_id = key.offset; rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); subvol_info->dirid = btrfs_root_ref_dirid(leaf, rref); item_off = btrfs_item_ptr_offset(leaf, slot) + sizeof(struct btrfs_root_ref); item_len = btrfs_item_size(leaf, slot) - sizeof(struct btrfs_root_ref); read_extent_buffer(leaf, subvol_info->name, item_off, item_len); } else { ret = -ENOENT; goto out; } } btrfs_free_path(path); path = NULL; if (copy_to_user(argp, subvol_info, sizeof(*subvol_info))) ret = -EFAULT; out: btrfs_put_root(root); out_free: btrfs_free_path(path); kfree(subvol_info); return ret; } /* * Return ROOT_REF information of the subvolume containing this inode * except the subvolume name. */ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root, void __user *argp) { struct btrfs_ioctl_get_subvol_rootref_args *rootrefs; struct btrfs_root_ref *rref; struct btrfs_path *path; struct btrfs_key key; struct extent_buffer *leaf; u64 objectid; int slot; int ret; u8 found; path = btrfs_alloc_path(); if (!path) return -ENOMEM; rootrefs = memdup_user(argp, sizeof(*rootrefs)); if (IS_ERR(rootrefs)) { btrfs_free_path(path); return PTR_ERR(rootrefs); } objectid = btrfs_root_id(root); key.objectid = objectid; key.type = BTRFS_ROOT_REF_KEY; key.offset = rootrefs->min_treeid; found = 0; root = root->fs_info->tree_root; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) { goto out; } else if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(root, path); if (ret < 0) { goto out; } else if (ret > 0) { ret = -EUCLEAN; goto out; } } while (1) { leaf = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid != objectid || key.type != BTRFS_ROOT_REF_KEY) { ret = 0; goto out; } if (found == BTRFS_MAX_ROOTREF_BUFFER_NUM) { ret = -EOVERFLOW; goto out; } rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); rootrefs->rootref[found].treeid = key.offset; rootrefs->rootref[found].dirid = btrfs_root_ref_dirid(leaf, rref); found++; ret = btrfs_next_item(root, path); if (ret < 0) { goto out; } else if (ret > 0) { ret = -EUCLEAN; goto out; } } out: btrfs_free_path(path); if (!ret || ret == -EOVERFLOW) { rootrefs->num_items = found; /* update min_treeid for next search */ if (found) rootrefs->min_treeid = rootrefs->rootref[found - 1].treeid + 1; if (copy_to_user(argp, rootrefs, sizeof(*rootrefs))) ret = -EFAULT; } kfree(rootrefs); return ret; } static noinline int btrfs_ioctl_snap_destroy(struct file *file, void __user *arg, bool destroy_v2) { struct dentry *parent = file->f_path.dentry; struct dentry *dentry; struct inode *dir = d_inode(parent); struct btrfs_fs_info *fs_info = inode_to_fs_info(dir); struct inode *inode; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *dest = NULL; struct btrfs_ioctl_vol_args *vol_args = NULL; struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL; struct mnt_idmap *idmap = file_mnt_idmap(file); char *subvol_name, *subvol_name_ptr = NULL; int subvol_namelen; int ret = 0; bool destroy_parent = false; /* We don't support snapshots with extent tree v2 yet. */ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { btrfs_err(fs_info, "extent tree v2 doesn't support snapshot deletion yet"); return -EOPNOTSUPP; } if (destroy_v2) { vol_args2 = memdup_user(arg, sizeof(*vol_args2)); if (IS_ERR(vol_args2)) return PTR_ERR(vol_args2); if (vol_args2->flags & ~BTRFS_SUBVOL_DELETE_ARGS_MASK) { ret = -EOPNOTSUPP; goto out; } /* * If SPEC_BY_ID is not set, we are looking for the subvolume by * name, same as v1 currently does. */ if (!(vol_args2->flags & BTRFS_SUBVOL_SPEC_BY_ID)) { ret = btrfs_check_ioctl_vol_args2_subvol_name(vol_args2); if (ret < 0) goto out; subvol_name = vol_args2->name; ret = mnt_want_write_file(file); if (ret) goto out; } else { struct inode *old_dir; if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) { ret = -EINVAL; goto out; } ret = mnt_want_write_file(file); if (ret) goto out; dentry = btrfs_get_dentry(fs_info->sb, BTRFS_FIRST_FREE_OBJECTID, vol_args2->subvolid, 0); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto out_drop_write; } /* * Change the default parent since the subvolume being * deleted can be outside of the current mount point. */ parent = btrfs_get_parent(dentry); /* * At this point dentry->d_name can point to '/' if the * subvolume we want to destroy is outsite of the * current mount point, so we need to release the * current dentry and execute the lookup to return a new * one with ->d_name pointing to the * <mount point>/subvol_name. */ dput(dentry); if (IS_ERR(parent)) { ret = PTR_ERR(parent); goto out_drop_write; } old_dir = dir; dir = d_inode(parent); /* * If v2 was used with SPEC_BY_ID, a new parent was * allocated since the subvolume can be outside of the * current mount point. Later on we need to release this * new parent dentry. */ destroy_parent = true; /* * On idmapped mounts, deletion via subvolid is * restricted to subvolumes that are immediate * ancestors of the inode referenced by the file * descriptor in the ioctl. Otherwise the idmapping * could potentially be abused to delete subvolumes * anywhere in the filesystem the user wouldn't be able * to delete without an idmapped mount. */ if (old_dir != dir && idmap != &nop_mnt_idmap) { ret = -EOPNOTSUPP; goto free_parent; } subvol_name_ptr = btrfs_get_subvol_name_from_objectid( fs_info, vol_args2->subvolid); if (IS_ERR(subvol_name_ptr)) { ret = PTR_ERR(subvol_name_ptr); goto free_parent; } /* subvol_name_ptr is already nul terminated */ subvol_name = (char *)kbasename(subvol_name_ptr); } } else { vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); ret = btrfs_check_ioctl_vol_args_path(vol_args); if (ret < 0) goto out; subvol_name = vol_args->name; ret = mnt_want_write_file(file); if (ret) goto out; } subvol_namelen = strlen(subvol_name); if (strchr(subvol_name, '/') || strncmp(subvol_name, "..", subvol_namelen) == 0) { ret = -EINVAL; goto free_subvol_name; } if (!S_ISDIR(dir->i_mode)) { ret = -ENOTDIR; goto free_subvol_name; } ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); if (ret == -EINTR) goto free_subvol_name; dentry = lookup_one(idmap, subvol_name, parent, subvol_namelen); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto out_unlock_dir; } if (d_really_is_negative(dentry)) { ret = -ENOENT; goto out_dput; } inode = d_inode(dentry); dest = BTRFS_I(inode)->root; if (!capable(CAP_SYS_ADMIN)) { /* * Regular user. Only allow this with a special mount * option, when the user has write+exec access to the * subvol root, and when rmdir(2) would have been * allowed. * * Note that this is _not_ check that the subvol is * empty or doesn't contain data that we wouldn't * otherwise be able to delete. * * Users who want to delete empty subvols should try * rmdir(2). */ ret = -EPERM; if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED)) goto out_dput; /* * Do not allow deletion if the parent dir is the same * as the dir to be deleted. That means the ioctl * must be called on the dentry referencing the root * of the subvol, not a random directory contained * within it. */ ret = -EINVAL; if (root == dest) goto out_dput; ret = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC); if (ret) goto out_dput; } /* check if subvolume may be deleted by a user */ ret = btrfs_may_delete(idmap, dir, dentry, 1); if (ret) goto out_dput; if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { ret = -EINVAL; goto out_dput; } btrfs_inode_lock(BTRFS_I(inode), 0); ret = btrfs_delete_subvolume(BTRFS_I(dir), dentry); btrfs_inode_unlock(BTRFS_I(inode), 0); if (!ret) d_delete_notify(dir, dentry); out_dput: dput(dentry); out_unlock_dir: btrfs_inode_unlock(BTRFS_I(dir), 0); free_subvol_name: kfree(subvol_name_ptr); free_parent: if (destroy_parent) dput(parent); out_drop_write: mnt_drop_write_file(file); out: kfree(vol_args2); kfree(vol_args); return ret; } static int btrfs_ioctl_defrag(struct file *file, void __user *argp) { struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_defrag_range_args range = {0}; int ret; ret = mnt_want_write_file(file); if (ret) return ret; if (btrfs_root_readonly(root)) { ret = -EROFS; goto out; } switch (inode->i_mode & S_IFMT) { case S_IFDIR: if (!capable(CAP_SYS_ADMIN)) { ret = -EPERM; goto out; } ret = btrfs_defrag_root(root); break; case S_IFREG: /* * Note that this does not check the file descriptor for write * access. This prevents defragmenting executables that are * running and allows defrag on files open in read-only mode. */ if (!capable(CAP_SYS_ADMIN) && inode_permission(&nop_mnt_idmap, inode, MAY_WRITE)) { ret = -EPERM; goto out; } /* * Don't allow defrag on pre-content watched files, as it could * populate the page cache with 0's via readahead. */ if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { ret = -EINVAL; goto out; } if (argp) { if (copy_from_user(&range, argp, sizeof(range))) { ret = -EFAULT; goto out; } if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) { ret = -EOPNOTSUPP; goto out; } /* compression requires us to start the IO */ if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { range.flags |= BTRFS_DEFRAG_RANGE_START_IO; range.extent_thresh = (u32)-1; } } else { /* the rest are all set to zero by kzalloc */ range.len = (u64)-1; } ret = btrfs_defrag_file(BTRFS_I(file_inode(file)), &file->f_ra, &range, BTRFS_OLDEST_GENERATION, 0); if (ret > 0) ret = 0; break; default: ret = -EINVAL; } out: mnt_drop_write_file(file); return ret; } static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg) { struct btrfs_ioctl_vol_args *vol_args; bool restore_op = false; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { btrfs_err(fs_info, "device add not supported on extent tree v2 yet"); return -EINVAL; } if (fs_info->fs_devices->temp_fsid) { btrfs_err(fs_info, "device add not supported on cloned temp-fsid mount"); return -EINVAL; } if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_ADD)) { if (!btrfs_exclop_start_try_lock(fs_info, BTRFS_EXCLOP_DEV_ADD)) return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; /* * We can do the device add because we have a paused balanced, * change the exclusive op type and remember we should bring * back the paused balance */ fs_info->exclusive_operation = BTRFS_EXCLOP_DEV_ADD; btrfs_exclop_start_unlock(fs_info); restore_op = true; } vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) { ret = PTR_ERR(vol_args); goto out; } ret = btrfs_check_ioctl_vol_args_path(vol_args); if (ret < 0) goto out_free; ret = btrfs_init_new_device(fs_info, vol_args->name); if (!ret) btrfs_info(fs_info, "disk added %s", vol_args->name); out_free: kfree(vol_args); out: if (restore_op) btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED); else btrfs_exclop_finish(fs_info); return ret; } static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) { BTRFS_DEV_LOOKUP_ARGS(args); struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_ioctl_vol_args_v2 *vol_args; struct file *bdev_file = NULL; int ret; bool cancel = false; if (!capable(CAP_SYS_ADMIN)) return -EPERM; vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) { ret = -EOPNOTSUPP; goto out; } ret = btrfs_check_ioctl_vol_args2_subvol_name(vol_args); if (ret < 0) goto out; if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) { args.devid = vol_args->devid; } else if (!strcmp("cancel", vol_args->name)) { cancel = true; } else { ret = btrfs_get_dev_args_from_path(fs_info, &args, vol_args->name); if (ret) goto out; } ret = mnt_want_write_file(file); if (ret) goto out; ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE, cancel); if (ret) goto err_drop; /* Exclusive operation is now claimed */ ret = btrfs_rm_device(fs_info, &args, &bdev_file); btrfs_exclop_finish(fs_info); if (!ret) { if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) btrfs_info(fs_info, "device deleted: id %llu", vol_args->devid); else btrfs_info(fs_info, "device deleted: %s", vol_args->name); } err_drop: mnt_drop_write_file(file); if (bdev_file) fput(bdev_file); out: btrfs_put_dev_args_from_path(&args); kfree(vol_args); return ret; } static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) { BTRFS_DEV_LOOKUP_ARGS(args); struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_ioctl_vol_args *vol_args; struct file *bdev_file = NULL; int ret; bool cancel = false; if (!capable(CAP_SYS_ADMIN)) return -EPERM; vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) return PTR_ERR(vol_args); ret = btrfs_check_ioctl_vol_args_path(vol_args); if (ret < 0) goto out_free; if (!strcmp("cancel", vol_args->name)) { cancel = true; } else { ret = btrfs_get_dev_args_from_path(fs_info, &args, vol_args->name); if (ret) goto out; } ret = mnt_want_write_file(file); if (ret) goto out; ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE, cancel); if (ret == 0) { ret = btrfs_rm_device(fs_info, &args, &bdev_file); if (!ret) btrfs_info(fs_info, "disk deleted %s", vol_args->name); btrfs_exclop_finish(fs_info); } mnt_drop_write_file(file); if (bdev_file) fput(bdev_file); out: btrfs_put_dev_args_from_path(&args); out_free: kfree(vol_args); return ret; } static long btrfs_ioctl_fs_info(const struct btrfs_fs_info *fs_info, void __user *arg) { struct btrfs_ioctl_fs_info_args *fi_args; struct btrfs_device *device; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; u64 flags_in; int ret = 0; fi_args = memdup_user(arg, sizeof(*fi_args)); if (IS_ERR(fi_args)) return PTR_ERR(fi_args); flags_in = fi_args->flags; memset(fi_args, 0, sizeof(*fi_args)); rcu_read_lock(); fi_args->num_devices = fs_devices->num_devices; list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { if (device->devid > fi_args->max_id) fi_args->max_id = device->devid; } rcu_read_unlock(); memcpy(&fi_args->fsid, fs_devices->fsid, sizeof(fi_args->fsid)); fi_args->nodesize = fs_info->nodesize; fi_args->sectorsize = fs_info->sectorsize; fi_args->clone_alignment = fs_info->sectorsize; if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) { fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy); fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy); fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_INFO; } if (flags_in & BTRFS_FS_INFO_FLAG_GENERATION) { fi_args->generation = btrfs_get_fs_generation(fs_info); fi_args->flags |= BTRFS_FS_INFO_FLAG_GENERATION; } if (flags_in & BTRFS_FS_INFO_FLAG_METADATA_UUID) { memcpy(&fi_args->metadata_uuid, fs_devices->metadata_uuid, sizeof(fi_args->metadata_uuid)); fi_args->flags |= BTRFS_FS_INFO_FLAG_METADATA_UUID; } if (copy_to_user(arg, fi_args, sizeof(*fi_args))) ret = -EFAULT; kfree(fi_args); return ret; } static long btrfs_ioctl_dev_info(const struct btrfs_fs_info *fs_info, void __user *arg) { BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_ioctl_dev_info_args *di_args; struct btrfs_device *dev; int ret = 0; di_args = memdup_user(arg, sizeof(*di_args)); if (IS_ERR(di_args)) return PTR_ERR(di_args); args.devid = di_args->devid; if (!btrfs_is_empty_uuid(di_args->uuid)) args.uuid = di_args->uuid; rcu_read_lock(); dev = btrfs_find_device(fs_info->fs_devices, &args); if (!dev) { ret = -ENODEV; goto out; } di_args->devid = dev->devid; di_args->bytes_used = btrfs_device_get_bytes_used(dev); di_args->total_bytes = btrfs_device_get_total_bytes(dev); memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); memcpy(di_args->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE); if (dev->name) strscpy(di_args->path, btrfs_dev_name(dev), sizeof(di_args->path)); else di_args->path[0] = '\0'; out: rcu_read_unlock(); if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) ret = -EFAULT; kfree(di_args); return ret; } static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) { struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *new_root; struct btrfs_dir_item *di; struct btrfs_trans_handle *trans; struct btrfs_path *path = NULL; struct btrfs_disk_key disk_key; struct fscrypt_str name = FSTR_INIT("default", 7); u64 objectid = 0; u64 dir_id; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; ret = mnt_want_write_file(file); if (ret) return ret; if (copy_from_user(&objectid, argp, sizeof(objectid))) { ret = -EFAULT; goto out; } if (!objectid) objectid = BTRFS_FS_TREE_OBJECTID; new_root = btrfs_get_fs_root(fs_info, objectid, true); if (IS_ERR(new_root)) { ret = PTR_ERR(new_root); goto out; } if (!is_fstree(btrfs_root_id(new_root))) { ret = -ENOENT; goto out_free; } path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out_free; } trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_free; } dir_id = btrfs_super_root_dir(fs_info->super_copy); di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path, dir_id, &name, 1); if (IS_ERR_OR_NULL(di)) { btrfs_release_path(path); btrfs_end_transaction(trans); btrfs_err(fs_info, "Umm, you don't have the default diritem, this isn't going to work"); ret = -ENOENT; goto out_free; } btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); btrfs_release_path(path); btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL); btrfs_end_transaction(trans); out_free: btrfs_put_root(new_root); btrfs_free_path(path); out: mnt_drop_write_file(file); return ret; } static void get_block_group_info(struct list_head *groups_list, struct btrfs_ioctl_space_info *space) { struct btrfs_block_group *block_group; space->total_bytes = 0; space->used_bytes = 0; space->flags = 0; list_for_each_entry(block_group, groups_list, list) { space->flags = block_group->flags; space->total_bytes += block_group->length; space->used_bytes += block_group->used; } } static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info, void __user *arg) { struct btrfs_ioctl_space_args space_args = { 0 }; struct btrfs_ioctl_space_info space; struct btrfs_ioctl_space_info *dest; struct btrfs_ioctl_space_info *dest_orig; struct btrfs_ioctl_space_info __user *user_dest; struct btrfs_space_info *info; static const u64 types[] = { BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_SYSTEM, BTRFS_BLOCK_GROUP_METADATA, BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA }; int num_types = 4; int alloc_size; int ret = 0; u64 slot_count = 0; int i, c; if (copy_from_user(&space_args, (struct btrfs_ioctl_space_args __user *)arg, sizeof(space_args))) return -EFAULT; for (i = 0; i < num_types; i++) { struct btrfs_space_info *tmp; info = NULL; list_for_each_entry(tmp, &fs_info->space_info, list) { if (tmp->flags == types[i]) { info = tmp; break; } } if (!info) continue; down_read(&info->groups_sem); for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { if (!list_empty(&info->block_groups[c])) slot_count++; } up_read(&info->groups_sem); } /* * Global block reserve, exported as a space_info */ slot_count++; /* space_slots == 0 means they are asking for a count */ if (space_args.space_slots == 0) { space_args.total_spaces = slot_count; goto out; } slot_count = min_t(u64, space_args.space_slots, slot_count); alloc_size = sizeof(*dest) * slot_count; /* we generally have at most 6 or so space infos, one for each raid * level. So, a whole page should be more than enough for everyone */ if (alloc_size > PAGE_SIZE) return -ENOMEM; space_args.total_spaces = 0; dest = kmalloc(alloc_size, GFP_KERNEL); if (!dest) return -ENOMEM; dest_orig = dest; /* now we have a buffer to copy into */ for (i = 0; i < num_types; i++) { struct btrfs_space_info *tmp; if (!slot_count) break; info = NULL; list_for_each_entry(tmp, &fs_info->space_info, list) { if (tmp->flags == types[i]) { info = tmp; break; } } if (!info) continue; down_read(&info->groups_sem); for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { if (!list_empty(&info->block_groups[c])) { get_block_group_info(&info->block_groups[c], &space); memcpy(dest, &space, sizeof(space)); dest++; space_args.total_spaces++; slot_count--; } if (!slot_count) break; } up_read(&info->groups_sem); } /* * Add global block reserve */ if (slot_count) { struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; spin_lock(&block_rsv->lock); space.total_bytes = block_rsv->size; space.used_bytes = block_rsv->size - block_rsv->reserved; spin_unlock(&block_rsv->lock); space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV; memcpy(dest, &space, sizeof(space)); space_args.total_spaces++; } user_dest = (struct btrfs_ioctl_space_info __user *) (arg + sizeof(struct btrfs_ioctl_space_args)); if (copy_to_user(user_dest, dest_orig, alloc_size)) ret = -EFAULT; kfree(dest_orig); out: if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) ret = -EFAULT; return ret; } static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, void __user *argp) { struct btrfs_trans_handle *trans; u64 transid; /* * Start orphan cleanup here for the given root in case it hasn't been * started already by other means. Errors are handled in the other * functions during transaction commit. */ btrfs_orphan_cleanup(root); trans = btrfs_attach_transaction_barrier(root); if (IS_ERR(trans)) { if (PTR_ERR(trans) != -ENOENT) return PTR_ERR(trans); /* No running transaction, don't bother */ transid = btrfs_get_last_trans_committed(root->fs_info); goto out; } transid = trans->transid; btrfs_commit_transaction_async(trans); out: if (argp) if (copy_to_user(argp, &transid, sizeof(transid))) return -EFAULT; return 0; } static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info, void __user *argp) { /* By default wait for the current transaction. */ u64 transid = 0; if (argp) if (copy_from_user(&transid, argp, sizeof(transid))) return -EFAULT; return btrfs_wait_for_commit(fs_info, transid); } static long btrfs_ioctl_scrub(struct file *file, void __user *arg) { struct btrfs_fs_info *fs_info = inode_to_fs_info(file_inode(file)); struct btrfs_ioctl_scrub_args *sa; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet"); return -EINVAL; } sa = memdup_user(arg, sizeof(*sa)); if (IS_ERR(sa)) return PTR_ERR(sa); if (sa->flags & ~BTRFS_SCRUB_SUPPORTED_FLAGS) { ret = -EOPNOTSUPP; goto out; } if (!(sa->flags & BTRFS_SCRUB_READONLY)) { ret = mnt_want_write_file(file); if (ret) goto out; } ret = btrfs_scrub_dev(fs_info, sa->devid, sa->start, sa->end, &sa->progress, sa->flags & BTRFS_SCRUB_READONLY, 0); /* * Copy scrub args to user space even if btrfs_scrub_dev() returned an * error. This is important as it allows user space to know how much * progress scrub has done. For example, if scrub is canceled we get * -ECANCELED from btrfs_scrub_dev() and return that error back to user * space. Later user space can inspect the progress from the structure * btrfs_ioctl_scrub_args and resume scrub from where it left off * previously (btrfs-progs does this). * If we fail to copy the btrfs_ioctl_scrub_args structure to user space * then return -EFAULT to signal the structure was not copied or it may * be corrupt and unreliable due to a partial copy. */ if (copy_to_user(arg, sa, sizeof(*sa))) ret = -EFAULT; if (!(sa->flags & BTRFS_SCRUB_READONLY)) mnt_drop_write_file(file); out: kfree(sa); return ret; } static long btrfs_ioctl_scrub_cancel(struct btrfs_fs_info *fs_info) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; return btrfs_scrub_cancel(fs_info); } static long btrfs_ioctl_scrub_progress(struct btrfs_fs_info *fs_info, void __user *arg) { struct btrfs_ioctl_scrub_args *sa; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; sa = memdup_user(arg, sizeof(*sa)); if (IS_ERR(sa)) return PTR_ERR(sa); ret = btrfs_scrub_progress(fs_info, sa->devid, &sa->progress); if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa))) ret = -EFAULT; kfree(sa); return ret; } static long btrfs_ioctl_get_dev_stats(struct btrfs_fs_info *fs_info, void __user *arg) { struct btrfs_ioctl_get_dev_stats *sa; int ret; sa = memdup_user(arg, sizeof(*sa)); if (IS_ERR(sa)) return PTR_ERR(sa); if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) { kfree(sa); return -EPERM; } ret = btrfs_get_dev_stats(fs_info, sa); if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa))) ret = -EFAULT; kfree(sa); return ret; } static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info, void __user *arg) { struct btrfs_ioctl_dev_replace_args *p; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { btrfs_err(fs_info, "device replace not supported on extent tree v2 yet"); return -EINVAL; } p = memdup_user(arg, sizeof(*p)); if (IS_ERR(p)) return PTR_ERR(p); switch (p->cmd) { case BTRFS_IOCTL_DEV_REPLACE_CMD_START: if (sb_rdonly(fs_info->sb)) { ret = -EROFS; goto out; } if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REPLACE)) { ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } else { ret = btrfs_dev_replace_by_ioctl(fs_info, p); btrfs_exclop_finish(fs_info); } break; case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS: btrfs_dev_replace_status(fs_info, p); ret = 0; break; case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL: p->result = btrfs_dev_replace_cancel(fs_info); ret = 0; break; default: ret = -EINVAL; break; } if ((ret == 0 || ret == -ECANCELED) && copy_to_user(arg, p, sizeof(*p))) ret = -EFAULT; out: kfree(p); return ret; } static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) { int ret = 0; int i; u64 rel_ptr; int size; struct btrfs_ioctl_ino_path_args *ipa = NULL; struct inode_fs_paths *ipath = NULL; struct btrfs_path *path; if (!capable(CAP_DAC_READ_SEARCH)) return -EPERM; path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out; } ipa = memdup_user(arg, sizeof(*ipa)); if (IS_ERR(ipa)) { ret = PTR_ERR(ipa); ipa = NULL; goto out; } size = min_t(u32, ipa->size, 4096); ipath = init_ipath(size, root, path); if (IS_ERR(ipath)) { ret = PTR_ERR(ipath); ipath = NULL; goto out; } ret = paths_from_inode(ipa->inum, ipath); if (ret < 0) goto out; for (i = 0; i < ipath->fspath->elem_cnt; ++i) { rel_ptr = ipath->fspath->val[i] - (u64)(unsigned long)ipath->fspath->val; ipath->fspath->val[i] = rel_ptr; } btrfs_free_path(path); path = NULL; ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, ipath->fspath, size); if (ret) { ret = -EFAULT; goto out; } out: btrfs_free_path(path); free_ipath(ipath); kfree(ipa); return ret; } static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, void __user *arg, int version) { int ret = 0; int size; struct btrfs_ioctl_logical_ino_args *loi; struct btrfs_data_container *inodes = NULL; struct btrfs_path *path = NULL; bool ignore_offset; if (!capable(CAP_SYS_ADMIN)) return -EPERM; loi = memdup_user(arg, sizeof(*loi)); if (IS_ERR(loi)) return PTR_ERR(loi); if (version == 1) { ignore_offset = false; size = min_t(u32, loi->size, SZ_64K); } else { /* All reserved bits must be 0 for now */ if (memchr_inv(loi->reserved, 0, sizeof(loi->reserved))) { ret = -EINVAL; goto out_loi; } /* Only accept flags we have defined so far */ if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) { ret = -EINVAL; goto out_loi; } ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET; size = min_t(u32, loi->size, SZ_16M); } inodes = init_data_container(size); if (IS_ERR(inodes)) { ret = PTR_ERR(inodes); goto out_loi; } path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out; } ret = iterate_inodes_from_logical(loi->logical, fs_info, path, inodes, ignore_offset); btrfs_free_path(path); if (ret == -EINVAL) ret = -ENOENT; if (ret < 0) goto out; ret = copy_to_user((void __user *)(unsigned long)loi->inodes, inodes, size); if (ret) ret = -EFAULT; out: kvfree(inodes); out_loi: kfree(loi); return ret; } void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_balance_args *bargs) { struct btrfs_balance_control *bctl = fs_info->balance_ctl; bargs->flags = bctl->flags; if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) bargs->state |= BTRFS_BALANCE_STATE_RUNNING; if (atomic_read(&fs_info->balance_pause_req)) bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ; if (atomic_read(&fs_info->balance_cancel_req)) bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ; memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); spin_lock(&fs_info->balance_lock); memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); spin_unlock(&fs_info->balance_lock); } /* * Try to acquire fs_info::balance_mutex as well as set BTRFS_EXLCOP_BALANCE as * required. * * @fs_info: the filesystem * @excl_acquired: ptr to boolean value which is set to false in case balance * is being resumed * * Return 0 on success in which case both fs_info::balance is acquired as well * as exclusive ops are blocked. In case of failure return an error code. */ static int btrfs_try_lock_balance(struct btrfs_fs_info *fs_info, bool *excl_acquired) { int ret; /* * Exclusive operation is locked. Three possibilities: * (1) some other op is running * (2) balance is running * (3) balance is paused -- special case (think resume) */ while (1) { if (btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { *excl_acquired = true; mutex_lock(&fs_info->balance_mutex); return 0; } mutex_lock(&fs_info->balance_mutex); if (fs_info->balance_ctl) { /* This is either (2) or (3) */ if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) { /* This is (2) */ ret = -EINPROGRESS; goto out_failure; } else { mutex_unlock(&fs_info->balance_mutex); /* * Lock released to allow other waiters to * continue, we'll reexamine the status again. */ mutex_lock(&fs_info->balance_mutex); if (fs_info->balance_ctl && !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) { /* This is (3) */ *excl_acquired = false; return 0; } } } else { /* This is (1) */ ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; goto out_failure; } mutex_unlock(&fs_info->balance_mutex); } out_failure: mutex_unlock(&fs_info->balance_mutex); *excl_acquired = false; return ret; } static long btrfs_ioctl_balance(struct file *file, void __user *arg) { struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_ioctl_balance_args *bargs; struct btrfs_balance_control *bctl; bool need_unlock = true; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; ret = mnt_want_write_file(file); if (ret) return ret; bargs = memdup_user(arg, sizeof(*bargs)); if (IS_ERR(bargs)) { ret = PTR_ERR(bargs); bargs = NULL; goto out; } ret = btrfs_try_lock_balance(fs_info, &need_unlock); if (ret) goto out; lockdep_assert_held(&fs_info->balance_mutex); if (bargs->flags & BTRFS_BALANCE_RESUME) { if (!fs_info->balance_ctl) { ret = -ENOTCONN; goto out_unlock; } bctl = fs_info->balance_ctl; spin_lock(&fs_info->balance_lock); bctl->flags |= BTRFS_BALANCE_RESUME; spin_unlock(&fs_info->balance_lock); btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE); goto do_balance; } if (bargs->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) { ret = -EINVAL; goto out_unlock; } if (fs_info->balance_ctl) { ret = -EINPROGRESS; goto out_unlock; } bctl = kzalloc(sizeof(*bctl), GFP_KERNEL); if (!bctl) { ret = -ENOMEM; goto out_unlock; } memcpy(&bctl->data, &bargs->data, sizeof(bctl->data)); memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta)); memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys)); bctl->flags = bargs->flags; do_balance: /* * Ownership of bctl and exclusive operation goes to btrfs_balance. * bctl is freed in reset_balance_state, or, if restriper was paused * all the way until unmount, in free_fs_info. The flag should be * cleared after reset_balance_state. */ need_unlock = false; ret = btrfs_balance(fs_info, bctl, bargs); bctl = NULL; if (ret == 0 || ret == -ECANCELED) { if (copy_to_user(arg, bargs, sizeof(*bargs))) ret = -EFAULT; } kfree(bctl); out_unlock: mutex_unlock(&fs_info->balance_mutex); if (need_unlock) btrfs_exclop_finish(fs_info); out: mnt_drop_write_file(file); kfree(bargs); return ret; } static long btrfs_ioctl_balance_ctl(struct btrfs_fs_info *fs_info, int cmd) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; switch (cmd) { case BTRFS_BALANCE_CTL_PAUSE: return btrfs_pause_balance(fs_info); case BTRFS_BALANCE_CTL_CANCEL: return btrfs_cancel_balance(fs_info); } return -EINVAL; } static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info, void __user *arg) { struct btrfs_ioctl_balance_args *bargs; int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; mutex_lock(&fs_info->balance_mutex); if (!fs_info->balance_ctl) { ret = -ENOTCONN; goto out; } bargs = kzalloc(sizeof(*bargs), GFP_KERNEL); if (!bargs) { ret = -ENOMEM; goto out; } btrfs_update_ioctl_balance_args(fs_info, bargs); if (copy_to_user(arg, bargs, sizeof(*bargs))) ret = -EFAULT; kfree(bargs); out: mutex_unlock(&fs_info->balance_mutex); return ret; } static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_ioctl_quota_ctl_args *sa; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; ret = mnt_want_write_file(file); if (ret) return ret; sa = memdup_user(arg, sizeof(*sa)); if (IS_ERR(sa)) { ret = PTR_ERR(sa); goto drop_write; } switch (sa->cmd) { case BTRFS_QUOTA_CTL_ENABLE: case BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA: down_write(&fs_info->subvol_sem); ret = btrfs_quota_enable(fs_info, sa); up_write(&fs_info->subvol_sem); break; case BTRFS_QUOTA_CTL_DISABLE: /* * Lock the cleaner mutex to prevent races with concurrent * relocation, because relocation may be building backrefs for * blocks of the quota root while we are deleting the root. This * is like dropping fs roots of deleted snapshots/subvolumes, we * need the same protection. * * This also prevents races between concurrent tasks trying to * disable quotas, because we will unlock and relock * qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes. * * We take this here because we have the dependency of * * inode_lock -> subvol_sem * * because of rename. With relocation we can prealloc extents, * so that makes the dependency chain * * cleaner_mutex -> inode_lock -> subvol_sem * * so we must take the cleaner_mutex here before we take the * subvol_sem. The deadlock can't actually happen, but this * quiets lockdep. */ mutex_lock(&fs_info->cleaner_mutex); down_write(&fs_info->subvol_sem); ret = btrfs_quota_disable(fs_info); up_write(&fs_info->subvol_sem); mutex_unlock(&fs_info->cleaner_mutex); break; default: ret = -EINVAL; break; } kfree(sa); drop_write: mnt_drop_write_file(file); return ret; } /* * Quick check for ioctl handlers if quotas are enabled. Proper locking must be * done before any operations. */ static bool qgroup_enabled(struct btrfs_fs_info *fs_info) { bool ret = true; mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) ret = false; mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_qgroup_assign_args *sa; struct btrfs_qgroup_list *prealloc = NULL; struct btrfs_trans_handle *trans; int ret; int err; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!qgroup_enabled(root->fs_info)) return -ENOTCONN; ret = mnt_want_write_file(file); if (ret) return ret; sa = memdup_user(arg, sizeof(*sa)); if (IS_ERR(sa)) { ret = PTR_ERR(sa); goto drop_write; } if (sa->assign) { prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); if (!prealloc) { ret = -ENOMEM; goto drop_write; } } trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; } /* * Prealloc ownership is moved to the relation handler, there it's used * or freed on error. */ if (sa->assign) { ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst, prealloc); prealloc = NULL; } else { ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst); } /* update qgroup status and info */ mutex_lock(&fs_info->qgroup_ioctl_lock); err = btrfs_run_qgroups(trans); mutex_unlock(&fs_info->qgroup_ioctl_lock); if (err < 0) btrfs_warn(fs_info, "qgroup status update failed after %s relation, marked as inconsistent", sa->assign ? "adding" : "deleting"); err = btrfs_end_transaction(trans); if (err && !ret) ret = err; out: kfree(prealloc); kfree(sa); drop_write: mnt_drop_write_file(file); return ret; } static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_qgroup_create_args *sa; struct btrfs_trans_handle *trans; int ret; int err; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!qgroup_enabled(root->fs_info)) return -ENOTCONN; ret = mnt_want_write_file(file); if (ret) return ret; sa = memdup_user(arg, sizeof(*sa)); if (IS_ERR(sa)) { ret = PTR_ERR(sa); goto drop_write; } if (!sa->qgroupid) { ret = -EINVAL; goto out; } if (sa->create && is_fstree(sa->qgroupid)) { ret = -EINVAL; goto out; } trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; } if (sa->create) { ret = btrfs_create_qgroup(trans, sa->qgroupid); } else { ret = btrfs_remove_qgroup(trans, sa->qgroupid); } err = btrfs_end_transaction(trans); if (err && !ret) ret = err; out: kfree(sa); drop_write: mnt_drop_write_file(file); return ret; } static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_qgroup_limit_args *sa; struct btrfs_trans_handle *trans; int ret; int err; u64 qgroupid; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!qgroup_enabled(root->fs_info)) return -ENOTCONN; ret = mnt_want_write_file(file); if (ret) return ret; sa = memdup_user(arg, sizeof(*sa)); if (IS_ERR(sa)) { ret = PTR_ERR(sa); goto drop_write; } trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; } qgroupid = sa->qgroupid; if (!qgroupid) { /* take the current subvol as qgroup */ qgroupid = btrfs_root_id(root); } ret = btrfs_limit_qgroup(trans, qgroupid, &sa->lim); err = btrfs_end_transaction(trans); if (err && !ret) ret = err; out: kfree(sa); drop_write: mnt_drop_write_file(file); return ret; } static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_ioctl_quota_rescan_args *qsa; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!qgroup_enabled(fs_info)) return -ENOTCONN; ret = mnt_want_write_file(file); if (ret) return ret; qsa = memdup_user(arg, sizeof(*qsa)); if (IS_ERR(qsa)) { ret = PTR_ERR(qsa); goto drop_write; } if (qsa->flags) { ret = -EINVAL; goto out; } ret = btrfs_qgroup_rescan(fs_info); out: kfree(qsa); drop_write: mnt_drop_write_file(file); return ret; } static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info, void __user *arg) { struct btrfs_ioctl_quota_rescan_args qsa = {0}; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { qsa.flags = 1; qsa.progress = fs_info->qgroup_rescan_progress.objectid; } if (copy_to_user(arg, &qsa, sizeof(qsa))) return -EFAULT; return 0; } static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; return btrfs_qgroup_wait_for_completion(fs_info, true); } static long _btrfs_ioctl_set_received_subvol(struct file *file, struct mnt_idmap *idmap, struct btrfs_ioctl_received_subvol_args *sa) { struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root_item *root_item = &root->root_item; struct btrfs_trans_handle *trans; struct timespec64 ct = current_time(inode); int ret = 0; int received_uuid_changed; if (!inode_owner_or_capable(idmap, inode)) return -EPERM; ret = mnt_want_write_file(file); if (ret < 0) return ret; down_write(&fs_info->subvol_sem); if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { ret = -EINVAL; goto out; } if (btrfs_root_readonly(root)) { ret = -EROFS; goto out; } /* * 1 - root item * 2 - uuid items (received uuid + subvol uuid) */ trans = btrfs_start_transaction(root, 3); if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; goto out; } sa->rtransid = trans->transid; sa->rtime.sec = ct.tv_sec; sa->rtime.nsec = ct.tv_nsec; received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); if (received_uuid_changed && !btrfs_is_empty_uuid(root_item->received_uuid)) { ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid, BTRFS_UUID_KEY_RECEIVED_SUBVOL, btrfs_root_id(root)); if (ret && ret != -ENOENT) { btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); goto out; } } memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); btrfs_set_root_stransid(root_item, sa->stransid); btrfs_set_root_rtransid(root_item, sa->rtransid); btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec); btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec); btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec); btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec); ret = btrfs_update_root(trans, fs_info->tree_root, &root->root_key, &root->root_item); if (ret < 0) { btrfs_end_transaction(trans); goto out; } if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { ret = btrfs_uuid_tree_add(trans, sa->uuid, BTRFS_UUID_KEY_RECEIVED_SUBVOL, btrfs_root_id(root)); if (ret < 0 && ret != -EEXIST) { btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); goto out; } } ret = btrfs_commit_transaction(trans); out: up_write(&fs_info->subvol_sem); mnt_drop_write_file(file); return ret; } #ifdef CONFIG_64BIT static long btrfs_ioctl_set_received_subvol_32(struct file *file, void __user *arg) { struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL; struct btrfs_ioctl_received_subvol_args *args64 = NULL; int ret = 0; args32 = memdup_user(arg, sizeof(*args32)); if (IS_ERR(args32)) return PTR_ERR(args32); args64 = kmalloc(sizeof(*args64), GFP_KERNEL); if (!args64) { ret = -ENOMEM; goto out; } memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE); args64->stransid = args32->stransid; args64->rtransid = args32->rtransid; args64->stime.sec = args32->stime.sec; args64->stime.nsec = args32->stime.nsec; args64->rtime.sec = args32->rtime.sec; args64->rtime.nsec = args32->rtime.nsec; args64->flags = args32->flags; ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_idmap(file), args64); if (ret) goto out; memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE); args32->stransid = args64->stransid; args32->rtransid = args64->rtransid; args32->stime.sec = args64->stime.sec; args32->stime.nsec = args64->stime.nsec; args32->rtime.sec = args64->rtime.sec; args32->rtime.nsec = args64->rtime.nsec; args32->flags = args64->flags; ret = copy_to_user(arg, args32, sizeof(*args32)); if (ret) ret = -EFAULT; out: kfree(args32); kfree(args64); return ret; } #endif static long btrfs_ioctl_set_received_subvol(struct file *file, void __user *arg) { struct btrfs_ioctl_received_subvol_args *sa = NULL; int ret = 0; sa = memdup_user(arg, sizeof(*sa)); if (IS_ERR(sa)) return PTR_ERR(sa); ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_idmap(file), sa); if (ret) goto out; ret = copy_to_user(arg, sa, sizeof(*sa)); if (ret) ret = -EFAULT; out: kfree(sa); return ret; } static int btrfs_ioctl_get_fslabel(struct btrfs_fs_info *fs_info, void __user *arg) { size_t len; int ret; char label[BTRFS_LABEL_SIZE]; spin_lock(&fs_info->super_lock); memcpy(label, fs_info->super_copy->label, BTRFS_LABEL_SIZE); spin_unlock(&fs_info->super_lock); len = strnlen(label, BTRFS_LABEL_SIZE); if (len == BTRFS_LABEL_SIZE) { btrfs_warn(fs_info, "label is too long, return the first %zu bytes", --len); } ret = copy_to_user(arg, label, len); return ret ? -EFAULT : 0; } static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_super_block *super_block = fs_info->super_copy; struct btrfs_trans_handle *trans; char label[BTRFS_LABEL_SIZE]; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(label, arg, sizeof(label))) return -EFAULT; if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { btrfs_err(fs_info, "unable to set label with more than %d bytes", BTRFS_LABEL_SIZE - 1); return -EINVAL; } ret = mnt_want_write_file(file); if (ret) return ret; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_unlock; } spin_lock(&fs_info->super_lock); strcpy(super_block->label, label); spin_unlock(&fs_info->super_lock); ret = btrfs_commit_transaction(trans); out_unlock: mnt_drop_write_file(file); return ret; } #define INIT_FEATURE_FLAGS(suffix) \ { .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \ .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \ .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix } int btrfs_ioctl_get_supported_features(void __user *arg) { static const struct btrfs_ioctl_feature_flags features[3] = { INIT_FEATURE_FLAGS(SUPP), INIT_FEATURE_FLAGS(SAFE_SET), INIT_FEATURE_FLAGS(SAFE_CLEAR) }; if (copy_to_user(arg, &features, sizeof(features))) return -EFAULT; return 0; } static int btrfs_ioctl_get_features(struct btrfs_fs_info *fs_info, void __user *arg) { struct btrfs_super_block *super_block = fs_info->super_copy; struct btrfs_ioctl_feature_flags features; features.compat_flags = btrfs_super_compat_flags(super_block); features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block); features.incompat_flags = btrfs_super_incompat_flags(super_block); if (copy_to_user(arg, &features, sizeof(features))) return -EFAULT; return 0; } static int check_feature_bits(const struct btrfs_fs_info *fs_info, enum btrfs_feature_set set, u64 change_mask, u64 flags, u64 supported_flags, u64 safe_set, u64 safe_clear) { const char *type = btrfs_feature_set_name(set); char *names; u64 disallowed, unsupported; u64 set_mask = flags & change_mask; u64 clear_mask = ~flags & change_mask; unsupported = set_mask & ~supported_flags; if (unsupported) { names = btrfs_printable_features(set, unsupported); if (names) { btrfs_warn(fs_info, "this kernel does not support the %s feature bit%s", names, strchr(names, ',') ? "s" : ""); kfree(names); } else btrfs_warn(fs_info, "this kernel does not support %s bits 0x%llx", type, unsupported); return -EOPNOTSUPP; } disallowed = set_mask & ~safe_set; if (disallowed) { names = btrfs_printable_features(set, disallowed); if (names) { btrfs_warn(fs_info, "can't set the %s feature bit%s while mounted", names, strchr(names, ',') ? "s" : ""); kfree(names); } else btrfs_warn(fs_info, "can't set %s bits 0x%llx while mounted", type, disallowed); return -EPERM; } disallowed = clear_mask & ~safe_clear; if (disallowed) { names = btrfs_printable_features(set, disallowed); if (names) { btrfs_warn(fs_info, "can't clear the %s feature bit%s while mounted", names, strchr(names, ',') ? "s" : ""); kfree(names); } else btrfs_warn(fs_info, "can't clear %s bits 0x%llx while mounted", type, disallowed); return -EPERM; } return 0; } #define check_feature(fs_info, change_mask, flags, mask_base) \ check_feature_bits(fs_info, FEAT_##mask_base, change_mask, flags, \ BTRFS_FEATURE_ ## mask_base ## _SUPP, \ BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \ BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR) static int btrfs_ioctl_set_features(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_super_block *super_block = fs_info->super_copy; struct btrfs_ioctl_feature_flags flags[2]; struct btrfs_trans_handle *trans; u64 newflags; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(flags, arg, sizeof(flags))) return -EFAULT; /* Nothing to do */ if (!flags[0].compat_flags && !flags[0].compat_ro_flags && !flags[0].incompat_flags) return 0; ret = check_feature(fs_info, flags[0].compat_flags, flags[1].compat_flags, COMPAT); if (ret) return ret; ret = check_feature(fs_info, flags[0].compat_ro_flags, flags[1].compat_ro_flags, COMPAT_RO); if (ret) return ret; ret = check_feature(fs_info, flags[0].incompat_flags, flags[1].incompat_flags, INCOMPAT); if (ret) return ret; ret = mnt_want_write_file(file); if (ret) return ret; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_drop_write; } spin_lock(&fs_info->super_lock); newflags = btrfs_super_compat_flags(super_block); newflags |= flags[0].compat_flags & flags[1].compat_flags; newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags); btrfs_set_super_compat_flags(super_block, newflags); newflags = btrfs_super_compat_ro_flags(super_block); newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags; newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags); btrfs_set_super_compat_ro_flags(super_block, newflags); newflags = btrfs_super_incompat_flags(super_block); newflags |= flags[0].incompat_flags & flags[1].incompat_flags; newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags); btrfs_set_super_incompat_flags(super_block, newflags); spin_unlock(&fs_info->super_lock); ret = btrfs_commit_transaction(trans); out_drop_write: mnt_drop_write_file(file); return ret; } static int _btrfs_ioctl_send(struct btrfs_root *root, void __user *argp, bool compat) { struct btrfs_ioctl_send_args *arg; int ret; if (compat) { #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) struct btrfs_ioctl_send_args_32 args32 = { 0 }; ret = copy_from_user(&args32, argp, sizeof(args32)); if (ret) return -EFAULT; arg = kzalloc(sizeof(*arg), GFP_KERNEL); if (!arg) return -ENOMEM; arg->send_fd = args32.send_fd; arg->clone_sources_count = args32.clone_sources_count; arg->clone_sources = compat_ptr(args32.clone_sources); arg->parent_root = args32.parent_root; arg->flags = args32.flags; arg->version = args32.version; memcpy(arg->reserved, args32.reserved, sizeof(args32.reserved)); #else return -ENOTTY; #endif } else { arg = memdup_user(argp, sizeof(*arg)); if (IS_ERR(arg)) return PTR_ERR(arg); } ret = btrfs_ioctl_send(root, arg); kfree(arg); return ret; } static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp, bool compat) { struct btrfs_ioctl_encoded_io_args args = { 0 }; size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args, flags); size_t copy_end; struct btrfs_inode *inode = BTRFS_I(file_inode(file)); struct btrfs_fs_info *fs_info = inode->root->fs_info; struct extent_io_tree *io_tree = &inode->io_tree; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; loff_t pos; struct kiocb kiocb; ssize_t ret; u64 disk_bytenr, disk_io_size; struct extent_state *cached_state = NULL; if (!capable(CAP_SYS_ADMIN)) { ret = -EPERM; goto out_acct; } if (compat) { #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) struct btrfs_ioctl_encoded_io_args_32 args32; copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32, flags); if (copy_from_user(&args32, argp, copy_end)) { ret = -EFAULT; goto out_acct; } args.iov = compat_ptr(args32.iov); args.iovcnt = args32.iovcnt; args.offset = args32.offset; args.flags = args32.flags; #else return -ENOTTY; #endif } else { copy_end = copy_end_kernel; if (copy_from_user(&args, argp, copy_end)) { ret = -EFAULT; goto out_acct; } } if (args.flags != 0) { ret = -EINVAL; goto out_acct; } ret = import_iovec(ITER_DEST, args.iov, args.iovcnt, ARRAY_SIZE(iovstack), &iov, &iter); if (ret < 0) goto out_acct; if (iov_iter_count(&iter) == 0) { ret = 0; goto out_iov; } pos = args.offset; ret = rw_verify_area(READ, file, &pos, args.len); if (ret < 0) goto out_iov; init_sync_kiocb(&kiocb, file); kiocb.ki_pos = pos; ret = btrfs_encoded_read(&kiocb, &iter, &args, &cached_state, &disk_bytenr, &disk_io_size); if (ret == -EIOCBQUEUED) { bool unlocked = false; u64 start, lockend, count; start = ALIGN_DOWN(kiocb.ki_pos, fs_info->sectorsize); lockend = start + BTRFS_MAX_UNCOMPRESSED - 1; if (args.compression) count = disk_io_size; else count = args.len; ret = btrfs_encoded_read_regular(&kiocb, &iter, start, lockend, &cached_state, disk_bytenr, disk_io_size, count, args.compression, &unlocked); if (!unlocked) { unlock_extent(io_tree, start, lockend, &cached_state); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); } } if (ret >= 0) { fsnotify_access(file); if (copy_to_user(argp + copy_end, (char *)&args + copy_end_kernel, sizeof(args) - copy_end_kernel)) ret = -EFAULT; } out_iov: kfree(iov); out_acct: if (ret > 0) add_rchar(current, ret); inc_syscr(current); return ret; } static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat) { struct btrfs_ioctl_encoded_io_args args; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; loff_t pos; struct kiocb kiocb; ssize_t ret; if (!capable(CAP_SYS_ADMIN)) { ret = -EPERM; goto out_acct; } if (!(file->f_mode & FMODE_WRITE)) { ret = -EBADF; goto out_acct; } if (compat) { #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) struct btrfs_ioctl_encoded_io_args_32 args32; if (copy_from_user(&args32, argp, sizeof(args32))) { ret = -EFAULT; goto out_acct; } args.iov = compat_ptr(args32.iov); args.iovcnt = args32.iovcnt; args.offset = args32.offset; args.flags = args32.flags; args.len = args32.len; args.unencoded_len = args32.unencoded_len; args.unencoded_offset = args32.unencoded_offset; args.compression = args32.compression; args.encryption = args32.encryption; memcpy(args.reserved, args32.reserved, sizeof(args.reserved)); #else return -ENOTTY; #endif } else { if (copy_from_user(&args, argp, sizeof(args))) { ret = -EFAULT; goto out_acct; } } ret = -EINVAL; if (args.flags != 0) goto out_acct; if (memchr_inv(args.reserved, 0, sizeof(args.reserved))) goto out_acct; if (args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE && args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE) goto out_acct; if (args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES || args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES) goto out_acct; if (args.unencoded_offset > args.unencoded_len) goto out_acct; if (args.len > args.unencoded_len - args.unencoded_offset) goto out_acct; ret = import_iovec(ITER_SOURCE, args.iov, args.iovcnt, ARRAY_SIZE(iovstack), &iov, &iter); if (ret < 0) goto out_acct; if (iov_iter_count(&iter) == 0) { ret = 0; goto out_iov; } pos = args.offset; ret = rw_verify_area(WRITE, file, &pos, args.len); if (ret < 0) goto out_iov; init_sync_kiocb(&kiocb, file); ret = kiocb_set_rw_flags(&kiocb, 0, WRITE); if (ret) goto out_iov; kiocb.ki_pos = pos; file_start_write(file); ret = btrfs_do_write_iter(&kiocb, &iter, &args); if (ret > 0) fsnotify_modify(file); file_end_write(file); out_iov: kfree(iov); out_acct: if (ret > 0) add_wchar(current, ret); inc_syscw(current); return ret; } /* * Context that's attached to an encoded read io_uring command, in cmd->pdu. It * contains the fields in btrfs_uring_read_extent that are necessary to finish * off and cleanup the I/O in btrfs_uring_read_finished. */ struct btrfs_uring_priv { struct io_uring_cmd *cmd; struct page **pages; unsigned long nr_pages; struct kiocb iocb; struct iovec *iov; struct iov_iter iter; struct extent_state *cached_state; u64 count; u64 start; u64 lockend; int err; bool compressed; }; struct io_btrfs_cmd { struct btrfs_uring_priv *priv; }; static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct io_btrfs_cmd *bc = io_uring_cmd_to_pdu(cmd, struct io_btrfs_cmd); struct btrfs_uring_priv *priv = bc->priv; struct btrfs_inode *inode = BTRFS_I(file_inode(priv->iocb.ki_filp)); struct extent_io_tree *io_tree = &inode->io_tree; unsigned long index; u64 cur; size_t page_offset; ssize_t ret; /* The inode lock has already been acquired in btrfs_uring_read_extent. */ btrfs_lockdep_inode_acquire(inode, i_rwsem); if (priv->err) { ret = priv->err; goto out; } if (priv->compressed) { index = 0; page_offset = 0; } else { index = (priv->iocb.ki_pos - priv->start) >> PAGE_SHIFT; page_offset = offset_in_page(priv->iocb.ki_pos - priv->start); } cur = 0; while (cur < priv->count) { size_t bytes = min_t(size_t, priv->count - cur, PAGE_SIZE - page_offset); if (copy_page_to_iter(priv->pages[index], page_offset, bytes, &priv->iter) != bytes) { ret = -EFAULT; goto out; } index++; cur += bytes; page_offset = 0; } ret = priv->count; out: unlock_extent(io_tree, priv->start, priv->lockend, &priv->cached_state); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); io_uring_cmd_done(cmd, ret, 0, issue_flags); add_rchar(current, ret); for (index = 0; index < priv->nr_pages; index++) __free_page(priv->pages[index]); kfree(priv->pages); kfree(priv->iov); kfree(priv); } void btrfs_uring_read_extent_endio(void *ctx, int err) { struct btrfs_uring_priv *priv = ctx; struct io_btrfs_cmd *bc = io_uring_cmd_to_pdu(priv->cmd, struct io_btrfs_cmd); priv->err = err; bc->priv = priv; io_uring_cmd_complete_in_task(priv->cmd, btrfs_uring_read_finished); } static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter, u64 start, u64 lockend, struct extent_state *cached_state, u64 disk_bytenr, u64 disk_io_size, size_t count, bool compressed, struct iovec *iov, struct io_uring_cmd *cmd) { struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp)); struct extent_io_tree *io_tree = &inode->io_tree; struct page **pages; struct btrfs_uring_priv *priv = NULL; unsigned long nr_pages; int ret; nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE); pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!pages) return -ENOMEM; ret = btrfs_alloc_page_array(nr_pages, pages, 0); if (ret) { ret = -ENOMEM; goto out_fail; } priv = kmalloc(sizeof(*priv), GFP_NOFS); if (!priv) { ret = -ENOMEM; goto out_fail; } priv->iocb = *iocb; priv->iov = iov; priv->iter = *iter; priv->count = count; priv->cmd = cmd; priv->cached_state = cached_state; priv->compressed = compressed; priv->nr_pages = nr_pages; priv->pages = pages; priv->start = start; priv->lockend = lockend; priv->err = 0; ret = btrfs_encoded_read_regular_fill_pages(inode, disk_bytenr, disk_io_size, pages, priv); if (ret && ret != -EIOCBQUEUED) goto out_fail; /* * If we return -EIOCBQUEUED, we're deferring the cleanup to * btrfs_uring_read_finished(), which will handle unlocking the extent * and inode and freeing the allocations. */ /* * We're returning to userspace with the inode lock held, and that's * okay - it'll get unlocked in a worker thread. Call * btrfs_lockdep_inode_release() to avoid confusing lockdep. */ btrfs_lockdep_inode_release(inode, i_rwsem); return -EIOCBQUEUED; out_fail: unlock_extent(io_tree, start, lockend, &cached_state); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); kfree(priv); return ret; } struct btrfs_uring_encoded_data { struct btrfs_ioctl_encoded_io_args args; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov; struct iov_iter iter; }; static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue_flags) { size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args, flags); size_t copy_end; int ret; u64 disk_bytenr, disk_io_size; struct file *file; struct btrfs_inode *inode; struct btrfs_fs_info *fs_info; struct extent_io_tree *io_tree; loff_t pos; struct kiocb kiocb; struct extent_state *cached_state = NULL; u64 start, lockend; void __user *sqe_addr; struct btrfs_uring_encoded_data *data = io_uring_cmd_get_async_data(cmd)->op_data; if (!capable(CAP_SYS_ADMIN)) { ret = -EPERM; goto out_acct; } file = cmd->file; inode = BTRFS_I(file->f_inode); fs_info = inode->root->fs_info; io_tree = &inode->io_tree; sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr)); if (issue_flags & IO_URING_F_COMPAT) { #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32, flags); #else return -ENOTTY; #endif } else { copy_end = copy_end_kernel; } if (!data) { data = kzalloc(sizeof(*data), GFP_NOFS); if (!data) { ret = -ENOMEM; goto out_acct; } io_uring_cmd_get_async_data(cmd)->op_data = data; if (issue_flags & IO_URING_F_COMPAT) { #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) struct btrfs_ioctl_encoded_io_args_32 args32; if (copy_from_user(&args32, sqe_addr, copy_end)) { ret = -EFAULT; goto out_acct; } data->args.iov = compat_ptr(args32.iov); data->args.iovcnt = args32.iovcnt; data->args.offset = args32.offset; data->args.flags = args32.flags; #endif } else { if (copy_from_user(&data->args, sqe_addr, copy_end)) { ret = -EFAULT; goto out_acct; } } if (data->args.flags != 0) { ret = -EINVAL; goto out_acct; } data->iov = data->iovstack; ret = import_iovec(ITER_DEST, data->args.iov, data->args.iovcnt, ARRAY_SIZE(data->iovstack), &data->iov, &data->iter); if (ret < 0) goto out_acct; if (iov_iter_count(&data->iter) == 0) { ret = 0; goto out_free; } } pos = data->args.offset; ret = rw_verify_area(READ, file, &pos, data->args.len); if (ret < 0) goto out_free; init_sync_kiocb(&kiocb, file); kiocb.ki_pos = pos; if (issue_flags & IO_URING_F_NONBLOCK) kiocb.ki_flags |= IOCB_NOWAIT; start = ALIGN_DOWN(pos, fs_info->sectorsize); lockend = start + BTRFS_MAX_UNCOMPRESSED - 1; ret = btrfs_encoded_read(&kiocb, &data->iter, &data->args, &cached_state, &disk_bytenr, &disk_io_size); if (ret < 0 && ret != -EIOCBQUEUED) goto out_free; file_accessed(file); if (copy_to_user(sqe_addr + copy_end, (const char *)&data->args + copy_end_kernel, sizeof(data->args) - copy_end_kernel)) { if (ret == -EIOCBQUEUED) { unlock_extent(io_tree, start, lockend, &cached_state); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); } ret = -EFAULT; goto out_free; } if (ret == -EIOCBQUEUED) { u64 count = min_t(u64, iov_iter_count(&data->iter), disk_io_size); /* Match ioctl by not returning past EOF if uncompressed. */ if (!data->args.compression) count = min_t(u64, count, data->args.len); ret = btrfs_uring_read_extent(&kiocb, &data->iter, start, lockend, cached_state, disk_bytenr, disk_io_size, count, data->args.compression, data->iov, cmd); goto out_acct; } out_free: kfree(data->iov); out_acct: if (ret > 0) add_rchar(current, ret); inc_syscr(current); return ret; } static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags) { loff_t pos; struct kiocb kiocb; struct file *file; ssize_t ret; void __user *sqe_addr; struct btrfs_uring_encoded_data *data = io_uring_cmd_get_async_data(cmd)->op_data; if (!capable(CAP_SYS_ADMIN)) { ret = -EPERM; goto out_acct; } file = cmd->file; sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr)); if (!(file->f_mode & FMODE_WRITE)) { ret = -EBADF; goto out_acct; } if (!data) { data = kzalloc(sizeof(*data), GFP_NOFS); if (!data) { ret = -ENOMEM; goto out_acct; } io_uring_cmd_get_async_data(cmd)->op_data = data; if (issue_flags & IO_URING_F_COMPAT) { #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) struct btrfs_ioctl_encoded_io_args_32 args32; if (copy_from_user(&args32, sqe_addr, sizeof(args32))) { ret = -EFAULT; goto out_acct; } data->args.iov = compat_ptr(args32.iov); data->args.iovcnt = args32.iovcnt; data->args.offset = args32.offset; data->args.flags = args32.flags; data->args.len = args32.len; data->args.unencoded_len = args32.unencoded_len; data->args.unencoded_offset = args32.unencoded_offset; data->args.compression = args32.compression; data->args.encryption = args32.encryption; memcpy(data->args.reserved, args32.reserved, sizeof(data->args.reserved)); #else ret = -ENOTTY; goto out_acct; #endif } else { if (copy_from_user(&data->args, sqe_addr, sizeof(data->args))) { ret = -EFAULT; goto out_acct; } } ret = -EINVAL; if (data->args.flags != 0) goto out_acct; if (memchr_inv(data->args.reserved, 0, sizeof(data->args.reserved))) goto out_acct; if (data->args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE && data->args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE) goto out_acct; if (data->args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES || data->args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES) goto out_acct; if (data->args.unencoded_offset > data->args.unencoded_len) goto out_acct; if (data->args.len > data->args.unencoded_len - data->args.unencoded_offset) goto out_acct; data->iov = data->iovstack; ret = import_iovec(ITER_SOURCE, data->args.iov, data->args.iovcnt, ARRAY_SIZE(data->iovstack), &data->iov, &data->iter); if (ret < 0) goto out_acct; if (iov_iter_count(&data->iter) == 0) { ret = 0; goto out_iov; } } if (issue_flags & IO_URING_F_NONBLOCK) { ret = -EAGAIN; goto out_acct; } pos = data->args.offset; ret = rw_verify_area(WRITE, file, &pos, data->args.len); if (ret < 0) goto out_iov; init_sync_kiocb(&kiocb, file); ret = kiocb_set_rw_flags(&kiocb, 0, WRITE); if (ret) goto out_iov; kiocb.ki_pos = pos; file_start_write(file); ret = btrfs_do_write_iter(&kiocb, &data->iter, &data->args); if (ret > 0) fsnotify_modify(file); file_end_write(file); out_iov: kfree(data->iov); out_acct: if (ret > 0) add_wchar(current, ret); inc_syscw(current); return ret; } int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { switch (cmd->cmd_op) { case BTRFS_IOC_ENCODED_READ: #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) case BTRFS_IOC_ENCODED_READ_32: #endif return btrfs_uring_encoded_read(cmd, issue_flags); case BTRFS_IOC_ENCODED_WRITE: #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) case BTRFS_IOC_ENCODED_WRITE_32: #endif return btrfs_uring_encoded_write(cmd, issue_flags); } return -EINVAL; } static int btrfs_ioctl_subvol_sync(struct btrfs_fs_info *fs_info, void __user *argp) { struct btrfs_root *root; struct btrfs_ioctl_subvol_wait args = { 0 }; signed long sched_ret; int refs; u64 root_flags; bool wait_for_deletion = false; bool found = false; if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; switch (args.mode) { case BTRFS_SUBVOL_SYNC_WAIT_FOR_QUEUED: /* * Wait for the first one deleted that waits until all previous * are cleaned. */ spin_lock(&fs_info->trans_lock); if (!list_empty(&fs_info->dead_roots)) { root = list_last_entry(&fs_info->dead_roots, struct btrfs_root, root_list); args.subvolid = btrfs_root_id(root); found = true; } spin_unlock(&fs_info->trans_lock); if (!found) return -ENOENT; fallthrough; case BTRFS_SUBVOL_SYNC_WAIT_FOR_ONE: if ((0 < args.subvolid && args.subvolid < BTRFS_FIRST_FREE_OBJECTID) || BTRFS_LAST_FREE_OBJECTID < args.subvolid) return -EINVAL; break; case BTRFS_SUBVOL_SYNC_COUNT: spin_lock(&fs_info->trans_lock); args.count = list_count_nodes(&fs_info->dead_roots); spin_unlock(&fs_info->trans_lock); if (copy_to_user(argp, &args, sizeof(args))) return -EFAULT; return 0; case BTRFS_SUBVOL_SYNC_PEEK_FIRST: spin_lock(&fs_info->trans_lock); /* Last in the list was deleted first. */ if (!list_empty(&fs_info->dead_roots)) { root = list_last_entry(&fs_info->dead_roots, struct btrfs_root, root_list); args.subvolid = btrfs_root_id(root); } else { args.subvolid = 0; } spin_unlock(&fs_info->trans_lock); if (copy_to_user(argp, &args, sizeof(args))) return -EFAULT; return 0; case BTRFS_SUBVOL_SYNC_PEEK_LAST: spin_lock(&fs_info->trans_lock); /* First in the list was deleted last. */ if (!list_empty(&fs_info->dead_roots)) { root = list_first_entry(&fs_info->dead_roots, struct btrfs_root, root_list); args.subvolid = btrfs_root_id(root); } else { args.subvolid = 0; } spin_unlock(&fs_info->trans_lock); if (copy_to_user(argp, &args, sizeof(args))) return -EFAULT; return 0; default: return -EINVAL; } /* 32bit limitation: fs_roots_radix key is not wide enough. */ if (sizeof(unsigned long) != sizeof(u64) && args.subvolid > U32_MAX) return -EOVERFLOW; while (1) { /* Wait for the specific one. */ if (down_read_interruptible(&fs_info->subvol_sem) == -EINTR) return -EINTR; refs = -1; spin_lock(&fs_info->fs_roots_radix_lock); root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)args.subvolid); if (root) { spin_lock(&root->root_item_lock); refs = btrfs_root_refs(&root->root_item); root_flags = btrfs_root_flags(&root->root_item); spin_unlock(&root->root_item_lock); } spin_unlock(&fs_info->fs_roots_radix_lock); up_read(&fs_info->subvol_sem); /* Subvolume does not exist. */ if (!root) return -ENOENT; /* Subvolume not deleted at all. */ if (refs > 0) return -EEXIST; /* We've waited and now the subvolume is gone. */ if (wait_for_deletion && refs == -1) { /* Return the one we waited for as the last one. */ if (copy_to_user(argp, &args, sizeof(args))) return -EFAULT; return 0; } /* Subvolume not found on the first try (deleted or never existed). */ if (refs == -1) return -ENOENT; wait_for_deletion = true; ASSERT(root_flags & BTRFS_ROOT_SUBVOL_DEAD); sched_ret = schedule_timeout_interruptible(HZ); /* Early wake up or error. */ if (sched_ret != 0) return -EINTR; } return 0; } long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_root *root = BTRFS_I(inode)->root; void __user *argp = (void __user *)arg; switch (cmd) { case FS_IOC_GETVERSION: return btrfs_ioctl_getversion(inode, argp); case FS_IOC_GETFSLABEL: return btrfs_ioctl_get_fslabel(fs_info, argp); case FS_IOC_SETFSLABEL: return btrfs_ioctl_set_fslabel(file, argp); case FITRIM: return btrfs_ioctl_fitrim(fs_info, argp); case BTRFS_IOC_SNAP_CREATE: return btrfs_ioctl_snap_create(file, argp, 0); case BTRFS_IOC_SNAP_CREATE_V2: return btrfs_ioctl_snap_create_v2(file, argp, 0); case BTRFS_IOC_SUBVOL_CREATE: return btrfs_ioctl_snap_create(file, argp, 1); case BTRFS_IOC_SUBVOL_CREATE_V2: return btrfs_ioctl_snap_create_v2(file, argp, 1); case BTRFS_IOC_SNAP_DESTROY: return btrfs_ioctl_snap_destroy(file, argp, false); case BTRFS_IOC_SNAP_DESTROY_V2: return btrfs_ioctl_snap_destroy(file, argp, true); case BTRFS_IOC_SUBVOL_GETFLAGS: return btrfs_ioctl_subvol_getflags(BTRFS_I(inode), argp); case BTRFS_IOC_SUBVOL_SETFLAGS: return btrfs_ioctl_subvol_setflags(file, argp); case BTRFS_IOC_DEFAULT_SUBVOL: return btrfs_ioctl_default_subvol(file, argp); case BTRFS_IOC_DEFRAG: return btrfs_ioctl_defrag(file, NULL); case BTRFS_IOC_DEFRAG_RANGE: return btrfs_ioctl_defrag(file, argp); case BTRFS_IOC_RESIZE: return btrfs_ioctl_resize(file, argp); case BTRFS_IOC_ADD_DEV: return btrfs_ioctl_add_dev(fs_info, argp); case BTRFS_IOC_RM_DEV: return btrfs_ioctl_rm_dev(file, argp); case BTRFS_IOC_RM_DEV_V2: return btrfs_ioctl_rm_dev_v2(file, argp); case BTRFS_IOC_FS_INFO: return btrfs_ioctl_fs_info(fs_info, argp); case BTRFS_IOC_DEV_INFO: return btrfs_ioctl_dev_info(fs_info, argp); case BTRFS_IOC_TREE_SEARCH: return btrfs_ioctl_tree_search(root, argp); case BTRFS_IOC_TREE_SEARCH_V2: return btrfs_ioctl_tree_search_v2(root, argp); case BTRFS_IOC_INO_LOOKUP: return btrfs_ioctl_ino_lookup(root, argp); case BTRFS_IOC_INO_PATHS: return btrfs_ioctl_ino_to_path(root, argp); case BTRFS_IOC_LOGICAL_INO: return btrfs_ioctl_logical_to_ino(fs_info, argp, 1); case BTRFS_IOC_LOGICAL_INO_V2: return btrfs_ioctl_logical_to_ino(fs_info, argp, 2); case BTRFS_IOC_SPACE_INFO: return btrfs_ioctl_space_info(fs_info, argp); case BTRFS_IOC_SYNC: { int ret; ret = btrfs_start_delalloc_roots(fs_info, LONG_MAX, false); if (ret) return ret; ret = btrfs_sync_fs(inode->i_sb, 1); /* * There may be work for the cleaner kthread to do (subvolume * deletion, delayed iputs, defrag inodes, etc), so wake it up. */ wake_up_process(fs_info->cleaner_kthread); return ret; } case BTRFS_IOC_START_SYNC: return btrfs_ioctl_start_sync(root, argp); case BTRFS_IOC_WAIT_SYNC: return btrfs_ioctl_wait_sync(fs_info, argp); case BTRFS_IOC_SCRUB: return btrfs_ioctl_scrub(file, argp); case BTRFS_IOC_SCRUB_CANCEL: return btrfs_ioctl_scrub_cancel(fs_info); case BTRFS_IOC_SCRUB_PROGRESS: return btrfs_ioctl_scrub_progress(fs_info, argp); case BTRFS_IOC_BALANCE_V2: return btrfs_ioctl_balance(file, argp); case BTRFS_IOC_BALANCE_CTL: return btrfs_ioctl_balance_ctl(fs_info, arg); case BTRFS_IOC_BALANCE_PROGRESS: return btrfs_ioctl_balance_progress(fs_info, argp); case BTRFS_IOC_SET_RECEIVED_SUBVOL: return btrfs_ioctl_set_received_subvol(file, argp); #ifdef CONFIG_64BIT case BTRFS_IOC_SET_RECEIVED_SUBVOL_32: return btrfs_ioctl_set_received_subvol_32(file, argp); #endif case BTRFS_IOC_SEND: return _btrfs_ioctl_send(root, argp, false); #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) case BTRFS_IOC_SEND_32: return _btrfs_ioctl_send(root, argp, true); #endif case BTRFS_IOC_GET_DEV_STATS: return btrfs_ioctl_get_dev_stats(fs_info, argp); case BTRFS_IOC_QUOTA_CTL: return btrfs_ioctl_quota_ctl(file, argp); case BTRFS_IOC_QGROUP_ASSIGN: return btrfs_ioctl_qgroup_assign(file, argp); case BTRFS_IOC_QGROUP_CREATE: return btrfs_ioctl_qgroup_create(file, argp); case BTRFS_IOC_QGROUP_LIMIT: return btrfs_ioctl_qgroup_limit(file, argp); case BTRFS_IOC_QUOTA_RESCAN: return btrfs_ioctl_quota_rescan(file, argp); case BTRFS_IOC_QUOTA_RESCAN_STATUS: return btrfs_ioctl_quota_rescan_status(fs_info, argp); case BTRFS_IOC_QUOTA_RESCAN_WAIT: return btrfs_ioctl_quota_rescan_wait(fs_info); case BTRFS_IOC_DEV_REPLACE: return btrfs_ioctl_dev_replace(fs_info, argp); case BTRFS_IOC_GET_SUPPORTED_FEATURES: return btrfs_ioctl_get_supported_features(argp); case BTRFS_IOC_GET_FEATURES: return btrfs_ioctl_get_features(fs_info, argp); case BTRFS_IOC_SET_FEATURES: return btrfs_ioctl_set_features(file, argp); case BTRFS_IOC_GET_SUBVOL_INFO: return btrfs_ioctl_get_subvol_info(inode, argp); case BTRFS_IOC_GET_SUBVOL_ROOTREF: return btrfs_ioctl_get_subvol_rootref(root, argp); case BTRFS_IOC_INO_LOOKUP_USER: return btrfs_ioctl_ino_lookup_user(file, argp); case FS_IOC_ENABLE_VERITY: return fsverity_ioctl_enable(file, (const void __user *)argp); case FS_IOC_MEASURE_VERITY: return fsverity_ioctl_measure(file, argp); case FS_IOC_READ_VERITY_METADATA: return fsverity_ioctl_read_metadata(file, argp); case BTRFS_IOC_ENCODED_READ: return btrfs_ioctl_encoded_read(file, argp, false); case BTRFS_IOC_ENCODED_WRITE: return btrfs_ioctl_encoded_write(file, argp, false); #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) case BTRFS_IOC_ENCODED_READ_32: return btrfs_ioctl_encoded_read(file, argp, true); case BTRFS_IOC_ENCODED_WRITE_32: return btrfs_ioctl_encoded_write(file, argp, true); #endif case BTRFS_IOC_SUBVOL_SYNC_WAIT: return btrfs_ioctl_subvol_sync(fs_info, argp); } return -ENOTTY; } #ifdef CONFIG_COMPAT long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { /* * These all access 32-bit values anyway so no further * handling is necessary. */ switch (cmd) { case FS_IOC32_GETVERSION: cmd = FS_IOC_GETVERSION; break; } return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif |
23 16 16 23 23 23 16 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 | // SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/wait.h> #include <linux/writeback.h> #include <linux/iversion.h> #include <linux/filelock.h> #include <linux/jiffies.h> #include "super.h" #include "mds_client.h" #include "cache.h" #include "crypto.h" #include <linux/ceph/decode.h> #include <linux/ceph/messenger.h> /* * Capability management * * The Ceph metadata servers control client access to inode metadata * and file data by issuing capabilities, granting clients permission * to read and/or write both inode field and file data to OSDs * (storage nodes). Each capability consists of a set of bits * indicating which operations are allowed. * * If the client holds a *_SHARED cap, the client has a coherent value * that can be safely read from the cached inode. * * In the case of a *_EXCL (exclusive) or FILE_WR capabilities, the * client is allowed to change inode attributes (e.g., file size, * mtime), note its dirty state in the ceph_cap, and asynchronously * flush that metadata change to the MDS. * * In the event of a conflicting operation (perhaps by another * client), the MDS will revoke the conflicting client capabilities. * * In order for a client to cache an inode, it must hold a capability * with at least one MDS server. When inodes are released, release * notifications are batched and periodically sent en masse to the MDS * cluster to release server state. */ static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc); static void __kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_inode_info *ci, u64 oldest_flush_tid); /* * Generate readable cap strings for debugging output. */ #define MAX_CAP_STR 20 static char cap_str[MAX_CAP_STR][40]; static DEFINE_SPINLOCK(cap_str_lock); static int last_cap_str; static char *gcap_string(char *s, int c) { if (c & CEPH_CAP_GSHARED) *s++ = 's'; if (c & CEPH_CAP_GEXCL) *s++ = 'x'; if (c & CEPH_CAP_GCACHE) *s++ = 'c'; if (c & CEPH_CAP_GRD) *s++ = 'r'; if (c & CEPH_CAP_GWR) *s++ = 'w'; if (c & CEPH_CAP_GBUFFER) *s++ = 'b'; if (c & CEPH_CAP_GWREXTEND) *s++ = 'a'; if (c & CEPH_CAP_GLAZYIO) *s++ = 'l'; return s; } const char *ceph_cap_string(int caps) { int i; char *s; int c; spin_lock(&cap_str_lock); i = last_cap_str++; if (last_cap_str == MAX_CAP_STR) last_cap_str = 0; spin_unlock(&cap_str_lock); s = cap_str[i]; if (caps & CEPH_CAP_PIN) *s++ = 'p'; c = (caps >> CEPH_CAP_SAUTH) & 3; if (c) { *s++ = 'A'; s = gcap_string(s, c); } c = (caps >> CEPH_CAP_SLINK) & 3; if (c) { *s++ = 'L'; s = gcap_string(s, c); } c = (caps >> CEPH_CAP_SXATTR) & 3; if (c) { *s++ = 'X'; s = gcap_string(s, c); } c = caps >> CEPH_CAP_SFILE; if (c) { *s++ = 'F'; s = gcap_string(s, c); } if (s == cap_str[i]) *s++ = '-'; *s = 0; return cap_str[i]; } void ceph_caps_init(struct ceph_mds_client *mdsc) { INIT_LIST_HEAD(&mdsc->caps_list); spin_lock_init(&mdsc->caps_list_lock); } void ceph_caps_finalize(struct ceph_mds_client *mdsc) { struct ceph_cap *cap; spin_lock(&mdsc->caps_list_lock); while (!list_empty(&mdsc->caps_list)) { cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item); list_del(&cap->caps_item); kmem_cache_free(ceph_cap_cachep, cap); } mdsc->caps_total_count = 0; mdsc->caps_avail_count = 0; mdsc->caps_use_count = 0; mdsc->caps_reserve_count = 0; mdsc->caps_min_count = 0; spin_unlock(&mdsc->caps_list_lock); } void ceph_adjust_caps_max_min(struct ceph_mds_client *mdsc, struct ceph_mount_options *fsopt) { spin_lock(&mdsc->caps_list_lock); mdsc->caps_min_count = fsopt->max_readdir; if (mdsc->caps_min_count < 1024) mdsc->caps_min_count = 1024; mdsc->caps_use_max = fsopt->caps_max; if (mdsc->caps_use_max > 0 && mdsc->caps_use_max < mdsc->caps_min_count) mdsc->caps_use_max = mdsc->caps_min_count; spin_unlock(&mdsc->caps_list_lock); } static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps) { struct ceph_cap *cap; int i; if (nr_caps) { BUG_ON(mdsc->caps_reserve_count < nr_caps); mdsc->caps_reserve_count -= nr_caps; if (mdsc->caps_avail_count >= mdsc->caps_reserve_count + mdsc->caps_min_count) { mdsc->caps_total_count -= nr_caps; for (i = 0; i < nr_caps; i++) { cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item); list_del(&cap->caps_item); kmem_cache_free(ceph_cap_cachep, cap); } } else { mdsc->caps_avail_count += nr_caps; } doutc(mdsc->fsc->client, "caps %d = %d used + %d resv + %d avail\n", mdsc->caps_total_count, mdsc->caps_use_count, mdsc->caps_reserve_count, mdsc->caps_avail_count); BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); } } /* * Called under mdsc->mutex. */ int ceph_reserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx, int need) { struct ceph_client *cl = mdsc->fsc->client; int i, j; struct ceph_cap *cap; int have; int alloc = 0; int max_caps; int err = 0; bool trimmed = false; struct ceph_mds_session *s; LIST_HEAD(newcaps); doutc(cl, "ctx=%p need=%d\n", ctx, need); /* first reserve any caps that are already allocated */ spin_lock(&mdsc->caps_list_lock); if (mdsc->caps_avail_count >= need) have = need; else have = mdsc->caps_avail_count; mdsc->caps_avail_count -= have; mdsc->caps_reserve_count += have; BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); spin_unlock(&mdsc->caps_list_lock); for (i = have; i < need; ) { cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); if (cap) { list_add(&cap->caps_item, &newcaps); alloc++; i++; continue; } if (!trimmed) { for (j = 0; j < mdsc->max_sessions; j++) { s = __ceph_lookup_mds_session(mdsc, j); if (!s) continue; mutex_unlock(&mdsc->mutex); mutex_lock(&s->s_mutex); max_caps = s->s_nr_caps - (need - i); ceph_trim_caps(mdsc, s, max_caps); mutex_unlock(&s->s_mutex); ceph_put_mds_session(s); mutex_lock(&mdsc->mutex); } trimmed = true; spin_lock(&mdsc->caps_list_lock); if (mdsc->caps_avail_count) { int more_have; if (mdsc->caps_avail_count >= need - i) more_have = need - i; else more_have = mdsc->caps_avail_count; i += more_have; have += more_have; mdsc->caps_avail_count -= more_have; mdsc->caps_reserve_count += more_have; } spin_unlock(&mdsc->caps_list_lock); continue; } pr_warn_client(cl, "ctx=%p ENOMEM need=%d got=%d\n", ctx, need, have + alloc); err = -ENOMEM; break; } if (!err) { BUG_ON(have + alloc != need); ctx->count = need; ctx->used = 0; } spin_lock(&mdsc->caps_list_lock); mdsc->caps_total_count += alloc; mdsc->caps_reserve_count += alloc; list_splice(&newcaps, &mdsc->caps_list); BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); if (err) __ceph_unreserve_caps(mdsc, have + alloc); spin_unlock(&mdsc->caps_list_lock); doutc(cl, "ctx=%p %d = %d used + %d resv + %d avail\n", ctx, mdsc->caps_total_count, mdsc->caps_use_count, mdsc->caps_reserve_count, mdsc->caps_avail_count); return err; } void ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx) { struct ceph_client *cl = mdsc->fsc->client; bool reclaim = false; if (!ctx->count) return; doutc(cl, "ctx=%p count=%d\n", ctx, ctx->count); spin_lock(&mdsc->caps_list_lock); __ceph_unreserve_caps(mdsc, ctx->count); ctx->count = 0; if (mdsc->caps_use_max > 0 && mdsc->caps_use_count > mdsc->caps_use_max) reclaim = true; spin_unlock(&mdsc->caps_list_lock); if (reclaim) ceph_reclaim_caps_nr(mdsc, ctx->used); } struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap *cap = NULL; /* temporary, until we do something about cap import/export */ if (!ctx) { cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); if (cap) { spin_lock(&mdsc->caps_list_lock); mdsc->caps_use_count++; mdsc->caps_total_count++; spin_unlock(&mdsc->caps_list_lock); } else { spin_lock(&mdsc->caps_list_lock); if (mdsc->caps_avail_count) { BUG_ON(list_empty(&mdsc->caps_list)); mdsc->caps_avail_count--; mdsc->caps_use_count++; cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item); list_del(&cap->caps_item); BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); } spin_unlock(&mdsc->caps_list_lock); } return cap; } spin_lock(&mdsc->caps_list_lock); doutc(cl, "ctx=%p (%d) %d = %d used + %d resv + %d avail\n", ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count, mdsc->caps_reserve_count, mdsc->caps_avail_count); BUG_ON(!ctx->count); BUG_ON(ctx->count > mdsc->caps_reserve_count); BUG_ON(list_empty(&mdsc->caps_list)); ctx->count--; ctx->used++; mdsc->caps_reserve_count--; mdsc->caps_use_count++; cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item); list_del(&cap->caps_item); BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); spin_unlock(&mdsc->caps_list_lock); return cap; } void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) { struct ceph_client *cl = mdsc->fsc->client; spin_lock(&mdsc->caps_list_lock); doutc(cl, "%p %d = %d used + %d resv + %d avail\n", cap, mdsc->caps_total_count, mdsc->caps_use_count, mdsc->caps_reserve_count, mdsc->caps_avail_count); mdsc->caps_use_count--; /* * Keep some preallocated caps around (ceph_min_count), to * avoid lots of free/alloc churn. */ if (mdsc->caps_avail_count >= mdsc->caps_reserve_count + mdsc->caps_min_count) { mdsc->caps_total_count--; kmem_cache_free(ceph_cap_cachep, cap); } else { mdsc->caps_avail_count++; list_add(&cap->caps_item, &mdsc->caps_list); } BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); spin_unlock(&mdsc->caps_list_lock); } void ceph_reservation_status(struct ceph_fs_client *fsc, int *total, int *avail, int *used, int *reserved, int *min) { struct ceph_mds_client *mdsc = fsc->mdsc; spin_lock(&mdsc->caps_list_lock); if (total) *total = mdsc->caps_total_count; if (avail) *avail = mdsc->caps_avail_count; if (used) *used = mdsc->caps_use_count; if (reserved) *reserved = mdsc->caps_reserve_count; if (min) *min = mdsc->caps_min_count; spin_unlock(&mdsc->caps_list_lock); } /* * Find ceph_cap for given mds, if any. * * Called with i_ceph_lock held. */ struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) { struct ceph_cap *cap; struct rb_node *n = ci->i_caps.rb_node; while (n) { cap = rb_entry(n, struct ceph_cap, ci_node); if (mds < cap->mds) n = n->rb_left; else if (mds > cap->mds) n = n->rb_right; else return cap; } return NULL; } struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds) { struct ceph_cap *cap; spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); spin_unlock(&ci->i_ceph_lock); return cap; } /* * Called under i_ceph_lock. */ static void __insert_cap_node(struct ceph_inode_info *ci, struct ceph_cap *new) { struct rb_node **p = &ci->i_caps.rb_node; struct rb_node *parent = NULL; struct ceph_cap *cap = NULL; while (*p) { parent = *p; cap = rb_entry(parent, struct ceph_cap, ci_node); if (new->mds < cap->mds) p = &(*p)->rb_left; else if (new->mds > cap->mds) p = &(*p)->rb_right; else BUG(); } rb_link_node(&new->ci_node, parent, p); rb_insert_color(&new->ci_node, &ci->i_caps); } /* * (re)set cap hold timeouts, which control the delayed release * of unused caps back to the MDS. Should be called on cap use. */ static void __cap_set_timeouts(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { struct inode *inode = &ci->netfs.inode; struct ceph_mount_options *opt = mdsc->fsc->mount_options; ci->i_hold_caps_max = round_jiffies(jiffies + opt->caps_wanted_delay_max * HZ); doutc(mdsc->fsc->client, "%p %llx.%llx %lu\n", inode, ceph_vinop(inode), ci->i_hold_caps_max - jiffies); } /* * (Re)queue cap at the end of the delayed cap release list. * * If I_FLUSH is set, leave the inode at the front of the list. * * Caller holds i_ceph_lock * -> we take mdsc->cap_delay_lock */ static void __cap_delay_requeue(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { struct inode *inode = &ci->netfs.inode; doutc(mdsc->fsc->client, "%p %llx.%llx flags 0x%lx at %lu\n", inode, ceph_vinop(inode), ci->i_ceph_flags, ci->i_hold_caps_max); if (!mdsc->stopping) { spin_lock(&mdsc->cap_delay_lock); if (!list_empty(&ci->i_cap_delay_list)) { if (ci->i_ceph_flags & CEPH_I_FLUSH) goto no_change; list_del_init(&ci->i_cap_delay_list); } __cap_set_timeouts(mdsc, ci); list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list); no_change: spin_unlock(&mdsc->cap_delay_lock); } } /* * Queue an inode for immediate writeback. Mark inode with I_FLUSH, * indicating we should send a cap message to flush dirty metadata * asap, and move to the front of the delayed cap list. */ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { struct inode *inode = &ci->netfs.inode; doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); spin_lock(&mdsc->cap_delay_lock); ci->i_ceph_flags |= CEPH_I_FLUSH; if (!list_empty(&ci->i_cap_delay_list)) list_del_init(&ci->i_cap_delay_list); list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list); spin_unlock(&mdsc->cap_delay_lock); } /* * Cancel delayed work on cap. * * Caller must hold i_ceph_lock. */ static void __cap_delay_cancel(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { struct inode *inode = &ci->netfs.inode; doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); if (list_empty(&ci->i_cap_delay_list)) return; spin_lock(&mdsc->cap_delay_lock); list_del_init(&ci->i_cap_delay_list); spin_unlock(&mdsc->cap_delay_lock); } /* Common issue checks for add_cap, handle_cap_grant. */ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, unsigned issued) { struct inode *inode = &ci->netfs.inode; struct ceph_client *cl = ceph_inode_to_client(inode); unsigned had = __ceph_caps_issued(ci, NULL); lockdep_assert_held(&ci->i_ceph_lock); /* * Each time we receive FILE_CACHE anew, we increment * i_rdcache_gen. */ if (S_ISREG(ci->netfs.inode.i_mode) && (issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) { ci->i_rdcache_gen++; } /* * If FILE_SHARED is newly issued, mark dir not complete. We don't * know what happened to this directory while we didn't have the cap. * If FILE_SHARED is being revoked, also mark dir not complete. It * stops on-going cached readdir. */ if ((issued & CEPH_CAP_FILE_SHARED) != (had & CEPH_CAP_FILE_SHARED)) { if (issued & CEPH_CAP_FILE_SHARED) atomic_inc(&ci->i_shared_gen); if (S_ISDIR(ci->netfs.inode.i_mode)) { doutc(cl, " marking %p NOT complete\n", inode); __ceph_dir_clear_complete(ci); } } /* Wipe saved layout if we're losing DIR_CREATE caps */ if (S_ISDIR(ci->netfs.inode.i_mode) && (had & CEPH_CAP_DIR_CREATE) && !(issued & CEPH_CAP_DIR_CREATE)) { ceph_put_string(rcu_dereference_raw(ci->i_cached_layout.pool_ns)); memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout)); } } /** * change_auth_cap_ses - move inode to appropriate lists when auth caps change * @ci: inode to be moved * @session: new auth caps session */ void change_auth_cap_ses(struct ceph_inode_info *ci, struct ceph_mds_session *session) { lockdep_assert_held(&ci->i_ceph_lock); if (list_empty(&ci->i_dirty_item) && list_empty(&ci->i_flushing_item)) return; spin_lock(&session->s_mdsc->cap_dirty_lock); if (!list_empty(&ci->i_dirty_item)) list_move(&ci->i_dirty_item, &session->s_cap_dirty); if (!list_empty(&ci->i_flushing_item)) list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing); spin_unlock(&session->s_mdsc->cap_dirty_lock); } /* * Add a capability under the given MDS session. * * Caller should hold session snap_rwsem (read) and ci->i_ceph_lock * * @fmode is the open file mode, if we are opening a file, otherwise * it is < 0. (This is so we can atomically add the cap and add an * open file reference to it.) */ void ceph_add_cap(struct inode *inode, struct ceph_mds_session *session, u64 cap_id, unsigned issued, unsigned wanted, unsigned seq, unsigned mseq, u64 realmino, int flags, struct ceph_cap **new_cap) { struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap *cap; int mds = session->s_mds; int actual_wanted; u32 gen; lockdep_assert_held(&ci->i_ceph_lock); doutc(cl, "%p %llx.%llx mds%d cap %llx %s seq %d\n", inode, ceph_vinop(inode), session->s_mds, cap_id, ceph_cap_string(issued), seq); gen = atomic_read(&session->s_cap_gen); cap = __get_cap_for_mds(ci, mds); if (!cap) { cap = *new_cap; *new_cap = NULL; cap->issued = 0; cap->implemented = 0; cap->mds = mds; cap->mds_wanted = 0; cap->mseq = 0; cap->ci = ci; __insert_cap_node(ci, cap); /* add to session cap list */ cap->session = session; spin_lock(&session->s_cap_lock); list_add_tail(&cap->session_caps, &session->s_caps); session->s_nr_caps++; atomic64_inc(&mdsc->metric.total_caps); spin_unlock(&session->s_cap_lock); } else { spin_lock(&session->s_cap_lock); list_move_tail(&cap->session_caps, &session->s_caps); spin_unlock(&session->s_cap_lock); if (cap->cap_gen < gen) cap->issued = cap->implemented = CEPH_CAP_PIN; /* * auth mds of the inode changed. we received the cap export * message, but still haven't received the cap import message. * handle_cap_export() updated the new auth MDS' cap. * * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing * a message that was send before the cap import message. So * don't remove caps. */ if (ceph_seq_cmp(seq, cap->seq) <= 0) { WARN_ON(cap != ci->i_auth_cap); WARN_ON(cap->cap_id != cap_id); seq = cap->seq; mseq = cap->mseq; issued |= cap->issued; flags |= CEPH_CAP_FLAG_AUTH; } } if (!ci->i_snap_realm || ((flags & CEPH_CAP_FLAG_AUTH) && realmino != (u64)-1 && ci->i_snap_realm->ino != realmino)) { /* * add this inode to the appropriate snap realm */ struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc, realmino); if (realm) ceph_change_snap_realm(inode, realm); else WARN(1, "%s: couldn't find snap realm 0x%llx (ino 0x%llx oldrealm 0x%llx)\n", __func__, realmino, ci->i_vino.ino, ci->i_snap_realm ? ci->i_snap_realm->ino : 0); } __check_cap_issue(ci, cap, issued); /* * If we are issued caps we don't want, or the mds' wanted * value appears to be off, queue a check so we'll release * later and/or update the mds wanted value. */ actual_wanted = __ceph_caps_wanted(ci); if ((wanted & ~actual_wanted) || (issued & ~actual_wanted & CEPH_CAP_ANY_WR)) { doutc(cl, "issued %s, mds wanted %s, actual %s, queueing\n", ceph_cap_string(issued), ceph_cap_string(wanted), ceph_cap_string(actual_wanted)); __cap_delay_requeue(mdsc, ci); } if (flags & CEPH_CAP_FLAG_AUTH) { if (!ci->i_auth_cap || ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) { if (ci->i_auth_cap && ci->i_auth_cap->session != cap->session) change_auth_cap_ses(ci, cap->session); ci->i_auth_cap = cap; cap->mds_wanted = wanted; } } else { WARN_ON(ci->i_auth_cap == cap); } doutc(cl, "inode %p %llx.%llx cap %p %s now %s seq %d mds%d\n", inode, ceph_vinop(inode), cap, ceph_cap_string(issued), ceph_cap_string(issued|cap->issued), seq, mds); cap->cap_id = cap_id; cap->issued = issued; cap->implemented |= issued; if (ceph_seq_cmp(mseq, cap->mseq) > 0) cap->mds_wanted = wanted; else cap->mds_wanted |= wanted; cap->seq = seq; cap->issue_seq = seq; cap->mseq = mseq; cap->cap_gen = gen; wake_up_all(&ci->i_cap_wq); } /* * Return true if cap has not timed out and belongs to the current * generation of the MDS session (i.e. has not gone 'stale' due to * us losing touch with the mds). */ static int __cap_is_valid(struct ceph_cap *cap) { struct inode *inode = &cap->ci->netfs.inode; struct ceph_client *cl = cap->session->s_mdsc->fsc->client; unsigned long ttl; u32 gen; gen = atomic_read(&cap->session->s_cap_gen); ttl = cap->session->s_cap_ttl; if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) { doutc(cl, "%p %llx.%llx cap %p issued %s but STALE (gen %u vs %u)\n", inode, ceph_vinop(inode), cap, ceph_cap_string(cap->issued), cap->cap_gen, gen); return 0; } return 1; } /* * Return set of valid cap bits issued to us. Note that caps time * out, and may be invalidated in bulk if the client session times out * and session->s_cap_gen is bumped. */ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) { struct inode *inode = &ci->netfs.inode; struct ceph_client *cl = ceph_inode_to_client(inode); int have = ci->i_snap_caps; struct ceph_cap *cap; struct rb_node *p; if (implemented) *implemented = 0; for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { cap = rb_entry(p, struct ceph_cap, ci_node); if (!__cap_is_valid(cap)) continue; doutc(cl, "%p %llx.%llx cap %p issued %s\n", inode, ceph_vinop(inode), cap, ceph_cap_string(cap->issued)); have |= cap->issued; if (implemented) *implemented |= cap->implemented; } /* * exclude caps issued by non-auth MDS, but are been revoking * by the auth MDS. The non-auth MDS should be revoking/exporting * these caps, but the message is delayed. */ if (ci->i_auth_cap) { cap = ci->i_auth_cap; have &= ~cap->implemented | cap->issued; } return have; } /* * Get cap bits issued by caps other than @ocap */ int __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap) { int have = ci->i_snap_caps; struct ceph_cap *cap; struct rb_node *p; for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { cap = rb_entry(p, struct ceph_cap, ci_node); if (cap == ocap) continue; if (!__cap_is_valid(cap)) continue; have |= cap->issued; } return have; } /* * Move a cap to the end of the LRU (oldest caps at list head, newest * at list tail). */ static void __touch_cap(struct ceph_cap *cap) { struct inode *inode = &cap->ci->netfs.inode; struct ceph_mds_session *s = cap->session; struct ceph_client *cl = s->s_mdsc->fsc->client; spin_lock(&s->s_cap_lock); if (!s->s_cap_iterator) { doutc(cl, "%p %llx.%llx cap %p mds%d\n", inode, ceph_vinop(inode), cap, s->s_mds); list_move_tail(&cap->session_caps, &s->s_caps); } else { doutc(cl, "%p %llx.%llx cap %p mds%d NOP, iterating over caps\n", inode, ceph_vinop(inode), cap, s->s_mds); } spin_unlock(&s->s_cap_lock); } /* * Check if we hold the given mask. If so, move the cap(s) to the * front of their respective LRUs. (This is the preferred way for * callers to check for caps they want.) */ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) { struct inode *inode = &ci->netfs.inode; struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap *cap; struct rb_node *p; int have = ci->i_snap_caps; if ((have & mask) == mask) { doutc(cl, "mask %p %llx.%llx snap issued %s (mask %s)\n", inode, ceph_vinop(inode), ceph_cap_string(have), ceph_cap_string(mask)); return 1; } for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { cap = rb_entry(p, struct ceph_cap, ci_node); if (!__cap_is_valid(cap)) continue; if ((cap->issued & mask) == mask) { doutc(cl, "mask %p %llx.%llx cap %p issued %s (mask %s)\n", inode, ceph_vinop(inode), cap, ceph_cap_string(cap->issued), ceph_cap_string(mask)); if (touch) __touch_cap(cap); return 1; } /* does a combination of caps satisfy mask? */ have |= cap->issued; if ((have & mask) == mask) { doutc(cl, "mask %p %llx.%llx combo issued %s (mask %s)\n", inode, ceph_vinop(inode), ceph_cap_string(cap->issued), ceph_cap_string(mask)); if (touch) { struct rb_node *q; /* touch this + preceding caps */ __touch_cap(cap); for (q = rb_first(&ci->i_caps); q != p; q = rb_next(q)) { cap = rb_entry(q, struct ceph_cap, ci_node); if (!__cap_is_valid(cap)) continue; if (cap->issued & mask) __touch_cap(cap); } } return 1; } } return 0; } int __ceph_caps_issued_mask_metric(struct ceph_inode_info *ci, int mask, int touch) { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb); int r; r = __ceph_caps_issued_mask(ci, mask, touch); if (r) ceph_update_cap_hit(&fsc->mdsc->metric); else ceph_update_cap_mis(&fsc->mdsc->metric); return r; } /* * Return true if mask caps are currently being revoked by an MDS. */ int __ceph_caps_revoking_other(struct ceph_inode_info *ci, struct ceph_cap *ocap, int mask) { struct ceph_cap *cap; struct rb_node *p; for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { cap = rb_entry(p, struct ceph_cap, ci_node); if (cap != ocap && (cap->implemented & ~cap->issued & mask)) return 1; } return 0; } int __ceph_caps_used(struct ceph_inode_info *ci) { int used = 0; if (ci->i_pin_ref) used |= CEPH_CAP_PIN; if (ci->i_rd_ref) used |= CEPH_CAP_FILE_RD; if (ci->i_rdcache_ref || (S_ISREG(ci->netfs.inode.i_mode) && ci->netfs.inode.i_data.nrpages)) used |= CEPH_CAP_FILE_CACHE; if (ci->i_wr_ref) used |= CEPH_CAP_FILE_WR; if (ci->i_wb_ref || ci->i_wrbuffer_ref) used |= CEPH_CAP_FILE_BUFFER; if (ci->i_fx_ref) used |= CEPH_CAP_FILE_EXCL; return used; } #define FMODE_WAIT_BIAS 1000 /* * wanted, by virtue of open file modes */ int __ceph_caps_file_wanted(struct ceph_inode_info *ci) { const int PIN_SHIFT = ffs(CEPH_FILE_MODE_PIN); const int RD_SHIFT = ffs(CEPH_FILE_MODE_RD); const int WR_SHIFT = ffs(CEPH_FILE_MODE_WR); const int LAZY_SHIFT = ffs(CEPH_FILE_MODE_LAZY); struct ceph_mount_options *opt = ceph_inode_to_fs_client(&ci->netfs.inode)->mount_options; unsigned long used_cutoff = jiffies - opt->caps_wanted_delay_max * HZ; unsigned long idle_cutoff = jiffies - opt->caps_wanted_delay_min * HZ; if (S_ISDIR(ci->netfs.inode.i_mode)) { int want = 0; /* use used_cutoff here, to keep dir's wanted caps longer */ if (ci->i_nr_by_mode[RD_SHIFT] > 0 || time_after(ci->i_last_rd, used_cutoff)) want |= CEPH_CAP_ANY_SHARED; if (ci->i_nr_by_mode[WR_SHIFT] > 0 || time_after(ci->i_last_wr, used_cutoff)) { want |= CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL; if (opt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) want |= CEPH_CAP_ANY_DIR_OPS; } if (want || ci->i_nr_by_mode[PIN_SHIFT] > 0) want |= CEPH_CAP_PIN; return want; } else { int bits = 0; if (ci->i_nr_by_mode[RD_SHIFT] > 0) { if (ci->i_nr_by_mode[RD_SHIFT] >= FMODE_WAIT_BIAS || time_after(ci->i_last_rd, used_cutoff)) bits |= 1 << RD_SHIFT; } else if (time_after(ci->i_last_rd, idle_cutoff)) { bits |= 1 << RD_SHIFT; } if (ci->i_nr_by_mode[WR_SHIFT] > 0) { if (ci->i_nr_by_mode[WR_SHIFT] >= FMODE_WAIT_BIAS || time_after(ci->i_last_wr, used_cutoff)) bits |= 1 << WR_SHIFT; } else if (time_after(ci->i_last_wr, idle_cutoff)) { bits |= 1 << WR_SHIFT; } /* check lazyio only when read/write is wanted */ if ((bits & (CEPH_FILE_MODE_RDWR << 1)) && ci->i_nr_by_mode[LAZY_SHIFT] > 0) bits |= 1 << LAZY_SHIFT; return bits ? ceph_caps_for_mode(bits >> 1) : 0; } } /* * wanted, by virtue of open file modes AND cap refs (buffered/cached data) */ int __ceph_caps_wanted(struct ceph_inode_info *ci) { int w = __ceph_caps_file_wanted(ci) | __ceph_caps_used(ci); if (S_ISDIR(ci->netfs.inode.i_mode)) { /* we want EXCL if holding caps of dir ops */ if (w & CEPH_CAP_ANY_DIR_OPS) w |= CEPH_CAP_FILE_EXCL; } else { /* we want EXCL if dirty data */ if (w & CEPH_CAP_FILE_BUFFER) w |= CEPH_CAP_FILE_EXCL; } return w; } /* * Return caps we have registered with the MDS(s) as 'wanted'. */ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check) { struct ceph_cap *cap; struct rb_node *p; int mds_wanted = 0; for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { cap = rb_entry(p, struct ceph_cap, ci_node); if (check && !__cap_is_valid(cap)) continue; if (cap == ci->i_auth_cap) mds_wanted |= cap->mds_wanted; else mds_wanted |= (cap->mds_wanted & ~CEPH_CAP_ANY_FILE_WR); } return mds_wanted; } int ceph_is_any_caps(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); int ret; spin_lock(&ci->i_ceph_lock); ret = __ceph_is_any_real_caps(ci); spin_unlock(&ci->i_ceph_lock); return ret; } /* * Remove a cap. Take steps to deal with a racing iterate_session_caps. * * caller should hold i_ceph_lock. * caller will not hold session s_mutex if called from destroy_inode. */ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) { struct ceph_mds_session *session = cap->session; struct ceph_client *cl = session->s_mdsc->fsc->client; struct ceph_inode_info *ci = cap->ci; struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc; int removed = 0; /* 'ci' being NULL means the remove have already occurred */ if (!ci) { doutc(cl, "inode is NULL\n"); return; } lockdep_assert_held(&ci->i_ceph_lock); doutc(cl, "%p from %p %llx.%llx\n", cap, inode, ceph_vinop(inode)); mdsc = ceph_inode_to_fs_client(&ci->netfs.inode)->mdsc; /* remove from inode's cap rbtree, and clear auth cap */ rb_erase(&cap->ci_node, &ci->i_caps); if (ci->i_auth_cap == cap) ci->i_auth_cap = NULL; /* remove from session list */ spin_lock(&session->s_cap_lock); if (session->s_cap_iterator == cap) { /* not yet, we are iterating over this very cap */ doutc(cl, "delaying %p removal from session %p\n", cap, cap->session); } else { list_del_init(&cap->session_caps); session->s_nr_caps--; atomic64_dec(&mdsc->metric.total_caps); cap->session = NULL; removed = 1; } /* protect backpointer with s_cap_lock: see iterate_session_caps */ cap->ci = NULL; /* * s_cap_reconnect is protected by s_cap_lock. no one changes * s_cap_gen while session is in the reconnect state. */ if (queue_release && (!session->s_cap_reconnect || cap->cap_gen == atomic_read(&session->s_cap_gen))) { cap->queue_release = 1; if (removed) { __ceph_queue_cap_release(session, cap); removed = 0; } } else { cap->queue_release = 0; } cap->cap_ino = ci->i_vino.ino; spin_unlock(&session->s_cap_lock); if (removed) ceph_put_cap(mdsc, cap); if (!__ceph_is_any_real_caps(ci)) { /* when reconnect denied, we remove session caps forcibly, * i_wr_ref can be non-zero. If there are ongoing write, * keep i_snap_realm. */ if (ci->i_wr_ref == 0 && ci->i_snap_realm) ceph_change_snap_realm(&ci->netfs.inode, NULL); __cap_delay_cancel(mdsc, ci); } } void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, bool queue_release) { struct ceph_inode_info *ci = cap->ci; struct ceph_fs_client *fsc; /* 'ci' being NULL means the remove have already occurred */ if (!ci) { doutc(mdsc->fsc->client, "inode is NULL\n"); return; } lockdep_assert_held(&ci->i_ceph_lock); fsc = ceph_inode_to_fs_client(&ci->netfs.inode); WARN_ON_ONCE(ci->i_auth_cap == cap && !list_empty(&ci->i_dirty_item) && !fsc->blocklisted && !ceph_inode_is_shutdown(&ci->netfs.inode)); __ceph_remove_cap(cap, queue_release); } struct cap_msg_args { struct ceph_mds_session *session; u64 ino, cid, follows; u64 flush_tid, oldest_flush_tid, size, max_size; u64 xattr_version; u64 change_attr; struct ceph_buffer *xattr_buf; struct ceph_buffer *old_xattr_buf; struct timespec64 atime, mtime, ctime, btime; int op, caps, wanted, dirty; u32 seq, issue_seq, mseq, time_warp_seq; u32 flags; kuid_t uid; kgid_t gid; umode_t mode; bool inline_data; bool wake; bool encrypted; u32 fscrypt_auth_len; u8 fscrypt_auth[sizeof(struct ceph_fscrypt_auth)]; // for context }; /* Marshal up the cap msg to the MDS */ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg) { struct ceph_mds_caps *fc; void *p; struct ceph_mds_client *mdsc = arg->session->s_mdsc; struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc; doutc(mdsc->fsc->client, "%s %llx %llx caps %s wanted %s dirty %s seq %u/%u" " tid %llu/%llu mseq %u follows %lld size %llu/%llu" " xattr_ver %llu xattr_len %d\n", ceph_cap_op_name(arg->op), arg->cid, arg->ino, ceph_cap_string(arg->caps), ceph_cap_string(arg->wanted), ceph_cap_string(arg->dirty), arg->seq, arg->issue_seq, arg->flush_tid, arg->oldest_flush_tid, arg->mseq, arg->follows, arg->size, arg->max_size, arg->xattr_version, arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0); msg->hdr.version = cpu_to_le16(12); msg->hdr.tid = cpu_to_le64(arg->flush_tid); fc = msg->front.iov_base; memset(fc, 0, sizeof(*fc)); fc->cap_id = cpu_to_le64(arg->cid); fc->op = cpu_to_le32(arg->op); fc->seq = cpu_to_le32(arg->seq); fc->issue_seq = cpu_to_le32(arg->issue_seq); fc->migrate_seq = cpu_to_le32(arg->mseq); fc->caps = cpu_to_le32(arg->caps); fc->wanted = cpu_to_le32(arg->wanted); fc->dirty = cpu_to_le32(arg->dirty); fc->ino = cpu_to_le64(arg->ino); fc->snap_follows = cpu_to_le64(arg->follows); #if IS_ENABLED(CONFIG_FS_ENCRYPTION) if (arg->encrypted) fc->size = cpu_to_le64(round_up(arg->size, CEPH_FSCRYPT_BLOCK_SIZE)); else #endif fc->size = cpu_to_le64(arg->size); fc->max_size = cpu_to_le64(arg->max_size); ceph_encode_timespec64(&fc->mtime, &arg->mtime); ceph_encode_timespec64(&fc->atime, &arg->atime); ceph_encode_timespec64(&fc->ctime, &arg->ctime); fc->time_warp_seq = cpu_to_le32(arg->time_warp_seq); fc->uid = cpu_to_le32(from_kuid(&init_user_ns, arg->uid)); fc->gid = cpu_to_le32(from_kgid(&init_user_ns, arg->gid)); fc->mode = cpu_to_le32(arg->mode); fc->xattr_version = cpu_to_le64(arg->xattr_version); if (arg->xattr_buf) { msg->middle = ceph_buffer_get(arg->xattr_buf); fc->xattr_len = cpu_to_le32(arg->xattr_buf->vec.iov_len); msg->hdr.middle_len = cpu_to_le32(arg->xattr_buf->vec.iov_len); } p = fc + 1; /* flock buffer size (version 2) */ ceph_encode_32(&p, 0); /* inline version (version 4) */ ceph_encode_64(&p, arg->inline_data ? 0 : CEPH_INLINE_NONE); /* inline data size */ ceph_encode_32(&p, 0); /* * osd_epoch_barrier (version 5) * The epoch_barrier is protected osdc->lock, so READ_ONCE here in * case it was recently changed */ ceph_encode_32(&p, READ_ONCE(osdc->epoch_barrier)); /* oldest_flush_tid (version 6) */ ceph_encode_64(&p, arg->oldest_flush_tid); /* * caller_uid/caller_gid (version 7) * * Currently, we don't properly track which caller dirtied the caps * last, and force a flush of them when there is a conflict. For now, * just set this to 0:0, to emulate how the MDS has worked up to now. */ ceph_encode_32(&p, 0); ceph_encode_32(&p, 0); /* pool namespace (version 8) (mds always ignores this) */ ceph_encode_32(&p, 0); /* btime and change_attr (version 9) */ ceph_encode_timespec64(p, &arg->btime); p += sizeof(struct ceph_timespec); ceph_encode_64(&p, arg->change_attr); /* Advisory flags (version 10) */ ceph_encode_32(&p, arg->flags); /* dirstats (version 11) - these are r/o on the client */ ceph_encode_64(&p, 0); ceph_encode_64(&p, 0); #if IS_ENABLED(CONFIG_FS_ENCRYPTION) /* * fscrypt_auth and fscrypt_file (version 12) * * fscrypt_auth holds the crypto context (if any). fscrypt_file * tracks the real i_size as an __le64 field (and we use a rounded-up * i_size in the traditional size field). */ ceph_encode_32(&p, arg->fscrypt_auth_len); ceph_encode_copy(&p, arg->fscrypt_auth, arg->fscrypt_auth_len); ceph_encode_32(&p, sizeof(__le64)); ceph_encode_64(&p, arg->size); #else /* CONFIG_FS_ENCRYPTION */ ceph_encode_32(&p, 0); ceph_encode_32(&p, 0); #endif /* CONFIG_FS_ENCRYPTION */ } /* * Queue cap releases when an inode is dropped from our cache. */ void __ceph_remove_caps(struct ceph_inode_info *ci) { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct rb_node *p; /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU) * may call __ceph_caps_issued_mask() on a freeing inode. */ spin_lock(&ci->i_ceph_lock); p = rb_first(&ci->i_caps); while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); p = rb_next(p); ceph_remove_cap(mdsc, cap, true); } spin_unlock(&ci->i_ceph_lock); } /* * Prepare to send a cap message to an MDS. Update the cap state, and populate * the arg struct with the parameters that will need to be sent. This should * be done under the i_ceph_lock to guard against changes to cap state. * * Make note of max_size reported/requested from mds, revoked caps * that have now been implemented. */ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, int op, int flags, int used, int want, int retain, int flushing, u64 flush_tid, u64 oldest_flush_tid) { struct ceph_inode_info *ci = cap->ci; struct inode *inode = &ci->netfs.inode; struct ceph_client *cl = ceph_inode_to_client(inode); int held, revoking; lockdep_assert_held(&ci->i_ceph_lock); held = cap->issued | cap->implemented; revoking = cap->implemented & ~cap->issued; retain &= ~revoking; doutc(cl, "%p %llx.%llx cap %p session %p %s -> %s (revoking %s)\n", inode, ceph_vinop(inode), cap, cap->session, ceph_cap_string(held), ceph_cap_string(held & retain), ceph_cap_string(revoking)); BUG_ON((retain & CEPH_CAP_PIN) == 0); ci->i_ceph_flags &= ~CEPH_I_FLUSH; cap->issued &= retain; /* drop bits we don't want */ /* * Wake up any waiters on wanted -> needed transition. This is due to * the weird transition from buffered to sync IO... we need to flush * dirty pages _before_ allowing sync writes to avoid reordering. */ arg->wake = cap->implemented & ~cap->issued; cap->implemented &= cap->issued | used; cap->mds_wanted = want; arg->session = cap->session; arg->ino = ceph_vino(inode).ino; arg->cid = cap->cap_id; arg->follows = flushing ? ci->i_head_snapc->seq : 0; arg->flush_tid = flush_tid; arg->oldest_flush_tid = oldest_flush_tid; arg->size = i_size_read(inode); ci->i_reported_size = arg->size; arg->max_size = ci->i_wanted_max_size; if (cap == ci->i_auth_cap) { if (want & CEPH_CAP_ANY_FILE_WR) ci->i_requested_max_size = arg->max_size; else ci->i_requested_max_size = 0; } if (flushing & CEPH_CAP_XATTR_EXCL) { arg->old_xattr_buf = __ceph_build_xattrs_blob(ci); arg->xattr_version = ci->i_xattrs.version; arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob); } else { arg->xattr_buf = NULL; arg->old_xattr_buf = NULL; } arg->mtime = inode_get_mtime(inode); arg->atime = inode_get_atime(inode); arg->ctime = inode_get_ctime(inode); arg->btime = ci->i_btime; arg->change_attr = inode_peek_iversion_raw(inode); arg->op = op; arg->caps = cap->implemented; arg->wanted = want; arg->dirty = flushing; arg->seq = cap->seq; arg->issue_seq = cap->issue_seq; arg->mseq = cap->mseq; arg->time_warp_seq = ci->i_time_warp_seq; arg->uid = inode->i_uid; arg->gid = inode->i_gid; arg->mode = inode->i_mode; arg->inline_data = ci->i_inline_version != CEPH_INLINE_NONE; if (!(flags & CEPH_CLIENT_CAPS_PENDING_CAPSNAP) && !list_empty(&ci->i_cap_snaps)) { struct ceph_cap_snap *capsnap; list_for_each_entry_reverse(capsnap, &ci->i_cap_snaps, ci_item) { if (capsnap->cap_flush.tid) break; if (capsnap->need_flush) { flags |= CEPH_CLIENT_CAPS_PENDING_CAPSNAP; break; } } } arg->flags = flags; arg->encrypted = IS_ENCRYPTED(inode); #if IS_ENABLED(CONFIG_FS_ENCRYPTION) if (ci->fscrypt_auth_len && WARN_ON_ONCE(ci->fscrypt_auth_len > sizeof(struct ceph_fscrypt_auth))) { /* Don't set this if it's too big */ arg->fscrypt_auth_len = 0; } else { arg->fscrypt_auth_len = ci->fscrypt_auth_len; memcpy(arg->fscrypt_auth, ci->fscrypt_auth, min_t(size_t, ci->fscrypt_auth_len, sizeof(arg->fscrypt_auth))); } #endif /* CONFIG_FS_ENCRYPTION */ } #if IS_ENABLED(CONFIG_FS_ENCRYPTION) #define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \ 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4 + 8) static inline int cap_msg_size(struct cap_msg_args *arg) { return CAP_MSG_FIXED_FIELDS + arg->fscrypt_auth_len; } #else #define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \ 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4) static inline int cap_msg_size(struct cap_msg_args *arg) { return CAP_MSG_FIXED_FIELDS; } #endif /* CONFIG_FS_ENCRYPTION */ /* * Send a cap msg on the given inode. * * Caller should hold snap_rwsem (read), s_mutex. */ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci) { struct ceph_msg *msg; struct inode *inode = &ci->netfs.inode; struct ceph_client *cl = ceph_inode_to_client(inode); msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(arg), GFP_NOFS, false); if (!msg) { pr_err_client(cl, "error allocating cap msg: ino (%llx.%llx)" " flushing %s tid %llu, requeuing cap.\n", ceph_vinop(inode), ceph_cap_string(arg->dirty), arg->flush_tid); spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(arg->session->s_mdsc, ci); spin_unlock(&ci->i_ceph_lock); return; } encode_cap_msg(msg, arg); ceph_con_send(&arg->session->s_con, msg); ceph_buffer_put(arg->old_xattr_buf); ceph_buffer_put(arg->xattr_buf); if (arg->wake) wake_up_all(&ci->i_cap_wq); } static inline int __send_flush_snap(struct inode *inode, struct ceph_mds_session *session, struct ceph_cap_snap *capsnap, u32 mseq, u64 oldest_flush_tid) { struct cap_msg_args arg; struct ceph_msg *msg; arg.session = session; arg.ino = ceph_vino(inode).ino; arg.cid = 0; arg.follows = capsnap->follows; arg.flush_tid = capsnap->cap_flush.tid; arg.oldest_flush_tid = oldest_flush_tid; arg.size = capsnap->size; arg.max_size = 0; arg.xattr_version = capsnap->xattr_version; arg.xattr_buf = capsnap->xattr_blob; arg.old_xattr_buf = NULL; arg.atime = capsnap->atime; arg.mtime = capsnap->mtime; arg.ctime = capsnap->ctime; arg.btime = capsnap->btime; arg.change_attr = capsnap->change_attr; arg.op = CEPH_CAP_OP_FLUSHSNAP; arg.caps = capsnap->issued; arg.wanted = 0; arg.dirty = capsnap->dirty; arg.seq = 0; arg.issue_seq = 0; arg.mseq = mseq; arg.time_warp_seq = capsnap->time_warp_seq; arg.uid = capsnap->uid; arg.gid = capsnap->gid; arg.mode = capsnap->mode; arg.inline_data = capsnap->inline_data; arg.flags = 0; arg.wake = false; arg.encrypted = IS_ENCRYPTED(inode); /* No fscrypt_auth changes from a capsnap.*/ arg.fscrypt_auth_len = 0; msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(&arg), GFP_NOFS, false); if (!msg) return -ENOMEM; encode_cap_msg(msg, &arg); ceph_con_send(&arg.session->s_con, msg); return 0; } /* * When a snapshot is taken, clients accumulate dirty metadata on * inodes with capabilities in ceph_cap_snaps to describe the file * state at the time the snapshot was taken. This must be flushed * asynchronously back to the MDS once sync writes complete and dirty * data is written out. * * Called under i_ceph_lock. */ static void __ceph_flush_snaps(struct ceph_inode_info *ci, struct ceph_mds_session *session) __releases(ci->i_ceph_lock) __acquires(ci->i_ceph_lock) { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = session->s_mdsc; struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap_snap *capsnap; u64 oldest_flush_tid = 0; u64 first_tid = 1, last_tid = 0; doutc(cl, "%p %llx.%llx session %p\n", inode, ceph_vinop(inode), session); list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { /* * we need to wait for sync writes to complete and for dirty * pages to be written out. */ if (capsnap->dirty_pages || capsnap->writing) break; /* should be removed by ceph_try_drop_cap_snap() */ BUG_ON(!capsnap->need_flush); /* only flush each capsnap once */ if (capsnap->cap_flush.tid > 0) { doutc(cl, "already flushed %p, skipping\n", capsnap); continue; } spin_lock(&mdsc->cap_dirty_lock); capsnap->cap_flush.tid = ++mdsc->last_cap_flush_tid; list_add_tail(&capsnap->cap_flush.g_list, &mdsc->cap_flush_list); if (oldest_flush_tid == 0) oldest_flush_tid = __get_oldest_flush_tid(mdsc); if (list_empty(&ci->i_flushing_item)) { list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing); } spin_unlock(&mdsc->cap_dirty_lock); list_add_tail(&capsnap->cap_flush.i_list, &ci->i_cap_flush_list); if (first_tid == 1) first_tid = capsnap->cap_flush.tid; last_tid = capsnap->cap_flush.tid; } ci->i_ceph_flags &= ~CEPH_I_FLUSH_SNAPS; while (first_tid <= last_tid) { struct ceph_cap *cap = ci->i_auth_cap; struct ceph_cap_flush *cf = NULL, *iter; int ret; if (!(cap && cap->session == session)) { doutc(cl, "%p %llx.%llx auth cap %p not mds%d, stop\n", inode, ceph_vinop(inode), cap, session->s_mds); break; } ret = -ENOENT; list_for_each_entry(iter, &ci->i_cap_flush_list, i_list) { if (iter->tid >= first_tid) { cf = iter; ret = 0; break; } } if (ret < 0) break; first_tid = cf->tid + 1; capsnap = container_of(cf, struct ceph_cap_snap, cap_flush); refcount_inc(&capsnap->nref); spin_unlock(&ci->i_ceph_lock); doutc(cl, "%p %llx.%llx capsnap %p tid %llu %s\n", inode, ceph_vinop(inode), capsnap, cf->tid, ceph_cap_string(capsnap->dirty)); ret = __send_flush_snap(inode, session, capsnap, cap->mseq, oldest_flush_tid); if (ret < 0) { pr_err_client(cl, "error sending cap flushsnap, " "ino (%llx.%llx) tid %llu follows %llu\n", ceph_vinop(inode), cf->tid, capsnap->follows); } ceph_put_cap_snap(capsnap); spin_lock(&ci->i_ceph_lock); } } void ceph_flush_snaps(struct ceph_inode_info *ci, struct ceph_mds_session **psession) { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_mds_session *session = NULL; bool need_put = false; int mds; doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); if (psession) session = *psession; retry: spin_lock(&ci->i_ceph_lock); if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) { doutc(cl, " no capsnap needs flush, doing nothing\n"); goto out; } if (!ci->i_auth_cap) { doutc(cl, " no auth cap (migrating?), doing nothing\n"); goto out; } mds = ci->i_auth_cap->session->s_mds; if (session && session->s_mds != mds) { doutc(cl, " oops, wrong session %p mutex\n", session); ceph_put_mds_session(session); session = NULL; } if (!session) { spin_unlock(&ci->i_ceph_lock); mutex_lock(&mdsc->mutex); session = __ceph_lookup_mds_session(mdsc, mds); mutex_unlock(&mdsc->mutex); goto retry; } // make sure flushsnap messages are sent in proper order. if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) __kick_flushing_caps(mdsc, session, ci, 0); __ceph_flush_snaps(ci, session); out: spin_unlock(&ci->i_ceph_lock); if (psession) *psession = session; else ceph_put_mds_session(session); /* we flushed them all; remove this inode from the queue */ spin_lock(&mdsc->snap_flush_lock); if (!list_empty(&ci->i_snap_flush_item)) need_put = true; list_del_init(&ci->i_snap_flush_item); spin_unlock(&mdsc->snap_flush_lock); if (need_put) iput(inode); } /* * Mark caps dirty. If inode is newly dirty, return the dirty flags. * Caller is then responsible for calling __mark_inode_dirty with the * returned flags value. */ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, struct ceph_cap_flush **pcf) { struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb)->mdsc; struct inode *inode = &ci->netfs.inode; struct ceph_client *cl = ceph_inode_to_client(inode); int was = ci->i_dirty_caps; int dirty = 0; lockdep_assert_held(&ci->i_ceph_lock); if (!ci->i_auth_cap) { pr_warn_client(cl, "%p %llx.%llx mask %s, " "but no auth cap (session was closed?)\n", inode, ceph_vinop(inode), ceph_cap_string(mask)); return 0; } doutc(cl, "%p %llx.%llx %s dirty %s -> %s\n", inode, ceph_vinop(inode), ceph_cap_string(mask), ceph_cap_string(was), ceph_cap_string(was | mask)); ci->i_dirty_caps |= mask; if (was == 0) { struct ceph_mds_session *session = ci->i_auth_cap->session; WARN_ON_ONCE(ci->i_prealloc_cap_flush); swap(ci->i_prealloc_cap_flush, *pcf); if (!ci->i_head_snapc) { WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem)); ci->i_head_snapc = ceph_get_snap_context( ci->i_snap_realm->cached_context); } doutc(cl, "%p %llx.%llx now dirty snapc %p auth cap %p\n", inode, ceph_vinop(inode), ci->i_head_snapc, ci->i_auth_cap); BUG_ON(!list_empty(&ci->i_dirty_item)); spin_lock(&mdsc->cap_dirty_lock); list_add(&ci->i_dirty_item, &session->s_cap_dirty); spin_unlock(&mdsc->cap_dirty_lock); if (ci->i_flushing_caps == 0) { ihold(inode); dirty |= I_DIRTY_SYNC; } } else { WARN_ON_ONCE(!ci->i_prealloc_cap_flush); } BUG_ON(list_empty(&ci->i_dirty_item)); if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) && (mask & CEPH_CAP_FILE_BUFFER)) dirty |= I_DIRTY_DATASYNC; __cap_delay_requeue(mdsc, ci); return dirty; } struct ceph_cap_flush *ceph_alloc_cap_flush(void) { struct ceph_cap_flush *cf; cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL); if (!cf) return NULL; cf->is_capsnap = false; return cf; } void ceph_free_cap_flush(struct ceph_cap_flush *cf) { if (cf) kmem_cache_free(ceph_cap_flush_cachep, cf); } static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc) { if (!list_empty(&mdsc->cap_flush_list)) { struct ceph_cap_flush *cf = list_first_entry(&mdsc->cap_flush_list, struct ceph_cap_flush, g_list); return cf->tid; } return 0; } /* * Remove cap_flush from the mdsc's or inode's flushing cap list. * Return true if caller needs to wake up flush waiters. */ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc, struct ceph_cap_flush *cf) { struct ceph_cap_flush *prev; bool wake = cf->wake; if (wake && cf->g_list.prev != &mdsc->cap_flush_list) { prev = list_prev_entry(cf, g_list); prev->wake = true; wake = false; } list_del_init(&cf->g_list); return wake; } static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci, struct ceph_cap_flush *cf) { struct ceph_cap_flush *prev; bool wake = cf->wake; if (wake && cf->i_list.prev != &ci->i_cap_flush_list) { prev = list_prev_entry(cf, i_list); prev->wake = true; wake = false; } list_del_init(&cf->i_list); return wake; } /* * Add dirty inode to the flushing list. Assigned a seq number so we * can wait for caps to flush without starving. * * Called under i_ceph_lock. Returns the flush tid. */ static u64 __mark_caps_flushing(struct inode *inode, struct ceph_mds_session *session, bool wake, u64 *oldest_flush_tid) { struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap_flush *cf = NULL; int flushing; lockdep_assert_held(&ci->i_ceph_lock); BUG_ON(ci->i_dirty_caps == 0); BUG_ON(list_empty(&ci->i_dirty_item)); BUG_ON(!ci->i_prealloc_cap_flush); flushing = ci->i_dirty_caps; doutc(cl, "flushing %s, flushing_caps %s -> %s\n", ceph_cap_string(flushing), ceph_cap_string(ci->i_flushing_caps), ceph_cap_string(ci->i_flushing_caps | flushing)); ci->i_flushing_caps |= flushing; ci->i_dirty_caps = 0; doutc(cl, "%p %llx.%llx now !dirty\n", inode, ceph_vinop(inode)); swap(cf, ci->i_prealloc_cap_flush); cf->caps = flushing; cf->wake = wake; spin_lock(&mdsc->cap_dirty_lock); list_del_init(&ci->i_dirty_item); cf->tid = ++mdsc->last_cap_flush_tid; list_add_tail(&cf->g_list, &mdsc->cap_flush_list); *oldest_flush_tid = __get_oldest_flush_tid(mdsc); if (list_empty(&ci->i_flushing_item)) { list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing); mdsc->num_cap_flushing++; } spin_unlock(&mdsc->cap_dirty_lock); list_add_tail(&cf->i_list, &ci->i_cap_flush_list); return cf->tid; } /* * try to invalidate mapping pages without blocking. */ static int try_nonblocking_invalidate(struct inode *inode) __releases(ci->i_ceph_lock) __acquires(ci->i_ceph_lock) { struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u32 invalidating_gen = ci->i_rdcache_gen; spin_unlock(&ci->i_ceph_lock); ceph_fscache_invalidate(inode, false); invalidate_mapping_pages(&inode->i_data, 0, -1); spin_lock(&ci->i_ceph_lock); if (inode->i_data.nrpages == 0 && invalidating_gen == ci->i_rdcache_gen) { /* success. */ doutc(cl, "%p %llx.%llx success\n", inode, ceph_vinop(inode)); /* save any racing async invalidate some trouble */ ci->i_rdcache_revoking = ci->i_rdcache_gen - 1; return 0; } doutc(cl, "%p %llx.%llx failed\n", inode, ceph_vinop(inode)); return -1; } bool __ceph_should_report_size(struct ceph_inode_info *ci) { loff_t size = i_size_read(&ci->netfs.inode); /* mds will adjust max size according to the reported size */ if (ci->i_flushing_caps & CEPH_CAP_FILE_WR) return false; if (size >= ci->i_max_size) return true; /* half of previous max_size increment has been used */ if (ci->i_max_size > ci->i_reported_size && (size << 1) >= ci->i_max_size + ci->i_reported_size) return true; return false; } /* * Swiss army knife function to examine currently used and wanted * versus held caps. Release, flush, ack revoked caps to mds as * appropriate. * * CHECK_CAPS_AUTHONLY - we should only check the auth cap * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without * further delay. * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without * further delay. */ void ceph_check_caps(struct ceph_inode_info *ci, int flags) { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap *cap; u64 flush_tid, oldest_flush_tid; int file_wanted, used, cap_used; int issued, implemented, want, retain, revoking, flushing = 0; int mds = -1; /* keep track of how far we've gone through i_caps list to avoid an infinite loop on retry */ struct rb_node *p; bool queue_invalidate = false; bool tried_invalidate = false; bool queue_writeback = false; struct ceph_mds_session *session = NULL; spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) { ci->i_ceph_flags |= CEPH_I_ASYNC_CHECK_CAPS; /* Don't send messages until we get async create reply */ spin_unlock(&ci->i_ceph_lock); return; } if (ci->i_ceph_flags & CEPH_I_FLUSH) flags |= CHECK_CAPS_FLUSH; retry: /* Caps wanted by virtue of active open files. */ file_wanted = __ceph_caps_file_wanted(ci); /* Caps which have active references against them */ used = __ceph_caps_used(ci); /* * "issued" represents the current caps that the MDS wants us to have. * "implemented" is the set that we have been granted, and includes the * ones that have not yet been returned to the MDS (the "revoking" set, * usually because they have outstanding references). */ issued = __ceph_caps_issued(ci, &implemented); revoking = implemented & ~issued; want = file_wanted; /* The ones we currently want to retain (may be adjusted below) */ retain = file_wanted | used | CEPH_CAP_PIN; if (!mdsc->stopping && inode->i_nlink > 0) { if (file_wanted) { retain |= CEPH_CAP_ANY; /* be greedy */ } else if (S_ISDIR(inode->i_mode) && (issued & CEPH_CAP_FILE_SHARED) && __ceph_dir_is_complete(ci)) { /* * If a directory is complete, we want to keep * the exclusive cap. So that MDS does not end up * revoking the shared cap on every create/unlink * operation. */ if (IS_RDONLY(inode)) { want = CEPH_CAP_ANY_SHARED; } else { want |= CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL; } retain |= want; } else { retain |= CEPH_CAP_ANY_SHARED; /* * keep RD only if we didn't have the file open RW, * because then the mds would revoke it anyway to * journal max_size=0. */ if (ci->i_max_size == 0) retain |= CEPH_CAP_ANY_RD; } } doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " "flushing %s issued %s revoking %s retain %s %s%s%s%s\n", inode, ceph_vinop(inode), ceph_cap_string(file_wanted), ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_flushing_caps), ceph_cap_string(issued), ceph_cap_string(revoking), ceph_cap_string(retain), (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "", (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "", (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); /* * If we no longer need to hold onto old our caps, and we may * have cached pages, but don't want them, then try to invalidate. * If we fail, it's because pages are locked.... try again later. */ if ((!(flags & CHECK_CAPS_NOINVAL) || mdsc->stopping) && S_ISREG(inode->i_mode) && !(ci->i_wb_ref || ci->i_wrbuffer_ref) && /* no dirty pages... */ inode->i_data.nrpages && /* have cached pages */ (revoking & (CEPH_CAP_FILE_CACHE| CEPH_CAP_FILE_LAZYIO)) && /* or revoking cache */ !tried_invalidate) { doutc(cl, "trying to invalidate on %p %llx.%llx\n", inode, ceph_vinop(inode)); if (try_nonblocking_invalidate(inode) < 0) { doutc(cl, "queuing invalidate\n"); queue_invalidate = true; ci->i_rdcache_revoking = ci->i_rdcache_gen; } tried_invalidate = true; goto retry; } for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { int mflags = 0; struct cap_msg_args arg; cap = rb_entry(p, struct ceph_cap, ci_node); /* avoid looping forever */ if (mds >= cap->mds || ((flags & CHECK_CAPS_AUTHONLY) && cap != ci->i_auth_cap)) continue; /* * If we have an auth cap, we don't need to consider any * overlapping caps as used. */ cap_used = used; if (ci->i_auth_cap && cap != ci->i_auth_cap) cap_used &= ~ci->i_auth_cap->issued; revoking = cap->implemented & ~cap->issued; doutc(cl, " mds%d cap %p used %s issued %s implemented %s revoking %s\n", cap->mds, cap, ceph_cap_string(cap_used), ceph_cap_string(cap->issued), ceph_cap_string(cap->implemented), ceph_cap_string(revoking)); /* completed revocation? going down and there are no caps? */ if (revoking) { if ((revoking & cap_used) == 0) { doutc(cl, "completed revocation of %s\n", ceph_cap_string(cap->implemented & ~cap->issued)); goto ack; } /* * If the "i_wrbuffer_ref" was increased by mmap or generic * cache write just before the ceph_check_caps() is called, * the Fb capability revoking will fail this time. Then we * must wait for the BDI's delayed work to flush the dirty * pages and to release the "i_wrbuffer_ref", which will cost * at most 5 seconds. That means the MDS needs to wait at * most 5 seconds to finished the Fb capability's revocation. * * Let's queue a writeback for it. */ if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref && (revoking & CEPH_CAP_FILE_BUFFER)) queue_writeback = true; } if (flags & CHECK_CAPS_FLUSH_FORCE) { doutc(cl, "force to flush caps\n"); goto ack; } if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { /* request larger max_size from MDS? */ if (ci->i_wanted_max_size > ci->i_max_size && ci->i_wanted_max_size > ci->i_requested_max_size) { doutc(cl, "requesting new max_size\n"); goto ack; } /* approaching file_max? */ if (__ceph_should_report_size(ci)) { doutc(cl, "i_size approaching max_size\n"); goto ack; } } /* flush anything dirty? */ if (cap == ci->i_auth_cap) { if ((flags & CHECK_CAPS_FLUSH) && ci->i_dirty_caps) { doutc(cl, "flushing dirty caps\n"); goto ack; } if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) { doutc(cl, "flushing snap caps\n"); goto ack; } } /* want more caps from mds? */ if (want & ~cap->mds_wanted) { if (want & ~(cap->mds_wanted | cap->issued)) goto ack; if (!__cap_is_valid(cap)) goto ack; } /* things we might delay */ if ((cap->issued & ~retain) == 0) continue; /* nope, all good */ ack: ceph_put_mds_session(session); session = ceph_get_mds_session(cap->session); /* kick flushing and flush snaps before sending normal * cap message */ if (cap == ci->i_auth_cap && (ci->i_ceph_flags & (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS))) { if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) __kick_flushing_caps(mdsc, session, ci, 0); if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) __ceph_flush_snaps(ci, session); goto retry; } if (cap == ci->i_auth_cap && ci->i_dirty_caps) { flushing = ci->i_dirty_caps; flush_tid = __mark_caps_flushing(inode, session, false, &oldest_flush_tid); if (flags & CHECK_CAPS_FLUSH && list_empty(&session->s_cap_dirty)) mflags |= CEPH_CLIENT_CAPS_SYNC; } else { flushing = 0; flush_tid = 0; spin_lock(&mdsc->cap_dirty_lock); oldest_flush_tid = __get_oldest_flush_tid(mdsc); spin_unlock(&mdsc->cap_dirty_lock); } mds = cap->mds; /* remember mds, so we don't repeat */ __prep_cap(&arg, cap, CEPH_CAP_OP_UPDATE, mflags, cap_used, want, retain, flushing, flush_tid, oldest_flush_tid); spin_unlock(&ci->i_ceph_lock); __send_cap(&arg, ci); spin_lock(&ci->i_ceph_lock); goto retry; /* retake i_ceph_lock and restart our cap scan. */ } /* periodically re-calculate caps wanted by open files */ if (__ceph_is_any_real_caps(ci) && list_empty(&ci->i_cap_delay_list) && (file_wanted & ~CEPH_CAP_PIN) && !(used & (CEPH_CAP_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { __cap_delay_requeue(mdsc, ci); } spin_unlock(&ci->i_ceph_lock); ceph_put_mds_session(session); if (queue_writeback) ceph_queue_writeback(inode); if (queue_invalidate) ceph_queue_invalidate(inode); } /* * Try to flush dirty caps back to the auth mds. */ static int try_flush_caps(struct inode *inode, u64 *ptid) { struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); int flushing = 0; u64 flush_tid = 0, oldest_flush_tid = 0; spin_lock(&ci->i_ceph_lock); retry_locked: if (ci->i_dirty_caps && ci->i_auth_cap) { struct ceph_cap *cap = ci->i_auth_cap; struct cap_msg_args arg; struct ceph_mds_session *session = cap->session; if (session->s_state < CEPH_MDS_SESSION_OPEN) { spin_unlock(&ci->i_ceph_lock); goto out; } if (ci->i_ceph_flags & (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS)) { if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) __kick_flushing_caps(mdsc, session, ci, 0); if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) __ceph_flush_snaps(ci, session); goto retry_locked; } flushing = ci->i_dirty_caps; flush_tid = __mark_caps_flushing(inode, session, true, &oldest_flush_tid); __prep_cap(&arg, cap, CEPH_CAP_OP_FLUSH, CEPH_CLIENT_CAPS_SYNC, __ceph_caps_used(ci), __ceph_caps_wanted(ci), (cap->issued | cap->implemented), flushing, flush_tid, oldest_flush_tid); spin_unlock(&ci->i_ceph_lock); __send_cap(&arg, ci); } else { if (!list_empty(&ci->i_cap_flush_list)) { struct ceph_cap_flush *cf = list_last_entry(&ci->i_cap_flush_list, struct ceph_cap_flush, i_list); cf->wake = true; flush_tid = cf->tid; } flushing = ci->i_flushing_caps; spin_unlock(&ci->i_ceph_lock); } out: *ptid = flush_tid; return flushing; } /* * Return true if we've flushed caps through the given flush_tid. */ static int caps_are_flushed(struct inode *inode, u64 flush_tid) { struct ceph_inode_info *ci = ceph_inode(inode); int ret = 1; spin_lock(&ci->i_ceph_lock); if (!list_empty(&ci->i_cap_flush_list)) { struct ceph_cap_flush * cf = list_first_entry(&ci->i_cap_flush_list, struct ceph_cap_flush, i_list); if (cf->tid <= flush_tid) ret = 0; } spin_unlock(&ci->i_ceph_lock); return ret; } /* * flush the mdlog and wait for any unsafe requests to complete. */ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) { struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; int ret, err = 0; spin_lock(&ci->i_unsafe_lock); if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) { req1 = list_last_entry(&ci->i_unsafe_dirops, struct ceph_mds_request, r_unsafe_dir_item); ceph_mdsc_get_request(req1); } if (!list_empty(&ci->i_unsafe_iops)) { req2 = list_last_entry(&ci->i_unsafe_iops, struct ceph_mds_request, r_unsafe_target_item); ceph_mdsc_get_request(req2); } spin_unlock(&ci->i_unsafe_lock); /* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */ if (req1 || req2) { struct ceph_mds_request *req; struct ceph_mds_session **sessions; struct ceph_mds_session *s; unsigned int max_sessions; int i; mutex_lock(&mdsc->mutex); max_sessions = mdsc->max_sessions; sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); if (!sessions) { mutex_unlock(&mdsc->mutex); err = -ENOMEM; goto out; } spin_lock(&ci->i_unsafe_lock); if (req1) { list_for_each_entry(req, &ci->i_unsafe_dirops, r_unsafe_dir_item) { s = req->r_session; if (!s) continue; if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; } } } if (req2) { list_for_each_entry(req, &ci->i_unsafe_iops, r_unsafe_target_item) { s = req->r_session; if (!s) continue; if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; } } } spin_unlock(&ci->i_unsafe_lock); /* the auth MDS */ spin_lock(&ci->i_ceph_lock); if (ci->i_auth_cap) { s = ci->i_auth_cap->session; if (!sessions[s->s_mds]) sessions[s->s_mds] = ceph_get_mds_session(s); } spin_unlock(&ci->i_ceph_lock); mutex_unlock(&mdsc->mutex); /* send flush mdlog request to MDSes */ for (i = 0; i < max_sessions; i++) { s = sessions[i]; if (s) { send_flush_mdlog(s); ceph_put_mds_session(s); } } kfree(sessions); } doutc(cl, "%p %llx.%llx wait on tid %llu %llu\n", inode, ceph_vinop(inode), req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL); if (req1) { ret = !wait_for_completion_timeout(&req1->r_safe_completion, ceph_timeout_jiffies(req1->r_timeout)); if (ret) err = -EIO; } if (req2) { ret = !wait_for_completion_timeout(&req2->r_safe_completion, ceph_timeout_jiffies(req2->r_timeout)); if (ret) err = -EIO; } out: if (req1) ceph_mdsc_put_request(req1); if (req2) ceph_mdsc_put_request(req2); return err; } int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_client *cl = ceph_inode_to_client(inode); u64 flush_tid; int ret, err; int dirty; doutc(cl, "%p %llx.%llx%s\n", inode, ceph_vinop(inode), datasync ? " datasync" : ""); ret = file_write_and_wait_range(file, start, end); if (datasync) goto out; ret = ceph_wait_on_async_create(inode); if (ret) goto out; dirty = try_flush_caps(inode, &flush_tid); doutc(cl, "dirty caps are %s\n", ceph_cap_string(dirty)); err = flush_mdlog_and_wait_inode_unsafe_requests(inode); /* * only wait on non-file metadata writeback (the mds * can recover size and mtime, so we don't need to * wait for that) */ if (!err && (dirty & ~CEPH_CAP_ANY_FILE_WR)) { err = wait_event_interruptible(ci->i_cap_wq, caps_are_flushed(inode, flush_tid)); } if (err < 0) ret = err; err = file_check_and_advance_wb_err(file); if (err < 0) ret = err; out: doutc(cl, "%p %llx.%llx%s result=%d\n", inode, ceph_vinop(inode), datasync ? " datasync" : "", ret); return ret; } /* * Flush any dirty caps back to the mds. If we aren't asked to wait, * queue inode for flush but don't do so immediately, because we can * get by with fewer MDS messages if we wait for data writeback to * complete first. */ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_client *cl = ceph_inode_to_client(inode); u64 flush_tid; int err = 0; int dirty; int wait = (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync); doutc(cl, "%p %llx.%llx wait=%d\n", inode, ceph_vinop(inode), wait); ceph_fscache_unpin_writeback(inode, wbc); if (wait) { err = ceph_wait_on_async_create(inode); if (err) return err; dirty = try_flush_caps(inode, &flush_tid); if (dirty) err = wait_event_interruptible(ci->i_cap_wq, caps_are_flushed(inode, flush_tid)); } else { struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; spin_lock(&ci->i_ceph_lock); if (__ceph_caps_dirty(ci)) __cap_delay_requeue_front(mdsc, ci); spin_unlock(&ci->i_ceph_lock); } return err; } static void __kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_inode_info *ci, u64 oldest_flush_tid) __releases(ci->i_ceph_lock) __acquires(ci->i_ceph_lock) { struct inode *inode = &ci->netfs.inode; struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap *cap; struct ceph_cap_flush *cf; int ret; u64 first_tid = 0; u64 last_snap_flush = 0; /* Don't do anything until create reply comes in */ if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) return; ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) { if (cf->is_capsnap) { last_snap_flush = cf->tid; break; } } list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) { if (cf->tid < first_tid) continue; cap = ci->i_auth_cap; if (!(cap && cap->session == session)) { pr_err_client(cl, "%p auth cap %p not mds%d ???\n", inode, cap, session->s_mds); break; } first_tid = cf->tid + 1; if (!cf->is_capsnap) { struct cap_msg_args arg; doutc(cl, "%p %llx.%llx cap %p tid %llu %s\n", inode, ceph_vinop(inode), cap, cf->tid, ceph_cap_string(cf->caps)); __prep_cap(&arg, cap, CEPH_CAP_OP_FLUSH, (cf->tid < last_snap_flush ? CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0), __ceph_caps_used(ci), __ceph_caps_wanted(ci), (cap->issued | cap->implemented), cf->caps, cf->tid, oldest_flush_tid); spin_unlock(&ci->i_ceph_lock); __send_cap(&arg, ci); } else { struct ceph_cap_snap *capsnap = container_of(cf, struct ceph_cap_snap, cap_flush); doutc(cl, "%p %llx.%llx capsnap %p tid %llu %s\n", inode, ceph_vinop(inode), capsnap, cf->tid, ceph_cap_string(capsnap->dirty)); refcount_inc(&capsnap->nref); spin_unlock(&ci->i_ceph_lock); ret = __send_flush_snap(inode, session, capsnap, cap->mseq, oldest_flush_tid); if (ret < 0) { pr_err_client(cl, "error sending cap flushsnap," " %p %llx.%llx tid %llu follows %llu\n", inode, ceph_vinop(inode), cf->tid, capsnap->follows); } ceph_put_cap_snap(capsnap); } spin_lock(&ci->i_ceph_lock); } } void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct ceph_cap *cap; u64 oldest_flush_tid; doutc(cl, "mds%d\n", session->s_mds); spin_lock(&mdsc->cap_dirty_lock); oldest_flush_tid = __get_oldest_flush_tid(mdsc); spin_unlock(&mdsc->cap_dirty_lock); list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { struct inode *inode = &ci->netfs.inode; spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; if (!(cap && cap->session == session)) { pr_err_client(cl, "%p %llx.%llx auth cap %p not mds%d ???\n", inode, ceph_vinop(inode), cap, session->s_mds); spin_unlock(&ci->i_ceph_lock); continue; } /* * if flushing caps were revoked, we re-send the cap flush * in client reconnect stage. This guarantees MDS * processes * the cap flush message before issuing the flushing caps to * other client. */ if ((cap->issued & ci->i_flushing_caps) != ci->i_flushing_caps) { /* encode_caps_cb() also will reset these sequence * numbers. make sure sequence numbers in cap flush * message match later reconnect message */ cap->seq = 0; cap->issue_seq = 0; cap->mseq = 0; __kick_flushing_caps(mdsc, session, ci, oldest_flush_tid); } else { ci->i_ceph_flags |= CEPH_I_KICK_FLUSH; } spin_unlock(&ci->i_ceph_lock); } } void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct ceph_cap *cap; u64 oldest_flush_tid; lockdep_assert_held(&session->s_mutex); doutc(cl, "mds%d\n", session->s_mds); spin_lock(&mdsc->cap_dirty_lock); oldest_flush_tid = __get_oldest_flush_tid(mdsc); spin_unlock(&mdsc->cap_dirty_lock); list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { struct inode *inode = &ci->netfs.inode; spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; if (!(cap && cap->session == session)) { pr_err_client(cl, "%p %llx.%llx auth cap %p not mds%d ???\n", inode, ceph_vinop(inode), cap, session->s_mds); spin_unlock(&ci->i_ceph_lock); continue; } if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) { __kick_flushing_caps(mdsc, session, ci, oldest_flush_tid); } spin_unlock(&ci->i_ceph_lock); } } void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session, struct ceph_inode_info *ci) { struct ceph_mds_client *mdsc = session->s_mdsc; struct ceph_cap *cap = ci->i_auth_cap; struct inode *inode = &ci->netfs.inode; lockdep_assert_held(&ci->i_ceph_lock); doutc(mdsc->fsc->client, "%p %llx.%llx flushing %s\n", inode, ceph_vinop(inode), ceph_cap_string(ci->i_flushing_caps)); if (!list_empty(&ci->i_cap_flush_list)) { u64 oldest_flush_tid; spin_lock(&mdsc->cap_dirty_lock); list_move_tail(&ci->i_flushing_item, &cap->session->s_cap_flushing); oldest_flush_tid = __get_oldest_flush_tid(mdsc); spin_unlock(&mdsc->cap_dirty_lock); __kick_flushing_caps(mdsc, session, ci, oldest_flush_tid); } } /* * Take references to capabilities we hold, so that we don't release * them to the MDS prematurely. */ void ceph_take_cap_refs(struct ceph_inode_info *ci, int got, bool snap_rwsem_locked) { struct inode *inode = &ci->netfs.inode; struct ceph_client *cl = ceph_inode_to_client(inode); lockdep_assert_held(&ci->i_ceph_lock); if (got & CEPH_CAP_PIN) ci->i_pin_ref++; if (got & CEPH_CAP_FILE_RD) ci->i_rd_ref++; if (got & CEPH_CAP_FILE_CACHE) ci->i_rdcache_ref++; if (got & CEPH_CAP_FILE_EXCL) ci->i_fx_ref++; if (got & CEPH_CAP_FILE_WR) { if (ci->i_wr_ref == 0 && !ci->i_head_snapc) { BUG_ON(!snap_rwsem_locked); ci->i_head_snapc = ceph_get_snap_context( ci->i_snap_realm->cached_context); } ci->i_wr_ref++; } if (got & CEPH_CAP_FILE_BUFFER) { if (ci->i_wb_ref == 0) ihold(inode); ci->i_wb_ref++; doutc(cl, "%p %llx.%llx wb %d -> %d (?)\n", inode, ceph_vinop(inode), ci->i_wb_ref-1, ci->i_wb_ref); } } /* * Try to grab cap references. Specify those refs we @want, and the * minimal set we @need. Also include the larger offset we are writing * to (when applicable), and check against max_size here as well. * Note that caller is responsible for ensuring max_size increases are * requested from the MDS. * * Returns 0 if caps were not able to be acquired (yet), 1 if succeed, * or a negative error code. There are 3 special error codes: * -EAGAIN: need to sleep but non-blocking is specified * -EFBIG: ask caller to call check_max_size() and try again. * -EUCLEAN: ask caller to call ceph_renew_caps() and try again. */ enum { /* first 8 bits are reserved for CEPH_FILE_MODE_FOO */ NON_BLOCKING = (1 << 8), CHECK_FILELOCK = (1 << 9), }; static int try_get_cap_refs(struct inode *inode, int need, int want, loff_t endoff, int flags, int *got) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct ceph_client *cl = ceph_inode_to_client(inode); int ret = 0; int have, implemented; bool snap_rwsem_locked = false; doutc(cl, "%p %llx.%llx need %s want %s\n", inode, ceph_vinop(inode), ceph_cap_string(need), ceph_cap_string(want)); again: spin_lock(&ci->i_ceph_lock); if ((flags & CHECK_FILELOCK) && (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) { doutc(cl, "%p %llx.%llx error filelock\n", inode, ceph_vinop(inode)); ret = -EIO; goto out_unlock; } /* finish pending truncate */ while (ci->i_truncate_pending) { spin_unlock(&ci->i_ceph_lock); if (snap_rwsem_locked) { up_read(&mdsc->snap_rwsem); snap_rwsem_locked = false; } __ceph_do_pending_vmtruncate(inode); spin_lock(&ci->i_ceph_lock); } have = __ceph_caps_issued(ci, &implemented); if (have & need & CEPH_CAP_FILE_WR) { if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { doutc(cl, "%p %llx.%llx endoff %llu > maxsize %llu\n", inode, ceph_vinop(inode), endoff, ci->i_max_size); if (endoff > ci->i_requested_max_size) ret = ci->i_auth_cap ? -EFBIG : -EUCLEAN; goto out_unlock; } /* * If a sync write is in progress, we must wait, so that we * can get a final snapshot value for size+mtime. */ if (__ceph_have_pending_cap_snap(ci)) { doutc(cl, "%p %llx.%llx cap_snap_pending\n", inode, ceph_vinop(inode)); goto out_unlock; } } if ((have & need) == need) { /* * Look at (implemented & ~have & not) so that we keep waiting * on transition from wanted -> needed caps. This is needed * for WRBUFFER|WR -> WR to avoid a new WR sync write from * going before a prior buffered writeback happens. * * For RDCACHE|RD -> RD, there is not need to wait and we can * just exclude the revoking caps and force to sync read. */ int not = want & ~(have & need); int revoking = implemented & ~have; int exclude = revoking & not; doutc(cl, "%p %llx.%llx have %s but not %s (revoking %s)\n", inode, ceph_vinop(inode), ceph_cap_string(have), ceph_cap_string(not), ceph_cap_string(revoking)); if (!exclude || !(exclude & CEPH_CAP_FILE_BUFFER)) { if (!snap_rwsem_locked && !ci->i_head_snapc && (need & CEPH_CAP_FILE_WR)) { if (!down_read_trylock(&mdsc->snap_rwsem)) { /* * we can not call down_read() when * task isn't in TASK_RUNNING state */ if (flags & NON_BLOCKING) { ret = -EAGAIN; goto out_unlock; } spin_unlock(&ci->i_ceph_lock); down_read(&mdsc->snap_rwsem); snap_rwsem_locked = true; goto again; } snap_rwsem_locked = true; } if ((have & want) == want) *got = need | (want & ~exclude); else *got = need; ceph_take_cap_refs(ci, *got, true); ret = 1; } } else { int session_readonly = false; int mds_wanted; if (ci->i_auth_cap && (need & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_EXCL))) { struct ceph_mds_session *s = ci->i_auth_cap->session; spin_lock(&s->s_cap_lock); session_readonly = s->s_readonly; spin_unlock(&s->s_cap_lock); } if (session_readonly) { doutc(cl, "%p %llx.%llx need %s but mds%d readonly\n", inode, ceph_vinop(inode), ceph_cap_string(need), ci->i_auth_cap->mds); ret = -EROFS; goto out_unlock; } if (ceph_inode_is_shutdown(inode)) { doutc(cl, "%p %llx.%llx inode is shutdown\n", inode, ceph_vinop(inode)); ret = -ESTALE; goto out_unlock; } mds_wanted = __ceph_caps_mds_wanted(ci, false); if (need & ~mds_wanted) { doutc(cl, "%p %llx.%llx need %s > mds_wanted %s\n", inode, ceph_vinop(inode), ceph_cap_string(need), ceph_cap_string(mds_wanted)); ret = -EUCLEAN; goto out_unlock; } doutc(cl, "%p %llx.%llx have %s need %s\n", inode, ceph_vinop(inode), ceph_cap_string(have), ceph_cap_string(need)); } out_unlock: __ceph_touch_fmode(ci, mdsc, flags); spin_unlock(&ci->i_ceph_lock); if (snap_rwsem_locked) up_read(&mdsc->snap_rwsem); if (!ret) ceph_update_cap_mis(&mdsc->metric); else if (ret == 1) ceph_update_cap_hit(&mdsc->metric); doutc(cl, "%p %llx.%llx ret %d got %s\n", inode, ceph_vinop(inode), ret, ceph_cap_string(*got)); return ret; } /* * Check the offset we are writing up to against our current * max_size. If necessary, tell the MDS we want to write to * a larger offset. */ static void check_max_size(struct inode *inode, loff_t endoff) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_client *cl = ceph_inode_to_client(inode); int check = 0; /* do we need to explicitly request a larger max_size? */ spin_lock(&ci->i_ceph_lock); if (endoff >= ci->i_max_size && endoff > ci->i_wanted_max_size) { doutc(cl, "write %p %llx.%llx at large endoff %llu, req max_size\n", inode, ceph_vinop(inode), endoff); ci->i_wanted_max_size = endoff; } /* duplicate ceph_check_caps()'s logic */ if (ci->i_auth_cap && (ci->i_auth_cap->issued & CEPH_CAP_FILE_WR) && ci->i_wanted_max_size > ci->i_max_size && ci->i_wanted_max_size > ci->i_requested_max_size) check = 1; spin_unlock(&ci->i_ceph_lock); if (check) ceph_check_caps(ci, CHECK_CAPS_AUTHONLY); } static inline int get_used_fmode(int caps) { int fmode = 0; if (caps & CEPH_CAP_FILE_RD) fmode |= CEPH_FILE_MODE_RD; if (caps & CEPH_CAP_FILE_WR) fmode |= CEPH_FILE_MODE_WR; return fmode; } int ceph_try_get_caps(struct inode *inode, int need, int want, bool nonblock, int *got) { int ret, flags; BUG_ON(need & ~CEPH_CAP_FILE_RD); BUG_ON(want & ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO | CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL | CEPH_CAP_ANY_DIR_OPS)); if (need) { ret = ceph_pool_perm_check(inode, need); if (ret < 0) return ret; } flags = get_used_fmode(need | want); if (nonblock) flags |= NON_BLOCKING; ret = try_get_cap_refs(inode, need, want, 0, flags, got); /* three special error codes */ if (ret == -EAGAIN || ret == -EFBIG || ret == -EUCLEAN) ret = 0; return ret; } /* * Wait for caps, and take cap references. If we can't get a WR cap * due to a small max_size, make sure we check_max_size (and possibly * ask the mds) so we don't get hung up indefinitely. */ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, int want, loff_t endoff, int *got) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); int ret, _got, flags; ret = ceph_pool_perm_check(inode, need); if (ret < 0) return ret; if (fi && (fi->fmode & CEPH_FILE_MODE_WR) && fi->filp_gen != READ_ONCE(fsc->filp_gen)) return -EBADF; flags = get_used_fmode(need | want); while (true) { flags &= CEPH_FILE_MODE_MASK; if (vfs_inode_has_locks(inode)) flags |= CHECK_FILELOCK; _got = 0; ret = try_get_cap_refs(inode, need, want, endoff, flags, &_got); WARN_ON_ONCE(ret == -EAGAIN); if (!ret) { #ifdef CONFIG_DEBUG_FS struct ceph_mds_client *mdsc = fsc->mdsc; struct cap_wait cw; #endif DEFINE_WAIT_FUNC(wait, woken_wake_function); #ifdef CONFIG_DEBUG_FS cw.ino = ceph_ino(inode); cw.tgid = current->tgid; cw.need = need; cw.want = want; spin_lock(&mdsc->caps_list_lock); list_add(&cw.list, &mdsc->cap_wait_list); spin_unlock(&mdsc->caps_list_lock); #endif /* make sure used fmode not timeout */ ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS); add_wait_queue(&ci->i_cap_wq, &wait); flags |= NON_BLOCKING; while (!(ret = try_get_cap_refs(inode, need, want, endoff, flags, &_got))) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; } wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } remove_wait_queue(&ci->i_cap_wq, &wait); ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS); #ifdef CONFIG_DEBUG_FS spin_lock(&mdsc->caps_list_lock); list_del(&cw.list); spin_unlock(&mdsc->caps_list_lock); #endif if (ret == -EAGAIN) continue; } if (fi && (fi->fmode & CEPH_FILE_MODE_WR) && fi->filp_gen != READ_ONCE(fsc->filp_gen)) { if (ret >= 0 && _got) ceph_put_cap_refs(ci, _got); return -EBADF; } if (ret < 0) { if (ret == -EFBIG || ret == -EUCLEAN) { int ret2 = ceph_wait_on_async_create(inode); if (ret2 < 0) return ret2; } if (ret == -EFBIG) { check_max_size(inode, endoff); continue; } if (ret == -EUCLEAN) { /* session was killed, try renew caps */ ret = ceph_renew_caps(inode, flags); if (ret == 0) continue; } return ret; } if (S_ISREG(ci->netfs.inode.i_mode) && ceph_has_inline_data(ci) && (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && i_size_read(inode) > 0) { struct page *page = find_get_page(inode->i_mapping, 0); if (page) { bool uptodate = PageUptodate(page); put_page(page); if (uptodate) break; } /* * drop cap refs first because getattr while * holding * caps refs can cause deadlock. */ ceph_put_cap_refs(ci, _got); _got = 0; /* * getattr request will bring inline data into * page cache */ ret = __ceph_do_getattr(inode, NULL, CEPH_STAT_CAP_INLINE_DATA, true); if (ret < 0) return ret; continue; } break; } *got = _got; return 0; } int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got) { struct ceph_file_info *fi = filp->private_data; struct inode *inode = file_inode(filp); return __ceph_get_caps(inode, fi, need, want, endoff, got); } /* * Take cap refs. Caller must already know we hold at least one ref * on the caps in question or we don't know this is safe. */ void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps) { spin_lock(&ci->i_ceph_lock); ceph_take_cap_refs(ci, caps, false); spin_unlock(&ci->i_ceph_lock); } /* * drop cap_snap that is not associated with any snapshot. * we don't need to send FLUSHSNAP message for it. */ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap) { struct inode *inode = &ci->netfs.inode; struct ceph_client *cl = ceph_inode_to_client(inode); if (!capsnap->need_flush && !capsnap->writing && !capsnap->dirty_pages) { doutc(cl, "%p follows %llu\n", capsnap, capsnap->follows); BUG_ON(capsnap->cap_flush.tid > 0); ceph_put_snap_context(capsnap->context); if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps)) ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS; list_del(&capsnap->ci_item); ceph_put_cap_snap(capsnap); return 1; } return 0; } enum put_cap_refs_mode { PUT_CAP_REFS_SYNC = 0, PUT_CAP_REFS_ASYNC, }; /* * Release cap refs. * * If we released the last ref on any given cap, call ceph_check_caps * to release (or schedule a release). * * If we are releasing a WR cap (from a sync write), finalize any affected * cap_snap, and wake up any waiters. */ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had, enum put_cap_refs_mode mode) { struct inode *inode = &ci->netfs.inode; struct ceph_client *cl = ceph_inode_to_client(inode); int last = 0, put = 0, flushsnaps = 0, wake = 0; bool check_flushsnaps = false; spin_lock(&ci->i_ceph_lock); if (had & CEPH_CAP_PIN) --ci->i_pin_ref; if (had & CEPH_CAP_FILE_RD) if (--ci->i_rd_ref == 0) last++; if (had & CEPH_CAP_FILE_CACHE) if (--ci->i_rdcache_ref == 0) last++; if (had & CEPH_CAP_FILE_EXCL) if (--ci->i_fx_ref == 0) last++; if (had & CEPH_CAP_FILE_BUFFER) { if (--ci->i_wb_ref == 0) { last++; /* put the ref held by ceph_take_cap_refs() */ put++; check_flushsnaps = true; } doutc(cl, "%p %llx.%llx wb %d -> %d (?)\n", inode, ceph_vinop(inode), ci->i_wb_ref+1, ci->i_wb_ref); } if (had & CEPH_CAP_FILE_WR) { if (--ci->i_wr_ref == 0) { /* * The Fb caps will always be took and released * together with the Fw caps. */ WARN_ON_ONCE(ci->i_wb_ref); last++; check_flushsnaps = true; if (ci->i_wrbuffer_ref_head == 0 && ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) { BUG_ON(!ci->i_head_snapc); ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; } /* see comment in __ceph_remove_cap() */ if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm) ceph_change_snap_realm(inode, NULL); } } if (check_flushsnaps && __ceph_have_pending_cap_snap(ci)) { struct ceph_cap_snap *capsnap = list_last_entry(&ci->i_cap_snaps, struct ceph_cap_snap, ci_item); capsnap->writing = 0; if (ceph_try_drop_cap_snap(ci, capsnap)) /* put the ref held by ceph_queue_cap_snap() */ put++; else if (__ceph_finish_cap_snap(ci, capsnap)) flushsnaps = 1; wake = 1; } spin_unlock(&ci->i_ceph_lock); doutc(cl, "%p %llx.%llx had %s%s%s\n", inode, ceph_vinop(inode), ceph_cap_string(had), last ? " last" : "", put ? " put" : ""); switch (mode) { case PUT_CAP_REFS_SYNC: if (last) ceph_check_caps(ci, 0); else if (flushsnaps) ceph_flush_snaps(ci, NULL); break; case PUT_CAP_REFS_ASYNC: if (last) ceph_queue_check_caps(inode); else if (flushsnaps) ceph_queue_flush_snaps(inode); break; default: break; } if (wake) wake_up_all(&ci->i_cap_wq); while (put-- > 0) iput(inode); } void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) { __ceph_put_cap_refs(ci, had, PUT_CAP_REFS_SYNC); } void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had) { __ceph_put_cap_refs(ci, had, PUT_CAP_REFS_ASYNC); } /* * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap * context. Adjust per-snap dirty page accounting as appropriate. * Once all dirty data for a cap_snap is flushed, flush snapped file * metadata back to the MDS. If we dropped the last ref, call * ceph_check_caps. */ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, struct ceph_snap_context *snapc) { struct inode *inode = &ci->netfs.inode; struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap_snap *capsnap = NULL, *iter; int put = 0; bool last = false; bool flush_snaps = false; bool complete_capsnap = false; spin_lock(&ci->i_ceph_lock); ci->i_wrbuffer_ref -= nr; if (ci->i_wrbuffer_ref == 0) { last = true; put++; } if (ci->i_head_snapc == snapc) { ci->i_wrbuffer_ref_head -= nr; if (ci->i_wrbuffer_ref_head == 0 && ci->i_wr_ref == 0 && ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) { BUG_ON(!ci->i_head_snapc); ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; } doutc(cl, "on %p %llx.%llx head %d/%d -> %d/%d %s\n", inode, ceph_vinop(inode), ci->i_wrbuffer_ref+nr, ci->i_wrbuffer_ref_head+nr, ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, last ? " LAST" : ""); } else { list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) { if (iter->context == snapc) { capsnap = iter; break; } } if (!capsnap) { /* * The capsnap should already be removed when removing * auth cap in the case of a forced unmount. */ WARN_ON_ONCE(ci->i_auth_cap); goto unlock; } capsnap->dirty_pages -= nr; if (capsnap->dirty_pages == 0) { complete_capsnap = true; if (!capsnap->writing) { if (ceph_try_drop_cap_snap(ci, capsnap)) { put++; } else { ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS; flush_snaps = true; } } } doutc(cl, "%p %llx.%llx cap_snap %p snap %lld %d/%d -> %d/%d %s%s\n", inode, ceph_vinop(inode), capsnap, capsnap->context->seq, ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, ci->i_wrbuffer_ref, capsnap->dirty_pages, last ? " (wrbuffer last)" : "", complete_capsnap ? " (complete capsnap)" : ""); } unlock: spin_unlock(&ci->i_ceph_lock); if (last) { ceph_check_caps(ci, 0); } else if (flush_snaps) { ceph_flush_snaps(ci, NULL); } if (complete_capsnap) wake_up_all(&ci->i_cap_wq); while (put-- > 0) { iput(inode); } } /* * Invalidate unlinked inode's aliases, so we can drop the inode ASAP. */ static void invalidate_aliases(struct inode *inode) { struct ceph_client *cl = ceph_inode_to_client(inode); struct dentry *dn, *prev = NULL; doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); d_prune_aliases(inode); /* * For non-directory inode, d_find_alias() only returns * hashed dentry. After calling d_invalidate(), the * dentry becomes unhashed. * * For directory inode, d_find_alias() can return * unhashed dentry. But directory inode should have * one alias at most. */ while ((dn = d_find_alias(inode))) { if (dn == prev) { dput(dn); break; } d_invalidate(dn); if (prev) dput(prev); prev = dn; } if (prev) dput(prev); } struct cap_extra_info { struct ceph_string *pool_ns; /* inline data */ u64 inline_version; void *inline_data; u32 inline_len; /* dirstat */ bool dirstat_valid; u64 nfiles; u64 nsubdirs; u64 change_attr; /* currently issued */ int issued; struct timespec64 btime; u8 *fscrypt_auth; u32 fscrypt_auth_len; u64 fscrypt_file_size; }; /* * Handle a cap GRANT message from the MDS. (Note that a GRANT may * actually be a revocation if it specifies a smaller cap set.) * * caller holds s_mutex and i_ceph_lock, we drop both. */ static void handle_cap_grant(struct inode *inode, struct ceph_mds_session *session, struct ceph_cap *cap, struct ceph_mds_caps *grant, struct ceph_buffer *xattr_buf, struct cap_extra_info *extra_info) __releases(ci->i_ceph_lock) __releases(session->s_mdsc->snap_rwsem) { struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); int seq = le32_to_cpu(grant->seq); int newcaps = le32_to_cpu(grant->caps); int used, wanted, dirty; u64 size = le64_to_cpu(grant->size); u64 max_size = le64_to_cpu(grant->max_size); unsigned char check_caps = 0; bool was_stale = cap->cap_gen < atomic_read(&session->s_cap_gen); bool wake = false; bool writeback = false; bool queue_trunc = false; bool queue_invalidate = false; bool deleted_inode = false; bool fill_inline = false; bool revoke_wait = false; int flags = 0; /* * If there is at least one crypto block then we'll trust * fscrypt_file_size. If the real length of the file is 0, then * ignore it (it has probably been truncated down to 0 by the MDS). */ if (IS_ENCRYPTED(inode) && size) size = extra_info->fscrypt_file_size; doutc(cl, "%p %llx.%llx cap %p mds%d seq %d %s\n", inode, ceph_vinop(inode), cap, session->s_mds, seq, ceph_cap_string(newcaps)); doutc(cl, " size %llu max_size %llu, i_size %llu\n", size, max_size, i_size_read(inode)); /* * If CACHE is being revoked, and we have no dirty buffers, * try to invalidate (once). (If there are dirty buffers, we * will invalidate _after_ writeback.) */ if (S_ISREG(inode->i_mode) && /* don't invalidate readdir cache */ ((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && !(ci->i_wrbuffer_ref || ci->i_wb_ref)) { if (try_nonblocking_invalidate(inode)) { /* there were locked pages.. invalidate later in a separate thread. */ if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { queue_invalidate = true; ci->i_rdcache_revoking = ci->i_rdcache_gen; } } } if (was_stale) cap->issued = cap->implemented = CEPH_CAP_PIN; /* * auth mds of the inode changed. we received the cap export message, * but still haven't received the cap import message. handle_cap_export * updated the new auth MDS' cap. * * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message * that was sent before the cap import message. So don't remove caps. */ if (ceph_seq_cmp(seq, cap->seq) <= 0) { WARN_ON(cap != ci->i_auth_cap); WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id)); seq = cap->seq; newcaps |= cap->issued; } /* side effects now are allowed */ cap->cap_gen = atomic_read(&session->s_cap_gen); cap->seq = seq; __check_cap_issue(ci, cap, newcaps); inode_set_max_iversion_raw(inode, extra_info->change_attr); if ((newcaps & CEPH_CAP_AUTH_SHARED) && (extra_info->issued & CEPH_CAP_AUTH_EXCL) == 0) { umode_t mode = le32_to_cpu(grant->mode); if (inode_wrong_type(inode, mode)) pr_warn_once("inode type changed! (ino %llx.%llx is 0%o, mds says 0%o)\n", ceph_vinop(inode), inode->i_mode, mode); else inode->i_mode = mode; inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); ci->i_btime = extra_info->btime; doutc(cl, "%p %llx.%llx mode 0%o uid.gid %d.%d\n", inode, ceph_vinop(inode), inode->i_mode, from_kuid(&init_user_ns, inode->i_uid), from_kgid(&init_user_ns, inode->i_gid)); #if IS_ENABLED(CONFIG_FS_ENCRYPTION) if (ci->fscrypt_auth_len != extra_info->fscrypt_auth_len || memcmp(ci->fscrypt_auth, extra_info->fscrypt_auth, ci->fscrypt_auth_len)) pr_warn_ratelimited_client(cl, "cap grant attempt to change fscrypt_auth on non-I_NEW inode (old len %d new len %d)\n", ci->fscrypt_auth_len, extra_info->fscrypt_auth_len); #endif } if ((newcaps & CEPH_CAP_LINK_SHARED) && (extra_info->issued & CEPH_CAP_LINK_EXCL) == 0) { set_nlink(inode, le32_to_cpu(grant->nlink)); if (inode->i_nlink == 0) deleted_inode = true; } if ((extra_info->issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) { int len = le32_to_cpu(grant->xattr_len); u64 version = le64_to_cpu(grant->xattr_version); if (version > ci->i_xattrs.version) { doutc(cl, " got new xattrs v%llu on %p %llx.%llx len %d\n", version, inode, ceph_vinop(inode), len); if (ci->i_xattrs.blob) ceph_buffer_put(ci->i_xattrs.blob); ci->i_xattrs.blob = ceph_buffer_get(xattr_buf); ci->i_xattrs.version = version; ceph_forget_all_cached_acls(inode); ceph_security_invalidate_secctx(inode); } } if (newcaps & CEPH_CAP_ANY_RD) { struct timespec64 mtime, atime, ctime; /* ctime/mtime/atime? */ ceph_decode_timespec64(&mtime, &grant->mtime); ceph_decode_timespec64(&atime, &grant->atime); ceph_decode_timespec64(&ctime, &grant->ctime); ceph_fill_file_time(inode, extra_info->issued, le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, &atime); } if ((newcaps & CEPH_CAP_FILE_SHARED) && extra_info->dirstat_valid) { ci->i_files = extra_info->nfiles; ci->i_subdirs = extra_info->nsubdirs; } if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { /* file layout may have changed */ s64 old_pool = ci->i_layout.pool_id; struct ceph_string *old_ns; ceph_file_layout_from_legacy(&ci->i_layout, &grant->layout); old_ns = rcu_dereference_protected(ci->i_layout.pool_ns, lockdep_is_held(&ci->i_ceph_lock)); rcu_assign_pointer(ci->i_layout.pool_ns, extra_info->pool_ns); if (ci->i_layout.pool_id != old_pool || extra_info->pool_ns != old_ns) ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; extra_info->pool_ns = old_ns; /* size/truncate_seq? */ queue_trunc = ceph_fill_file_size(inode, extra_info->issued, le32_to_cpu(grant->truncate_seq), le64_to_cpu(grant->truncate_size), size); } if (ci->i_auth_cap == cap && (newcaps & CEPH_CAP_ANY_FILE_WR)) { if (max_size != ci->i_max_size) { doutc(cl, "max_size %lld -> %llu\n", ci->i_max_size, max_size); ci->i_max_size = max_size; if (max_size >= ci->i_wanted_max_size) { ci->i_wanted_max_size = 0; /* reset */ ci->i_requested_max_size = 0; } wake = true; } } /* check cap bits */ wanted = __ceph_caps_wanted(ci); used = __ceph_caps_used(ci); dirty = __ceph_caps_dirty(ci); doutc(cl, " my wanted = %s, used = %s, dirty %s\n", ceph_cap_string(wanted), ceph_cap_string(used), ceph_cap_string(dirty)); if ((was_stale || le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) && (wanted & ~(cap->mds_wanted | newcaps))) { /* * If mds is importing cap, prior cap messages that update * 'wanted' may get dropped by mds (migrate seq mismatch). * * We don't send cap message to update 'wanted' if what we * want are already issued. If mds revokes caps, cap message * that releases caps also tells mds what we want. But if * caps got revoked by mds forcedly (session stale). We may * haven't told mds what we want. */ check_caps = 1; } /* revocation, grant, or no-op? */ if (cap->issued & ~newcaps) { int revoking = cap->issued & ~newcaps; doutc(cl, "revocation: %s -> %s (revoking %s)\n", ceph_cap_string(cap->issued), ceph_cap_string(newcaps), ceph_cap_string(revoking)); if (S_ISREG(inode->i_mode) && (revoking & used & CEPH_CAP_FILE_BUFFER)) { writeback = true; /* initiate writeback; will delay ack */ revoke_wait = true; } else if (queue_invalidate && revoking == CEPH_CAP_FILE_CACHE && (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) { revoke_wait = true; /* do nothing yet, invalidation will be queued */ } else if (cap == ci->i_auth_cap) { check_caps = 1; /* check auth cap only */ } else { check_caps = 2; /* check all caps */ } /* If there is new caps, try to wake up the waiters */ if (~cap->issued & newcaps) wake = true; cap->issued = newcaps; cap->implemented |= newcaps; } else if (cap->issued == newcaps) { doutc(cl, "caps unchanged: %s -> %s\n", ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); } else { doutc(cl, "grant: %s -> %s\n", ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); /* non-auth MDS is revoking the newly grant caps ? */ if (cap == ci->i_auth_cap && __ceph_caps_revoking_other(ci, cap, newcaps)) check_caps = 2; cap->issued = newcaps; cap->implemented |= newcaps; /* add bits only, to * avoid stepping on a * pending revocation */ wake = true; } BUG_ON(cap->issued & ~cap->implemented); /* don't let check_caps skip sending a response to MDS for revoke msgs */ if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { cap->mds_wanted = 0; flags |= CHECK_CAPS_FLUSH_FORCE; if (cap == ci->i_auth_cap) check_caps = 1; /* check auth cap only */ else check_caps = 2; /* check all caps */ } if (extra_info->inline_version > 0 && extra_info->inline_version >= ci->i_inline_version) { ci->i_inline_version = extra_info->inline_version; if (ci->i_inline_version != CEPH_INLINE_NONE && (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO))) fill_inline = true; } if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { if (ci->i_auth_cap == cap) { if (newcaps & ~extra_info->issued) wake = true; if (ci->i_requested_max_size > max_size || !(le32_to_cpu(grant->wanted) & CEPH_CAP_ANY_FILE_WR)) { /* re-request max_size if necessary */ ci->i_requested_max_size = 0; wake = true; } ceph_kick_flushing_inode_caps(session, ci); } up_read(&session->s_mdsc->snap_rwsem); } spin_unlock(&ci->i_ceph_lock); if (fill_inline) ceph_fill_inline_data(inode, NULL, extra_info->inline_data, extra_info->inline_len); if (queue_trunc) ceph_queue_vmtruncate(inode); if (writeback) /* * queue inode for writeback: we can't actually call * filemap_write_and_wait, etc. from message handler * context. */ ceph_queue_writeback(inode); if (queue_invalidate) ceph_queue_invalidate(inode); if (deleted_inode) invalidate_aliases(inode); if (wake) wake_up_all(&ci->i_cap_wq); mutex_unlock(&session->s_mutex); if (check_caps == 1) ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); else if (check_caps == 2) ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL); } /* * Handle FLUSH_ACK from MDS, indicating that metadata we sent to the * MDS has been safely committed. */ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, struct ceph_mds_caps *m, struct ceph_mds_session *session, struct ceph_cap *cap) __releases(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap_flush *cf, *tmp_cf; LIST_HEAD(to_remove); unsigned seq = le32_to_cpu(m->seq); int dirty = le32_to_cpu(m->dirty); int cleaned = 0; bool drop = false; bool wake_ci = false; bool wake_mdsc = false; list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) { /* Is this the one that was flushed? */ if (cf->tid == flush_tid) cleaned = cf->caps; /* Is this a capsnap? */ if (cf->is_capsnap) continue; if (cf->tid <= flush_tid) { /* * An earlier or current tid. The FLUSH_ACK should * represent a superset of this flush's caps. */ wake_ci |= __detach_cap_flush_from_ci(ci, cf); list_add_tail(&cf->i_list, &to_remove); } else { /* * This is a later one. Any caps in it are still dirty * so don't count them as cleaned. */ cleaned &= ~cf->caps; if (!cleaned) break; } } doutc(cl, "%p %llx.%llx mds%d seq %d on %s cleaned %s, flushing %s -> %s\n", inode, ceph_vinop(inode), session->s_mds, seq, ceph_cap_string(dirty), ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps), ceph_cap_string(ci->i_flushing_caps & ~cleaned)); if (list_empty(&to_remove) && !cleaned) goto out; ci->i_flushing_caps &= ~cleaned; spin_lock(&mdsc->cap_dirty_lock); list_for_each_entry(cf, &to_remove, i_list) wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc, cf); if (ci->i_flushing_caps == 0) { if (list_empty(&ci->i_cap_flush_list)) { list_del_init(&ci->i_flushing_item); if (!list_empty(&session->s_cap_flushing)) { struct inode *inode = &list_first_entry(&session->s_cap_flushing, struct ceph_inode_info, i_flushing_item)->netfs.inode; doutc(cl, " mds%d still flushing cap on %p %llx.%llx\n", session->s_mds, inode, ceph_vinop(inode)); } } mdsc->num_cap_flushing--; doutc(cl, " %p %llx.%llx now !flushing\n", inode, ceph_vinop(inode)); if (ci->i_dirty_caps == 0) { doutc(cl, " %p %llx.%llx now clean\n", inode, ceph_vinop(inode)); BUG_ON(!list_empty(&ci->i_dirty_item)); drop = true; if (ci->i_wr_ref == 0 && ci->i_wrbuffer_ref_head == 0) { BUG_ON(!ci->i_head_snapc); ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; } } else { BUG_ON(list_empty(&ci->i_dirty_item)); } } spin_unlock(&mdsc->cap_dirty_lock); out: spin_unlock(&ci->i_ceph_lock); while (!list_empty(&to_remove)) { cf = list_first_entry(&to_remove, struct ceph_cap_flush, i_list); list_del_init(&cf->i_list); if (!cf->is_capsnap) ceph_free_cap_flush(cf); } if (wake_ci) wake_up_all(&ci->i_cap_wq); if (wake_mdsc) wake_up_all(&mdsc->cap_flushing_wq); if (drop) iput(inode); } void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap, bool *wake_ci, bool *wake_mdsc) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_client *cl = mdsc->fsc->client; bool ret; lockdep_assert_held(&ci->i_ceph_lock); doutc(cl, "removing capsnap %p, %p %llx.%llx ci %p\n", capsnap, inode, ceph_vinop(inode), ci); list_del_init(&capsnap->ci_item); ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush); if (wake_ci) *wake_ci = ret; spin_lock(&mdsc->cap_dirty_lock); if (list_empty(&ci->i_cap_flush_list)) list_del_init(&ci->i_flushing_item); ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush); if (wake_mdsc) *wake_mdsc = ret; spin_unlock(&mdsc->cap_dirty_lock); } void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap, bool *wake_ci, bool *wake_mdsc) { struct ceph_inode_info *ci = ceph_inode(inode); lockdep_assert_held(&ci->i_ceph_lock); WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing); __ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc); } /* * Handle FLUSHSNAP_ACK. MDS has flushed snap data to disk and we can * throw away our cap_snap. * * Caller hold s_mutex. */ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, struct ceph_mds_caps *m, struct ceph_mds_session *session) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_client *cl = mdsc->fsc->client; u64 follows = le64_to_cpu(m->snap_follows); struct ceph_cap_snap *capsnap = NULL, *iter; bool wake_ci = false; bool wake_mdsc = false; doutc(cl, "%p %llx.%llx ci %p mds%d follows %lld\n", inode, ceph_vinop(inode), ci, session->s_mds, follows); spin_lock(&ci->i_ceph_lock); list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) { if (iter->follows == follows) { if (iter->cap_flush.tid != flush_tid) { doutc(cl, " cap_snap %p follows %lld " "tid %lld != %lld\n", iter, follows, flush_tid, iter->cap_flush.tid); break; } capsnap = iter; break; } else { doutc(cl, " skipping cap_snap %p follows %lld\n", iter, iter->follows); } } if (capsnap) ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc); spin_unlock(&ci->i_ceph_lock); if (capsnap) { ceph_put_snap_context(capsnap->context); ceph_put_cap_snap(capsnap); if (wake_ci) wake_up_all(&ci->i_cap_wq); if (wake_mdsc) wake_up_all(&mdsc->cap_flushing_wq); iput(inode); } } /* * Handle TRUNC from MDS, indicating file truncation. * * caller hold s_mutex. */ static bool handle_cap_trunc(struct inode *inode, struct ceph_mds_caps *trunc, struct ceph_mds_session *session, struct cap_extra_info *extra_info) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_client *cl = ceph_inode_to_client(inode); int mds = session->s_mds; int seq = le32_to_cpu(trunc->seq); u32 truncate_seq = le32_to_cpu(trunc->truncate_seq); u64 truncate_size = le64_to_cpu(trunc->truncate_size); u64 size = le64_to_cpu(trunc->size); int implemented = 0; int dirty = __ceph_caps_dirty(ci); int issued = __ceph_caps_issued(ceph_inode(inode), &implemented); bool queue_trunc = false; lockdep_assert_held(&ci->i_ceph_lock); issued |= implemented | dirty; /* * If there is at least one crypto block then we'll trust * fscrypt_file_size. If the real length of the file is 0, then * ignore it (it has probably been truncated down to 0 by the MDS). */ if (IS_ENCRYPTED(inode) && size) size = extra_info->fscrypt_file_size; doutc(cl, "%p %llx.%llx mds%d seq %d to %lld truncate seq %d\n", inode, ceph_vinop(inode), mds, seq, truncate_size, truncate_seq); queue_trunc = ceph_fill_file_size(inode, issued, truncate_seq, truncate_size, size); return queue_trunc; } /* * Handle EXPORT from MDS. Cap is being migrated _from_ this mds to a * different one. If we are the most recent migration we've seen (as * indicated by mseq), make note of the migrating cap bits for the * duration (until we see the corresponding IMPORT). * * caller holds s_mutex */ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, struct ceph_mds_cap_peer *ph, struct ceph_mds_session *session) { struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_session *tsession = NULL; struct ceph_cap *cap, *tcap, *new_cap = NULL; struct ceph_inode_info *ci = ceph_inode(inode); u64 t_cap_id; u32 t_issue_seq, t_mseq; int target, issued; int mds = session->s_mds; if (ph) { t_cap_id = le64_to_cpu(ph->cap_id); t_issue_seq = le32_to_cpu(ph->issue_seq); t_mseq = le32_to_cpu(ph->mseq); target = le32_to_cpu(ph->mds); } else { t_cap_id = t_issue_seq = t_mseq = 0; target = -1; } doutc(cl, " cap %llx.%llx export to peer %d piseq %u pmseq %u\n", ceph_vinop(inode), target, t_issue_seq, t_mseq); retry: down_read(&mdsc->snap_rwsem); spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id)) goto out_unlock; if (target < 0) { ceph_remove_cap(mdsc, cap, false); goto out_unlock; } /* * now we know we haven't received the cap import message yet * because the exported cap still exist. */ issued = cap->issued; if (issued != cap->implemented) pr_err_ratelimited_client(cl, "issued != implemented: " "%p %llx.%llx mds%d seq %d mseq %d" " issued %s implemented %s\n", inode, ceph_vinop(inode), mds, cap->seq, cap->mseq, ceph_cap_string(issued), ceph_cap_string(cap->implemented)); tcap = __get_cap_for_mds(ci, target); if (tcap) { /* already have caps from the target */ if (tcap->cap_id == t_cap_id && ceph_seq_cmp(tcap->seq, t_issue_seq) < 0) { doutc(cl, " updating import cap %p mds%d\n", tcap, target); tcap->cap_id = t_cap_id; tcap->seq = t_issue_seq - 1; tcap->issue_seq = t_issue_seq - 1; tcap->issued |= issued; tcap->implemented |= issued; if (cap == ci->i_auth_cap) { ci->i_auth_cap = tcap; change_auth_cap_ses(ci, tcap->session); } } ceph_remove_cap(mdsc, cap, false); goto out_unlock; } else if (tsession) { /* add placeholder for the export target */ int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; tcap = new_cap; ceph_add_cap(inode, tsession, t_cap_id, issued, 0, t_issue_seq - 1, t_mseq, (u64)-1, flag, &new_cap); if (!list_empty(&ci->i_cap_flush_list) && ci->i_auth_cap == tcap) { spin_lock(&mdsc->cap_dirty_lock); list_move_tail(&ci->i_flushing_item, &tcap->session->s_cap_flushing); spin_unlock(&mdsc->cap_dirty_lock); } ceph_remove_cap(mdsc, cap, false); goto out_unlock; } spin_unlock(&ci->i_ceph_lock); up_read(&mdsc->snap_rwsem); mutex_unlock(&session->s_mutex); /* open target session */ tsession = ceph_mdsc_open_export_target_session(mdsc, target); if (!IS_ERR(tsession)) { if (mds > target) { mutex_lock(&session->s_mutex); mutex_lock_nested(&tsession->s_mutex, SINGLE_DEPTH_NESTING); } else { mutex_lock(&tsession->s_mutex); mutex_lock_nested(&session->s_mutex, SINGLE_DEPTH_NESTING); } new_cap = ceph_get_cap(mdsc, NULL); } else { WARN_ON(1); tsession = NULL; target = -1; mutex_lock(&session->s_mutex); } goto retry; out_unlock: spin_unlock(&ci->i_ceph_lock); up_read(&mdsc->snap_rwsem); mutex_unlock(&session->s_mutex); if (tsession) { mutex_unlock(&tsession->s_mutex); ceph_put_mds_session(tsession); } if (new_cap) ceph_put_cap(mdsc, new_cap); } /* * Handle cap IMPORT. * * caller holds s_mutex. acquires i_ceph_lock */ static void handle_cap_import(struct ceph_mds_client *mdsc, struct inode *inode, struct ceph_mds_caps *im, struct ceph_mds_cap_peer *ph, struct ceph_mds_session *session, struct ceph_cap **target_cap, int *old_issued) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap *cap, *ocap, *new_cap = NULL; int mds = session->s_mds; int issued; unsigned caps = le32_to_cpu(im->caps); unsigned wanted = le32_to_cpu(im->wanted); unsigned seq = le32_to_cpu(im->seq); unsigned mseq = le32_to_cpu(im->migrate_seq); u64 realmino = le64_to_cpu(im->realm); u64 cap_id = le64_to_cpu(im->cap_id); u64 p_cap_id; u32 piseq = 0; u32 pmseq = 0; int peer; if (ph) { p_cap_id = le64_to_cpu(ph->cap_id); peer = le32_to_cpu(ph->mds); piseq = le32_to_cpu(ph->issue_seq); pmseq = le32_to_cpu(ph->mseq); } else { p_cap_id = 0; peer = -1; } doutc(cl, " cap %llx.%llx import from peer %d piseq %u pmseq %u\n", ceph_vinop(inode), peer, piseq, pmseq); retry: cap = __get_cap_for_mds(ci, mds); if (!cap) { if (!new_cap) { spin_unlock(&ci->i_ceph_lock); new_cap = ceph_get_cap(mdsc, NULL); spin_lock(&ci->i_ceph_lock); goto retry; } cap = new_cap; } else { if (new_cap) { ceph_put_cap(mdsc, new_cap); new_cap = NULL; } } __ceph_caps_issued(ci, &issued); issued |= __ceph_caps_dirty(ci); ceph_add_cap(inode, session, cap_id, caps, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, &new_cap); ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; if (ocap && ocap->cap_id == p_cap_id) { doutc(cl, " remove export cap %p mds%d flags %d\n", ocap, peer, ph->flags); if ((ph->flags & CEPH_CAP_FLAG_AUTH) && (ocap->seq != piseq || ocap->mseq != pmseq)) { pr_err_ratelimited_client(cl, "mismatched seq/mseq: " "%p %llx.%llx mds%d seq %d mseq %d" " importer mds%d has peer seq %d mseq %d\n", inode, ceph_vinop(inode), peer, ocap->seq, ocap->mseq, mds, piseq, pmseq); } ceph_remove_cap(mdsc, ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); } *old_issued = issued; *target_cap = cap; } #ifdef CONFIG_FS_ENCRYPTION static int parse_fscrypt_fields(void **p, void *end, struct cap_extra_info *extra) { u32 len; ceph_decode_32_safe(p, end, extra->fscrypt_auth_len, bad); if (extra->fscrypt_auth_len) { ceph_decode_need(p, end, extra->fscrypt_auth_len, bad); extra->fscrypt_auth = kmalloc(extra->fscrypt_auth_len, GFP_KERNEL); if (!extra->fscrypt_auth) return -ENOMEM; ceph_decode_copy_safe(p, end, extra->fscrypt_auth, extra->fscrypt_auth_len, bad); } ceph_decode_32_safe(p, end, len, bad); if (len >= sizeof(u64)) { ceph_decode_64_safe(p, end, extra->fscrypt_file_size, bad); len -= sizeof(u64); } ceph_decode_skip_n(p, end, len, bad); return 0; bad: return -EIO; } #else static int parse_fscrypt_fields(void **p, void *end, struct cap_extra_info *extra) { u32 len; /* Don't care about these fields unless we're encryption-capable */ ceph_decode_32_safe(p, end, len, bad); if (len) ceph_decode_skip_n(p, end, len, bad); ceph_decode_32_safe(p, end, len, bad); if (len) ceph_decode_skip_n(p, end, len, bad); return 0; bad: return -EIO; } #endif /* * Handle a caps message from the MDS. * * Identify the appropriate session, inode, and call the right handler * based on the cap op. */ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_msg *msg) { struct ceph_mds_client *mdsc = session->s_mdsc; struct ceph_client *cl = mdsc->fsc->client; struct inode *inode; struct ceph_inode_info *ci; struct ceph_cap *cap; struct ceph_mds_caps *h; struct ceph_mds_cap_peer *peer = NULL; struct ceph_snap_realm *realm = NULL; int op; int msg_version = le16_to_cpu(msg->hdr.version); u32 seq, mseq, issue_seq; struct ceph_vino vino; void *snaptrace; size_t snaptrace_len; void *p, *end; struct cap_extra_info extra_info = {}; bool queue_trunc; bool close_sessions = false; bool do_cap_release = false; if (!ceph_inc_mds_stopping_blocker(mdsc, session)) return; /* decode */ end = msg->front.iov_base + msg->front.iov_len; if (msg->front.iov_len < sizeof(*h)) goto bad; h = msg->front.iov_base; op = le32_to_cpu(h->op); vino.ino = le64_to_cpu(h->ino); vino.snap = CEPH_NOSNAP; seq = le32_to_cpu(h->seq); mseq = le32_to_cpu(h->migrate_seq); issue_seq = le32_to_cpu(h->issue_seq); snaptrace = h + 1; snaptrace_len = le32_to_cpu(h->snap_trace_len); p = snaptrace + snaptrace_len; if (msg_version >= 2) { u32 flock_len; ceph_decode_32_safe(&p, end, flock_len, bad); if (p + flock_len > end) goto bad; p += flock_len; } if (msg_version >= 3) { if (op == CEPH_CAP_OP_IMPORT) { if (p + sizeof(*peer) > end) goto bad; peer = p; p += sizeof(*peer); } else if (op == CEPH_CAP_OP_EXPORT) { /* recorded in unused fields */ peer = (void *)&h->size; } } if (msg_version >= 4) { ceph_decode_64_safe(&p, end, extra_info.inline_version, bad); ceph_decode_32_safe(&p, end, extra_info.inline_len, bad); if (p + extra_info.inline_len > end) goto bad; extra_info.inline_data = p; p += extra_info.inline_len; } if (msg_version >= 5) { struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc; u32 epoch_barrier; ceph_decode_32_safe(&p, end, epoch_barrier, bad); ceph_osdc_update_epoch_barrier(osdc, epoch_barrier); } if (msg_version >= 8) { u32 pool_ns_len; /* version >= 6 */ ceph_decode_skip_64(&p, end, bad); // flush_tid /* version >= 7 */ ceph_decode_skip_32(&p, end, bad); // caller_uid ceph_decode_skip_32(&p, end, bad); // caller_gid /* version >= 8 */ ceph_decode_32_safe(&p, end, pool_ns_len, bad); if (pool_ns_len > 0) { ceph_decode_need(&p, end, pool_ns_len, bad); extra_info.pool_ns = ceph_find_or_create_string(p, pool_ns_len); p += pool_ns_len; } } if (msg_version >= 9) { struct ceph_timespec *btime; if (p + sizeof(*btime) > end) goto bad; btime = p; ceph_decode_timespec64(&extra_info.btime, btime); p += sizeof(*btime); ceph_decode_64_safe(&p, end, extra_info.change_attr, bad); } if (msg_version >= 11) { /* version >= 10 */ ceph_decode_skip_32(&p, end, bad); // flags /* version >= 11 */ extra_info.dirstat_valid = true; ceph_decode_64_safe(&p, end, extra_info.nfiles, bad); ceph_decode_64_safe(&p, end, extra_info.nsubdirs, bad); } if (msg_version >= 12) { if (parse_fscrypt_fields(&p, end, &extra_info)) goto bad; } /* lookup ino */ inode = ceph_find_inode(mdsc->fsc->sb, vino); doutc(cl, " caps mds%d op %s ino %llx.%llx inode %p seq %u iseq %u mseq %u\n", session->s_mds, ceph_cap_op_name(op), vino.ino, vino.snap, inode, seq, issue_seq, mseq); mutex_lock(&session->s_mutex); if (!inode) { doutc(cl, " i don't have ino %llx\n", vino.ino); switch (op) { case CEPH_CAP_OP_IMPORT: case CEPH_CAP_OP_REVOKE: case CEPH_CAP_OP_GRANT: do_cap_release = true; break; default: break; } goto flush_cap_releases; } ci = ceph_inode(inode); /* these will work even if we don't have a cap yet */ switch (op) { case CEPH_CAP_OP_FLUSHSNAP_ACK: handle_cap_flushsnap_ack(inode, le64_to_cpu(msg->hdr.tid), h, session); goto done; case CEPH_CAP_OP_EXPORT: handle_cap_export(inode, h, peer, session); goto done_unlocked; case CEPH_CAP_OP_IMPORT: realm = NULL; if (snaptrace_len) { down_write(&mdsc->snap_rwsem); if (ceph_update_snap_trace(mdsc, snaptrace, snaptrace + snaptrace_len, false, &realm)) { up_write(&mdsc->snap_rwsem); close_sessions = true; goto done; } downgrade_write(&mdsc->snap_rwsem); } else { down_read(&mdsc->snap_rwsem); } spin_lock(&ci->i_ceph_lock); handle_cap_import(mdsc, inode, h, peer, session, &cap, &extra_info.issued); handle_cap_grant(inode, session, cap, h, msg->middle, &extra_info); if (realm) ceph_put_snap_realm(mdsc, realm); goto done_unlocked; } /* the rest require a cap */ spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds); if (!cap) { doutc(cl, " no cap on %p ino %llx.%llx from mds%d\n", inode, ceph_ino(inode), ceph_snap(inode), session->s_mds); spin_unlock(&ci->i_ceph_lock); switch (op) { case CEPH_CAP_OP_REVOKE: case CEPH_CAP_OP_GRANT: do_cap_release = true; break; default: break; } goto flush_cap_releases; } /* note that each of these drops i_ceph_lock for us */ switch (op) { case CEPH_CAP_OP_REVOKE: case CEPH_CAP_OP_GRANT: __ceph_caps_issued(ci, &extra_info.issued); extra_info.issued |= __ceph_caps_dirty(ci); handle_cap_grant(inode, session, cap, h, msg->middle, &extra_info); goto done_unlocked; case CEPH_CAP_OP_FLUSH_ACK: handle_cap_flush_ack(inode, le64_to_cpu(msg->hdr.tid), h, session, cap); break; case CEPH_CAP_OP_TRUNC: queue_trunc = handle_cap_trunc(inode, h, session, &extra_info); spin_unlock(&ci->i_ceph_lock); if (queue_trunc) ceph_queue_vmtruncate(inode); break; default: spin_unlock(&ci->i_ceph_lock); pr_err_client(cl, "unknown cap op %d %s\n", op, ceph_cap_op_name(op)); } done: mutex_unlock(&session->s_mutex); done_unlocked: iput(inode); out: ceph_dec_mds_stopping_blocker(mdsc); ceph_put_string(extra_info.pool_ns); /* Defer closing the sessions after s_mutex lock being released */ if (close_sessions) ceph_mdsc_close_sessions(mdsc); kfree(extra_info.fscrypt_auth); return; flush_cap_releases: /* * send any cap release message to try to move things * along for the mds (who clearly thinks we still have this * cap). */ if (do_cap_release) { cap = ceph_get_cap(mdsc, NULL); cap->cap_ino = vino.ino; cap->queue_release = 1; cap->cap_id = le64_to_cpu(h->cap_id); cap->mseq = mseq; cap->seq = seq; cap->issue_seq = seq; spin_lock(&session->s_cap_lock); __ceph_queue_cap_release(session, cap); spin_unlock(&session->s_cap_lock); } ceph_flush_session_cap_releases(mdsc, session); goto done; bad: pr_err_client(cl, "corrupt message\n"); ceph_msg_dump(msg); goto out; } /* * Delayed work handler to process end of delayed cap release LRU list. * * If new caps are added to the list while processing it, these won't get * processed in this run. In this case, the ci->i_hold_caps_max will be * returned so that the work can be scheduled accordingly. */ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) { struct ceph_client *cl = mdsc->fsc->client; struct inode *inode; struct ceph_inode_info *ci; struct ceph_mount_options *opt = mdsc->fsc->mount_options; unsigned long delay_max = opt->caps_wanted_delay_max * HZ; unsigned long loop_start = jiffies; unsigned long delay = 0; doutc(cl, "begin\n"); spin_lock(&mdsc->cap_delay_lock); while (!list_empty(&mdsc->cap_delay_list)) { ci = list_first_entry(&mdsc->cap_delay_list, struct ceph_inode_info, i_cap_delay_list); if (time_before(loop_start, ci->i_hold_caps_max - delay_max)) { doutc(cl, "caps added recently. Exiting loop"); delay = ci->i_hold_caps_max; break; } if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 && time_before(jiffies, ci->i_hold_caps_max)) break; list_del_init(&ci->i_cap_delay_list); inode = igrab(&ci->netfs.inode); if (inode) { spin_unlock(&mdsc->cap_delay_lock); doutc(cl, "on %p %llx.%llx\n", inode, ceph_vinop(inode)); ceph_check_caps(ci, 0); iput(inode); spin_lock(&mdsc->cap_delay_lock); } /* * Make sure too many dirty caps or general * slowness doesn't block mdsc delayed work, * preventing send_renew_caps() from running. */ if (time_after_eq(jiffies, loop_start + 5 * HZ)) break; } spin_unlock(&mdsc->cap_delay_lock); doutc(cl, "done\n"); return delay; } /* * Flush all dirty caps to the mds */ static void flush_dirty_session_caps(struct ceph_mds_session *s) { struct ceph_mds_client *mdsc = s->s_mdsc; struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct inode *inode; doutc(cl, "begin\n"); spin_lock(&mdsc->cap_dirty_lock); while (!list_empty(&s->s_cap_dirty)) { ci = list_first_entry(&s->s_cap_dirty, struct ceph_inode_info, i_dirty_item); inode = &ci->netfs.inode; ihold(inode); doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); spin_unlock(&mdsc->cap_dirty_lock); ceph_wait_on_async_create(inode); ceph_check_caps(ci, CHECK_CAPS_FLUSH); iput(inode); spin_lock(&mdsc->cap_dirty_lock); } spin_unlock(&mdsc->cap_dirty_lock); doutc(cl, "done\n"); } void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) { ceph_mdsc_iterate_sessions(mdsc, flush_dirty_session_caps, true); } /* * Flush all cap releases to the mds */ static void flush_cap_releases(struct ceph_mds_session *s) { struct ceph_mds_client *mdsc = s->s_mdsc; struct ceph_client *cl = mdsc->fsc->client; doutc(cl, "begin\n"); spin_lock(&s->s_cap_lock); if (s->s_num_cap_releases) ceph_flush_session_cap_releases(mdsc, s); spin_unlock(&s->s_cap_lock); doutc(cl, "done\n"); } void ceph_flush_cap_releases(struct ceph_mds_client *mdsc) { ceph_mdsc_iterate_sessions(mdsc, flush_cap_releases, true); } void __ceph_touch_fmode(struct ceph_inode_info *ci, struct ceph_mds_client *mdsc, int fmode) { unsigned long now = jiffies; if (fmode & CEPH_FILE_MODE_RD) ci->i_last_rd = now; if (fmode & CEPH_FILE_MODE_WR) ci->i_last_wr = now; /* queue periodic check */ if (fmode && __ceph_is_any_real_caps(ci) && list_empty(&ci->i_cap_delay_list)) __cap_delay_requeue(mdsc, ci); } void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->netfs.inode.i_sb); int bits = (fmode << 1) | 1; bool already_opened = false; int i; if (count == 1) atomic64_inc(&mdsc->metric.opened_files); spin_lock(&ci->i_ceph_lock); for (i = 0; i < CEPH_FILE_MODE_BITS; i++) { /* * If any of the mode ref is larger than 0, * that means it has been already opened by * others. Just skip checking the PIN ref. */ if (i && ci->i_nr_by_mode[i]) already_opened = true; if (bits & (1 << i)) ci->i_nr_by_mode[i] += count; } if (!already_opened) percpu_counter_inc(&mdsc->metric.opened_inodes); spin_unlock(&ci->i_ceph_lock); } /* * Drop open file reference. If we were the last open file, * we may need to release capabilities to the MDS (or schedule * their delayed release). */ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->netfs.inode.i_sb); int bits = (fmode << 1) | 1; bool is_closed = true; int i; if (count == 1) atomic64_dec(&mdsc->metric.opened_files); spin_lock(&ci->i_ceph_lock); for (i = 0; i < CEPH_FILE_MODE_BITS; i++) { if (bits & (1 << i)) { BUG_ON(ci->i_nr_by_mode[i] < count); ci->i_nr_by_mode[i] -= count; } /* * If any of the mode ref is not 0 after * decreased, that means it is still opened * by others. Just skip checking the PIN ref. */ if (i && ci->i_nr_by_mode[i]) is_closed = false; } if (is_closed) percpu_counter_dec(&mdsc->metric.opened_inodes); spin_unlock(&ci->i_ceph_lock); } /* * For a soon-to-be unlinked file, drop the LINK caps. If it * looks like the link count will hit 0, drop any other caps (other * than PIN) we don't specifically want (due to the file still being * open). */ int ceph_drop_caps_for_unlink(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; spin_lock(&ci->i_ceph_lock); if (inode->i_nlink == 1) { drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN); if (__ceph_caps_dirty(ci)) { struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); spin_lock(&mdsc->cap_delay_lock); ci->i_ceph_flags |= CEPH_I_FLUSH; if (!list_empty(&ci->i_cap_delay_list)) list_del_init(&ci->i_cap_delay_list); list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_unlink_delay_list); spin_unlock(&mdsc->cap_delay_lock); /* * Fire the work immediately, because the MDS maybe * waiting for caps release. */ ceph_queue_cap_unlink_work(mdsc); } } spin_unlock(&ci->i_ceph_lock); return drop; } /* * Helpers for embedding cap and dentry lease releases into mds * requests. * * @force is used by dentry_release (below) to force inclusion of a * record for the directory inode, even when there aren't any caps to * drop. */ int ceph_encode_inode_release(void **p, struct inode *inode, int mds, int drop, int unless, int force) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap *cap; struct ceph_mds_request_release *rel = *p; int used, dirty; int ret = 0; spin_lock(&ci->i_ceph_lock); used = __ceph_caps_used(ci); dirty = __ceph_caps_dirty(ci); doutc(cl, "%p %llx.%llx mds%d used|dirty %s drop %s unless %s\n", inode, ceph_vinop(inode), mds, ceph_cap_string(used|dirty), ceph_cap_string(drop), ceph_cap_string(unless)); /* only drop unused, clean caps */ drop &= ~(used | dirty); cap = __get_cap_for_mds(ci, mds); if (cap && __cap_is_valid(cap)) { unless &= cap->issued; if (unless) { if (unless & CEPH_CAP_AUTH_EXCL) drop &= ~CEPH_CAP_AUTH_SHARED; if (unless & CEPH_CAP_LINK_EXCL) drop &= ~CEPH_CAP_LINK_SHARED; if (unless & CEPH_CAP_XATTR_EXCL) drop &= ~CEPH_CAP_XATTR_SHARED; if (unless & CEPH_CAP_FILE_EXCL) drop &= ~CEPH_CAP_FILE_SHARED; } if (force || (cap->issued & drop)) { if (cap->issued & drop) { int wanted = __ceph_caps_wanted(ci); doutc(cl, "%p %llx.%llx cap %p %s -> %s, " "wanted %s -> %s\n", inode, ceph_vinop(inode), cap, ceph_cap_string(cap->issued), ceph_cap_string(cap->issued & ~drop), ceph_cap_string(cap->mds_wanted), ceph_cap_string(wanted)); cap->issued &= ~drop; cap->implemented &= ~drop; cap->mds_wanted = wanted; if (cap == ci->i_auth_cap && !(wanted & CEPH_CAP_ANY_FILE_WR)) ci->i_requested_max_size = 0; } else { doutc(cl, "%p %llx.%llx cap %p %s (force)\n", inode, ceph_vinop(inode), cap, ceph_cap_string(cap->issued)); } rel->ino = cpu_to_le64(ceph_ino(inode)); rel->cap_id = cpu_to_le64(cap->cap_id); rel->seq = cpu_to_le32(cap->seq); rel->issue_seq = cpu_to_le32(cap->issue_seq); rel->mseq = cpu_to_le32(cap->mseq); rel->caps = cpu_to_le32(cap->implemented); rel->wanted = cpu_to_le32(cap->mds_wanted); rel->dname_len = 0; rel->dname_seq = 0; *p += sizeof(*rel); ret = 1; } else { doutc(cl, "%p %llx.%llx cap %p %s (noop)\n", inode, ceph_vinop(inode), cap, ceph_cap_string(cap->issued)); } } spin_unlock(&ci->i_ceph_lock); return ret; } /** * ceph_encode_dentry_release - encode a dentry release into an outgoing request * @p: outgoing request buffer * @dentry: dentry to release * @dir: dir to release it from * @mds: mds that we're speaking to * @drop: caps being dropped * @unless: unless we have these caps * * Encode a dentry release into an outgoing request buffer. Returns 1 if the * thing was released, or a negative error code otherwise. */ int ceph_encode_dentry_release(void **p, struct dentry *dentry, struct inode *dir, int mds, int drop, int unless) { struct ceph_mds_request_release *rel = *p; struct ceph_dentry_info *di = ceph_dentry(dentry); struct ceph_client *cl; int force = 0; int ret; /* This shouldn't happen */ BUG_ON(!dir); /* * force an record for the directory caps if we have a dentry lease. * this is racy (can't take i_ceph_lock and d_lock together), but it * doesn't have to be perfect; the mds will revoke anything we don't * release. */ spin_lock(&dentry->d_lock); if (di->lease_session && di->lease_session->s_mds == mds) force = 1; spin_unlock(&dentry->d_lock); ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force); cl = ceph_inode_to_client(dir); spin_lock(&dentry->d_lock); if (ret && di->lease_session && di->lease_session->s_mds == mds) { doutc(cl, "%p mds%d seq %d\n", dentry, mds, (int)di->lease_seq); rel->dname_seq = cpu_to_le32(di->lease_seq); __ceph_mdsc_drop_dentry_lease(dentry); spin_unlock(&dentry->d_lock); if (IS_ENCRYPTED(dir) && fscrypt_has_encryption_key(dir)) { int ret2 = ceph_encode_encrypted_fname(dir, dentry, *p); if (ret2 < 0) return ret2; rel->dname_len = cpu_to_le32(ret2); *p += ret2; } else { rel->dname_len = cpu_to_le32(dentry->d_name.len); memcpy(*p, dentry->d_name.name, dentry->d_name.len); *p += dentry->d_name.len; } } else { spin_unlock(&dentry->d_lock); } return ret; } static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap_snap *capsnap; int capsnap_release = 0; lockdep_assert_held(&ci->i_ceph_lock); doutc(cl, "removing capsnaps, ci is %p, %p %llx.%llx\n", ci, inode, ceph_vinop(inode)); while (!list_empty(&ci->i_cap_snaps)) { capsnap = list_first_entry(&ci->i_cap_snaps, struct ceph_cap_snap, ci_item); __ceph_remove_capsnap(inode, capsnap, NULL, NULL); ceph_put_snap_context(capsnap->context); ceph_put_cap_snap(capsnap); capsnap_release++; } wake_up_all(&ci->i_cap_wq); wake_up_all(&mdsc->cap_flushing_wq); return capsnap_release; } int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate) { struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_client *cl = fsc->client; struct ceph_inode_info *ci = ceph_inode(inode); bool is_auth; bool dirty_dropped = false; int iputs = 0; lockdep_assert_held(&ci->i_ceph_lock); doutc(cl, "removing cap %p, ci is %p, %p %llx.%llx\n", cap, ci, inode, ceph_vinop(inode)); is_auth = (cap == ci->i_auth_cap); __ceph_remove_cap(cap, false); if (is_auth) { struct ceph_cap_flush *cf; if (ceph_inode_is_shutdown(inode)) { if (inode->i_data.nrpages > 0) *invalidate = true; if (ci->i_wrbuffer_ref > 0) mapping_set_error(&inode->i_data, -EIO); } spin_lock(&mdsc->cap_dirty_lock); /* trash all of the cap flushes for this inode */ while (!list_empty(&ci->i_cap_flush_list)) { cf = list_first_entry(&ci->i_cap_flush_list, struct ceph_cap_flush, i_list); list_del_init(&cf->g_list); list_del_init(&cf->i_list); if (!cf->is_capsnap) ceph_free_cap_flush(cf); } if (!list_empty(&ci->i_dirty_item)) { pr_warn_ratelimited_client(cl, " dropping dirty %s state for %p %llx.%llx\n", ceph_cap_string(ci->i_dirty_caps), inode, ceph_vinop(inode)); ci->i_dirty_caps = 0; list_del_init(&ci->i_dirty_item); dirty_dropped = true; } if (!list_empty(&ci->i_flushing_item)) { pr_warn_ratelimited_client(cl, " dropping dirty+flushing %s state for %p %llx.%llx\n", ceph_cap_string(ci->i_flushing_caps), inode, ceph_vinop(inode)); ci->i_flushing_caps = 0; list_del_init(&ci->i_flushing_item); mdsc->num_cap_flushing--; dirty_dropped = true; } spin_unlock(&mdsc->cap_dirty_lock); if (dirty_dropped) { mapping_set_error(inode->i_mapping, -EIO); if (ci->i_wrbuffer_ref_head == 0 && ci->i_wr_ref == 0 && ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) { ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; } } if (atomic_read(&ci->i_filelock_ref) > 0) { /* make further file lock syscall return -EIO */ ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK; pr_warn_ratelimited_client(cl, " dropping file locks for %p %llx.%llx\n", inode, ceph_vinop(inode)); } if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) { cf = ci->i_prealloc_cap_flush; ci->i_prealloc_cap_flush = NULL; if (!cf->is_capsnap) ceph_free_cap_flush(cf); } if (!list_empty(&ci->i_cap_snaps)) iputs = remove_capsnaps(mdsc, inode); } if (dirty_dropped) ++iputs; return iputs; } |
11 10 11 8 11 2 10 11 8 8 8 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * RIPEMD-160 - RACE Integrity Primitives Evaluation Message Digest. * * Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC * * Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch> */ #include <crypto/internal/hash.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> #include <asm/byteorder.h> #include "ripemd.h" struct rmd160_ctx { u64 byte_count; u32 state[5]; __le32 buffer[16]; }; #define K1 RMD_K1 #define K2 RMD_K2 #define K3 RMD_K3 #define K4 RMD_K4 #define K5 RMD_K5 #define KK1 RMD_K6 #define KK2 RMD_K7 #define KK3 RMD_K8 #define KK4 RMD_K9 #define KK5 RMD_K1 #define F1(x, y, z) (x ^ y ^ z) /* XOR */ #define F2(x, y, z) (z ^ (x & (y ^ z))) /* x ? y : z */ #define F3(x, y, z) ((x | ~y) ^ z) #define F4(x, y, z) (y ^ (z & (x ^ y))) /* z ? x : y */ #define F5(x, y, z) (x ^ (y | ~z)) #define ROUND(a, b, c, d, e, f, k, x, s) { \ (a) += f((b), (c), (d)) + le32_to_cpup(&(x)) + (k); \ (a) = rol32((a), (s)) + (e); \ (c) = rol32((c), 10); \ } static void rmd160_transform(u32 *state, const __le32 *in) { u32 aa, bb, cc, dd, ee, aaa, bbb, ccc, ddd, eee; /* Initialize left lane */ aa = state[0]; bb = state[1]; cc = state[2]; dd = state[3]; ee = state[4]; /* Initialize right lane */ aaa = state[0]; bbb = state[1]; ccc = state[2]; ddd = state[3]; eee = state[4]; /* round 1: left lane */ ROUND(aa, bb, cc, dd, ee, F1, K1, in[0], 11); ROUND(ee, aa, bb, cc, dd, F1, K1, in[1], 14); ROUND(dd, ee, aa, bb, cc, F1, K1, in[2], 15); ROUND(cc, dd, ee, aa, bb, F1, K1, in[3], 12); ROUND(bb, cc, dd, ee, aa, F1, K1, in[4], 5); ROUND(aa, bb, cc, dd, ee, F1, K1, in[5], 8); ROUND(ee, aa, bb, cc, dd, F1, K1, in[6], 7); ROUND(dd, ee, aa, bb, cc, F1, K1, in[7], 9); ROUND(cc, dd, ee, aa, bb, F1, K1, in[8], 11); ROUND(bb, cc, dd, ee, aa, F1, K1, in[9], 13); ROUND(aa, bb, cc, dd, ee, F1, K1, in[10], 14); ROUND(ee, aa, bb, cc, dd, F1, K1, in[11], 15); ROUND(dd, ee, aa, bb, cc, F1, K1, in[12], 6); ROUND(cc, dd, ee, aa, bb, F1, K1, in[13], 7); ROUND(bb, cc, dd, ee, aa, F1, K1, in[14], 9); ROUND(aa, bb, cc, dd, ee, F1, K1, in[15], 8); /* round 2: left lane" */ ROUND(ee, aa, bb, cc, dd, F2, K2, in[7], 7); ROUND(dd, ee, aa, bb, cc, F2, K2, in[4], 6); ROUND(cc, dd, ee, aa, bb, F2, K2, in[13], 8); ROUND(bb, cc, dd, ee, aa, F2, K2, in[1], 13); ROUND(aa, bb, cc, dd, ee, F2, K2, in[10], 11); ROUND(ee, aa, bb, cc, dd, F2, K2, in[6], 9); ROUND(dd, ee, aa, bb, cc, F2, K2, in[15], 7); ROUND(cc, dd, ee, aa, bb, F2, K2, in[3], 15); ROUND(bb, cc, dd, ee, aa, F2, K2, in[12], 7); ROUND(aa, bb, cc, dd, ee, F2, K2, in[0], 12); ROUND(ee, aa, bb, cc, dd, F2, K2, in[9], 15); ROUND(dd, ee, aa, bb, cc, F2, K2, in[5], 9); ROUND(cc, dd, ee, aa, bb, F2, K2, in[2], 11); ROUND(bb, cc, dd, ee, aa, F2, K2, in[14], 7); ROUND(aa, bb, cc, dd, ee, F2, K2, in[11], 13); ROUND(ee, aa, bb, cc, dd, F2, K2, in[8], 12); /* round 3: left lane" */ ROUND(dd, ee, aa, bb, cc, F3, K3, in[3], 11); ROUND(cc, dd, ee, aa, bb, F3, K3, in[10], 13); ROUND(bb, cc, dd, ee, aa, F3, K3, in[14], 6); ROUND(aa, bb, cc, dd, ee, F3, K3, in[4], 7); ROUND(ee, aa, bb, cc, dd, F3, K3, in[9], 14); ROUND(dd, ee, aa, bb, cc, F3, K3, in[15], 9); ROUND(cc, dd, ee, aa, bb, F3, K3, in[8], 13); ROUND(bb, cc, dd, ee, aa, F3, K3, in[1], 15); ROUND(aa, bb, cc, dd, ee, F3, K3, in[2], 14); ROUND(ee, aa, bb, cc, dd, F3, K3, in[7], 8); ROUND(dd, ee, aa, bb, cc, F3, K3, in[0], 13); ROUND(cc, dd, ee, aa, bb, F3, K3, in[6], 6); ROUND(bb, cc, dd, ee, aa, F3, K3, in[13], 5); ROUND(aa, bb, cc, dd, ee, F3, K3, in[11], 12); ROUND(ee, aa, bb, cc, dd, F3, K3, in[5], 7); ROUND(dd, ee, aa, bb, cc, F3, K3, in[12], 5); /* round 4: left lane" */ ROUND(cc, dd, ee, aa, bb, F4, K4, in[1], 11); ROUND(bb, cc, dd, ee, aa, F4, K4, in[9], 12); ROUND(aa, bb, cc, dd, ee, F4, K4, in[11], 14); ROUND(ee, aa, bb, cc, dd, F4, K4, in[10], 15); ROUND(dd, ee, aa, bb, cc, F4, K4, in[0], 14); ROUND(cc, dd, ee, aa, bb, F4, K4, in[8], 15); ROUND(bb, cc, dd, ee, aa, F4, K4, in[12], 9); ROUND(aa, bb, cc, dd, ee, F4, K4, in[4], 8); ROUND(ee, aa, bb, cc, dd, F4, K4, in[13], 9); ROUND(dd, ee, aa, bb, cc, F4, K4, in[3], 14); ROUND(cc, dd, ee, aa, bb, F4, K4, in[7], 5); ROUND(bb, cc, dd, ee, aa, F4, K4, in[15], 6); ROUND(aa, bb, cc, dd, ee, F4, K4, in[14], 8); ROUND(ee, aa, bb, cc, dd, F4, K4, in[5], 6); ROUND(dd, ee, aa, bb, cc, F4, K4, in[6], 5); ROUND(cc, dd, ee, aa, bb, F4, K4, in[2], 12); /* round 5: left lane" */ ROUND(bb, cc, dd, ee, aa, F5, K5, in[4], 9); ROUND(aa, bb, cc, dd, ee, F5, K5, in[0], 15); ROUND(ee, aa, bb, cc, dd, F5, K5, in[5], 5); ROUND(dd, ee, aa, bb, cc, F5, K5, in[9], 11); ROUND(cc, dd, ee, aa, bb, F5, K5, in[7], 6); ROUND(bb, cc, dd, ee, aa, F5, K5, in[12], 8); ROUND(aa, bb, cc, dd, ee, F5, K5, in[2], 13); ROUND(ee, aa, bb, cc, dd, F5, K5, in[10], 12); ROUND(dd, ee, aa, bb, cc, F5, K5, in[14], 5); ROUND(cc, dd, ee, aa, bb, F5, K5, in[1], 12); ROUND(bb, cc, dd, ee, aa, F5, K5, in[3], 13); ROUND(aa, bb, cc, dd, ee, F5, K5, in[8], 14); ROUND(ee, aa, bb, cc, dd, F5, K5, in[11], 11); ROUND(dd, ee, aa, bb, cc, F5, K5, in[6], 8); ROUND(cc, dd, ee, aa, bb, F5, K5, in[15], 5); ROUND(bb, cc, dd, ee, aa, F5, K5, in[13], 6); /* round 1: right lane */ ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[5], 8); ROUND(eee, aaa, bbb, ccc, ddd, F5, KK1, in[14], 9); ROUND(ddd, eee, aaa, bbb, ccc, F5, KK1, in[7], 9); ROUND(ccc, ddd, eee, aaa, bbb, F5, KK1, in[0], 11); ROUND(bbb, ccc, ddd, eee, aaa, F5, KK1, in[9], 13); ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[2], 15); ROUND(eee, aaa, bbb, ccc, ddd, F5, KK1, in[11], 15); ROUND(ddd, eee, aaa, bbb, ccc, F5, KK1, in[4], 5); ROUND(ccc, ddd, eee, aaa, bbb, F5, KK1, in[13], 7); ROUND(bbb, ccc, ddd, eee, aaa, F5, KK1, in[6], 7); ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[15], 8); ROUND(eee, aaa, bbb, ccc, ddd, F5, KK1, in[8], 11); ROUND(ddd, eee, aaa, bbb, ccc, F5, KK1, in[1], 14); ROUND(ccc, ddd, eee, aaa, bbb, F5, KK1, in[10], 14); ROUND(bbb, ccc, ddd, eee, aaa, F5, KK1, in[3], 12); ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[12], 6); /* round 2: right lane */ ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[6], 9); ROUND(ddd, eee, aaa, bbb, ccc, F4, KK2, in[11], 13); ROUND(ccc, ddd, eee, aaa, bbb, F4, KK2, in[3], 15); ROUND(bbb, ccc, ddd, eee, aaa, F4, KK2, in[7], 7); ROUND(aaa, bbb, ccc, ddd, eee, F4, KK2, in[0], 12); ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[13], 8); ROUND(ddd, eee, aaa, bbb, ccc, F4, KK2, in[5], 9); ROUND(ccc, ddd, eee, aaa, bbb, F4, KK2, in[10], 11); ROUND(bbb, ccc, ddd, eee, aaa, F4, KK2, in[14], 7); ROUND(aaa, bbb, ccc, ddd, eee, F4, KK2, in[15], 7); ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[8], 12); ROUND(ddd, eee, aaa, bbb, ccc, F4, KK2, in[12], 7); ROUND(ccc, ddd, eee, aaa, bbb, F4, KK2, in[4], 6); ROUND(bbb, ccc, ddd, eee, aaa, F4, KK2, in[9], 15); ROUND(aaa, bbb, ccc, ddd, eee, F4, KK2, in[1], 13); ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[2], 11); /* round 3: right lane */ ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[15], 9); ROUND(ccc, ddd, eee, aaa, bbb, F3, KK3, in[5], 7); ROUND(bbb, ccc, ddd, eee, aaa, F3, KK3, in[1], 15); ROUND(aaa, bbb, ccc, ddd, eee, F3, KK3, in[3], 11); ROUND(eee, aaa, bbb, ccc, ddd, F3, KK3, in[7], 8); ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[14], 6); ROUND(ccc, ddd, eee, aaa, bbb, F3, KK3, in[6], 6); ROUND(bbb, ccc, ddd, eee, aaa, F3, KK3, in[9], 14); ROUND(aaa, bbb, ccc, ddd, eee, F3, KK3, in[11], 12); ROUND(eee, aaa, bbb, ccc, ddd, F3, KK3, in[8], 13); ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[12], 5); ROUND(ccc, ddd, eee, aaa, bbb, F3, KK3, in[2], 14); ROUND(bbb, ccc, ddd, eee, aaa, F3, KK3, in[10], 13); ROUND(aaa, bbb, ccc, ddd, eee, F3, KK3, in[0], 13); ROUND(eee, aaa, bbb, ccc, ddd, F3, KK3, in[4], 7); ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[13], 5); /* round 4: right lane */ ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[8], 15); ROUND(bbb, ccc, ddd, eee, aaa, F2, KK4, in[6], 5); ROUND(aaa, bbb, ccc, ddd, eee, F2, KK4, in[4], 8); ROUND(eee, aaa, bbb, ccc, ddd, F2, KK4, in[1], 11); ROUND(ddd, eee, aaa, bbb, ccc, F2, KK4, in[3], 14); ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[11], 14); ROUND(bbb, ccc, ddd, eee, aaa, F2, KK4, in[15], 6); ROUND(aaa, bbb, ccc, ddd, eee, F2, KK4, in[0], 14); ROUND(eee, aaa, bbb, ccc, ddd, F2, KK4, in[5], 6); ROUND(ddd, eee, aaa, bbb, ccc, F2, KK4, in[12], 9); ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[2], 12); ROUND(bbb, ccc, ddd, eee, aaa, F2, KK4, in[13], 9); ROUND(aaa, bbb, ccc, ddd, eee, F2, KK4, in[9], 12); ROUND(eee, aaa, bbb, ccc, ddd, F2, KK4, in[7], 5); ROUND(ddd, eee, aaa, bbb, ccc, F2, KK4, in[10], 15); ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[14], 8); /* round 5: right lane */ ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[12], 8); ROUND(aaa, bbb, ccc, ddd, eee, F1, KK5, in[15], 5); ROUND(eee, aaa, bbb, ccc, ddd, F1, KK5, in[10], 12); ROUND(ddd, eee, aaa, bbb, ccc, F1, KK5, in[4], 9); ROUND(ccc, ddd, eee, aaa, bbb, F1, KK5, in[1], 12); ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[5], 5); ROUND(aaa, bbb, ccc, ddd, eee, F1, KK5, in[8], 14); ROUND(eee, aaa, bbb, ccc, ddd, F1, KK5, in[7], 6); ROUND(ddd, eee, aaa, bbb, ccc, F1, KK5, in[6], 8); ROUND(ccc, ddd, eee, aaa, bbb, F1, KK5, in[2], 13); ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[13], 6); ROUND(aaa, bbb, ccc, ddd, eee, F1, KK5, in[14], 5); ROUND(eee, aaa, bbb, ccc, ddd, F1, KK5, in[0], 15); ROUND(ddd, eee, aaa, bbb, ccc, F1, KK5, in[3], 13); ROUND(ccc, ddd, eee, aaa, bbb, F1, KK5, in[9], 11); ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[11], 11); /* combine results */ ddd += cc + state[1]; /* final result for state[0] */ state[1] = state[2] + dd + eee; state[2] = state[3] + ee + aaa; state[3] = state[4] + aa + bbb; state[4] = state[0] + bb + ccc; state[0] = ddd; } static int rmd160_init(struct shash_desc *desc) { struct rmd160_ctx *rctx = shash_desc_ctx(desc); rctx->byte_count = 0; rctx->state[0] = RMD_H0; rctx->state[1] = RMD_H1; rctx->state[2] = RMD_H2; rctx->state[3] = RMD_H3; rctx->state[4] = RMD_H4; memset(rctx->buffer, 0, sizeof(rctx->buffer)); return 0; } static int rmd160_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct rmd160_ctx *rctx = shash_desc_ctx(desc); const u32 avail = sizeof(rctx->buffer) - (rctx->byte_count & 0x3f); rctx->byte_count += len; /* Enough space in buffer? If so copy and we're done */ if (avail > len) { memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail), data, len); goto out; } memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail), data, avail); rmd160_transform(rctx->state, rctx->buffer); data += avail; len -= avail; while (len >= sizeof(rctx->buffer)) { memcpy(rctx->buffer, data, sizeof(rctx->buffer)); rmd160_transform(rctx->state, rctx->buffer); data += sizeof(rctx->buffer); len -= sizeof(rctx->buffer); } memcpy(rctx->buffer, data, len); out: return 0; } /* Add padding and return the message digest. */ static int rmd160_final(struct shash_desc *desc, u8 *out) { struct rmd160_ctx *rctx = shash_desc_ctx(desc); u32 i, index, padlen; __le64 bits; __le32 *dst = (__le32 *)out; static const u8 padding[64] = { 0x80, }; bits = cpu_to_le64(rctx->byte_count << 3); /* Pad out to 56 mod 64 */ index = rctx->byte_count & 0x3f; padlen = (index < 56) ? (56 - index) : ((64+56) - index); rmd160_update(desc, padding, padlen); /* Append length */ rmd160_update(desc, (const u8 *)&bits, sizeof(bits)); /* Store state in digest */ for (i = 0; i < 5; i++) dst[i] = cpu_to_le32p(&rctx->state[i]); /* Wipe context */ memset(rctx, 0, sizeof(*rctx)); return 0; } static struct shash_alg alg = { .digestsize = RMD160_DIGEST_SIZE, .init = rmd160_init, .update = rmd160_update, .final = rmd160_final, .descsize = sizeof(struct rmd160_ctx), .base = { .cra_name = "rmd160", .cra_driver_name = "rmd160-generic", .cra_blocksize = RMD160_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static int __init rmd160_mod_init(void) { return crypto_register_shash(&alg); } static void __exit rmd160_mod_fini(void) { crypto_unregister_shash(&alg); } subsys_initcall(rmd160_mod_init); module_exit(rmd160_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>"); MODULE_DESCRIPTION("RIPEMD-160 Message Digest"); MODULE_ALIAS_CRYPTO("rmd160"); |
47 47 46 45 47 47 43 46 47 27 47 47 47 47 24 46 46 47 47 47 47 47 47 43 43 43 44 44 44 43 48 44 43 2 44 44 44 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 | // SPDX-License-Identifier: GPL-2.0 /* * drivers/usb/core/sysfs.c * * (C) Copyright 2002 David Brownell * (C) Copyright 2002,2004 Greg Kroah-Hartman * (C) Copyright 2002,2004 IBM Corp. * * All of the sysfs file attributes for usb devices and interfaces. * * Released under the GPLv2 only. */ #include <linux/kernel.h> #include <linux/kstrtox.h> #include <linux/string.h> #include <linux/usb.h> #include <linux/usb/hcd.h> #include <linux/usb/quirks.h> #include <linux/of.h> #include "usb.h" /* Active configuration fields */ #define usb_actconfig_show(field, format_string) \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct usb_device *udev; \ struct usb_host_config *actconfig; \ ssize_t rc; \ \ udev = to_usb_device(dev); \ rc = usb_lock_device_interruptible(udev); \ if (rc < 0) \ return -EINTR; \ actconfig = udev->actconfig; \ if (actconfig) \ rc = sysfs_emit(buf, format_string, \ actconfig->desc.field); \ usb_unlock_device(udev); \ return rc; \ } \ #define usb_actconfig_attr(field, format_string) \ usb_actconfig_show(field, format_string) \ static DEVICE_ATTR_RO(field) usb_actconfig_attr(bNumInterfaces, "%2d\n"); usb_actconfig_attr(bmAttributes, "%2x\n"); static ssize_t bMaxPower_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; struct usb_host_config *actconfig; ssize_t rc; udev = to_usb_device(dev); rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; actconfig = udev->actconfig; if (actconfig) rc = sysfs_emit(buf, "%dmA\n", usb_get_max_power(udev, actconfig)); usb_unlock_device(udev); return rc; } static DEVICE_ATTR_RO(bMaxPower); static ssize_t configuration_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; struct usb_host_config *actconfig; ssize_t rc; udev = to_usb_device(dev); rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; actconfig = udev->actconfig; if (actconfig && actconfig->string) rc = sysfs_emit(buf, "%s\n", actconfig->string); usb_unlock_device(udev); return rc; } static DEVICE_ATTR_RO(configuration); /* configuration value is always present, and r/w */ usb_actconfig_show(bConfigurationValue, "%u\n"); static ssize_t bConfigurationValue_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); int config, value, rc; if (sscanf(buf, "%d", &config) != 1 || config < -1 || config > 255) return -EINVAL; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; value = usb_set_configuration(udev, config); usb_unlock_device(udev); return (value < 0) ? value : count; } static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR, bConfigurationValue_show, bConfigurationValue_store); #ifdef CONFIG_OF static ssize_t devspec_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_node *of_node = dev->of_node; return sysfs_emit(buf, "%pOF\n", of_node); } static DEVICE_ATTR_RO(devspec); #endif /* String fields */ #define usb_string_attr(name) \ static ssize_t name##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct usb_device *udev; \ int retval; \ \ udev = to_usb_device(dev); \ retval = usb_lock_device_interruptible(udev); \ if (retval < 0) \ return -EINTR; \ retval = sysfs_emit(buf, "%s\n", udev->name); \ usb_unlock_device(udev); \ return retval; \ } \ static DEVICE_ATTR_RO(name) usb_string_attr(product); usb_string_attr(manufacturer); usb_string_attr(serial); static ssize_t speed_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; char *speed; udev = to_usb_device(dev); switch (udev->speed) { case USB_SPEED_LOW: speed = "1.5"; break; case USB_SPEED_UNKNOWN: case USB_SPEED_FULL: speed = "12"; break; case USB_SPEED_HIGH: speed = "480"; break; case USB_SPEED_SUPER: speed = "5000"; break; case USB_SPEED_SUPER_PLUS: if (udev->ssp_rate == USB_SSP_GEN_2x2) speed = "20000"; else speed = "10000"; break; default: speed = "unknown"; } return sysfs_emit(buf, "%s\n", speed); } static DEVICE_ATTR_RO(speed); static ssize_t rx_lanes_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->rx_lanes); } static DEVICE_ATTR_RO(rx_lanes); static ssize_t tx_lanes_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->tx_lanes); } static DEVICE_ATTR_RO(tx_lanes); static ssize_t busnum_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->bus->busnum); } static DEVICE_ATTR_RO(busnum); static ssize_t devnum_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->devnum); } static DEVICE_ATTR_RO(devnum); static ssize_t devpath_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%s\n", udev->devpath); } static DEVICE_ATTR_RO(devpath); static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; u16 bcdUSB; udev = to_usb_device(dev); bcdUSB = le16_to_cpu(udev->descriptor.bcdUSB); return sysfs_emit(buf, "%2x.%02x\n", bcdUSB >> 8, bcdUSB & 0xff); } static DEVICE_ATTR_RO(version); static ssize_t maxchild_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->maxchild); } static DEVICE_ATTR_RO(maxchild); static ssize_t quirks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "0x%x\n", udev->quirks); } static DEVICE_ATTR_RO(quirks); static ssize_t avoid_reset_quirk_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", !!(udev->quirks & USB_QUIRK_RESET)); } static ssize_t avoid_reset_quirk_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); bool val; int rc; if (kstrtobool(buf, &val) != 0) return -EINVAL; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; if (val) udev->quirks |= USB_QUIRK_RESET; else udev->quirks &= ~USB_QUIRK_RESET; usb_unlock_device(udev); return count; } static DEVICE_ATTR_RW(avoid_reset_quirk); static ssize_t urbnum_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev; udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", atomic_read(&udev->urbnum)); } static DEVICE_ATTR_RO(urbnum); static ssize_t ltm_capable_show(struct device *dev, struct device_attribute *attr, char *buf) { if (usb_device_supports_ltm(to_usb_device(dev))) return sysfs_emit(buf, "%s\n", "yes"); return sysfs_emit(buf, "%s\n", "no"); } static DEVICE_ATTR_RO(ltm_capable); #ifdef CONFIG_PM static ssize_t persist_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->persist_enabled); } static ssize_t persist_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); bool value; int rc; /* Hubs are always enabled for USB_PERSIST */ if (udev->descriptor.bDeviceClass == USB_CLASS_HUB) return -EPERM; if (kstrtobool(buf, &value) != 0) return -EINVAL; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; udev->persist_enabled = !!value; usb_unlock_device(udev); return count; } static DEVICE_ATTR_RW(persist); static int add_persist_attributes(struct device *dev) { int rc = 0; if (is_usb_device(dev)) { struct usb_device *udev = to_usb_device(dev); /* Hubs are automatically enabled for USB_PERSIST, * no point in creating the attribute file. */ if (udev->descriptor.bDeviceClass != USB_CLASS_HUB) rc = sysfs_add_file_to_group(&dev->kobj, &dev_attr_persist.attr, power_group_name); } return rc; } static void remove_persist_attributes(struct device *dev) { sysfs_remove_file_from_group(&dev->kobj, &dev_attr_persist.attr, power_group_name); } static ssize_t connected_duration_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); return sysfs_emit(buf, "%u\n", jiffies_to_msecs(jiffies - udev->connect_time)); } static DEVICE_ATTR_RO(connected_duration); /* * If the device is resumed, the last time the device was suspended has * been pre-subtracted from active_duration. We add the current time to * get the duration that the device was actually active. * * If the device is suspended, the active_duration is up-to-date. */ static ssize_t active_duration_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); int duration; if (udev->state != USB_STATE_SUSPENDED) duration = jiffies_to_msecs(jiffies + udev->active_duration); else duration = jiffies_to_msecs(udev->active_duration); return sysfs_emit(buf, "%u\n", duration); } static DEVICE_ATTR_RO(active_duration); static ssize_t autosuspend_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", dev->power.autosuspend_delay / 1000); } static ssize_t autosuspend_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int value; if (sscanf(buf, "%d", &value) != 1 || value >= INT_MAX/1000 || value <= -INT_MAX/1000) return -EINVAL; pm_runtime_set_autosuspend_delay(dev, value * 1000); return count; } static DEVICE_ATTR_RW(autosuspend); static const char on_string[] = "on"; static const char auto_string[] = "auto"; static void warn_level(void) { static int level_warned; if (!level_warned) { level_warned = 1; printk(KERN_WARNING "WARNING! power/level is deprecated; " "use power/control instead\n"); } } static ssize_t level_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); const char *p = auto_string; warn_level(); if (udev->state != USB_STATE_SUSPENDED && !udev->dev.power.runtime_auto) p = on_string; return sysfs_emit(buf, "%s\n", p); } static ssize_t level_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); int len = count; char *cp; int rc = count; int rv; warn_level(); cp = memchr(buf, '\n', count); if (cp) len = cp - buf; rv = usb_lock_device_interruptible(udev); if (rv < 0) return -EINTR; if (len == sizeof on_string - 1 && strncmp(buf, on_string, len) == 0) usb_disable_autosuspend(udev); else if (len == sizeof auto_string - 1 && strncmp(buf, auto_string, len) == 0) usb_enable_autosuspend(udev); else rc = -EINVAL; usb_unlock_device(udev); return rc; } static DEVICE_ATTR_RW(level); static ssize_t usb2_hardware_lpm_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); const char *p; if (udev->usb2_hw_lpm_allowed == 1) p = "enabled"; else p = "disabled"; return sysfs_emit(buf, "%s\n", p); } static ssize_t usb2_hardware_lpm_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); bool value; int ret; ret = usb_lock_device_interruptible(udev); if (ret < 0) return -EINTR; ret = kstrtobool(buf, &value); if (!ret) { udev->usb2_hw_lpm_allowed = value; if (value) ret = usb_enable_usb2_hardware_lpm(udev); else ret = usb_disable_usb2_hardware_lpm(udev); } usb_unlock_device(udev); if (!ret) return count; return ret; } static DEVICE_ATTR_RW(usb2_hardware_lpm); static ssize_t usb2_lpm_l1_timeout_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->l1_params.timeout); } static ssize_t usb2_lpm_l1_timeout_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); u16 timeout; if (kstrtou16(buf, 0, &timeout)) return -EINVAL; udev->l1_params.timeout = timeout; return count; } static DEVICE_ATTR_RW(usb2_lpm_l1_timeout); static ssize_t usb2_lpm_besl_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); return sysfs_emit(buf, "%d\n", udev->l1_params.besl); } static ssize_t usb2_lpm_besl_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); u8 besl; if (kstrtou8(buf, 0, &besl) || besl > 15) return -EINVAL; udev->l1_params.besl = besl; return count; } static DEVICE_ATTR_RW(usb2_lpm_besl); static ssize_t usb3_hardware_lpm_u1_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); const char *p; int rc; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; if (udev->usb3_lpm_u1_enabled) p = "enabled"; else p = "disabled"; usb_unlock_device(udev); return sysfs_emit(buf, "%s\n", p); } static DEVICE_ATTR_RO(usb3_hardware_lpm_u1); static ssize_t usb3_hardware_lpm_u2_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *udev = to_usb_device(dev); const char *p; int rc; rc = usb_lock_device_interruptible(udev); if (rc < 0) return -EINTR; if (udev->usb3_lpm_u2_enabled) p = "enabled"; else p = "disabled"; usb_unlock_device(udev); return sysfs_emit(buf, "%s\n", p); } static DEVICE_ATTR_RO(usb3_hardware_lpm_u2); static struct attribute *usb2_hardware_lpm_attr[] = { &dev_attr_usb2_hardware_lpm.attr, &dev_attr_usb2_lpm_l1_timeout.attr, &dev_attr_usb2_lpm_besl.attr, NULL, }; static const struct attribute_group usb2_hardware_lpm_attr_group = { .name = power_group_name, .attrs = usb2_hardware_lpm_attr, }; static struct attribute *usb3_hardware_lpm_attr[] = { &dev_attr_usb3_hardware_lpm_u1.attr, &dev_attr_usb3_hardware_lpm_u2.attr, NULL, }; static const struct attribute_group usb3_hardware_lpm_attr_group = { .name = power_group_name, .attrs = usb3_hardware_lpm_attr, }; static struct attribute *power_attrs[] = { &dev_attr_autosuspend.attr, &dev_attr_level.attr, &dev_attr_connected_duration.attr, &dev_attr_active_duration.attr, NULL, }; static const struct attribute_group power_attr_group = { .name = power_group_name, .attrs = power_attrs, }; static int add_power_attributes(struct device *dev) { int rc = 0; if (is_usb_device(dev)) { struct usb_device *udev = to_usb_device(dev); rc = sysfs_merge_group(&dev->kobj, &power_attr_group); if (udev->usb2_hw_lpm_capable == 1) rc = sysfs_merge_group(&dev->kobj, &usb2_hardware_lpm_attr_group); if ((udev->speed == USB_SPEED_SUPER || udev->speed == USB_SPEED_SUPER_PLUS) && udev->lpm_capable == 1) rc = sysfs_merge_group(&dev->kobj, &usb3_hardware_lpm_attr_group); } return rc; } static void remove_power_attributes(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &usb3_hardware_lpm_attr_group); sysfs_unmerge_group(&dev->kobj, &usb2_hardware_lpm_attr_group); sysfs_unmerge_group(&dev->kobj, &power_attr_group); } #else #define add_persist_attributes(dev) 0 #define remove_persist_attributes(dev) do {} while (0) #define add_power_attributes(dev) 0 #define remove_power_attributes(dev) do {} while (0) #endif /* CONFIG_PM */ /* Descriptor fields */ #define usb_descriptor_attr_le16(field, format_string) \ static ssize_t \ field##_show(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct usb_device *udev; \ \ udev = to_usb_device(dev); \ return sysfs_emit(buf, format_string, \ le16_to_cpu(udev->descriptor.field)); \ } \ static DEVICE_ATTR_RO(field) usb_descriptor_attr_le16(idVendor, "%04x\n"); usb_descriptor_attr_le16(idProduct, "%04x\n"); usb_descriptor_attr_le16(bcdDevice, "%04x\n"); #define usb_descriptor_attr(field, format_string) \ static ssize_t \ field##_show(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct usb_device *udev; \ \ udev = to_usb_device(dev); \ return sysfs_emit(buf, format_string, udev->descriptor.field); \ } \ static DEVICE_ATTR_RO(field) usb_descriptor_attr(bDeviceClass, "%02x\n"); usb_descriptor_attr(bDeviceSubClass, "%02x\n"); usb_descriptor_attr(bDeviceProtocol, "%02x\n"); usb_descriptor_attr(bNumConfigurations, "%d\n"); usb_descriptor_attr(bMaxPacketSize0, "%d\n"); /* show if the device is authorized (1) or not (0) */ static ssize_t authorized_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *usb_dev = to_usb_device(dev); return sysfs_emit(buf, "%u\n", usb_dev->authorized); } /* * Authorize a device to be used in the system * * Writing a 0 deauthorizes the device, writing a 1 authorizes it. */ static ssize_t authorized_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { ssize_t result; struct usb_device *usb_dev = to_usb_device(dev); bool val; if (kstrtobool(buf, &val) != 0) result = -EINVAL; else if (val) result = usb_authorize_device(usb_dev); else result = usb_deauthorize_device(usb_dev); return result < 0 ? result : size; } static DEVICE_ATTR_IGNORE_LOCKDEP(authorized, S_IRUGO | S_IWUSR, authorized_show, authorized_store); /* "Safely remove a device" */ static ssize_t remove_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *udev = to_usb_device(dev); int rc = 0; usb_lock_device(udev); if (udev->state != USB_STATE_NOTATTACHED) { /* To avoid races, first unconfigure and then remove */ usb_set_configuration(udev, -1); rc = usb_remove_device(udev); } if (rc == 0) rc = count; usb_unlock_device(udev); return rc; } static DEVICE_ATTR_IGNORE_LOCKDEP(remove, S_IWUSR, NULL, remove_store); static struct attribute *dev_attrs[] = { /* current configuration's attributes */ &dev_attr_configuration.attr, &dev_attr_bNumInterfaces.attr, &dev_attr_bConfigurationValue.attr, &dev_attr_bmAttributes.attr, &dev_attr_bMaxPower.attr, /* device attributes */ &dev_attr_urbnum.attr, &dev_attr_idVendor.attr, &dev_attr_idProduct.attr, &dev_attr_bcdDevice.attr, &dev_attr_bDeviceClass.attr, &dev_attr_bDeviceSubClass.attr, &dev_attr_bDeviceProtocol.attr, &dev_attr_bNumConfigurations.attr, &dev_attr_bMaxPacketSize0.attr, &dev_attr_speed.attr, &dev_attr_rx_lanes.attr, &dev_attr_tx_lanes.attr, &dev_attr_busnum.attr, &dev_attr_devnum.attr, &dev_attr_devpath.attr, &dev_attr_version.attr, &dev_attr_maxchild.attr, &dev_attr_quirks.attr, &dev_attr_avoid_reset_quirk.attr, &dev_attr_authorized.attr, &dev_attr_remove.attr, &dev_attr_ltm_capable.attr, #ifdef CONFIG_OF &dev_attr_devspec.attr, #endif NULL, }; static const struct attribute_group dev_attr_grp = { .attrs = dev_attrs, }; /* When modifying this list, be sure to modify dev_string_attrs_are_visible() * accordingly. */ static struct attribute *dev_string_attrs[] = { &dev_attr_manufacturer.attr, &dev_attr_product.attr, &dev_attr_serial.attr, NULL }; static umode_t dev_string_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); struct usb_device *udev = to_usb_device(dev); if (a == &dev_attr_manufacturer.attr) { if (udev->manufacturer == NULL) return 0; } else if (a == &dev_attr_product.attr) { if (udev->product == NULL) return 0; } else if (a == &dev_attr_serial.attr) { if (udev->serial == NULL) return 0; } return a->mode; } static const struct attribute_group dev_string_attr_grp = { .attrs = dev_string_attrs, .is_visible = dev_string_attrs_are_visible, }; /* Binary descriptors */ static ssize_t descriptors_read(struct file *filp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); struct usb_device *udev = to_usb_device(dev); size_t nleft = count; size_t srclen, n; int cfgno; void *src; /* The binary attribute begins with the device descriptor. * Following that are the raw descriptor entries for all the * configurations (config plus subsidiary descriptors). */ for (cfgno = -1; cfgno < udev->descriptor.bNumConfigurations && nleft > 0; ++cfgno) { if (cfgno < 0) { src = &udev->descriptor; srclen = sizeof(struct usb_device_descriptor); } else { src = udev->rawdescriptors[cfgno]; srclen = le16_to_cpu(udev->config[cfgno].desc. wTotalLength); } if (off < srclen) { n = min(nleft, srclen - (size_t) off); memcpy(buf, src + off, n); nleft -= n; buf += n; off = 0; } else { off -= srclen; } } return count - nleft; } static const BIN_ATTR_RO(descriptors, 18 + 65535); /* dev descr + max-size raw descriptor */ static ssize_t bos_descriptors_read(struct file *filp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); struct usb_device *udev = to_usb_device(dev); struct usb_host_bos *bos = udev->bos; struct usb_bos_descriptor *desc; size_t desclen, n = 0; if (bos) { desc = bos->desc; desclen = le16_to_cpu(desc->wTotalLength); if (off < desclen) { n = min(count, desclen - (size_t) off); memcpy(buf, (void *) desc + off, n); } } return n; } static const BIN_ATTR_RO(bos_descriptors, 65535); /* max-size BOS */ /* When modifying this list, be sure to modify dev_bin_attrs_are_visible() * accordingly. */ static const struct bin_attribute *const dev_bin_attrs[] = { &bin_attr_descriptors, &bin_attr_bos_descriptors, NULL }; static umode_t dev_bin_attrs_are_visible(struct kobject *kobj, const struct bin_attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); struct usb_device *udev = to_usb_device(dev); /* * There's no need to check if the descriptors attribute should * be visible because all devices have a device descriptor. The * bos_descriptors attribute should be visible if and only if * the device has a BOS, so check if it exists here. */ if (a == &bin_attr_bos_descriptors) { if (udev->bos == NULL) return 0; } return a->attr.mode; } static const struct attribute_group dev_bin_attr_grp = { .bin_attrs_new = dev_bin_attrs, .is_bin_visible = dev_bin_attrs_are_visible, }; const struct attribute_group *usb_device_groups[] = { &dev_attr_grp, &dev_string_attr_grp, &dev_bin_attr_grp, NULL }; /* * Show & store the current value of authorized_default */ static ssize_t authorized_default_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *rh_usb_dev = to_usb_device(dev); struct usb_bus *usb_bus = rh_usb_dev->bus; struct usb_hcd *hcd; hcd = bus_to_hcd(usb_bus); return sysfs_emit(buf, "%u\n", hcd->dev_policy); } static ssize_t authorized_default_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { ssize_t result; unsigned int val; struct usb_device *rh_usb_dev = to_usb_device(dev); struct usb_bus *usb_bus = rh_usb_dev->bus; struct usb_hcd *hcd; hcd = bus_to_hcd(usb_bus); result = sscanf(buf, "%u\n", &val); if (result == 1) { hcd->dev_policy = val <= USB_DEVICE_AUTHORIZE_INTERNAL ? val : USB_DEVICE_AUTHORIZE_ALL; result = size; } else { result = -EINVAL; } return result; } static DEVICE_ATTR_RW(authorized_default); /* * interface_authorized_default_show - show default authorization status * for USB interfaces * * note: interface_authorized_default is the default value * for initializing the authorized attribute of interfaces */ static ssize_t interface_authorized_default_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_device *usb_dev = to_usb_device(dev); struct usb_hcd *hcd = bus_to_hcd(usb_dev->bus); return sysfs_emit(buf, "%u\n", !!HCD_INTF_AUTHORIZED(hcd)); } /* * interface_authorized_default_store - store default authorization status * for USB interfaces * * note: interface_authorized_default is the default value * for initializing the authorized attribute of interfaces */ static ssize_t interface_authorized_default_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_device *usb_dev = to_usb_device(dev); struct usb_hcd *hcd = bus_to_hcd(usb_dev->bus); int rc = count; bool val; if (kstrtobool(buf, &val) != 0) return -EINVAL; if (val) set_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags); else clear_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags); return rc; } static DEVICE_ATTR_RW(interface_authorized_default); /* Group all the USB bus attributes */ static struct attribute *usb_bus_attrs[] = { &dev_attr_authorized_default.attr, &dev_attr_interface_authorized_default.attr, NULL, }; static const struct attribute_group usb_bus_attr_group = { .name = NULL, /* we want them in the same directory */ .attrs = usb_bus_attrs, }; static int add_default_authorized_attributes(struct device *dev) { int rc = 0; if (is_usb_device(dev)) rc = sysfs_create_group(&dev->kobj, &usb_bus_attr_group); return rc; } static void remove_default_authorized_attributes(struct device *dev) { if (is_usb_device(dev)) { sysfs_remove_group(&dev->kobj, &usb_bus_attr_group); } } int usb_create_sysfs_dev_files(struct usb_device *udev) { struct device *dev = &udev->dev; int retval; retval = add_persist_attributes(dev); if (retval) goto error; retval = add_power_attributes(dev); if (retval) goto error; if (is_root_hub(udev)) { retval = add_default_authorized_attributes(dev); if (retval) goto error; } return retval; error: usb_remove_sysfs_dev_files(udev); return retval; } void usb_remove_sysfs_dev_files(struct usb_device *udev) { struct device *dev = &udev->dev; if (is_root_hub(udev)) remove_default_authorized_attributes(dev); remove_power_attributes(dev); remove_persist_attributes(dev); } /* Interface Association Descriptor fields */ #def |