Total coverage: 266684 (17%)of 1573016
4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 /* * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/spinlock.h> #include <net/inet_connection_sock.h> #include <net/tls.h> #include <net/tls_toe.h> #include "tls.h" static LIST_HEAD(device_list); static DEFINE_SPINLOCK(device_spinlock); static void tls_toe_sk_destruct(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tls_context *ctx = tls_get_ctx(sk); ctx->sk_destruct(sk); /* Free ctx */ rcu_assign_pointer(icsk->icsk_ulp_data, NULL); tls_ctx_free(sk, ctx); } int tls_toe_bypass(struct sock *sk) { struct tls_toe_device *dev; struct tls_context *ctx; int rc = 0; spin_lock_bh(&device_spinlock); list_for_each_entry(dev, &device_list, dev_list) { if (dev->feature && dev->feature(dev)) { ctx = tls_ctx_create(sk); if (!ctx) goto out; ctx->sk_destruct = sk->sk_destruct; sk->sk_destruct = tls_toe_sk_destruct; ctx->rx_conf = TLS_HW_RECORD; ctx->tx_conf = TLS_HW_RECORD; update_sk_prot(sk, ctx); rc = 1; break; } } out: spin_unlock_bh(&device_spinlock); return rc; } void tls_toe_unhash(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); struct tls_toe_device *dev; spin_lock_bh(&device_spinlock); list_for_each_entry(dev, &device_list, dev_list) { if (dev->unhash) { kref_get(&dev->kref); spin_unlock_bh(&device_spinlock); dev->unhash(dev, sk); kref_put(&dev->kref, dev->release); spin_lock_bh(&device_spinlock); } } spin_unlock_bh(&device_spinlock); ctx->sk_proto->unhash(sk); } int tls_toe_hash(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); struct tls_toe_device *dev; int err; err = ctx->sk_proto->hash(sk); spin_lock_bh(&device_spinlock); list_for_each_entry(dev, &device_list, dev_list) { if (dev->hash) { kref_get(&dev->kref); spin_unlock_bh(&device_spinlock); err |= dev->hash(dev, sk); kref_put(&dev->kref, dev->release); spin_lock_bh(&device_spinlock); } } spin_unlock_bh(&device_spinlock); if (err) tls_toe_unhash(sk); return err; } void tls_toe_register_device(struct tls_toe_device *device) { spin_lock_bh(&device_spinlock); list_add_tail(&device->dev_list, &device_list); spin_unlock_bh(&device_spinlock); } EXPORT_SYMBOL(tls_toe_register_device); void tls_toe_unregister_device(struct tls_toe_device *device) { spin_lock_bh(&device_spinlock); list_del(&device->dev_list); spin_unlock_bh(&device_spinlock); } EXPORT_SYMBOL(tls_toe_unregister_device);
2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" struct rss_req_info { struct ethnl_req_info base; u32 rss_context; }; struct rss_reply_data { struct ethnl_reply_data base; bool no_key_fields; u32 indir_size; u32 hkey_size; u32 hfunc; u32 input_xfrm; u32 *indir_table; u8 *hkey; }; #define RSS_REQINFO(__req_base) \ container_of(__req_base, struct rss_req_info, base) #define RSS_REPDATA(__reply_base) \ container_of(__reply_base, struct rss_reply_data, base) const struct nla_policy ethnl_rss_get_policy[] = { [ETHTOOL_A_RSS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_RSS_CONTEXT] = { .type = NLA_U32 }, [ETHTOOL_A_RSS_START_CONTEXT] = { .type = NLA_U32 }, }; static int rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb, struct netlink_ext_ack *extack) { struct rss_req_info *request = RSS_REQINFO(req_info); if (tb[ETHTOOL_A_RSS_CONTEXT]) request->rss_context = nla_get_u32(tb[ETHTOOL_A_RSS_CONTEXT]); if (tb[ETHTOOL_A_RSS_START_CONTEXT]) { NL_SET_BAD_ATTR(extack, tb[ETHTOOL_A_RSS_START_CONTEXT]); return -EINVAL; } return 0; } static int rss_prepare_get(const struct rss_req_info *request, struct net_device *dev, struct rss_reply_data *data, const struct genl_info *info) { struct ethtool_rxfh_param rxfh = {}; const struct ethtool_ops *ops; u32 total_size, indir_bytes; u8 *rss_config; int ret; ops = dev->ethtool_ops; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; data->indir_size = 0; data->hkey_size = 0; if (ops->get_rxfh_indir_size) data->indir_size = ops->get_rxfh_indir_size(dev); if (ops->get_rxfh_key_size) data->hkey_size = ops->get_rxfh_key_size(dev); indir_bytes = data->indir_size * sizeof(u32); total_size = indir_bytes + data->hkey_size; rss_config = kzalloc(total_size, GFP_KERNEL); if (!rss_config) { ret = -ENOMEM; goto out_ops; } if (data->indir_size) data->indir_table = (u32 *)rss_config; if (data->hkey_size) data->hkey = rss_config + indir_bytes; rxfh.indir_size = data->indir_size; rxfh.indir = data->indir_table; rxfh.key_size = data->hkey_size; rxfh.key = data->hkey; ret = ops->get_rxfh(dev, &rxfh); if (ret) goto out_ops; data->hfunc = rxfh.hfunc; data->input_xfrm = rxfh.input_xfrm; out_ops: ethnl_ops_complete(dev); return ret; } static int rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, struct rss_reply_data *data, const struct genl_info *info) { struct ethtool_rxfh_context *ctx; u32 total_size, indir_bytes; u8 *rss_config; ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context); if (!ctx) return -ENOENT; data->indir_size = ctx->indir_size; data->hkey_size = ctx->key_size; data->hfunc = ctx->hfunc; data->input_xfrm = ctx->input_xfrm; indir_bytes = data->indir_size * sizeof(u32); total_size = indir_bytes + data->hkey_size; rss_config = kzalloc(total_size, GFP_KERNEL); if (!rss_config) return -ENOMEM; data->indir_table = (u32 *)rss_config; memcpy(data->indir_table, ethtool_rxfh_context_indir(ctx), indir_bytes); if (data->hkey_size) { data->hkey = rss_config + indir_bytes; memcpy(data->hkey, ethtool_rxfh_context_key(ctx), data->hkey_size); } return 0; } static int rss_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct rss_reply_data *data = RSS_REPDATA(reply_base); struct rss_req_info *request = RSS_REQINFO(req_base); struct net_device *dev = reply_base->dev; const struct ethtool_ops *ops; ops = dev->ethtool_ops; if (!ops->get_rxfh) return -EOPNOTSUPP; /* Some drivers don't handle rss_context */ if (request->rss_context) { if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context) return -EOPNOTSUPP; data->no_key_fields = !ops->rxfh_per_ctx_key; return rss_prepare_ctx(request, dev, data, info); } return rss_prepare_get(request, dev, data, info); } static int rss_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct rss_reply_data *data = RSS_REPDATA(reply_base); int len; len = nla_total_size(sizeof(u32)) + /* _RSS_CONTEXT */ nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */ nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */ nla_total_size(data->hkey_size); /* _RSS_HKEY */ return len; } static int rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct rss_reply_data *data = RSS_REPDATA(reply_base); struct rss_req_info *request = RSS_REQINFO(req_base); if (request->rss_context && nla_put_u32(skb, ETHTOOL_A_RSS_CONTEXT, request->rss_context)) return -EMSGSIZE; if ((data->indir_size && nla_put(skb, ETHTOOL_A_RSS_INDIR, sizeof(u32) * data->indir_size, data->indir_table))) return -EMSGSIZE; if (data->no_key_fields) return 0; if ((data->hfunc && nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) || (data->input_xfrm && nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) || (data->hkey_size && nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey))) return -EMSGSIZE; return 0; } static void rss_cleanup_data(struct ethnl_reply_data *reply_base) { const struct rss_reply_data *data = RSS_REPDATA(reply_base); kfree(data->indir_table); } struct rss_nl_dump_ctx { unsigned long ifindex; unsigned long ctx_idx; /* User wants to only dump contexts from given ifindex */ unsigned int match_ifindex; unsigned int start_ctx; }; static struct rss_nl_dump_ctx *rss_dump_ctx(struct netlink_callback *cb) { NL_ASSERT_CTX_FITS(struct rss_nl_dump_ctx); return (struct rss_nl_dump_ctx *)cb->ctx; } int ethnl_rss_dump_start(struct netlink_callback *cb) { const struct genl_info *info = genl_info_dump(cb); struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb); struct ethnl_req_info req_info = {}; struct nlattr **tb = info->attrs; int ret; /* Filtering by context not supported */ if (tb[ETHTOOL_A_RSS_CONTEXT]) { NL_SET_BAD_ATTR(info->extack, tb[ETHTOOL_A_RSS_CONTEXT]); return -EINVAL; } if (tb[ETHTOOL_A_RSS_START_CONTEXT]) { ctx->start_ctx = nla_get_u32(tb[ETHTOOL_A_RSS_START_CONTEXT]); ctx->ctx_idx = ctx->start_ctx; } ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_RSS_HEADER], sock_net(cb->skb->sk), cb->extack, false); if (req_info.dev) { ctx->match_ifindex = req_info.dev->ifindex; ctx->ifindex = ctx->match_ifindex; ethnl_parse_header_dev_put(&req_info); req_info.dev = NULL; } return ret; } static int rss_dump_one_ctx(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, u32 rss_context) { const struct genl_info *info = genl_info_dump(cb); struct rss_reply_data data = {}; struct rss_req_info req = {}; void *ehdr; int ret; req.rss_context = rss_context; ehdr = ethnl_dump_put(skb, cb, ETHTOOL_MSG_RSS_GET_REPLY); if (!ehdr) return -EMSGSIZE; ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_RSS_HEADER); if (ret < 0) goto err_cancel; /* Context 0 is not currently storred or cached in the XArray */ if (!rss_context) ret = rss_prepare_get(&req, dev, &data, info); else ret = rss_prepare_ctx(&req, dev, &data, info); if (ret) goto err_cancel; ret = rss_fill_reply(skb, &req.base, &data.base); if (ret) goto err_cleanup; genlmsg_end(skb, ehdr); rss_cleanup_data(&data.base); return 0; err_cleanup: rss_cleanup_data(&data.base); err_cancel: genlmsg_cancel(skb, ehdr); return ret; } static int rss_dump_one_dev(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev) { struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb); int ret; if (!dev->ethtool_ops->get_rxfh) return 0; if (!ctx->ctx_idx) { ret = rss_dump_one_ctx(skb, cb, dev, 0); if (ret) return ret; ctx->ctx_idx++; } for (; xa_find(&dev->ethtool->rss_ctx, &ctx->ctx_idx, ULONG_MAX, XA_PRESENT); ctx->ctx_idx++) { ret = rss_dump_one_ctx(skb, cb, dev, ctx->ctx_idx); if (ret) return ret; } ctx->ctx_idx = ctx->start_ctx; return 0; } int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb); struct net *net = sock_net(skb->sk); struct net_device *dev; int ret = 0; rtnl_lock(); for_each_netdev_dump(net, dev, ctx->ifindex) { if (ctx->match_ifindex && ctx->match_ifindex != ctx->ifindex) break; ret = rss_dump_one_dev(skb, cb, dev); if (ret) break; } rtnl_unlock(); return ret; } const struct ethnl_request_ops ethnl_rss_request_ops = { .request_cmd = ETHTOOL_MSG_RSS_GET, .reply_cmd = ETHTOOL_MSG_RSS_GET_REPLY, .hdr_attr = ETHTOOL_A_RSS_HEADER, .req_info_size = sizeof(struct rss_req_info), .reply_data_size = sizeof(struct rss_reply_data), .parse_request = rss_parse_request, .prepare_data = rss_prepare_data, .reply_size = rss_reply_size, .fill_reply = rss_fill_reply, .cleanup_data = rss_cleanup_data, };
1 1 2 1 1 6 2 5 1 4 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2008-2009 Atheros Communications Inc. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/firmware.h> #include <linux/usb.h> #include <linux/unaligned.h> #include <net/bluetooth/bluetooth.h> #define VERSION "1.0" #define ATH3K_FIRMWARE "ath3k-1.fw" #define ATH3K_DNLOAD 0x01 #define ATH3K_GETSTATE 0x05 #define ATH3K_SET_NORMAL_MODE 0x07 #define ATH3K_GETVERSION 0x09 #define USB_REG_SWITCH_VID_PID 0x0a #define ATH3K_MODE_MASK 0x3F #define ATH3K_NORMAL_MODE 0x0E #define ATH3K_PATCH_UPDATE 0x80 #define ATH3K_SYSCFG_UPDATE 0x40 #define ATH3K_XTAL_FREQ_26M 0x00 #define ATH3K_XTAL_FREQ_40M 0x01 #define ATH3K_XTAL_FREQ_19P2 0x02 #define ATH3K_NAME_LEN 0xFF struct ath3k_version { __le32 rom_version; __le32 build_version; __le32 ram_version; __u8 ref_clock; __u8 reserved[7]; } __packed; static const struct usb_device_id ath3k_table[] = { /* Atheros AR3011 */ { USB_DEVICE(0x0CF3, 0x3000) }, /* Atheros AR3011 with sflash firmware*/ { USB_DEVICE(0x0489, 0xE027) }, { USB_DEVICE(0x0489, 0xE03D) }, { USB_DEVICE(0x04F2, 0xAFF1) }, { USB_DEVICE(0x0930, 0x0215) }, { USB_DEVICE(0x0CF3, 0x3002) }, { USB_DEVICE(0x0CF3, 0xE019) }, { USB_DEVICE(0x13d3, 0x3304) }, /* Atheros AR9285 Malbec with sflash firmware */ { USB_DEVICE(0x03F0, 0x311D) }, /* Atheros AR3012 with sflash firmware*/ { USB_DEVICE(0x0489, 0xe04d) }, { USB_DEVICE(0x0489, 0xe04e) }, { USB_DEVICE(0x0489, 0xe057) }, { USB_DEVICE(0x0489, 0xe056) }, { USB_DEVICE(0x0489, 0xe05f) }, { USB_DEVICE(0x0489, 0xe076) }, { USB_DEVICE(0x0489, 0xe078) }, { USB_DEVICE(0x0489, 0xe095) }, { USB_DEVICE(0x04c5, 0x1330) }, { USB_DEVICE(0x04CA, 0x3004) }, { USB_DEVICE(0x04CA, 0x3005) }, { USB_DEVICE(0x04CA, 0x3006) }, { USB_DEVICE(0x04CA, 0x3007) }, { USB_DEVICE(0x04CA, 0x3008) }, { USB_DEVICE(0x04CA, 0x300b) }, { USB_DEVICE(0x04CA, 0x300d) }, { USB_DEVICE(0x04CA, 0x300f) }, { USB_DEVICE(0x04CA, 0x3010) }, { USB_DEVICE(0x04CA, 0x3014) }, { USB_DEVICE(0x04CA, 0x3018) }, { USB_DEVICE(0x0930, 0x0219) }, { USB_DEVICE(0x0930, 0x021c) }, { USB_DEVICE(0x0930, 0x0220) }, { USB_DEVICE(0x0930, 0x0227) }, { USB_DEVICE(0x0b05, 0x17d0) }, { USB_DEVICE(0x0CF3, 0x0036) }, { USB_DEVICE(0x0CF3, 0x3004) }, { USB_DEVICE(0x0CF3, 0x3008) }, { USB_DEVICE(0x0CF3, 0x311D) }, { USB_DEVICE(0x0CF3, 0x311E) }, { USB_DEVICE(0x0CF3, 0x311F) }, { USB_DEVICE(0x0cf3, 0x3121) }, { USB_DEVICE(0x0CF3, 0x817a) }, { USB_DEVICE(0x0CF3, 0x817b) }, { USB_DEVICE(0x0cf3, 0xe003) }, { USB_DEVICE(0x0CF3, 0xE004) }, { USB_DEVICE(0x0CF3, 0xE005) }, { USB_DEVICE(0x0CF3, 0xE006) }, { USB_DEVICE(0x13d3, 0x3362) }, { USB_DEVICE(0x13d3, 0x3375) }, { USB_DEVICE(0x13d3, 0x3393) }, { USB_DEVICE(0x13d3, 0x3395) }, { USB_DEVICE(0x13d3, 0x3402) }, { USB_DEVICE(0x13d3, 0x3408) }, { USB_DEVICE(0x13d3, 0x3423) }, { USB_DEVICE(0x13d3, 0x3432) }, { USB_DEVICE(0x13d3, 0x3472) }, { USB_DEVICE(0x13d3, 0x3474) }, { USB_DEVICE(0x13d3, 0x3487) }, { USB_DEVICE(0x13d3, 0x3490) }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xE02C) }, /* Atheros AR5BBU22 with sflash firmware */ { USB_DEVICE(0x0489, 0xE036) }, { USB_DEVICE(0x0489, 0xE03C) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, ath3k_table); #define BTUSB_ATH3012 0x80 /* This table is to load patch and sysconfig files * for AR3012 */ static const struct usb_device_id ath3k_blist_tbl[] = { /* Atheros AR3012 with sflash firmware*/ { USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe05f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe076), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe078), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe095), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3006), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3018), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0CF3, 0x0036), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311D), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311E), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311F), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0CF3, 0x817a), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0CF3, 0x817b), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe006), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3395), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3432), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3472), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3487), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3490), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU22 with sflash firmware */ { USB_DEVICE(0x0489, 0xE036), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xE03C), .driver_info = BTUSB_ATH3012 }, { } /* Terminating entry */ }; static inline void ath3k_log_failed_loading(int err, int len, int size, int count) { BT_ERR("Firmware loading err = %d, len = %d, size = %d, count = %d", err, len, size, count); } #define USB_REQ_DFU_DNLOAD 1 #define BULK_SIZE 4096 #define FW_HDR_SIZE 20 #define TIMEGAP_USEC_MIN 50 #define TIMEGAP_USEC_MAX 100 static int ath3k_load_firmware(struct usb_device *udev, const struct firmware *firmware) { u8 *send_buf; int len = 0; int err, pipe, size, sent = 0; int count = firmware->size; BT_DBG("udev %p", udev); send_buf = kmalloc(BULK_SIZE, GFP_KERNEL); if (!send_buf) { BT_ERR("Can't allocate memory chunk for firmware"); return -ENOMEM; } err = usb_control_msg_send(udev, 0, USB_REQ_DFU_DNLOAD, USB_TYPE_VENDOR, 0, 0, firmware->data, FW_HDR_SIZE, USB_CTRL_SET_TIMEOUT, GFP_KERNEL); if (err) { BT_ERR("Can't change to loading configuration err"); goto error; } sent += FW_HDR_SIZE; count -= FW_HDR_SIZE; pipe = usb_sndbulkpipe(udev, 0x02); while (count) { /* workaround the compatibility issue with xHCI controller*/ usleep_range(TIMEGAP_USEC_MIN, TIMEGAP_USEC_MAX); size = min_t(uint, count, BULK_SIZE); memcpy(send_buf, firmware->data + sent, size); err = usb_bulk_msg(udev, pipe, send_buf, size, &len, 3000); if (err || len != size) { ath3k_log_failed_loading(err, len, size, count); goto error; } sent += size; count -= size; } error: kfree(send_buf); return err; } static int ath3k_get_state(struct usb_device *udev, unsigned char *state) { return usb_control_msg_recv(udev, 0, ATH3K_GETSTATE, USB_TYPE_VENDOR | USB_DIR_IN, 0, 0, state, 1, USB_CTRL_SET_TIMEOUT, GFP_KERNEL); } static int ath3k_get_version(struct usb_device *udev, struct ath3k_version *version) { return usb_control_msg_recv(udev, 0, ATH3K_GETVERSION, USB_TYPE_VENDOR | USB_DIR_IN, 0, 0, version, sizeof(*version), USB_CTRL_SET_TIMEOUT, GFP_KERNEL); } static int ath3k_load_fwfile(struct usb_device *udev, const struct firmware *firmware) { u8 *send_buf; int len = 0; int err, pipe, size, count, sent = 0; int ret; count = firmware->size; send_buf = kmalloc(BULK_SIZE, GFP_KERNEL); if (!send_buf) { BT_ERR("Can't allocate memory chunk for firmware"); return -ENOMEM; } size = min_t(uint, count, FW_HDR_SIZE); ret = usb_control_msg_send(udev, 0, ATH3K_DNLOAD, USB_TYPE_VENDOR, 0, 0, firmware->data, size, USB_CTRL_SET_TIMEOUT, GFP_KERNEL); if (ret) { BT_ERR("Can't change to loading configuration err"); kfree(send_buf); return ret; } sent += size; count -= size; pipe = usb_sndbulkpipe(udev, 0x02); while (count) { /* workaround the compatibility issue with xHCI controller*/ usleep_range(TIMEGAP_USEC_MIN, TIMEGAP_USEC_MAX); size = min_t(uint, count, BULK_SIZE); memcpy(send_buf, firmware->data + sent, size); err = usb_bulk_msg(udev, pipe, send_buf, size, &len, 3000); if (err || len != size) { ath3k_log_failed_loading(err, len, size, count); kfree(send_buf); return err; } sent += size; count -= size; } kfree(send_buf); return 0; } static void ath3k_switch_pid(struct usb_device *udev) { usb_control_msg_send(udev, 0, USB_REG_SWITCH_VID_PID, USB_TYPE_VENDOR, 0, 0, NULL, 0, USB_CTRL_SET_TIMEOUT, GFP_KERNEL); } static int ath3k_set_normal_mode(struct usb_device *udev) { unsigned char fw_state; int ret; ret = ath3k_get_state(udev, &fw_state); if (ret) { BT_ERR("Can't get state to change to normal mode err"); return ret; } if ((fw_state & ATH3K_MODE_MASK) == ATH3K_NORMAL_MODE) { BT_DBG("firmware was already in normal mode"); return 0; } return usb_control_msg_send(udev, 0, ATH3K_SET_NORMAL_MODE, USB_TYPE_VENDOR, 0, 0, NULL, 0, USB_CTRL_SET_TIMEOUT, GFP_KERNEL); } static int ath3k_load_patch(struct usb_device *udev) { unsigned char fw_state; char filename[ATH3K_NAME_LEN]; const struct firmware *firmware; struct ath3k_version fw_version; __u32 pt_rom_version, pt_build_version; int ret; ret = ath3k_get_state(udev, &fw_state); if (ret) { BT_ERR("Can't get state to change to load ram patch err"); return ret; } if (fw_state & ATH3K_PATCH_UPDATE) { BT_DBG("Patch was already downloaded"); return 0; } ret = ath3k_get_version(udev, &fw_version); if (ret) { BT_ERR("Can't get version to change to load ram patch err"); return ret; } snprintf(filename, ATH3K_NAME_LEN, "ar3k/AthrBT_0x%08x.dfu", le32_to_cpu(fw_version.rom_version)); ret = request_firmware(&firmware, filename, &udev->dev); if (ret < 0) { BT_ERR("Patch file not found %s", filename); return ret; } pt_rom_version = get_unaligned_le32(firmware->data + firmware->size - 8); pt_build_version = get_unaligned_le32(firmware->data + firmware->size - 4); if (pt_rom_version != le32_to_cpu(fw_version.rom_version) || pt_build_version <= le32_to_cpu(fw_version.build_version)) { BT_ERR("Patch file version did not match with firmware"); release_firmware(firmware); return -EINVAL; } ret = ath3k_load_fwfile(udev, firmware); release_firmware(firmware); return ret; } static int ath3k_load_syscfg(struct usb_device *udev) { unsigned char fw_state; char filename[ATH3K_NAME_LEN]; const struct firmware *firmware; struct ath3k_version fw_version; int clk_value, ret; ret = ath3k_get_state(udev, &fw_state); if (ret) { BT_ERR("Can't get state to change to load configuration err"); return -EBUSY; } ret = ath3k_get_version(udev, &fw_version); if (ret) { BT_ERR("Can't get version to change to load ram patch err"); return ret; } switch (fw_version.ref_clock) { case ATH3K_XTAL_FREQ_26M: clk_value = 26; break; case ATH3K_XTAL_FREQ_40M: clk_value = 40; break; case ATH3K_XTAL_FREQ_19P2: clk_value = 19; break; default: clk_value = 0; break; } snprintf(filename, ATH3K_NAME_LEN, "ar3k/ramps_0x%08x_%d%s", le32_to_cpu(fw_version.rom_version), clk_value, ".dfu"); ret = request_firmware(&firmware, filename, &udev->dev); if (ret < 0) { BT_ERR("Configuration file not found %s", filename); return ret; } ret = ath3k_load_fwfile(udev, firmware); release_firmware(firmware); return ret; } static int ath3k_probe(struct usb_interface *intf, const struct usb_device_id *id) { const struct firmware *firmware; struct usb_device *udev = interface_to_usbdev(intf); int ret; BT_DBG("intf %p id %p", intf, id); if (intf->cur_altsetting->desc.bInterfaceNumber != 0) return -ENODEV; /* match device ID in ath3k blacklist table */ if (!id->driver_info) { const struct usb_device_id *match; match = usb_match_id(intf, ath3k_blist_tbl); if (match) id = match; } /* load patch and sysconfig files for AR3012 */ if (id->driver_info & BTUSB_ATH3012) { /* New firmware with patch and sysconfig files already loaded */ if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x0001) return -ENODEV; ret = ath3k_load_patch(udev); if (ret < 0) { BT_ERR("Loading patch file failed"); return ret; } ret = ath3k_load_syscfg(udev); if (ret < 0) { BT_ERR("Loading sysconfig file failed"); return ret; } ret = ath3k_set_normal_mode(udev); if (ret) { BT_ERR("Set normal mode failed"); return ret; } ath3k_switch_pid(udev); return 0; } ret = request_firmware(&firmware, ATH3K_FIRMWARE, &udev->dev); if (ret < 0) { if (ret == -ENOENT) BT_ERR("Firmware file \"%s\" not found", ATH3K_FIRMWARE); else BT_ERR("Firmware file \"%s\" request failed (err=%d)", ATH3K_FIRMWARE, ret); return ret; } ret = ath3k_load_firmware(udev, firmware); release_firmware(firmware); return ret; } static void ath3k_disconnect(struct usb_interface *intf) { BT_DBG("%s intf %p", __func__, intf); } static struct usb_driver ath3k_driver = { .name = "ath3k", .probe = ath3k_probe, .disconnect = ath3k_disconnect, .id_table = ath3k_table, .disable_hub_initiated_lpm = 1, }; module_usb_driver(ath3k_driver); MODULE_AUTHOR("Atheros Communications"); MODULE_DESCRIPTION("Atheros AR30xx firmware driver"); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(ATH3K_FIRMWARE);
7 7 7 2 3 2 2 3 10 3 3 2 2 9 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 // SPDX-License-Identifier: GPL-2.0 /* * xfrm6_input.c: based on net/ipv4/xfrm4_input.c * * Authors: * Mitsuru KANDA @USAGI * Kazunori MIYAZAWA @USAGI * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * YOSHIFUJI Hideaki @USAGI * IPv6 support */ #include <linux/module.h> #include <linux/string.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <net/ipv6.h> #include <net/xfrm.h> #include <net/protocol.h> #include <net/gro.h> int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t) { XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); return xfrm_input(skb, nexthdr, spi, 0); } EXPORT_SYMBOL(xfrm6_rcv_spi); static int xfrm6_transport_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) { if (xfrm_trans_queue(skb, ip6_rcv_finish)) { kfree_skb(skb); return NET_RX_DROP; } return 0; } int xfrm6_transport_finish(struct sk_buff *skb, int async) { struct xfrm_offload *xo = xfrm_offload(skb); int nhlen = -skb_network_offset(skb); skb_network_header(skb)[IP6CB(skb)->nhoff] = XFRM_MODE_SKB_CB(skb)->protocol; #ifndef CONFIG_NETFILTER if (!async) return 1; #endif __skb_push(skb, nhlen); ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_postpush_rcsum(skb, skb_network_header(skb), nhlen); if (xo && (xo->flags & XFRM_GRO)) { /* The full l2 header needs to be preserved so that re-injecting the packet at l2 * works correctly in the presence of vlan tags. */ skb_mac_header_rebuild_full(skb, xo->orig_mac_len); skb_reset_network_header(skb); skb_reset_transport_header(skb); return 0; } NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, xfrm6_transport_finish2); return 0; } static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull) { struct udp_sock *up = udp_sk(sk); struct udphdr *uh; struct ipv6hdr *ip6h; int len; int ip6hlen = sizeof(struct ipv6hdr); __u8 *udpdata; __be32 *udpdata32; u16 encap_type; encap_type = READ_ONCE(up->encap_type); /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; /* If this is a paged skb, make sure we pull up * whatever data we need to look at. */ len = skb->len - sizeof(struct udphdr); if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) return 1; /* Now we can get the pointers */ uh = udp_hdr(skb); udpdata = (__u8 *)uh + sizeof(struct udphdr); udpdata32 = (__be32 *)udpdata; switch (encap_type) { default: case UDP_ENCAP_ESPINUDP: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { return -EINVAL; } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { /* ESP Packet without Non-ESP header */ len = sizeof(struct udphdr); } else /* Must be an IKE packet.. pass it through */ return 1; break; } /* At this point we are sure that this is an ESPinUDP packet, * so we need to remove 'len' bytes from the packet (the UDP * header and optional ESP marker bytes) and then modify the * protocol to ESP, and then call into the transform receiver. */ if (skb_unclone(skb, GFP_ATOMIC)) return -EINVAL; /* Now we can update and verify the packet length... */ ip6h = ipv6_hdr(skb); ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len); if (skb->len < ip6hlen + len) { /* packet is too small!?! */ return -EINVAL; } /* pull the data buffer up to the ESP header and set the * transport header to point to ESP. Keep UDP on the stack * for later. */ if (pull) { __skb_pull(skb, len); skb_reset_transport_header(skb); } else { skb_set_transport_header(skb, len); } /* process ESP */ return 0; } /* If it's a keepalive packet, then just eat it. * If it's an encapsulated packet, then pass it to the * IPsec xfrm input. * Returns 0 if skb passed to xfrm or was dropped. * Returns >0 if skb should be passed to UDP. * Returns <0 if skb should be resubmitted (-ret is protocol) */ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) { int ret; if (skb->protocol == htons(ETH_P_IP)) return xfrm4_udp_encap_rcv(sk, skb); ret = __xfrm6_udp_encap_rcv(sk, skb, true); if (!ret) return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, udp_sk(sk)->encap_type); if (ret < 0) { kfree_skb(skb); return 0; } return ret; } struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, struct sk_buff *skb) { int offset = skb_gro_offset(skb); const struct net_offload *ops; struct sk_buff *pp = NULL; int ret; if (skb->protocol == htons(ETH_P_IP)) return xfrm4_gro_udp_encap_rcv(sk, head, skb); offset = offset - sizeof(struct udphdr); if (!pskb_pull(skb, offset)) return NULL; rcu_read_lock(); ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]); if (!ops || !ops->callbacks.gro_receive) goto out; ret = __xfrm6_udp_encap_rcv(sk, skb, false); if (ret) goto out; skb_push(skb, offset); NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); rcu_read_unlock(); return pp; out: rcu_read_unlock(); skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 1; return NULL; } int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t) { return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], 0, t); } EXPORT_SYMBOL(xfrm6_rcv_tnl); int xfrm6_rcv(struct sk_buff *skb) { return xfrm6_rcv_tnl(skb, NULL); } EXPORT_SYMBOL(xfrm6_rcv); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto) { struct net *net = dev_net(skb->dev); struct xfrm_state *x = NULL; struct sec_path *sp; int i = 0; sp = secpath_set(skb); if (!sp) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); goto drop; } if (1 + sp->len == XFRM_MAX_DEPTH) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } for (i = 0; i < 3; i++) { xfrm_address_t *dst, *src; switch (i) { case 0: dst = daddr; src = saddr; break; case 1: /* lookup state with wild-card source address */ dst = daddr; src = (xfrm_address_t *)&in6addr_any; break; default: /* lookup state with wild-card addresses */ dst = (xfrm_address_t *)&in6addr_any; src = (xfrm_address_t *)&in6addr_any; break; } x = xfrm_state_lookup_byaddr(net, skb->mark, dst, src, proto, AF_INET6); if (!x) continue; if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR); xfrm_state_put(x); x = NULL; continue; } spin_lock(&x->lock); if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) && likely(x->km.state == XFRM_STATE_VALID) && !xfrm_state_check_expire(x)) { spin_unlock(&x->lock); if (x->type->input(x, skb) > 0) { /* found a valid state */ break; } } else spin_unlock(&x->lock); xfrm_state_put(x); x = NULL; } if (!x) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound_simple(skb, AF_INET6); goto drop; } sp->xvec[sp->len++] = x; spin_lock(&x->lock); x->curlft.bytes += skb->len; x->curlft.packets++; spin_unlock(&x->lock); return 1; drop: return -1; } EXPORT_SYMBOL(xfrm6_input_addr);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Packet RX/TX history data structures and routines for TFRC-based protocols. * * Copyright (c) 2007 The University of Aberdeen, Scotland, UK * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * This code has been developed by the University of Waikato WAND * research group. For further information please see https://www.wand.net.nz/ * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon * * Changes to meet Linux coding standards, to make it meet latest ccid3 draft * and to make it work as a loadable module in the DCCP stack written by * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. * * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #ifndef _DCCP_PKT_HIST_ #define _DCCP_PKT_HIST_ #include <linux/list.h> #include <linux/slab.h> #include "tfrc.h" /** * tfrc_tx_hist_entry - Simple singly-linked TX history list * @next: next oldest entry (LIFO order) * @seqno: sequence number of this entry * @stamp: send time of packet with sequence number @seqno */ struct tfrc_tx_hist_entry { struct tfrc_tx_hist_entry *next; u64 seqno; ktime_t stamp; }; static inline struct tfrc_tx_hist_entry * tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) { while (head != NULL && head->seqno != seqno) head = head->next; return head; } int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); /* Subtraction a-b modulo-16, respects circular wrap-around */ #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) /* Number of packets to wait after a missing packet (RFC 4342, 6.1) */ #define TFRC_NDUPACK 3 /** * tfrc_rx_hist_entry - Store information about a single received packet * @tfrchrx_seqno: DCCP packet sequence number * @tfrchrx_ccval: window counter value of packet (RFC 4342, 8.1) * @tfrchrx_ndp: the NDP count (if any) of the packet * @tfrchrx_tstamp: actual receive time of packet */ struct tfrc_rx_hist_entry { u64 tfrchrx_seqno:48, tfrchrx_ccval:4, tfrchrx_type:4; u64 tfrchrx_ndp:48; ktime_t tfrchrx_tstamp; }; /** * tfrc_rx_hist - RX history structure for TFRC-based protocols * @ring: Packet history for RTT sampling and loss detection * @loss_count: Number of entries in circular history * @loss_start: Movable index (for loss detection) * @rtt_sample_prev: Used during RTT sampling, points to candidate entry */ struct tfrc_rx_hist { struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1]; u8 loss_count:2, loss_start:2; #define rtt_sample_prev loss_start }; /** * tfrc_rx_hist_index - index to reach n-th entry after loss_start */ static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n) { return (h->loss_start + n) & TFRC_NDUPACK; } /** * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h) { return h->ring[tfrc_rx_hist_index(h, h->loss_count)]; } /** * tfrc_rx_hist_entry - return the n-th history entry after loss_start */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n) { return h->ring[tfrc_rx_hist_index(h, n)]; } /** * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h) { return h->ring[h->loss_start]; } /* indicate whether previously a packet was detected missing */ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) { return h->loss_count > 0; } void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, const u64 ndp); int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); struct tfrc_loss_hist; int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh, struct sk_buff *skb, const u64 ndp, u32 (*first_li)(struct sock *sk), struct sock *sk); u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb); int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); #endif /* _DCCP_PKT_HIST_ */
11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_CPUTIME_H #define _LINUX_SCHED_CPUTIME_H #include <linux/sched/signal.h> /* * cputime accounting APIs: */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime); extern u64 task_gtime(struct task_struct *t); #else static inline bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { *utime = t->utime; *stime = t->stime; return false; } static inline u64 task_gtime(struct task_struct *t) { return t->gtime; } #endif #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME static inline void task_cputime_scaled(struct task_struct *t, u64 *utimescaled, u64 *stimescaled) { *utimescaled = t->utimescaled; *stimescaled = t->stimescaled; } #else static inline void task_cputime_scaled(struct task_struct *t, u64 *utimescaled, u64 *stimescaled) { task_cputime(t, utimescaled, stimescaled); } #endif extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); extern void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, u64 *ut, u64 *st); /* * Thread group CPU time accounting. */ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples); /* * The following are functions that support scheduler-internal time accounting. * These functions are generally called at the timer tick. None of this depends * on CONFIG_SCHEDSTATS. */ /** * get_running_cputimer - return &tsk->signal->cputimer if cputimers are active * * @tsk: Pointer to target task. */ #ifdef CONFIG_POSIX_TIMERS static inline struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) { struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; /* * Check whether posix CPU timers are active. If not the thread * group accounting is not active either. Lockless check. */ if (!READ_ONCE(tsk->signal->posix_cputimers.timers_active)) return NULL; /* * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime * in __exit_signal(), we won't account to the signal struct further * cputime consumed by that task, even though the task can still be * ticking after __exit_signal(). * * In order to keep a consistent behaviour between thread group cputime * and thread group cputimer accounting, lets also ignore the cputime * elapsing after __exit_signal() in any thread group timer running. * * This makes sure that POSIX CPU clocks and timers are synchronized, so * that a POSIX CPU timer won't expire while the corresponding POSIX CPU * clock delta is behind the expiring timer value. */ if (unlikely(!tsk->sighand)) return NULL; return cputimer; } #else static inline struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) { return NULL; } #endif /** * account_group_user_time - Maintain utime for a thread group. * * @tsk: Pointer to task structure. * @cputime: Time value by which to increment the utime field of the * thread_group_cputime structure. * * If thread group time is being maintained, get the structure for the * running CPU and update the utime field there. */ static inline void account_group_user_time(struct task_struct *tsk, u64 cputime) { struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); if (!cputimer) return; atomic64_add(cputime, &cputimer->cputime_atomic.utime); } /** * account_group_system_time - Maintain stime for a thread group. * * @tsk: Pointer to task structure. * @cputime: Time value by which to increment the stime field of the * thread_group_cputime structure. * * If thread group time is being maintained, get the structure for the * running CPU and update the stime field there. */ static inline void account_group_system_time(struct task_struct *tsk, u64 cputime) { struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); if (!cputimer) return; atomic64_add(cputime, &cputimer->cputime_atomic.stime); } /** * account_group_exec_runtime - Maintain exec runtime for a thread group. * * @tsk: Pointer to task structure. * @ns: Time value by which to increment the sum_exec_runtime field * of the thread_group_cputime structure. * * If thread group time is being maintained, get the structure for the * running CPU and update the sum_exec_runtime field there. */ static inline void account_group_exec_runtime(struct task_struct *tsk, unsigned long long ns) { struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); if (!cputimer) return; atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime); } static inline void prev_cputime_init(struct prev_cputime *prev) { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE prev->utime = prev->stime = 0; raw_spin_lock_init(&prev->lock); #endif } extern unsigned long long task_sched_runtime(struct task_struct *task); #endif /* _LINUX_SCHED_CPUTIME_H */
42 10554 10567 26 54 109 108 26 1 48 50 50 25 36 4 4 4 4 4 4 4 4 4 4 4 4 4 6 6 6 5 5 4 1 5 5 5 5 26 20 26 26 29 29 26 26 26 16 10 10 10 17 18 57 57 56 57 57 56 18 18 18 18 18 18 70 55 70 10 10 10 10 10 7 10536 10537 10495 10536 10499 25 26 25 26 25 26 26 26 25 25 26 26 26 26 25 26 26 26 26 26 25 25 26 26 1 26 26 26 26 25 26 26 36 36 36 36 36 36 36 36 36 36 36 35 36 36 41 41 41 41 42 42 41 40 42 36 10 41 27 24 33 12 7 33 10 42 40 42 41 42 41 42 42 41 7 36 42 41 16 8 16 16 16 16 16 50 50 50 50 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 10513 10472 10513 10476 10512 10478 10469 10478 10482 10513 10475 10472 10473 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 // SPDX-License-Identifier: GPL-2.0+ /* * Base port operations for 8250/16550-type serial ports * * Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o. * Split from 8250_core.c, Copyright (C) 2001 Russell King. * * A note about mapbase / membase * * mapbase is the physical address of the IO port. * membase is an 'ioremapped' cookie. */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/ioport.h> #include <linux/init.h> #include <linux/irq.h> #include <linux/console.h> #include <linux/gpio/consumer.h> #include <linux/sysrq.h> #include <linux/delay.h> #include <linux/platform_device.h> #include <linux/tty.h> #include <linux/ratelimit.h> #include <linux/tty_flip.h> #include <linux/serial.h> #include <linux/serial_8250.h> #include <linux/nmi.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/pm_runtime.h> #include <linux/ktime.h> #include <asm/io.h> #include <asm/irq.h> #include "8250.h" /* * Debugging. */ #if 0 #define DEBUG_AUTOCONF(fmt...) printk(fmt) #else #define DEBUG_AUTOCONF(fmt...) do { } while (0) #endif /* * Here we define the default xmit fifo size used for each type of UART. */ static const struct serial8250_config uart_config[] = { [PORT_UNKNOWN] = { .name = "unknown", .fifo_size = 1, .tx_loadsz = 1, }, [PORT_8250] = { .name = "8250", .fifo_size = 1, .tx_loadsz = 1, }, [PORT_16450] = { .name = "16450", .fifo_size = 1, .tx_loadsz = 1, }, [PORT_16550] = { .name = "16550", .fifo_size = 1, .tx_loadsz = 1, }, [PORT_16550A] = { .name = "16550A", .fifo_size = 16, .tx_loadsz = 16, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .rxtrig_bytes = {1, 4, 8, 14}, .flags = UART_CAP_FIFO, }, [PORT_CIRRUS] = { .name = "Cirrus", .fifo_size = 1, .tx_loadsz = 1, }, [PORT_16650] = { .name = "ST16650", .fifo_size = 1, .tx_loadsz = 1, .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, }, [PORT_16650V2] = { .name = "ST16650V2", .fifo_size = 32, .tx_loadsz = 16, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | UART_FCR_T_TRIG_00, .rxtrig_bytes = {8, 16, 24, 28}, .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, }, [PORT_16750] = { .name = "TI16750", .fifo_size = 64, .tx_loadsz = 64, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 | UART_FCR7_64BYTE, .rxtrig_bytes = {1, 16, 32, 56}, .flags = UART_CAP_FIFO | UART_CAP_SLEEP | UART_CAP_AFE, }, [PORT_STARTECH] = { .name = "Startech", .fifo_size = 1, .tx_loadsz = 1, }, [PORT_16C950] = { .name = "16C950/954", .fifo_size = 128, .tx_loadsz = 128, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01, .rxtrig_bytes = {16, 32, 112, 120}, /* UART_CAP_EFR breaks billionon CF bluetooth card. */ .flags = UART_CAP_FIFO | UART_CAP_SLEEP, }, [PORT_16654] = { .name = "ST16654", .fifo_size = 64, .tx_loadsz = 32, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | UART_FCR_T_TRIG_10, .rxtrig_bytes = {8, 16, 56, 60}, .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, }, [PORT_16850] = { .name = "XR16850", .fifo_size = 128, .tx_loadsz = 128, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, }, [PORT_RSA] = { .name = "RSA", .fifo_size = 2048, .tx_loadsz = 2048, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11, .flags = UART_CAP_FIFO, }, [PORT_NS16550A] = { .name = "NS16550A", .fifo_size = 16, .tx_loadsz = 16, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO | UART_NATSEMI, }, [PORT_XSCALE] = { .name = "XScale", .fifo_size = 32, .tx_loadsz = 32, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO | UART_CAP_UUE | UART_CAP_RTOIE, }, [PORT_OCTEON] = { .name = "OCTEON", .fifo_size = 64, .tx_loadsz = 64, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO, }, [PORT_U6_16550A] = { .name = "U6_16550A", .fifo_size = 64, .tx_loadsz = 64, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO | UART_CAP_AFE, }, [PORT_TEGRA] = { .name = "Tegra", .fifo_size = 32, .tx_loadsz = 8, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | UART_FCR_T_TRIG_01, .rxtrig_bytes = {1, 4, 8, 14}, .flags = UART_CAP_FIFO | UART_CAP_RTOIE, }, [PORT_XR17D15X] = { .name = "XR17D15X", .fifo_size = 64, .tx_loadsz = 64, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR | UART_CAP_SLEEP, }, [PORT_XR17V35X] = { .name = "XR17V35X", .fifo_size = 256, .tx_loadsz = 256, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11 | UART_FCR_T_TRIG_11, .flags = UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR | UART_CAP_SLEEP, }, [PORT_LPC3220] = { .name = "LPC3220", .fifo_size = 64, .tx_loadsz = 32, .fcr = UART_FCR_DMA_SELECT | UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00 | UART_FCR_T_TRIG_00, .flags = UART_CAP_FIFO, }, [PORT_BRCM_TRUMANAGE] = { .name = "TruManage", .fifo_size = 1, .tx_loadsz = 1024, .flags = UART_CAP_HFIFO, }, [PORT_8250_CIR] = { .name = "CIR port" }, [PORT_ALTR_16550_F32] = { .name = "Altera 16550 FIFO32", .fifo_size = 32, .tx_loadsz = 32, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .rxtrig_bytes = {1, 8, 16, 30}, .flags = UART_CAP_FIFO | UART_CAP_AFE, }, [PORT_ALTR_16550_F64] = { .name = "Altera 16550 FIFO64", .fifo_size = 64, .tx_loadsz = 64, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .rxtrig_bytes = {1, 16, 32, 62}, .flags = UART_CAP_FIFO | UART_CAP_AFE, }, [PORT_ALTR_16550_F128] = { .name = "Altera 16550 FIFO128", .fifo_size = 128, .tx_loadsz = 128, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .rxtrig_bytes = {1, 32, 64, 126}, .flags = UART_CAP_FIFO | UART_CAP_AFE, }, /* * tx_loadsz is set to 63-bytes instead of 64-bytes to implement * workaround of errata A-008006 which states that tx_loadsz should * be configured less than Maximum supported fifo bytes. */ [PORT_16550A_FSL64] = { .name = "16550A_FSL64", .fifo_size = 64, .tx_loadsz = 63, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 | UART_FCR7_64BYTE, .flags = UART_CAP_FIFO | UART_CAP_NOTEMT, }, [PORT_RT2880] = { .name = "Palmchip BK-3103", .fifo_size = 16, .tx_loadsz = 16, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .rxtrig_bytes = {1, 4, 8, 14}, .flags = UART_CAP_FIFO, }, [PORT_DA830] = { .name = "TI DA8xx/66AK2x", .fifo_size = 16, .tx_loadsz = 16, .fcr = UART_FCR_DMA_SELECT | UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .rxtrig_bytes = {1, 4, 8, 14}, .flags = UART_CAP_FIFO | UART_CAP_AFE, }, [PORT_MTK_BTIF] = { .name = "MediaTek BTIF", .fifo_size = 16, .tx_loadsz = 16, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT, .flags = UART_CAP_FIFO, }, [PORT_NPCM] = { .name = "Nuvoton 16550", .fifo_size = 16, .tx_loadsz = 16, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT, .rxtrig_bytes = {1, 4, 8, 14}, .flags = UART_CAP_FIFO, }, [PORT_SUNIX] = { .name = "Sunix", .fifo_size = 128, .tx_loadsz = 128, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .rxtrig_bytes = {1, 32, 64, 112}, .flags = UART_CAP_FIFO | UART_CAP_SLEEP, }, [PORT_ASPEED_VUART] = { .name = "ASPEED VUART", .fifo_size = 16, .tx_loadsz = 16, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00, .rxtrig_bytes = {1, 4, 8, 14}, .flags = UART_CAP_FIFO, }, [PORT_MCHP16550A] = { .name = "MCHP16550A", .fifo_size = 256, .tx_loadsz = 256, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01, .rxtrig_bytes = {2, 66, 130, 194}, .flags = UART_CAP_FIFO, }, [PORT_BCM7271] = { .name = "Broadcom BCM7271 UART", .fifo_size = 32, .tx_loadsz = 32, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01, .rxtrig_bytes = {1, 8, 16, 30}, .flags = UART_CAP_FIFO | UART_CAP_AFE, }, }; /* Uart divisor latch read */ static u32 default_serial_dl_read(struct uart_8250_port *up) { /* Assign these in pieces to truncate any bits above 7. */ unsigned char dll = serial_in(up, UART_DLL); unsigned char dlm = serial_in(up, UART_DLM); return dll | dlm << 8; } /* Uart divisor latch write */ static void default_serial_dl_write(struct uart_8250_port *up, u32 value) { serial_out(up, UART_DLL, value & 0xff); serial_out(up, UART_DLM, value >> 8 & 0xff); } #ifdef CONFIG_HAS_IOPORT static unsigned int hub6_serial_in(struct uart_port *p, int offset) { offset = offset << p->regshift; outb(p->hub6 - 1 + offset, p->iobase); return inb(p->iobase + 1); } static void hub6_serial_out(struct uart_port *p, int offset, int value) { offset = offset << p->regshift; outb(p->hub6 - 1 + offset, p->iobase); outb(value, p->iobase + 1); } #endif /* CONFIG_HAS_IOPORT */ static unsigned int mem_serial_in(struct uart_port *p, int offset) { offset = offset << p->regshift; return readb(p->membase + offset); } static void mem_serial_out(struct uart_port *p, int offset, int value) { offset = offset << p->regshift; writeb(value, p->membase + offset); } static void mem16_serial_out(struct uart_port *p, int offset, int value) { offset = offset << p->regshift; writew(value, p->membase + offset); } static unsigned int mem16_serial_in(struct uart_port *p, int offset) { offset = offset << p->regshift; return readw(p->membase + offset); } static void mem32_serial_out(struct uart_port *p, int offset, int value) { offset = offset << p->regshift; writel(value, p->membase + offset); } static unsigned int mem32_serial_in(struct uart_port *p, int offset) { offset = offset << p->regshift; return readl(p->membase + offset); } static void mem32be_serial_out(struct uart_port *p, int offset, int value) { offset = offset << p->regshift; iowrite32be(value, p->membase + offset); } static unsigned int mem32be_serial_in(struct uart_port *p, int offset) { offset = offset << p->regshift; return ioread32be(p->membase + offset); } #ifdef CONFIG_HAS_IOPORT static unsigned int io_serial_in(struct uart_port *p, int offset) { offset = offset << p->regshift; return inb(p->iobase + offset); } static void io_serial_out(struct uart_port *p, int offset, int value) { offset = offset << p->regshift; outb(value, p->iobase + offset); } #endif static unsigned int no_serial_in(struct uart_port *p, int offset) { return (unsigned int)-1; } static void no_serial_out(struct uart_port *p, int offset, int value) { } static int serial8250_default_handle_irq(struct uart_port *port); static void set_io_from_upio(struct uart_port *p) { struct uart_8250_port *up = up_to_u8250p(p); up->dl_read = default_serial_dl_read; up->dl_write = default_serial_dl_write; switch (p->iotype) { #ifdef CONFIG_HAS_IOPORT case UPIO_HUB6: p->serial_in = hub6_serial_in; p->serial_out = hub6_serial_out; break; #endif case UPIO_MEM: p->serial_in = mem_serial_in; p->serial_out = mem_serial_out; break; case UPIO_MEM16: p->serial_in = mem16_serial_in; p->serial_out = mem16_serial_out; break; case UPIO_MEM32: p->serial_in = mem32_serial_in; p->serial_out = mem32_serial_out; break; case UPIO_MEM32BE: p->serial_in = mem32be_serial_in; p->serial_out = mem32be_serial_out; break; #ifdef CONFIG_HAS_IOPORT case UPIO_PORT: p->serial_in = io_serial_in; p->serial_out = io_serial_out; break; #endif default: WARN(p->iotype != UPIO_PORT || p->iobase, "Unsupported UART type %x\n", p->iotype); p->serial_in = no_serial_in; p->serial_out = no_serial_out; } /* Remember loaded iotype */ up->cur_iotype = p->iotype; p->handle_irq = serial8250_default_handle_irq; } static void serial_port_out_sync(struct uart_port *p, int offset, int value) { switch (p->iotype) { case UPIO_MEM: case UPIO_MEM16: case UPIO_MEM32: case UPIO_MEM32BE: case UPIO_AU: p->serial_out(p, offset, value); p->serial_in(p, UART_LCR); /* safe, no side-effects */ break; default: p->serial_out(p, offset, value); } } /* * FIFO support. */ static void serial8250_clear_fifos(struct uart_8250_port *p) { if (p->capabilities & UART_CAP_FIFO) { serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO); serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); serial_out(p, UART_FCR, 0); } } static enum hrtimer_restart serial8250_em485_handle_start_tx(struct hrtimer *t); static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t); void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p) { serial8250_clear_fifos(p); serial_out(p, UART_FCR, p->fcr); } EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos); void serial8250_rpm_get(struct uart_8250_port *p) { if (!(p->capabilities & UART_CAP_RPM)) return; pm_runtime_get_sync(p->port.dev); } EXPORT_SYMBOL_GPL(serial8250_rpm_get); void serial8250_rpm_put(struct uart_8250_port *p) { if (!(p->capabilities & UART_CAP_RPM)) return; pm_runtime_mark_last_busy(p->port.dev); pm_runtime_put_autosuspend(p->port.dev); } EXPORT_SYMBOL_GPL(serial8250_rpm_put); /** * serial8250_em485_init() - put uart_8250_port into rs485 emulating * @p: uart_8250_port port instance * * The function is used to start rs485 software emulating on the * &struct uart_8250_port* @p. Namely, RTS is switched before/after * transmission. The function is idempotent, so it is safe to call it * multiple times. * * The caller MUST enable interrupt on empty shift register before * calling serial8250_em485_init(). This interrupt is not a part of * 8250 standard, but implementation defined. * * The function is supposed to be called from .rs485_config callback * or from any other callback protected with p->port.lock spinlock. * * See also serial8250_em485_destroy() * * Return 0 - success, -errno - otherwise */ static int serial8250_em485_init(struct uart_8250_port *p) { /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&p->port.lock); if (p->em485) goto deassert_rts; p->em485 = kmalloc(sizeof(struct uart_8250_em485), GFP_ATOMIC); if (!p->em485) return -ENOMEM; hrtimer_init(&p->em485->stop_tx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer_init(&p->em485->start_tx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); p->em485->stop_tx_timer.function = &serial8250_em485_handle_stop_tx; p->em485->start_tx_timer.function = &serial8250_em485_handle_start_tx; p->em485->port = p; p->em485->active_timer = NULL; p->em485->tx_stopped = true; deassert_rts: if (p->em485->tx_stopped) p->rs485_stop_tx(p, true); return 0; } /** * serial8250_em485_destroy() - put uart_8250_port into normal state * @p: uart_8250_port port instance * * The function is used to stop rs485 software emulating on the * &struct uart_8250_port* @p. The function is idempotent, so it is safe to * call it multiple times. * * The function is supposed to be called from .rs485_config callback * or from any other callback protected with p->port.lock spinlock. * * See also serial8250_em485_init() */ void serial8250_em485_destroy(struct uart_8250_port *p) { if (!p->em485) return; hrtimer_cancel(&p->em485->start_tx_timer); hrtimer_cancel(&p->em485->stop_tx_timer); kfree(p->em485); p->em485 = NULL; } EXPORT_SYMBOL_GPL(serial8250_em485_destroy); struct serial_rs485 serial8250_em485_supported = { .flags = SER_RS485_ENABLED | SER_RS485_RTS_ON_SEND | SER_RS485_RTS_AFTER_SEND | SER_RS485_TERMINATE_BUS | SER_RS485_RX_DURING_TX, .delay_rts_before_send = 1, .delay_rts_after_send = 1, }; EXPORT_SYMBOL_GPL(serial8250_em485_supported); /** * serial8250_em485_config() - generic ->rs485_config() callback * @port: uart port * @termios: termios structure * @rs485: rs485 settings * * Generic callback usable by 8250 uart drivers to activate rs485 settings * if the uart is incapable of driving RTS as a Transmit Enable signal in * hardware, relying on software emulation instead. */ int serial8250_em485_config(struct uart_port *port, struct ktermios *termios, struct serial_rs485 *rs485) { struct uart_8250_port *up = up_to_u8250p(port); /* * Both serial8250_em485_init() and serial8250_em485_destroy() * are idempotent. */ if (rs485->flags & SER_RS485_ENABLED) return serial8250_em485_init(up); serial8250_em485_destroy(up); return 0; } EXPORT_SYMBOL_GPL(serial8250_em485_config); /* * These two wrappers ensure that enable_runtime_pm_tx() can be called more than * once and disable_runtime_pm_tx() will still disable RPM because the fifo is * empty and the HW can idle again. */ void serial8250_rpm_get_tx(struct uart_8250_port *p) { unsigned char rpm_active; if (!(p->capabilities & UART_CAP_RPM)) return; rpm_active = xchg(&p->rpm_tx_active, 1); if (rpm_active) return; pm_runtime_get_sync(p->port.dev); } EXPORT_SYMBOL_GPL(serial8250_rpm_get_tx); void serial8250_rpm_put_tx(struct uart_8250_port *p) { unsigned char rpm_active; if (!(p->capabilities & UART_CAP_RPM)) return; rpm_active = xchg(&p->rpm_tx_active, 0); if (!rpm_active) return; pm_runtime_mark_last_busy(p->port.dev); pm_runtime_put_autosuspend(p->port.dev); } EXPORT_SYMBOL_GPL(serial8250_rpm_put_tx); /* * IER sleep support. UARTs which have EFRs need the "extended * capability" bit enabled. Note that on XR16C850s, we need to * reset LCR to write to IER. */ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) { unsigned char lcr = 0, efr = 0; serial8250_rpm_get(p); if (p->capabilities & UART_CAP_SLEEP) { /* Synchronize UART_IER access against the console. */ uart_port_lock_irq(&p->port); if (p->capabilities & UART_CAP_EFR) { lcr = serial_in(p, UART_LCR); efr = serial_in(p, UART_EFR); serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(p, UART_EFR, UART_EFR_ECB); serial_out(p, UART_LCR, 0); } serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0); if (p->capabilities & UART_CAP_EFR) { serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(p, UART_EFR, efr); serial_out(p, UART_LCR, lcr); } uart_port_unlock_irq(&p->port); } serial8250_rpm_put(p); } static void serial8250_clear_IER(struct uart_8250_port *up) { if (up->capabilities & UART_CAP_UUE) serial_out(up, UART_IER, UART_IER_UUE); else serial_out(up, UART_IER, 0); } #ifdef CONFIG_SERIAL_8250_RSA /* * Attempts to turn on the RSA FIFO. Returns zero on failure. * We set the port uart clock rate if we succeed. */ static int __enable_rsa(struct uart_8250_port *up) { unsigned char mode; int result; mode = serial_in(up, UART_RSA_MSR); result = mode & UART_RSA_MSR_FIFO; if (!result) { serial_out(up, UART_RSA_MSR, mode | UART_RSA_MSR_FIFO); mode = serial_in(up, UART_RSA_MSR); result = mode & UART_RSA_MSR_FIFO; } if (result) up->port.uartclk = SERIAL_RSA_BAUD_BASE * 16; return result; } static void enable_rsa(struct uart_8250_port *up) { if (up->port.type == PORT_RSA) { if (up->port.uartclk != SERIAL_RSA_BAUD_BASE * 16) { uart_port_lock_irq(&up->port); __enable_rsa(up); uart_port_unlock_irq(&up->port); } if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) serial_out(up, UART_RSA_FRR, 0); } } /* * Attempts to turn off the RSA FIFO. Returns zero on failure. * It is unknown why interrupts were disabled in here. However, * the caller is expected to preserve this behaviour by grabbing * the spinlock before calling this function. */ static void disable_rsa(struct uart_8250_port *up) { unsigned char mode; int result; if (up->port.type == PORT_RSA && up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) { uart_port_lock_irq(&up->port); mode = serial_in(up, UART_RSA_MSR); result = !(mode & UART_RSA_MSR_FIFO); if (!result) { serial_out(up, UART_RSA_MSR, mode & ~UART_RSA_MSR_FIFO); mode = serial_in(up, UART_RSA_MSR); result = !(mode & UART_RSA_MSR_FIFO); } if (result) up->port.uartclk = SERIAL_RSA_BAUD_BASE_LO * 16; uart_port_unlock_irq(&up->port); } } #endif /* CONFIG_SERIAL_8250_RSA */ /* * This is a quickie test to see how big the FIFO is. * It doesn't work at all the time, more's the pity. */ static int size_fifo(struct uart_8250_port *up) { unsigned char old_fcr, old_mcr, old_lcr; u32 old_dl; int count; old_lcr = serial_in(up, UART_LCR); serial_out(up, UART_LCR, 0); old_fcr = serial_in(up, UART_FCR); old_mcr = serial8250_in_MCR(up); serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); serial8250_out_MCR(up, UART_MCR_LOOP); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); old_dl = serial_dl_read(up); serial_dl_write(up, 0x0001); serial_out(up, UART_LCR, UART_LCR_WLEN8); for (count = 0; count < 256; count++) serial_out(up, UART_TX, count); mdelay(20);/* FIXME - schedule_timeout */ for (count = 0; (serial_in(up, UART_LSR) & UART_LSR_DR) && (count < 256); count++) serial_in(up, UART_RX); serial_out(up, UART_FCR, old_fcr); serial8250_out_MCR(up, old_mcr); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_dl_write(up, old_dl); serial_out(up, UART_LCR, old_lcr); return count; } /* * Read UART ID using the divisor method - set DLL and DLM to zero * and the revision will be in DLL and device type in DLM. We * preserve the device state across this. */ static unsigned int autoconfig_read_divisor_id(struct uart_8250_port *p) { unsigned char old_lcr; unsigned int id, old_dl; old_lcr = serial_in(p, UART_LCR); serial_out(p, UART_LCR, UART_LCR_CONF_MODE_A); old_dl = serial_dl_read(p); serial_dl_write(p, 0); id = serial_dl_read(p); serial_dl_write(p, old_dl); serial_out(p, UART_LCR, old_lcr); return id; } /* * This is a helper routine to autodetect StarTech/Exar/Oxsemi UART's. * When this function is called we know it is at least a StarTech * 16650 V2, but it might be one of several StarTech UARTs, or one of * its clones. (We treat the broken original StarTech 16650 V1 as a * 16550, and why not? Startech doesn't seem to even acknowledge its * existence.) * * What evil have men's minds wrought... */ static void autoconfig_has_efr(struct uart_8250_port *up) { unsigned int id1, id2, id3, rev; /* * Everything with an EFR has SLEEP */ up->capabilities |= UART_CAP_EFR | UART_CAP_SLEEP; /* * First we check to see if it's an Oxford Semiconductor UART. * * If we have to do this here because some non-National * Semiconductor clone chips lock up if you try writing to the * LSR register (which serial_icr_read does) */ /* * Check for Oxford Semiconductor 16C950. * * EFR [4] must be set else this test fails. * * This shouldn't be necessary, but Mike Hudson (Exoray@isys.ca) * claims that it's needed for 952 dual UART's (which are not * recommended for new designs). */ up->acr = 0; serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(up, UART_EFR, UART_EFR_ECB); serial_out(up, UART_LCR, 0x00); id1 = serial_icr_read(up, UART_ID1); id2 = serial_icr_read(up, UART_ID2); id3 = serial_icr_read(up, UART_ID3); rev = serial_icr_read(up, UART_REV); DEBUG_AUTOCONF("950id=%02x:%02x:%02x:%02x ", id1, id2, id3, rev); if (id1 == 0x16 && id2 == 0xC9 && (id3 == 0x50 || id3 == 0x52 || id3 == 0x54)) { up->port.type = PORT_16C950; /* * Enable work around for the Oxford Semiconductor 952 rev B * chip which causes it to seriously miscalculate baud rates * when DLL is 0. */ if (id3 == 0x52 && rev == 0x01) up->bugs |= UART_BUG_QUOT; return; } /* * We check for a XR16C850 by setting DLL and DLM to 0, and then * reading back DLL and DLM. The chip type depends on the DLM * value read back: * 0x10 - XR16C850 and the DLL contains the chip revision. * 0x12 - XR16C2850. * 0x14 - XR16C854. */ id1 = autoconfig_read_divisor_id(up); DEBUG_AUTOCONF("850id=%04x ", id1); id2 = id1 >> 8; if (id2 == 0x10 || id2 == 0x12 || id2 == 0x14) { up->port.type = PORT_16850; return; } /* * It wasn't an XR16C850. * * We distinguish between the '654 and the '650 by counting * how many bytes are in the FIFO. I'm using this for now, * since that's the technique that was sent to me in the * serial driver update, but I'm not convinced this works. * I've had problems doing this in the past. -TYT */ if (size_fifo(up) == 64) up->port.type = PORT_16654; else up->port.type = PORT_16650V2; } /* * We detected a chip without a FIFO. Only two fall into * this category - the original 8250 and the 16450. The * 16450 has a scratch register (accessible with LCR=0) */ static void autoconfig_8250(struct uart_8250_port *up) { unsigned char scratch, status1, status2; up->port.type = PORT_8250; scratch = serial_in(up, UART_SCR); serial_out(up, UART_SCR, 0xa5); status1 = serial_in(up, UART_SCR); serial_out(up, UART_SCR, 0x5a); status2 = serial_in(up, UART_SCR); serial_out(up, UART_SCR, scratch); if (status1 == 0xa5 && status2 == 0x5a) up->port.type = PORT_16450; } static int broken_efr(struct uart_8250_port *up) { /* * Exar ST16C2550 "A2" devices incorrectly detect as * having an EFR, and report an ID of 0x0201. See * http://linux.derkeiler.com/Mailing-Lists/Kernel/2004-11/4812.html */ if (autoconfig_read_divisor_id(up) == 0x0201 && size_fifo(up) == 16) return 1; return 0; } /* * We know that the chip has FIFOs. Does it have an EFR? The * EFR is located in the same register position as the IIR and * we know the top two bits of the IIR are currently set. The * EFR should contain zero. Try to read the EFR. */ static void autoconfig_16550a(struct uart_8250_port *up) { unsigned char status1, status2; unsigned int iersave; /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&up->port.lock); up->port.type = PORT_16550A; up->capabilities |= UART_CAP_FIFO; if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS) && !(up->port.flags & UPF_FULL_PROBE)) return; /* * Check for presence of the EFR when DLAB is set. * Only ST16C650V1 UARTs pass this test. */ serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); if (serial_in(up, UART_EFR) == 0) { serial_out(up, UART_EFR, 0xA8); if (serial_in(up, UART_EFR) != 0) { DEBUG_AUTOCONF("EFRv1 "); up->port.type = PORT_16650; up->capabilities |= UART_CAP_EFR | UART_CAP_SLEEP; } else { serial_out(up, UART_LCR, 0); serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); status1 = serial_in(up, UART_IIR) & UART_IIR_FIFO_ENABLED_16750; serial_out(up, UART_FCR, 0); serial_out(up, UART_LCR, 0); if (status1 == UART_IIR_FIFO_ENABLED_16750) up->port.type = PORT_16550A_FSL64; else DEBUG_AUTOCONF("Motorola 8xxx DUART "); } serial_out(up, UART_EFR, 0); return; } /* * Maybe it requires 0xbf to be written to the LCR. * (other ST16C650V2 UARTs, TI16C752A, etc) */ serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); if (serial_in(up, UART_EFR) == 0 && !broken_efr(up)) { DEBUG_AUTOCONF("EFRv2 "); autoconfig_has_efr(up); return; } /* * Check for a National Semiconductor SuperIO chip. * Attempt to switch to bank 2, read the value of the LOOP bit * from EXCR1. Switch back to bank 0, change it in MCR. Then * switch back to bank 2, read it from EXCR1 again and check * it's changed. If so, set baud_base in EXCR2 to 921600. -- dwmw2 */ serial_out(up, UART_LCR, 0); status1 = serial8250_in_MCR(up); serial_out(up, UART_LCR, 0xE0); status2 = serial_in(up, 0x02); /* EXCR1 */ if (!((status2 ^ status1) & UART_MCR_LOOP)) { serial_out(up, UART_LCR, 0); serial8250_out_MCR(up, status1 ^ UART_MCR_LOOP); serial_out(up, UART_LCR, 0xE0); status2 = serial_in(up, 0x02); /* EXCR1 */ serial_out(up, UART_LCR, 0); serial8250_out_MCR(up, status1); if ((status2 ^ status1) & UART_MCR_LOOP) { unsigned short quot; serial_out(up, UART_LCR, 0xE0); quot = serial_dl_read(up); quot <<= 3; if (ns16550a_goto_highspeed(up)) serial_dl_write(up, quot); serial_out(up, UART_LCR, 0); up->port.uartclk = 921600*16; up->port.type = PORT_NS16550A; up->capabilities |= UART_NATSEMI; return; } } /* * No EFR. Try to detect a TI16750, which only sets bit 5 of * the IIR when 64 byte FIFO mode is enabled when DLAB is set. * Try setting it with and without DLAB set. Cheap clones * set bit 5 without DLAB set. */ serial_out(up, UART_LCR, 0); serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); status1 = serial_in(up, UART_IIR) & UART_IIR_FIFO_ENABLED_16750; serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); status2 = serial_in(up, UART_IIR) & UART_IIR_FIFO_ENABLED_16750; serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); serial_out(up, UART_LCR, 0); DEBUG_AUTOCONF("iir1=%d iir2=%d ", status1, status2); if (status1 == UART_IIR_FIFO_ENABLED_16550A && status2 == UART_IIR_FIFO_ENABLED_16750) { up->port.type = PORT_16750; up->capabilities |= UART_CAP_AFE | UART_CAP_SLEEP; return; } /* * Try writing and reading the UART_IER_UUE bit (b6). * If it works, this is probably one of the Xscale platform's * internal UARTs. * We're going to explicitly set the UUE bit to 0 before * trying to write and read a 1 just to make sure it's not * already a 1 and maybe locked there before we even start. */ iersave = serial_in(up, UART_IER); serial_out(up, UART_IER, iersave & ~UART_IER_UUE); if (!(serial_in(up, UART_IER) & UART_IER_UUE)) { /* * OK it's in a known zero state, try writing and reading * without disturbing the current state of the other bits. */ serial_out(up, UART_IER, iersave | UART_IER_UUE); if (serial_in(up, UART_IER) & UART_IER_UUE) { /* * It's an Xscale. * We'll leave the UART_IER_UUE bit set to 1 (enabled). */ DEBUG_AUTOCONF("Xscale "); up->port.type = PORT_XSCALE; up->capabilities |= UART_CAP_UUE | UART_CAP_RTOIE; return; } } else { /* * If we got here we couldn't force the IER_UUE bit to 0. * Log it and continue. */ DEBUG_AUTOCONF("Couldn't force IER_UUE to 0 "); } serial_out(up, UART_IER, iersave); /* * We distinguish between 16550A and U6 16550A by counting * how many bytes are in the FIFO. */ if (up->port.type == PORT_16550A && size_fifo(up) == 64) { up->port.type = PORT_U6_16550A; up->capabilities |= UART_CAP_AFE; } } /* * This routine is called by rs_init() to initialize a specific serial * port. It determines what type of UART chip this serial port is * using: 8250, 16450, 16550, 16550A. The important question is * whether or not this UART is a 16550A or not, since this will * determine whether or not we can use its FIFO features or not. */ static void autoconfig(struct uart_8250_port *up) { unsigned char status1, scratch, scratch2, scratch3; unsigned char save_lcr, save_mcr; struct uart_port *port = &up->port; unsigned long flags; unsigned int old_capabilities; if (!port->iobase && !port->mapbase && !port->membase) return; DEBUG_AUTOCONF("%s: autoconf (0x%04lx, 0x%p): ", port->name, port->iobase, port->membase); /* * We really do need global IRQs disabled here - we're going to * be frobbing the chips IRQ enable register to see if it exists. * * Synchronize UART_IER access against the console. */ uart_port_lock_irqsave(port, &flags); up->capabilities = 0; up->bugs = 0; if (!(port->flags & UPF_BUGGY_UART)) { /* * Do a simple existence test first; if we fail this, * there's no point trying anything else. * * 0x80 is used as a nonsense port to prevent against * false positives due to ISA bus float. The * assumption is that 0x80 is a non-existent port; * which should be safe since include/asm/io.h also * makes this assumption. * * Note: this is safe as long as MCR bit 4 is clear * and the device is in "PC" mode. */ scratch = serial_in(up, UART_IER); serial_out(up, UART_IER, 0); #if defined(__i386__) && defined(CONFIG_HAS_IOPORT) outb(0xff, 0x080); #endif /* * Mask out IER[7:4] bits for test as some UARTs (e.g. TL * 16C754B) allow only to modify them if an EFR bit is set. */ scratch2 = serial_in(up, UART_IER) & UART_IER_ALL_INTR; serial_out(up, UART_IER, UART_IER_ALL_INTR); #if defined(__i386__) && defined(CONFIG_HAS_IOPORT) outb(0, 0x080); #endif scratch3 = serial_in(up, UART_IER) & UART_IER_ALL_INTR; serial_out(up, UART_IER, scratch); if (scratch2 != 0 || scratch3 != UART_IER_ALL_INTR) { /* * We failed; there's nothing here */ uart_port_unlock_irqrestore(port, flags); DEBUG_AUTOCONF("IER test failed (%02x, %02x) ", scratch2, scratch3); goto out; } } save_mcr = serial8250_in_MCR(up); save_lcr = serial_in(up, UART_LCR); /* * Check to see if a UART is really there. Certain broken * internal modems based on the Rockwell chipset fail this * test, because they apparently don't implement the loopback * test mode. So this test is skipped on the COM 1 through * COM 4 ports. This *should* be safe, since no board * manufacturer would be stupid enough to design a board * that conflicts with COM 1-4 --- we hope! */ if (!(port->flags & UPF_SKIP_TEST)) { serial8250_out_MCR(up, UART_MCR_LOOP | UART_MCR_OUT2 | UART_MCR_RTS); status1 = serial_in(up, UART_MSR) & UART_MSR_STATUS_BITS; serial8250_out_MCR(up, save_mcr); if (status1 != (UART_MSR_DCD | UART_MSR_CTS)) { uart_port_unlock_irqrestore(port, flags); DEBUG_AUTOCONF("LOOP test failed (%02x) ", status1); goto out; } } /* * We're pretty sure there's a port here. Lets find out what * type of port it is. The IIR top two bits allows us to find * out if it's 8250 or 16450, 16550, 16550A or later. This * determines what we test for next. * * We also initialise the EFR (if any) to zero for later. The * EFR occupies the same register location as the FCR and IIR. */ serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(up, UART_EFR, 0); serial_out(up, UART_LCR, 0); serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); switch (serial_in(up, UART_IIR) & UART_IIR_FIFO_ENABLED) { case UART_IIR_FIFO_ENABLED_8250: autoconfig_8250(up); break; case UART_IIR_FIFO_ENABLED_16550: port->type = PORT_16550; break; case UART_IIR_FIFO_ENABLED_16550A: autoconfig_16550a(up); break; default: port->type = PORT_UNKNOWN; break; } #ifdef CONFIG_SERIAL_8250_RSA /* * Only probe for RSA ports if we got the region. */ if (port->type == PORT_16550A && up->probe & UART_PROBE_RSA && __enable_rsa(up)) port->type = PORT_RSA; #endif serial_out(up, UART_LCR, save_lcr); port->fifosize = uart_config[up->port.type].fifo_size; old_capabilities = up->capabilities; up->capabilities = uart_config[port->type].flags; up->tx_loadsz = uart_config[port->type].tx_loadsz; if (port->type == PORT_UNKNOWN) goto out_unlock; /* * Reset the UART. */ #ifdef CONFIG_SERIAL_8250_RSA if (port->type == PORT_RSA) serial_out(up, UART_RSA_FRR, 0); #endif serial8250_out_MCR(up, save_mcr); serial8250_clear_fifos(up); serial_in(up, UART_RX); serial8250_clear_IER(up); out_unlock: uart_port_unlock_irqrestore(port, flags); /* * Check if the device is a Fintek F81216A */ if (port->type == PORT_16550A && port->iotype == UPIO_PORT) fintek_8250_probe(up); if (up->capabilities != old_capabilities) { dev_warn(port->dev, "detected caps %08x should be %08x\n", old_capabilities, up->capabilities); } out: DEBUG_AUTOCONF("iir=%d ", scratch); DEBUG_AUTOCONF("type=%s\n", uart_config[port->type].name); } static void autoconfig_irq(struct uart_8250_port *up) { struct uart_port *port = &up->port; unsigned char save_mcr, save_ier; unsigned char save_ICP = 0; unsigned int ICP = 0; unsigned long irqs; int irq; if (port->flags & UPF_FOURPORT) { ICP = (port->iobase & 0xfe0) | 0x1f; save_ICP = inb_p(ICP); outb_p(0x80, ICP); inb_p(ICP); } /* forget possible initially masked and pending IRQ */ probe_irq_off(probe_irq_on()); save_mcr = serial8250_in_MCR(up); /* Synchronize UART_IER access against the console. */ uart_port_lock_irq(port); save_ier = serial_in(up, UART_IER); uart_port_unlock_irq(port); serial8250_out_MCR(up, UART_MCR_OUT1 | UART_MCR_OUT2); irqs = probe_irq_on(); serial8250_out_MCR(up, 0); udelay(10); if (port->flags & UPF_FOURPORT) { serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS); } else { serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2); } /* Synchronize UART_IER access against the console. */ uart_port_lock_irq(port); serial_out(up, UART_IER, UART_IER_ALL_INTR); uart_port_unlock_irq(port); serial_in(up, UART_LSR); serial_in(up, UART_RX); serial_in(up, UART_IIR); serial_in(up, UART_MSR); serial_out(up, UART_TX, 0xFF); udelay(20); irq = probe_irq_off(irqs); serial8250_out_MCR(up, save_mcr); /* Synchronize UART_IER access against the console. */ uart_port_lock_irq(port); serial_out(up, UART_IER, save_ier); uart_port_unlock_irq(port); if (port->flags & UPF_FOURPORT) outb_p(save_ICP, ICP); port->irq = (irq > 0) ? irq : 0; } static void serial8250_stop_rx(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&port->lock); serial8250_rpm_get(up); up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); serial_port_out(port, UART_IER, up->ier); serial8250_rpm_put(up); } /** * serial8250_em485_stop_tx() - generic ->rs485_stop_tx() callback * @p: uart 8250 port * @toggle_ier: true to allow enabling receive interrupts * * Generic callback usable by 8250 uart drivers to stop rs485 transmission. */ void serial8250_em485_stop_tx(struct uart_8250_port *p, bool toggle_ier) { unsigned char mcr = serial8250_in_MCR(p); /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&p->port.lock); if (p->port.rs485.flags & SER_RS485_RTS_AFTER_SEND) mcr |= UART_MCR_RTS; else mcr &= ~UART_MCR_RTS; serial8250_out_MCR(p, mcr); /* * Empty the RX FIFO, we are not interested in anything * received during the half-duplex transmission. * Enable previously disabled RX interrupts. */ if (!(p->port.rs485.flags & SER_RS485_RX_DURING_TX)) { serial8250_clear_and_reinit_fifos(p); if (toggle_ier) { p->ier |= UART_IER_RLSI | UART_IER_RDI; serial_port_out(&p->port, UART_IER, p->ier); } } } EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx); static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t) { struct uart_8250_em485 *em485 = container_of(t, struct uart_8250_em485, stop_tx_timer); struct uart_8250_port *p = em485->port; unsigned long flags; serial8250_rpm_get(p); uart_port_lock_irqsave(&p->port, &flags); if (em485->active_timer == &em485->stop_tx_timer) { p->rs485_stop_tx(p, true); em485->active_timer = NULL; em485->tx_stopped = true; } uart_port_unlock_irqrestore(&p->port, flags); serial8250_rpm_put(p); return HRTIMER_NORESTART; } static void start_hrtimer_ms(struct hrtimer *hrt, unsigned long msec) { hrtimer_start(hrt, ms_to_ktime(msec), HRTIMER_MODE_REL); } static void __stop_tx_rs485(struct uart_8250_port *p, u64 stop_delay) { struct uart_8250_em485 *em485 = p->em485; /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&p->port.lock); stop_delay += (u64)p->port.rs485.delay_rts_after_send * NSEC_PER_MSEC; /* * rs485_stop_tx() is going to set RTS according to config * AND flush RX FIFO if required. */ if (stop_delay > 0) { em485->active_timer = &em485->stop_tx_timer; hrtimer_start(&em485->stop_tx_timer, ns_to_ktime(stop_delay), HRTIMER_MODE_REL); } else { p->rs485_stop_tx(p, true); em485->active_timer = NULL; em485->tx_stopped = true; } } static inline void __stop_tx(struct uart_8250_port *p) { struct uart_8250_em485 *em485 = p->em485; if (em485) { u16 lsr = serial_lsr_in(p); u64 stop_delay = 0; if (!(lsr & UART_LSR_THRE)) return; /* * To provide required timing and allow FIFO transfer, * __stop_tx_rs485() must be called only when both FIFO and * shift register are empty. The device driver should either * enable interrupt on TEMT or set UART_CAP_NOTEMT that will * enlarge stop_tx_timer by the tx time of one frame to cover * for emptying of the shift register. */ if (!(lsr & UART_LSR_TEMT)) { if (!(p->capabilities & UART_CAP_NOTEMT)) return; /* * RTS might get deasserted too early with the normal * frame timing formula. It seems to suggest THRE might * get asserted already during tx of the stop bit * rather than after it is fully sent. * Roughly estimate 1 extra bit here with / 7. */ stop_delay = p->port.frame_time + DIV_ROUND_UP(p->port.frame_time, 7); } __stop_tx_rs485(p, stop_delay); } if (serial8250_clear_THRI(p)) serial8250_rpm_put_tx(p); } static void serial8250_stop_tx(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); serial8250_rpm_get(up); __stop_tx(up); /* * We really want to stop the transmitter from sending. */ if (port->type == PORT_16C950) { up->acr |= UART_ACR_TXDIS; serial_icr_write(up, UART_ACR, up->acr); } serial8250_rpm_put(up); } static inline void __start_tx(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); if (up->dma && !up->dma->tx_dma(up)) return; if (serial8250_set_THRI(up)) { if (up->bugs & UART_BUG_TXEN) { u16 lsr = serial_lsr_in(up); if (lsr & UART_LSR_THRE) serial8250_tx_chars(up); } } /* * Re-enable the transmitter if we disabled it. */ if (port->type == PORT_16C950 && up->acr & UART_ACR_TXDIS) { up->acr &= ~UART_ACR_TXDIS; serial_icr_write(up, UART_ACR, up->acr); } } /** * serial8250_em485_start_tx() - generic ->rs485_start_tx() callback * @up: uart 8250 port * @toggle_ier: true to allow disabling receive interrupts * * Generic callback usable by 8250 uart drivers to start rs485 transmission. * Assumes that setting the RTS bit in the MCR register means RTS is high. * (Some chips use inverse semantics.) Further assumes that reception is * stoppable by disabling the UART_IER_RDI interrupt. (Some chips set the * UART_LSR_DR bit even when UART_IER_RDI is disabled, foiling this approach.) */ void serial8250_em485_start_tx(struct uart_8250_port *up, bool toggle_ier) { unsigned char mcr = serial8250_in_MCR(up); if (!(up->port.rs485.flags & SER_RS485_RX_DURING_TX) && toggle_ier) serial8250_stop_rx(&up->port); if (up->port.rs485.flags & SER_RS485_RTS_ON_SEND) mcr |= UART_MCR_RTS; else mcr &= ~UART_MCR_RTS; serial8250_out_MCR(up, mcr); } EXPORT_SYMBOL_GPL(serial8250_em485_start_tx); /* Returns false, if start_tx_timer was setup to defer TX start */ static bool start_tx_rs485(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); struct uart_8250_em485 *em485 = up->em485; /* * While serial8250_em485_handle_stop_tx() is a noop if * em485->active_timer != &em485->stop_tx_timer, it might happen that * the timer is still armed and triggers only after the current bunch of * chars is send and em485->active_timer == &em485->stop_tx_timer again. * So cancel the timer. There is still a theoretical race condition if * the timer is already running and only comes around to check for * em485->active_timer when &em485->stop_tx_timer is armed again. */ if (em485->active_timer == &em485->stop_tx_timer) hrtimer_try_to_cancel(&em485->stop_tx_timer); em485->active_timer = NULL; if (em485->tx_stopped) { em485->tx_stopped = false; up->rs485_start_tx(up, true); if (up->port.rs485.delay_rts_before_send > 0) { em485->active_timer = &em485->start_tx_timer; start_hrtimer_ms(&em485->start_tx_timer, up->port.rs485.delay_rts_before_send); return false; } } return true; } static enum hrtimer_restart serial8250_em485_handle_start_tx(struct hrtimer *t) { struct uart_8250_em485 *em485 = container_of(t, struct uart_8250_em485, start_tx_timer); struct uart_8250_port *p = em485->port; unsigned long flags; uart_port_lock_irqsave(&p->port, &flags); if (em485->active_timer == &em485->start_tx_timer) { __start_tx(&p->port); em485->active_timer = NULL; } uart_port_unlock_irqrestore(&p->port, flags); return HRTIMER_NORESTART; } static void serial8250_start_tx(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); struct uart_8250_em485 *em485 = up->em485; /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&port->lock); if (!port->x_char && kfifo_is_empty(&port->state->port.xmit_fifo)) return; serial8250_rpm_get_tx(up); if (em485) { if ((em485->active_timer == &em485->start_tx_timer) || !start_tx_rs485(port)) return; } __start_tx(port); } static void serial8250_throttle(struct uart_port *port) { port->throttle(port); } static void serial8250_unthrottle(struct uart_port *port) { port->unthrottle(port); } static void serial8250_disable_ms(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&port->lock); /* no MSR capabilities */ if (up->bugs & UART_BUG_NOMSR) return; mctrl_gpio_disable_ms(up->gpios); up->ier &= ~UART_IER_MSI; serial_port_out(port, UART_IER, up->ier); } static void serial8250_enable_ms(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&port->lock); /* no MSR capabilities */ if (up->bugs & UART_BUG_NOMSR) return; mctrl_gpio_enable_ms(up->gpios); up->ier |= UART_IER_MSI; serial8250_rpm_get(up); serial_port_out(port, UART_IER, up->ier); serial8250_rpm_put(up); } void serial8250_read_char(struct uart_8250_port *up, u16 lsr) { struct uart_port *port = &up->port; u8 ch, flag = TTY_NORMAL; if (likely(lsr & UART_LSR_DR)) ch = serial_in(up, UART_RX); else /* * Intel 82571 has a Serial Over Lan device that will * set UART_LSR_BI without setting UART_LSR_DR when * it receives a break. To avoid reading from the * receive buffer without UART_LSR_DR bit set, we * just force the read character to be 0 */ ch = 0; port->icount.rx++; lsr |= up->lsr_saved_flags; up->lsr_saved_flags = 0; if (unlikely(lsr & UART_LSR_BRK_ERROR_BITS)) { if (lsr & UART_LSR_BI) { lsr &= ~(UART_LSR_FE | UART_LSR_PE); port->icount.brk++; /* * We do the SysRQ and SAK checking * here because otherwise the break * may get masked by ignore_status_mask * or read_status_mask. */ if (uart_handle_break(port)) return; } else if (lsr & UART_LSR_PE) port->icount.parity++; else if (lsr & UART_LSR_FE) port->icount.frame++; if (lsr & UART_LSR_OE) port->icount.overrun++; /* * Mask off conditions which should be ignored. */ lsr &= port->read_status_mask; if (lsr & UART_LSR_BI) { dev_dbg(port->dev, "handling break\n"); flag = TTY_BREAK; } else if (lsr & UART_LSR_PE) flag = TTY_PARITY; else if (lsr & UART_LSR_FE) flag = TTY_FRAME; } if (uart_prepare_sysrq_char(port, ch)) return; uart_insert_char(port, lsr, UART_LSR_OE, ch, flag); } EXPORT_SYMBOL_GPL(serial8250_read_char); /* * serial8250_rx_chars - Read characters. The first LSR value must be passed in. * * Returns LSR bits. The caller should rely only on non-Rx related LSR bits * (such as THRE) because the LSR value might come from an already consumed * character. */ u16 serial8250_rx_chars(struct uart_8250_port *up, u16 lsr) { struct uart_port *port = &up->port; int max_count = 256; do { serial8250_read_char(up, lsr); if (--max_count == 0) break; lsr = serial_in(up, UART_LSR); } while (lsr & (UART_LSR_DR | UART_LSR_BI)); tty_flip_buffer_push(&port->state->port); return lsr; } EXPORT_SYMBOL_GPL(serial8250_rx_chars); void serial8250_tx_chars(struct uart_8250_port *up) { struct uart_port *port = &up->port; struct tty_port *tport = &port->state->port; int count; if (port->x_char) { uart_xchar_out(port, UART_TX); return; } if (uart_tx_stopped(port)) { serial8250_stop_tx(port); return; } if (kfifo_is_empty(&tport->xmit_fifo)) { __stop_tx(up); return; } count = up->tx_loadsz; do { unsigned char c; if (!uart_fifo_get(port, &c)) break; serial_out(up, UART_TX, c); if (up->bugs & UART_BUG_TXRACE) { /* * The Aspeed BMC virtual UARTs have a bug where data * may get stuck in the BMC's Tx FIFO from bursts of * writes on the APB interface. * * Delay back-to-back writes by a read cycle to avoid * stalling the VUART. Read a register that won't have * side-effects and discard the result. */ serial_in(up, UART_SCR); } if ((up->capabilities & UART_CAP_HFIFO) && !uart_lsr_tx_empty(serial_in(up, UART_LSR))) break; /* The BCM2835 MINI UART THRE bit is really a not-full bit. */ if ((up->capabilities & UART_CAP_MINI) && !(serial_in(up, UART_LSR) & UART_LSR_THRE)) break; } while (--count > 0); if (kfifo_len(&tport->xmit_fifo) < WAKEUP_CHARS) uart_write_wakeup(port); /* * With RPM enabled, we have to wait until the FIFO is empty before the * HW can go idle. So we get here once again with empty FIFO and disable * the interrupt and RPM in __stop_tx() */ if (kfifo_is_empty(&tport->xmit_fifo) && !(up->capabilities & UART_CAP_RPM)) __stop_tx(up); } EXPORT_SYMBOL_GPL(serial8250_tx_chars); /* Caller holds uart port lock */ unsigned int serial8250_modem_status(struct uart_8250_port *up) { struct uart_port *port = &up->port; unsigned int status = serial_in(up, UART_MSR); status |= up->msr_saved_flags; up->msr_saved_flags = 0; if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI && port->state != NULL) { if (status & UART_MSR_TERI) port->icount.rng++; if (status & UART_MSR_DDSR) port->icount.dsr++; if (status & UART_MSR_DDCD) uart_handle_dcd_change(port, status & UART_MSR_DCD); if (status & UART_MSR_DCTS) uart_handle_cts_change(port, status & UART_MSR_CTS); wake_up_interruptible(&port->state->port.delta_msr_wait); } return status; } EXPORT_SYMBOL_GPL(serial8250_modem_status); static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir) { switch (iir & 0x3f) { case UART_IIR_THRI: /* * Postpone DMA or not decision to IIR_RDI or IIR_RX_TIMEOUT * because it's impossible to do an informed decision about * that with IIR_THRI. * * This also fixes one known DMA Rx corruption issue where * DR is asserted but DMA Rx only gets a corrupted zero byte * (too early DR?). */ return false; case UART_IIR_RDI: if (!up->dma->rx_running) break; fallthrough; case UART_IIR_RLSI: case UART_IIR_RX_TIMEOUT: serial8250_rx_dma_flush(up); return true; } return up->dma->rx_dma(up); } /* * This handles the interrupt from one port. */ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) { struct uart_8250_port *up = up_to_u8250p(port); struct tty_port *tport = &port->state->port; bool skip_rx = false; unsigned long flags; u16 status; if (iir & UART_IIR_NO_INT) return 0; uart_port_lock_irqsave(port, &flags); status = serial_lsr_in(up); /* * If port is stopped and there are no error conditions in the * FIFO, then don't drain the FIFO, as this may lead to TTY buffer * overflow. Not servicing, RX FIFO would trigger auto HW flow * control when FIFO occupancy reaches preset threshold, thus * halting RX. This only works when auto HW flow control is * available. */ if (!(status & (UART_LSR_FIFOE | UART_LSR_BRK_ERROR_BITS)) && (port->status & (UPSTAT_AUTOCTS | UPSTAT_AUTORTS)) && !(up->ier & (UART_IER_RLSI | UART_IER_RDI))) skip_rx = true; if (status & (UART_LSR_DR | UART_LSR_BI) && !skip_rx) { struct irq_data *d; d = irq_get_irq_data(port->irq); if (d && irqd_is_wakeup_set(d)) pm_wakeup_event(tport->tty->dev, 0); if (!up->dma || handle_rx_dma(up, iir)) status = serial8250_rx_chars(up, status); } serial8250_modem_status(up); if ((status & UART_LSR_THRE) && (up->ier & UART_IER_THRI)) { if (!up->dma || up->dma->tx_err) serial8250_tx_chars(up); else if (!up->dma->tx_running) __stop_tx(up); } uart_unlock_and_check_sysrq_irqrestore(port, flags); return 1; } EXPORT_SYMBOL_GPL(serial8250_handle_irq); static int serial8250_default_handle_irq(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned int iir; int ret; serial8250_rpm_get(up); iir = serial_port_in(port, UART_IIR); ret = serial8250_handle_irq(port, iir); serial8250_rpm_put(up); return ret; } /* * Newer 16550 compatible parts such as the SC16C650 & Altera 16550 Soft IP * have a programmable TX threshold that triggers the THRE interrupt in * the IIR register. In this case, the THRE interrupt indicates the FIFO * has space available. Load it up with tx_loadsz bytes. */ static int serial8250_tx_threshold_handle_irq(struct uart_port *port) { unsigned long flags; unsigned int iir = serial_port_in(port, UART_IIR); /* TX Threshold IRQ triggered so load up FIFO */ if ((iir & UART_IIR_ID) == UART_IIR_THRI) { struct uart_8250_port *up = up_to_u8250p(port); uart_port_lock_irqsave(port, &flags); serial8250_tx_chars(up); uart_port_unlock_irqrestore(port, flags); } iir = serial_port_in(port, UART_IIR); return serial8250_handle_irq(port, iir); } static unsigned int serial8250_tx_empty(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned int result = 0; unsigned long flags; serial8250_rpm_get(up); uart_port_lock_irqsave(port, &flags); if (!serial8250_tx_dma_running(up) && uart_lsr_tx_empty(serial_lsr_in(up))) result = TIOCSER_TEMT; uart_port_unlock_irqrestore(port, flags); serial8250_rpm_put(up); return result; } unsigned int serial8250_do_get_mctrl(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned int status; unsigned int val; serial8250_rpm_get(up); status = serial8250_modem_status(up); serial8250_rpm_put(up); val = serial8250_MSR_to_TIOCM(status); if (up->gpios) return mctrl_gpio_get(up->gpios, &val); return val; } EXPORT_SYMBOL_GPL(serial8250_do_get_mctrl); static unsigned int serial8250_get_mctrl(struct uart_port *port) { if (port->get_mctrl) return port->get_mctrl(port); return serial8250_do_get_mctrl(port); } void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl) { struct uart_8250_port *up = up_to_u8250p(port); unsigned char mcr; mcr = serial8250_TIOCM_to_MCR(mctrl); mcr |= up->mcr; serial8250_out_MCR(up, mcr); } EXPORT_SYMBOL_GPL(serial8250_do_set_mctrl); static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl) { if (port->rs485.flags & SER_RS485_ENABLED) return; if (port->set_mctrl) port->set_mctrl(port, mctrl); else serial8250_do_set_mctrl(port, mctrl); } static void serial8250_break_ctl(struct uart_port *port, int break_state) { struct uart_8250_port *up = up_to_u8250p(port); unsigned long flags; serial8250_rpm_get(up); uart_port_lock_irqsave(port, &flags); if (break_state == -1) up->lcr |= UART_LCR_SBC; else up->lcr &= ~UART_LCR_SBC; serial_port_out(port, UART_LCR, up->lcr); uart_port_unlock_irqrestore(port, flags); serial8250_rpm_put(up); } /* Returns true if @bits were set, false on timeout */ static bool wait_for_lsr(struct uart_8250_port *up, int bits) { unsigned int status, tmout; /* * Wait for a character to be sent. Fallback to a safe default * timeout value if @frame_time is not available. */ if (up->port.frame_time) tmout = up->port.frame_time * 2 / NSEC_PER_USEC; else tmout = 10000; for (;;) { status = serial_lsr_in(up); if ((status & bits) == bits) break; if (--tmout == 0) break; udelay(1); touch_nmi_watchdog(); } return (tmout != 0); } /* Wait for transmitter and holding register to empty with timeout */ static void wait_for_xmitr(struct uart_8250_port *up, int bits) { unsigned int tmout; wait_for_lsr(up, bits); /* Wait up to 1s for flow control if necessary */ if (up->port.flags & UPF_CONS_FLOW) { for (tmout = 1000000; tmout; tmout--) { unsigned int msr = serial_in(up, UART_MSR); up->msr_saved_flags |= msr & MSR_SAVE_FLAGS; if (msr & UART_MSR_CTS) break; udelay(1); touch_nmi_watchdog(); } } } #ifdef CONFIG_CONSOLE_POLL /* * Console polling routines for writing and reading from the uart while * in an interrupt or debug context. */ static int serial8250_get_poll_char(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); int status; u16 lsr; serial8250_rpm_get(up); lsr = serial_port_in(port, UART_LSR); if (!(lsr & UART_LSR_DR)) { status = NO_POLL_CHAR; goto out; } status = serial_port_in(port, UART_RX); out: serial8250_rpm_put(up); return status; } static void serial8250_put_poll_char(struct uart_port *port, unsigned char c) { unsigned int ier; struct uart_8250_port *up = up_to_u8250p(port); /* * Normally the port is locked to synchronize UART_IER access * against the console. However, this function is only used by * KDB/KGDB, where it may not be possible to acquire the port * lock because all other CPUs are quiesced. The quiescence * should allow safe lockless usage here. */ serial8250_rpm_get(up); /* * First save the IER then disable the interrupts */ ier = serial_port_in(port, UART_IER); serial8250_clear_IER(up); wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); /* * Send the character out. */ serial_port_out(port, UART_TX, c); /* * Finally, wait for transmitter to become empty * and restore the IER */ wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); serial_port_out(port, UART_IER, ier); serial8250_rpm_put(up); } #endif /* CONFIG_CONSOLE_POLL */ int serial8250_do_startup(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned long flags; unsigned char iir; int retval; u16 lsr; if (!port->fifosize) port->fifosize = uart_config[port->type].fifo_size; if (!up->tx_loadsz) up->tx_loadsz = uart_config[port->type].tx_loadsz; if (!up->capabilities) up->capabilities = uart_config[port->type].flags; up->mcr = 0; if (port->iotype != up->cur_iotype) set_io_from_upio(port); serial8250_rpm_get(up); if (port->type == PORT_16C950) { /* * Wake up and initialize UART * * Synchronize UART_IER access against the console. */ uart_port_lock_irqsave(port, &flags); up->acr = 0; serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); serial_port_out(port, UART_EFR, UART_EFR_ECB); serial_port_out(port, UART_IER, 0); serial_port_out(port, UART_LCR, 0); serial_icr_write(up, UART_CSR, 0); /* Reset the UART */ serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); serial_port_out(port, UART_EFR, UART_EFR_ECB); serial_port_out(port, UART_LCR, 0); uart_port_unlock_irqrestore(port, flags); } if (port->type == PORT_DA830) { /* * Reset the port * * Synchronize UART_IER access against the console. */ uart_port_lock_irqsave(port, &flags); serial_port_out(port, UART_IER, 0); serial_port_out(port, UART_DA830_PWREMU_MGMT, 0); uart_port_unlock_irqrestore(port, flags); mdelay(10); /* Enable Tx, Rx and free run mode */ serial_port_out(port, UART_DA830_PWREMU_MGMT, UART_DA830_PWREMU_MGMT_UTRST | UART_DA830_PWREMU_MGMT_URRST | UART_DA830_PWREMU_MGMT_FREE); } #ifdef CONFIG_SERIAL_8250_RSA /* * If this is an RSA port, see if we can kick it up to the * higher speed clock. */ enable_rsa(up); #endif /* * Clear the FIFO buffers and disable them. * (they will be reenabled in set_termios()) */ serial8250_clear_fifos(up); /* * Clear the interrupt registers. */ serial_port_in(port, UART_LSR); serial_port_in(port, UART_RX); serial_port_in(port, UART_IIR); serial_port_in(port, UART_MSR); /* * At this point, there's no way the LSR could still be 0xff; * if it is, then bail out, because there's likely no UART * here. */ if (!(port->flags & UPF_BUGGY_UART) && (serial_port_in(port, UART_LSR) == 0xff)) { dev_info_ratelimited(port->dev, "LSR safety check engaged!\n"); retval = -ENODEV; goto out; } /* * For a XR16C850, we need to set the trigger levels */ if (port->type == PORT_16850) { unsigned char fctr; serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); fctr = serial_in(up, UART_FCTR) & ~(UART_FCTR_RX|UART_FCTR_TX); serial_port_out(port, UART_FCTR, fctr | UART_FCTR_TRGD | UART_FCTR_RX); serial_port_out(port, UART_TRG, UART_TRG_96); serial_port_out(port, UART_FCTR, fctr | UART_FCTR_TRGD | UART_FCTR_TX); serial_port_out(port, UART_TRG, UART_TRG_96); serial_port_out(port, UART_LCR, 0); } /* * For the Altera 16550 variants, set TX threshold trigger level. */ if (((port->type == PORT_ALTR_16550_F32) || (port->type == PORT_ALTR_16550_F64) || (port->type == PORT_ALTR_16550_F128)) && (port->fifosize > 1)) { /* Bounds checking of TX threshold (valid 0 to fifosize-2) */ if ((up->tx_loadsz < 2) || (up->tx_loadsz > port->fifosize)) { dev_err(port->dev, "TX FIFO Threshold errors, skipping\n"); } else { serial_port_out(port, UART_ALTR_AFR, UART_ALTR_EN_TXFIFO_LW); serial_port_out(port, UART_ALTR_TX_LOW, port->fifosize - up->tx_loadsz); port->handle_irq = serial8250_tx_threshold_handle_irq; } } /* Check if we need to have shared IRQs */ if (port->irq && (up->port.flags & UPF_SHARE_IRQ)) up->port.irqflags |= IRQF_SHARED; retval = up->ops->setup_irq(up); if (retval) goto out; if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { unsigned char iir1; if (port->irqflags & IRQF_SHARED) disable_irq_nosync(port->irq); /* * Test for UARTs that do not reassert THRE when the * transmitter is idle and the interrupt has already * been cleared. Real 16550s should always reassert * this interrupt whenever the transmitter is idle and * the interrupt is enabled. Delays are necessary to * allow register changes to become visible. * * Synchronize UART_IER access against the console. */ uart_port_lock_irqsave(port, &flags); wait_for_xmitr(up, UART_LSR_THRE); serial_port_out_sync(port, UART_IER, UART_IER_THRI); udelay(1); /* allow THRE to set */ iir1 = serial_port_in(port, UART_IIR); serial_port_out(port, UART_IER, 0); serial_port_out_sync(port, UART_IER, UART_IER_THRI); udelay(1); /* allow a working UART time to re-assert THRE */ iir = serial_port_in(port, UART_IIR); serial_port_out(port, UART_IER, 0); uart_port_unlock_irqrestore(port, flags); if (port->irqflags & IRQF_SHARED) enable_irq(port->irq); /* * If the interrupt is not reasserted, or we otherwise * don't trust the iir, setup a timer to kick the UART * on a regular basis. */ if ((!(iir1 & UART_IIR_NO_INT) && (iir & UART_IIR_NO_INT)) || up->port.flags & UPF_BUG_THRE) { up->bugs |= UART_BUG_THRE; } } up->ops->setup_timer(up); /* * Now, initialize the UART */ serial_port_out(port, UART_LCR, UART_LCR_WLEN8); uart_port_lock_irqsave(port, &flags); if (up->port.flags & UPF_FOURPORT) { if (!up->port.irq) up->port.mctrl |= TIOCM_OUT1; } else /* * Most PC uarts need OUT2 raised to enable interrupts. */ if (port->irq) up->port.mctrl |= TIOCM_OUT2; serial8250_set_mctrl(port, port->mctrl); /* * Serial over Lan (SoL) hack: * Intel 8257x Gigabit ethernet chips have a 16550 emulation, to be * used for Serial Over Lan. Those chips take a longer time than a * normal serial device to signalize that a transmission data was * queued. Due to that, the above test generally fails. One solution * would be to delay the reading of iir. However, this is not * reliable, since the timeout is variable. So, let's just don't * test if we receive TX irq. This way, we'll never enable * UART_BUG_TXEN. */ if (up->port.quirks & UPQ_NO_TXEN_TEST) goto dont_test_tx_en; /* * Do a quick test to see if we receive an interrupt when we enable * the TX irq. */ serial_port_out(port, UART_IER, UART_IER_THRI); lsr = serial_port_in(port, UART_LSR); iir = serial_port_in(port, UART_IIR); serial_port_out(port, UART_IER, 0); if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) { if (!(up->bugs & UART_BUG_TXEN)) { up->bugs |= UART_BUG_TXEN; dev_dbg(port->dev, "enabling bad tx status workarounds\n"); } } else { up->bugs &= ~UART_BUG_TXEN; } dont_test_tx_en: uart_port_unlock_irqrestore(port, flags); /* * Clear the interrupt registers again for luck, and clear the * saved flags to avoid getting false values from polling * routines or the previous session. */ serial_port_in(port, UART_LSR); serial_port_in(port, UART_RX); serial_port_in(port, UART_IIR); serial_port_in(port, UART_MSR); up->lsr_saved_flags = 0; up->msr_saved_flags = 0; /* * Request DMA channels for both RX and TX. */ if (up->dma) { const char *msg = NULL; if (uart_console(port)) msg = "forbid DMA for kernel console"; else if (serial8250_request_dma(up)) msg = "failed to request DMA"; if (msg) { dev_warn_ratelimited(port->dev, "%s\n", msg); up->dma = NULL; } } /* * Set the IER shadow for rx interrupts but defer actual interrupt * enable until after the FIFOs are enabled; otherwise, an already- * active sender can swamp the interrupt handler with "too much work". */ up->ier = UART_IER_RLSI | UART_IER_RDI; if (port->flags & UPF_FOURPORT) { unsigned int icp; /* * Enable interrupts on the AST Fourport board */ icp = (port->iobase & 0xfe0) | 0x01f; outb_p(0x80, icp); inb_p(icp); } retval = 0; out: serial8250_rpm_put(up); return retval; } EXPORT_SYMBOL_GPL(serial8250_do_startup); static int serial8250_startup(struct uart_port *port) { if (port->startup) return port->startup(port); return serial8250_do_startup(port); } void serial8250_do_shutdown(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned long flags; serial8250_rpm_get(up); /* * Disable interrupts from this port * * Synchronize UART_IER access against the console. */ uart_port_lock_irqsave(port, &flags); up->ier = 0; serial_port_out(port, UART_IER, 0); uart_port_unlock_irqrestore(port, flags); synchronize_irq(port->irq); if (up->dma) serial8250_release_dma(up); uart_port_lock_irqsave(port, &flags); if (port->flags & UPF_FOURPORT) { /* reset interrupts on the AST Fourport board */ inb((port->iobase & 0xfe0) | 0x1f); port->mctrl |= TIOCM_OUT1; } else port->mctrl &= ~TIOCM_OUT2; serial8250_set_mctrl(port, port->mctrl); uart_port_unlock_irqrestore(port, flags); /* * Disable break condition and FIFOs */ serial_port_out(port, UART_LCR, serial_port_in(port, UART_LCR) & ~UART_LCR_SBC); serial8250_clear_fifos(up); #ifdef CONFIG_SERIAL_8250_RSA /* * Reset the RSA board back to 115kbps compat mode. */ disable_rsa(up); #endif /* * Read data port to reset things, and then unlink from * the IRQ chain. */ serial_port_in(port, UART_RX); serial8250_rpm_put(up); up->ops->release_irq(up); } EXPORT_SYMBOL_GPL(serial8250_do_shutdown); static void serial8250_shutdown(struct uart_port *port) { if (port->shutdown) port->shutdown(port); else serial8250_do_shutdown(port); } static unsigned int serial8250_do_get_divisor(struct uart_port *port, unsigned int baud, unsigned int *frac) { upf_t magic_multiplier = port->flags & UPF_MAGIC_MULTIPLIER; struct uart_8250_port *up = up_to_u8250p(port); unsigned int quot; /* * Handle magic divisors for baud rates above baud_base on SMSC * Super I/O chips. We clamp custom rates from clk/6 and clk/12 * up to clk/4 (0x8001) and clk/8 (0x8002) respectively. These * magic divisors actually reprogram the baud rate generator's * reference clock derived from chips's 14.318MHz clock input. * * Documentation claims that with these magic divisors the base * frequencies of 7.3728MHz and 3.6864MHz are used respectively * for the extra baud rates of 460800bps and 230400bps rather * than the usual base frequency of 1.8462MHz. However empirical * evidence contradicts that. * * Instead bit 7 of the DLM register (bit 15 of the divisor) is * effectively used as a clock prescaler selection bit for the * base frequency of 7.3728MHz, always used. If set to 0, then * the base frequency is divided by 4 for use by the Baud Rate * Generator, for the usual arrangement where the value of 1 of * the divisor produces the baud rate of 115200bps. Conversely, * if set to 1 and high-speed operation has been enabled with the * Serial Port Mode Register in the Device Configuration Space, * then the base frequency is supplied directly to the Baud Rate * Generator, so for the divisor values of 0x8001, 0x8002, 0x8003, * 0x8004, etc. the respective baud rates produced are 460800bps, * 230400bps, 153600bps, 115200bps, etc. * * In all cases only low 15 bits of the divisor are used to divide * the baud base and therefore 32767 is the maximum divisor value * possible, even though documentation says that the programmable * Baud Rate Generator is capable of dividing the internal PLL * clock by any divisor from 1 to 65535. */ if (magic_multiplier && baud >= port->uartclk / 6) quot = 0x8001; else if (magic_multiplier && baud >= port->uartclk / 12) quot = 0x8002; else quot = uart_get_divisor(port, baud); /* * Oxford Semi 952 rev B workaround */ if (up->bugs & UART_BUG_QUOT && (quot & 0xff) == 0) quot++; return quot; } static unsigned int serial8250_get_divisor(struct uart_port *port, unsigned int baud, unsigned int *frac) { if (port->get_divisor) return port->get_divisor(port, baud, frac); return serial8250_do_get_divisor(port, baud, frac); } static unsigned char serial8250_compute_lcr(struct uart_8250_port *up, tcflag_t c_cflag) { unsigned char cval; cval = UART_LCR_WLEN(tty_get_char_size(c_cflag)); if (c_cflag & CSTOPB) cval |= UART_LCR_STOP; if (c_cflag & PARENB) cval |= UART_LCR_PARITY; if (!(c_cflag & PARODD)) cval |= UART_LCR_EPAR; if (c_cflag & CMSPAR) cval |= UART_LCR_SPAR; return cval; } void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud, unsigned int quot) { struct uart_8250_port *up = up_to_u8250p(port); /* Workaround to enable 115200 baud on OMAP1510 internal ports */ if (is_omap1510_8250(up)) { if (baud == 115200) { quot = 1; serial_port_out(port, UART_OMAP_OSC_12M_SEL, 1); } else serial_port_out(port, UART_OMAP_OSC_12M_SEL, 0); } /* * For NatSemi, switch to bank 2 not bank 1, to avoid resetting EXCR2, * otherwise just set DLAB */ if (up->capabilities & UART_NATSEMI) serial_port_out(port, UART_LCR, 0xe0); else serial_port_out(port, UART_LCR, up->lcr | UART_LCR_DLAB); serial_dl_write(up, quot); } EXPORT_SYMBOL_GPL(serial8250_do_set_divisor); static void serial8250_set_divisor(struct uart_port *port, unsigned int baud, unsigned int quot, unsigned int quot_frac) { if (port->set_divisor) port->set_divisor(port, baud, quot, quot_frac); else serial8250_do_set_divisor(port, baud, quot); } static unsigned int serial8250_get_baud_rate(struct uart_port *port, struct ktermios *termios, const struct ktermios *old) { unsigned int tolerance = port->uartclk / 100; unsigned int min; unsigned int max; /* * Handle magic divisors for baud rates above baud_base on SMSC * Super I/O chips. Enable custom rates of clk/4 and clk/8, but * disable divisor values beyond 32767, which are unavailable. */ if (port->flags & UPF_MAGIC_MULTIPLIER) { min = port->uartclk / 16 / UART_DIV_MAX >> 1; max = (port->uartclk + tolerance) / 4; } else { min = port->uartclk / 16 / UART_DIV_MAX; max = (port->uartclk + tolerance) / 16; } /* * Ask the core to calculate the divisor for us. * Allow 1% tolerance at the upper limit so uart clks marginally * slower than nominal still match standard baud rates without * causing transmission errors. */ return uart_get_baud_rate(port, termios, old, min, max); } /* * Note in order to avoid the tty port mutex deadlock don't use the next method * within the uart port callbacks. Primarily it's supposed to be utilized to * handle a sudden reference clock rate change. */ void serial8250_update_uartclk(struct uart_port *port, unsigned int uartclk) { struct tty_port *tport = &port->state->port; struct tty_struct *tty; tty = tty_port_tty_get(tport); if (!tty) { mutex_lock(&tport->mutex); port->uartclk = uartclk; mutex_unlock(&tport->mutex); return; } down_write(&tty->termios_rwsem); mutex_lock(&tport->mutex); if (port->uartclk == uartclk) goto out_unlock; port->uartclk = uartclk; if (!tty_port_initialized(tport)) goto out_unlock; serial8250_do_set_termios(port, &tty->termios, NULL); out_unlock: mutex_unlock(&tport->mutex); up_write(&tty->termios_rwsem); tty_kref_put(tty); } EXPORT_SYMBOL_GPL(serial8250_update_uartclk); void serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, const struct ktermios *old) { struct uart_8250_port *up = up_to_u8250p(port); unsigned char cval; unsigned long flags; unsigned int baud, quot, frac = 0; if (up->capabilities & UART_CAP_MINI) { termios->c_cflag &= ~(CSTOPB | PARENB | PARODD | CMSPAR); if ((termios->c_cflag & CSIZE) == CS5 || (termios->c_cflag & CSIZE) == CS6) termios->c_cflag = (termios->c_cflag & ~CSIZE) | CS7; } cval = serial8250_compute_lcr(up, termios->c_cflag); baud = serial8250_get_baud_rate(port, termios, old); quot = serial8250_get_divisor(port, baud, &frac); /* * Ok, we're now changing the port state. Do it with * interrupts disabled. * * Synchronize UART_IER access against the console. */ serial8250_rpm_get(up); uart_port_lock_irqsave(port, &flags); up->lcr = cval; /* Save computed LCR */ if (up->capabilities & UART_CAP_FIFO && port->fifosize > 1) { if (baud < 2400 && !up->dma) { up->fcr &= ~UART_FCR_TRIGGER_MASK; up->fcr |= UART_FCR_TRIGGER_1; } } /* * MCR-based auto flow control. When AFE is enabled, RTS will be * deasserted when the receive FIFO contains more characters than * the trigger, or the MCR RTS bit is cleared. */ if (up->capabilities & UART_CAP_AFE) { up->mcr &= ~UART_MCR_AFE; if (termios->c_cflag & CRTSCTS) up->mcr |= UART_MCR_AFE; } /* * Update the per-port timeout. */ uart_update_timeout(port, termios->c_cflag, baud); /* * Specify which conditions may be considered for error * handling and the ignoring of characters. The actual * ignoring of characters only occurs if the bit is set * in @ignore_status_mask as well. */ port->read_status_mask = UART_LSR_OE | UART_LSR_DR; if (termios->c_iflag & INPCK) port->read_status_mask |= UART_LSR_FE | UART_LSR_PE; if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= UART_LSR_BI; /* * Characters to ignore */ port->ignore_status_mask = 0; if (termios->c_iflag & IGNPAR) port->ignore_status_mask |= UART_LSR_PE | UART_LSR_FE; if (termios->c_iflag & IGNBRK) { port->ignore_status_mask |= UART_LSR_BI; /* * If we're ignoring parity and break indicators, * ignore overruns too (for real raw support). */ if (termios->c_iflag & IGNPAR) port->ignore_status_mask |= UART_LSR_OE; } /* * ignore all characters if CREAD is not set */ if ((termios->c_cflag & CREAD) == 0) port->ignore_status_mask |= UART_LSR_DR; /* * CTS flow control flag and modem status interrupts */ up->ier &= ~UART_IER_MSI; if (!(up->bugs & UART_BUG_NOMSR) && UART_ENABLE_MS(&up->port, termios->c_cflag)) up->ier |= UART_IER_MSI; if (up->capabilities & UART_CAP_UUE) up->ier |= UART_IER_UUE; if (up->capabilities & UART_CAP_RTOIE) up->ier |= UART_IER_RTOIE; serial_port_out(port, UART_IER, up->ier); if (up->capabilities & UART_CAP_EFR) { unsigned char efr = 0; /* * TI16C752/Startech hardware flow control. FIXME: * - TI16C752 requires control thresholds to be set. * - UART_MCR_RTS is ineffective if auto-RTS mode is enabled. */ if (termios->c_cflag & CRTSCTS) efr |= UART_EFR_CTS; serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); if (port->flags & UPF_EXAR_EFR) serial_port_out(port, UART_XR_EFR, efr); else serial_port_out(port, UART_EFR, efr); } serial8250_set_divisor(port, baud, quot, frac); /* * LCR DLAB must be set to enable 64-byte FIFO mode. If the FCR * is written without DLAB set, this mode will be disabled. */ if (port->type == PORT_16750) serial_port_out(port, UART_FCR, up->fcr); serial_port_out(port, UART_LCR, up->lcr); /* reset DLAB */ if (port->type != PORT_16750) { /* emulated UARTs (Lucent Venus 167x) need two steps */ if (up->fcr & UART_FCR_ENABLE_FIFO) serial_port_out(port, UART_FCR, UART_FCR_ENABLE_FIFO); serial_port_out(port, UART_FCR, up->fcr); /* set fcr */ } serial8250_set_mctrl(port, port->mctrl); uart_port_unlock_irqrestore(port, flags); serial8250_rpm_put(up); /* Don't rewrite B0 */ if (tty_termios_baud_rate(termios)) tty_termios_encode_baud_rate(termios, baud, baud); } EXPORT_SYMBOL(serial8250_do_set_termios); static void serial8250_set_termios(struct uart_port *port, struct ktermios *termios, const struct ktermios *old) { if (port->set_termios) port->set_termios(port, termios, old); else serial8250_do_set_termios(port, termios, old); } void serial8250_do_set_ldisc(struct uart_port *port, struct ktermios *termios) { if (termios->c_line == N_PPS) { port->flags |= UPF_HARDPPS_CD; uart_port_lock_irq(port); serial8250_enable_ms(port); uart_port_unlock_irq(port); } else { port->flags &= ~UPF_HARDPPS_CD; if (!UART_ENABLE_MS(port, termios->c_cflag)) { uart_port_lock_irq(port); serial8250_disable_ms(port); uart_port_unlock_irq(port); } } } EXPORT_SYMBOL_GPL(serial8250_do_set_ldisc); static void serial8250_set_ldisc(struct uart_port *port, struct ktermios *termios) { if (port->set_ldisc) port->set_ldisc(port, termios); else serial8250_do_set_ldisc(port, termios); } void serial8250_do_pm(struct uart_port *port, unsigned int state, unsigned int oldstate) { struct uart_8250_port *p = up_to_u8250p(port); serial8250_set_sleep(p, state != 0); } EXPORT_SYMBOL(serial8250_do_pm); static void serial8250_pm(struct uart_port *port, unsigned int state, unsigned int oldstate) { if (port->pm) port->pm(port, state, oldstate); else serial8250_do_pm(port, state, oldstate); } static unsigned int serial8250_port_size(struct uart_8250_port *pt) { if (pt->port.mapsize) return pt->port.mapsize; if (is_omap1_8250(pt)) return 0x16 << pt->port.regshift; return 8 << pt->port.regshift; } /* * Resource handling. */ static int serial8250_request_std_resource(struct uart_8250_port *up) { unsigned int size = serial8250_port_size(up); struct uart_port *port = &up->port; int ret = 0; switch (port->iotype) { case UPIO_AU: case UPIO_TSI: case UPIO_MEM32: case UPIO_MEM32BE: case UPIO_MEM16: case UPIO_MEM: if (!port->mapbase) { ret = -EINVAL; break; } if (!request_mem_region(port->mapbase, size, "serial")) { ret = -EBUSY; break; } if (port->flags & UPF_IOREMAP) { port->membase = ioremap(port->mapbase, size); if (!port->membase) { release_mem_region(port->mapbase, size); ret = -ENOMEM; } } break; case UPIO_HUB6: case UPIO_PORT: if (!request_region(port->iobase, size, "serial")) ret = -EBUSY; break; } return ret; } static void serial8250_release_std_resource(struct uart_8250_port *up) { unsigned int size = serial8250_port_size(up); struct uart_port *port = &up->port; switch (port->iotype) { case UPIO_AU: case UPIO_TSI: case UPIO_MEM32: case UPIO_MEM32BE: case UPIO_MEM16: case UPIO_MEM: if (!port->mapbase) break; if (port->flags & UPF_IOREMAP) { iounmap(port->membase); port->membase = NULL; } release_mem_region(port->mapbase, size); break; case UPIO_HUB6: case UPIO_PORT: release_region(port->iobase, size); break; } } static void serial8250_release_port(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); serial8250_release_std_resource(up); } static int serial8250_request_port(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); return serial8250_request_std_resource(up); } static int fcr_get_rxtrig_bytes(struct uart_8250_port *up) { const struct serial8250_config *conf_type = &uart_config[up->port.type]; unsigned char bytes; bytes = conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(up->fcr)]; return bytes ? bytes : -EOPNOTSUPP; } static int bytes_to_fcr_rxtrig(struct uart_8250_port *up, unsigned char bytes) { const struct serial8250_config *conf_type = &uart_config[up->port.type]; int i; if (!conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(UART_FCR_R_TRIG_00)]) return -EOPNOTSUPP; for (i = 1; i < UART_FCR_R_TRIG_MAX_STATE; i++) { if (bytes < conf_type->rxtrig_bytes[i]) /* Use the nearest lower value */ return (--i) << UART_FCR_R_TRIG_SHIFT; } return UART_FCR_R_TRIG_11; } static int do_get_rxtrig(struct tty_port *port) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport = state->uart_port; struct uart_8250_port *up = up_to_u8250p(uport); if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1) return -EINVAL; return fcr_get_rxtrig_bytes(up); } static int do_serial8250_get_rxtrig(struct tty_port *port) { int rxtrig_bytes; mutex_lock(&port->mutex); rxtrig_bytes = do_get_rxtrig(port); mutex_unlock(&port->mutex); return rxtrig_bytes; } static ssize_t rx_trig_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) { struct tty_port *port = dev_get_drvdata(dev); int rxtrig_bytes; rxtrig_bytes = do_serial8250_get_rxtrig(port); if (rxtrig_bytes < 0) return rxtrig_bytes; return sysfs_emit(buf, "%d\n", rxtrig_bytes); } static int do_set_rxtrig(struct tty_port *port, unsigned char bytes) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport = state->uart_port; struct uart_8250_port *up = up_to_u8250p(uport); int rxtrig; if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1) return -EINVAL; rxtrig = bytes_to_fcr_rxtrig(up, bytes); if (rxtrig < 0) return rxtrig; serial8250_clear_fifos(up); up->fcr &= ~UART_FCR_TRIGGER_MASK; up->fcr |= (unsigned char)rxtrig; serial_out(up, UART_FCR, up->fcr); return 0; } static int do_serial8250_set_rxtrig(struct tty_port *port, unsigned char bytes) { int ret; mutex_lock(&port->mutex); ret = do_set_rxtrig(port, bytes); mutex_unlock(&port->mutex); return ret; } static ssize_t rx_trig_bytes_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct tty_port *port = dev_get_drvdata(dev); unsigned char bytes; int ret; if (!count) return -EINVAL; ret = kstrtou8(buf, 10, &bytes); if (ret < 0) return ret; ret = do_serial8250_set_rxtrig(port, bytes); if (ret < 0) return ret; return count; } static DEVICE_ATTR_RW(rx_trig_bytes); static struct attribute *serial8250_dev_attrs[] = { &dev_attr_rx_trig_bytes.attr, NULL }; static struct attribute_group serial8250_dev_attr_group = { .attrs = serial8250_dev_attrs, }; static void register_dev_spec_attr_grp(struct uart_8250_port *up) { const struct serial8250_config *conf_type = &uart_config[up->port.type]; if (conf_type->rxtrig_bytes[0]) up->port.attr_group = &serial8250_dev_attr_group; } static void serial8250_config_port(struct uart_port *port, int flags) { struct uart_8250_port *up = up_to_u8250p(port); int ret; /* * Find the region that we can probe for. This in turn * tells us whether we can probe for the type of port. */ ret = serial8250_request_std_resource(up); if (ret < 0) return; if (port->iotype != up->cur_iotype) set_io_from_upio(port); if (flags & UART_CONFIG_TYPE) autoconfig(up); /* HW bugs may trigger IRQ while IIR == NO_INT */ if (port->type == PORT_TEGRA) up->bugs |= UART_BUG_NOMSR; if (port->type != PORT_UNKNOWN && flags & UART_CONFIG_IRQ) autoconfig_irq(up); if (port->type == PORT_UNKNOWN) serial8250_release_std_resource(up); register_dev_spec_attr_grp(up); up->fcr = uart_config[up->port.type].fcr; } static int serial8250_verify_port(struct uart_port *port, struct serial_struct *ser) { if (ser->irq >= irq_get_nr_irqs() || ser->irq < 0 || ser->baud_base < 9600 || ser->type < PORT_UNKNOWN || ser->type >= ARRAY_SIZE(uart_config) || ser->type == PORT_CIRRUS || ser->type == PORT_STARTECH) return -EINVAL; return 0; } static const char *serial8250_type(struct uart_port *port) { int type = port->type; if (type >= ARRAY_SIZE(uart_config)) type = 0; return uart_config[type].name; } static const struct uart_ops serial8250_pops = { .tx_empty = serial8250_tx_empty, .set_mctrl = serial8250_set_mctrl, .get_mctrl = serial8250_get_mctrl, .stop_tx = serial8250_stop_tx, .start_tx = serial8250_start_tx, .throttle = serial8250_throttle, .unthrottle = serial8250_unthrottle, .stop_rx = serial8250_stop_rx, .enable_ms = serial8250_enable_ms, .break_ctl = serial8250_break_ctl, .startup = serial8250_startup, .shutdown = serial8250_shutdown, .set_termios = serial8250_set_termios, .set_ldisc = serial8250_set_ldisc, .pm = serial8250_pm, .type = serial8250_type, .release_port = serial8250_release_port, .request_port = serial8250_request_port, .config_port = serial8250_config_port, .verify_port = serial8250_verify_port, #ifdef CONFIG_CONSOLE_POLL .poll_get_char = serial8250_get_poll_char, .poll_put_char = serial8250_put_poll_char, #endif }; void serial8250_init_port(struct uart_8250_port *up) { struct uart_port *port = &up->port; spin_lock_init(&port->lock); port->ctrl_id = 0; port->pm = NULL; port->ops = &serial8250_pops; port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE); up->cur_iotype = UPIO_UNKNOWN; } EXPORT_SYMBOL_GPL(serial8250_init_port); void serial8250_set_defaults(struct uart_8250_port *up) { struct uart_port *port = &up->port; if (up->port.flags & UPF_FIXED_TYPE) { unsigned int type = up->port.type; if (!up->port.fifosize) up->port.fifosize = uart_config[type].fifo_size; if (!up->tx_loadsz) up->tx_loadsz = uart_config[type].tx_loadsz; if (!up->capabilities) up->capabilities = uart_config[type].flags; } set_io_from_upio(port); /* default dma handlers */ if (up->dma) { if (!up->dma->tx_dma) up->dma->tx_dma = serial8250_tx_dma; if (!up->dma->rx_dma) up->dma->rx_dma = serial8250_rx_dma; } } EXPORT_SYMBOL_GPL(serial8250_set_defaults); #ifdef CONFIG_SERIAL_8250_CONSOLE static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) { serial_port_out(port, UART_TX, ch); } static void serial8250_console_wait_putchar(struct uart_port *port, unsigned char ch) { struct uart_8250_port *up = up_to_u8250p(port); wait_for_xmitr(up, UART_LSR_THRE); serial8250_console_putchar(port, ch); } /* * Restore serial console when h/w power-off detected */ static void serial8250_console_restore(struct uart_8250_port *up) { struct uart_port *port = &up->port; struct ktermios termios; unsigned int baud, quot, frac = 0; termios.c_cflag = port->cons->cflag; termios.c_ispeed = port->cons->ispeed; termios.c_ospeed = port->cons->ospeed; if (port->state->port.tty && termios.c_cflag == 0) { termios.c_cflag = port->state->port.tty->termios.c_cflag; termios.c_ispeed = port->state->port.tty->termios.c_ispeed; termios.c_ospeed = port->state->port.tty->termios.c_ospeed; } baud = serial8250_get_baud_rate(port, &termios, NULL); quot = serial8250_get_divisor(port, baud, &frac); serial8250_set_divisor(port, baud, quot, frac); serial_port_out(port, UART_LCR, up->lcr); serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); } static void fifo_wait_for_lsr(struct uart_8250_port *up, unsigned int count) { unsigned int i; for (i = 0; i < count; i++) { if (wait_for_lsr(up, UART_LSR_THRE)) return; } } /* * Print a string to the serial port using the device FIFO * * It sends fifosize bytes and then waits for the fifo * to get empty. */ static void serial8250_console_fifo_write(struct uart_8250_port *up, const char *s, unsigned int count) { const char *end = s + count; unsigned int fifosize = up->tx_loadsz; struct uart_port *port = &up->port; unsigned int tx_count = 0; bool cr_sent = false; unsigned int i; while (s != end) { /* Allow timeout for each byte of a possibly full FIFO */ fifo_wait_for_lsr(up, fifosize); for (i = 0; i < fifosize && s != end; ++i) { if (*s == '\n' && !cr_sent) { serial8250_console_putchar(port, '\r'); cr_sent = true; } else { serial8250_console_putchar(port, *s++); cr_sent = false; } } tx_count = i; } /* * Allow timeout for each byte written since the caller will only wait * for UART_LSR_BOTH_EMPTY using the timeout of a single character */ fifo_wait_for_lsr(up, tx_count); } /* * Print a string to the serial port trying not to disturb * any possible real use of the port... * * The console_lock must be held when we get here. * * Doing runtime PM is really a bad idea for the kernel console. * Thus, we assume the function is called when device is powered up. */ void serial8250_console_write(struct uart_8250_port *up, const char *s, unsigned int count) { struct uart_8250_em485 *em485 = up->em485; struct uart_port *port = &up->port; unsigned long flags; unsigned int ier, use_fifo; int locked = 1; touch_nmi_watchdog(); if (oops_in_progress) locked = uart_port_trylock_irqsave(port, &flags); else uart_port_lock_irqsave(port, &flags); /* * First save the IER then disable the interrupts */ ier = serial_port_in(port, UART_IER); serial8250_clear_IER(up); /* check scratch reg to see if port powered off during system sleep */ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { serial8250_console_restore(up); up->canary = 0; } if (em485) { if (em485->tx_stopped) up->rs485_start_tx(up, false); mdelay(port->rs485.delay_rts_before_send); } use_fifo = (up->capabilities & UART_CAP_FIFO) && /* * BCM283x requires to check the fifo * after each byte. */ !(up->capabilities & UART_CAP_MINI) && /* * tx_loadsz contains the transmit fifo size */ up->tx_loadsz > 1 && (up->fcr & UART_FCR_ENABLE_FIFO) && port->state && test_bit(TTY_PORT_INITIALIZED, &port->state->port.iflags) && /* * After we put a data in the fifo, the controller will send * it regardless of the CTS state. Therefore, only use fifo * if we don't use control flow. */ !(up->port.flags & UPF_CONS_FLOW); if (likely(use_fifo)) serial8250_console_fifo_write(up, s, count); else uart_console_write(port, s, count, serial8250_console_wait_putchar); /* * Finally, wait for transmitter to become empty * and restore the IER */ wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); if (em485) { mdelay(port->rs485.delay_rts_after_send); if (em485->tx_stopped) up->rs485_stop_tx(up, false); } serial_port_out(port, UART_IER, ier); /* * The receive handling will happen properly because the * receive ready bit will still be set; it is not cleared * on read. However, modem control will not, we must * call it if we have saved something in the saved flags * while processing with interrupts off. */ if (up->msr_saved_flags) serial8250_modem_status(up); if (locked) uart_port_unlock_irqrestore(port, flags); } static unsigned int probe_baud(struct uart_port *port) { unsigned char lcr, dll, dlm; unsigned int quot; lcr = serial_port_in(port, UART_LCR); serial_port_out(port, UART_LCR, lcr | UART_LCR_DLAB); dll = serial_port_in(port, UART_DLL); dlm = serial_port_in(port, UART_DLM); serial_port_out(port, UART_LCR, lcr); quot = (dlm << 8) | dll; return (port->uartclk / 16) / quot; } int serial8250_console_setup(struct uart_port *port, char *options, bool probe) { int baud = 9600; int bits = 8; int parity = 'n'; int flow = 'n'; int ret; if (!port->iobase && !port->membase) return -ENODEV; if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); else if (probe) baud = probe_baud(port); ret = uart_set_options(port, port->cons, baud, parity, bits, flow); if (ret) return ret; if (port->dev) pm_runtime_get_sync(port->dev); return 0; } int serial8250_console_exit(struct uart_port *port) { if (port->dev) pm_runtime_put_sync(port->dev); return 0; } #endif /* CONFIG_SERIAL_8250_CONSOLE */ MODULE_DESCRIPTION("Base port operations for 8250/16550-type serial ports"); MODULE_LICENSE("GPL");
2 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 // SPDX-License-Identifier: GPL-2.0-or-later /* * USB-to-WWAN Driver for Sierra Wireless modems * * Copyright (C) 2008, 2009, 2010 Paxton Smith, Matthew Safar, Rory Filer * <linux@sierrawireless.com> * * Portions of this based on the cdc_ether driver by David Brownell (2003-2005) * and Ole Andre Vadla Ravnas (ActiveSync) (2006). * * IMPORTANT DISCLAIMER: This driver is not commercially supported by * Sierra Wireless. Use at your own risk. */ #define DRIVER_VERSION "v.2.0" #define DRIVER_AUTHOR "Paxton Smith, Matthew Safar, Rory Filer" #define DRIVER_DESC "USB-to-WWAN Driver for Sierra Wireless modems" static const char driver_name[] = "sierra_net"; /* if defined debug messages enabled */ /*#define DEBUG*/ #include <linux/module.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/mii.h> #include <linux/sched.h> #include <linux/timer.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <net/ip.h> #include <net/udp.h> #include <linux/unaligned.h> #include <linux/usb/usbnet.h> #define SWI_USB_REQUEST_GET_FW_ATTR 0x06 #define SWI_GET_FW_ATTR_MASK 0x08 /* atomic counter partially included in MAC address to make sure 2 devices * do not end up with the same MAC - concept breaks in case of > 255 ifaces */ static atomic_t iface_counter = ATOMIC_INIT(0); /* * SYNC Timer Delay definition used to set the expiry time */ #define SIERRA_NET_SYNCDELAY (2*HZ) /* Max. MTU supported. The modem buffers are limited to 1500 */ #define SIERRA_NET_MAX_SUPPORTED_MTU 1500 /* The SIERRA_NET_USBCTL_BUF_LEN defines a buffer size allocated for control * message reception ... and thus the max. received packet. * (May be the cause for parse_hip returning -EINVAL) */ #define SIERRA_NET_USBCTL_BUF_LEN 1024 /* Overriding the default usbnet rx_urb_size */ #define SIERRA_NET_RX_URB_SIZE (8 * 1024) /* Private data structure */ struct sierra_net_data { u16 link_up; /* air link up or down */ u8 tx_hdr_template[4]; /* part of HIP hdr for tx'd packets */ u8 sync_msg[4]; /* SYNC message */ u8 shdwn_msg[4]; /* Shutdown message */ /* Backpointer to the container */ struct usbnet *usbnet; u8 ifnum; /* interface number */ /* Bit masks, must be a power of 2 */ #define SIERRA_NET_EVENT_RESP_AVAIL 0x01 #define SIERRA_NET_TIMER_EXPIRY 0x02 unsigned long kevent_flags; struct work_struct sierra_net_kevent; struct timer_list sync_timer; /* For retrying SYNC sequence */ }; struct param { int is_present; union { void *ptr; u32 dword; u16 word; u8 byte; }; }; /* HIP message type */ #define SIERRA_NET_HIP_EXTENDEDID 0x7F #define SIERRA_NET_HIP_HSYNC_ID 0x60 /* Modem -> host */ #define SIERRA_NET_HIP_RESTART_ID 0x62 /* Modem -> host */ #define SIERRA_NET_HIP_MSYNC_ID 0x20 /* Host -> modem */ #define SIERRA_NET_HIP_SHUTD_ID 0x26 /* Host -> modem */ #define SIERRA_NET_HIP_EXT_IP_IN_ID 0x0202 #define SIERRA_NET_HIP_EXT_IP_OUT_ID 0x0002 /* 3G UMTS Link Sense Indication definitions */ #define SIERRA_NET_HIP_LSI_UMTSID 0x78 /* Reverse Channel Grant Indication HIP message */ #define SIERRA_NET_HIP_RCGI 0x64 /* LSI Protocol types */ #define SIERRA_NET_PROTOCOL_UMTS 0x01 #define SIERRA_NET_PROTOCOL_UMTS_DS 0x04 /* LSI Coverage */ #define SIERRA_NET_COVERAGE_NONE 0x00 #define SIERRA_NET_COVERAGE_NOPACKET 0x01 /* LSI Session */ #define SIERRA_NET_SESSION_IDLE 0x00 /* LSI Link types */ #define SIERRA_NET_AS_LINK_TYPE_IPV4 0x00 #define SIERRA_NET_AS_LINK_TYPE_IPV6 0x02 struct lsi_umts { u8 protocol; u8 unused1; __be16 length; /* eventually use a union for the rest - assume umts for now */ u8 coverage; u8 network_len; /* network name len */ u8 network[40]; /* network name (UCS2, bigendian) */ u8 session_state; u8 unused3[33]; } __packed; struct lsi_umts_single { struct lsi_umts lsi; u8 link_type; u8 pdp_addr_len; /* NW-supplied PDP address len */ u8 pdp_addr[16]; /* NW-supplied PDP address (bigendian)) */ u8 unused4[23]; u8 dns1_addr_len; /* NW-supplied 1st DNS address len (bigendian) */ u8 dns1_addr[16]; /* NW-supplied 1st DNS address */ u8 dns2_addr_len; /* NW-supplied 2nd DNS address len */ u8 dns2_addr[16]; /* NW-supplied 2nd DNS address (bigendian)*/ u8 wins1_addr_len; /* NW-supplied 1st Wins address len */ u8 wins1_addr[16]; /* NW-supplied 1st Wins address (bigendian)*/ u8 wins2_addr_len; /* NW-supplied 2nd Wins address len */ u8 wins2_addr[16]; /* NW-supplied 2nd Wins address (bigendian) */ u8 unused5[4]; u8 gw_addr_len; /* NW-supplied GW address len */ u8 gw_addr[16]; /* NW-supplied GW address (bigendian) */ u8 reserved[8]; } __packed; struct lsi_umts_dual { struct lsi_umts lsi; u8 pdp_addr4_len; /* NW-supplied PDP IPv4 address len */ u8 pdp_addr4[4]; /* NW-supplied PDP IPv4 address (bigendian)) */ u8 pdp_addr6_len; /* NW-supplied PDP IPv6 address len */ u8 pdp_addr6[16]; /* NW-supplied PDP IPv6 address (bigendian)) */ u8 unused4[23]; u8 dns1_addr4_len; /* NW-supplied 1st DNS v4 address len (bigendian) */ u8 dns1_addr4[4]; /* NW-supplied 1st DNS v4 address */ u8 dns1_addr6_len; /* NW-supplied 1st DNS v6 address len */ u8 dns1_addr6[16]; /* NW-supplied 1st DNS v6 address (bigendian)*/ u8 dns2_addr4_len; /* NW-supplied 2nd DNS v4 address len (bigendian) */ u8 dns2_addr4[4]; /* NW-supplied 2nd DNS v4 address */ u8 dns2_addr6_len; /* NW-supplied 2nd DNS v6 address len */ u8 dns2_addr6[16]; /* NW-supplied 2nd DNS v6 address (bigendian)*/ u8 unused5[68]; } __packed; #define SIERRA_NET_LSI_COMMON_LEN 4 #define SIERRA_NET_LSI_UMTS_LEN (sizeof(struct lsi_umts_single)) #define SIERRA_NET_LSI_UMTS_STATUS_LEN \ (SIERRA_NET_LSI_UMTS_LEN - SIERRA_NET_LSI_COMMON_LEN) #define SIERRA_NET_LSI_UMTS_DS_LEN (sizeof(struct lsi_umts_dual)) #define SIERRA_NET_LSI_UMTS_DS_STATUS_LEN \ (SIERRA_NET_LSI_UMTS_DS_LEN - SIERRA_NET_LSI_COMMON_LEN) /* Our own net device operations structure */ static const struct net_device_ops sierra_net_device_ops = { .ndo_open = usbnet_open, .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; /* get private data associated with passed in usbnet device */ static inline struct sierra_net_data *sierra_net_get_private(struct usbnet *dev) { return (struct sierra_net_data *)dev->data[0]; } /* set private data associated with passed in usbnet device */ static inline void sierra_net_set_private(struct usbnet *dev, struct sierra_net_data *priv) { dev->data[0] = (unsigned long)priv; } /* is packet IPv4/IPv6 */ static inline int is_ip(struct sk_buff *skb) { return skb->protocol == cpu_to_be16(ETH_P_IP) || skb->protocol == cpu_to_be16(ETH_P_IPV6); } /* * check passed in packet and make sure that: * - it is linear (no scatter/gather) * - it is ethernet (mac_header properly set) */ static int check_ethip_packet(struct sk_buff *skb, struct usbnet *dev) { skb_reset_mac_header(skb); /* ethernet header */ if (skb_is_nonlinear(skb)) { netdev_err(dev->net, "Non linear buffer-dropping\n"); return 0; } if (!pskb_may_pull(skb, ETH_HLEN)) return 0; skb->protocol = eth_hdr(skb)->h_proto; return 1; } static const u8 *save16bit(struct param *p, const u8 *datap) { p->is_present = 1; p->word = get_unaligned_be16(datap); return datap + sizeof(p->word); } static const u8 *save8bit(struct param *p, const u8 *datap) { p->is_present = 1; p->byte = *datap; return datap + sizeof(p->byte); } /*----------------------------------------------------------------------------* * BEGIN HIP * *----------------------------------------------------------------------------*/ /* HIP header */ #define SIERRA_NET_HIP_HDR_LEN 4 /* Extended HIP header */ #define SIERRA_NET_HIP_EXT_HDR_LEN 6 struct hip_hdr { int hdrlen; struct param payload_len; struct param msgid; struct param msgspecific; struct param extmsgid; }; static int parse_hip(const u8 *buf, const u32 buflen, struct hip_hdr *hh) { const u8 *curp = buf; int padded; if (buflen < SIERRA_NET_HIP_HDR_LEN) return -EPROTO; curp = save16bit(&hh->payload_len, curp); curp = save8bit(&hh->msgid, curp); curp = save8bit(&hh->msgspecific, curp); padded = hh->msgid.byte & 0x80; hh->msgid.byte &= 0x7F; /* 7 bits */ hh->extmsgid.is_present = (hh->msgid.byte == SIERRA_NET_HIP_EXTENDEDID); if (hh->extmsgid.is_present) { if (buflen < SIERRA_NET_HIP_EXT_HDR_LEN) return -EPROTO; hh->payload_len.word &= 0x3FFF; /* 14 bits */ curp = save16bit(&hh->extmsgid, curp); hh->extmsgid.word &= 0x03FF; /* 10 bits */ hh->hdrlen = SIERRA_NET_HIP_EXT_HDR_LEN; } else { hh->payload_len.word &= 0x07FF; /* 11 bits */ hh->hdrlen = SIERRA_NET_HIP_HDR_LEN; } if (padded) { hh->hdrlen++; hh->payload_len.word--; } /* if real packet shorter than the claimed length */ if (buflen < (hh->hdrlen + hh->payload_len.word)) return -EINVAL; return 0; } static void build_hip(u8 *buf, const u16 payloadlen, struct sierra_net_data *priv) { /* the following doesn't have the full functionality. We * currently build only one kind of header, so it is faster this way */ put_unaligned_be16(payloadlen, buf); memcpy(buf+2, priv->tx_hdr_template, sizeof(priv->tx_hdr_template)); } /*----------------------------------------------------------------------------* * END HIP * *----------------------------------------------------------------------------*/ static int sierra_net_send_cmd(struct usbnet *dev, u8 *cmd, int cmdlen, const char * cmd_name) { struct sierra_net_data *priv = sierra_net_get_private(dev); int status; status = usbnet_write_cmd(dev, USB_CDC_SEND_ENCAPSULATED_COMMAND, USB_DIR_OUT|USB_TYPE_CLASS|USB_RECIP_INTERFACE, 0, priv->ifnum, cmd, cmdlen); if (status != cmdlen && status != -ENODEV) netdev_err(dev->net, "Submit %s failed %d\n", cmd_name, status); return status; } static int sierra_net_send_sync(struct usbnet *dev) { int status; struct sierra_net_data *priv = sierra_net_get_private(dev); dev_dbg(&dev->udev->dev, "%s", __func__); status = sierra_net_send_cmd(dev, priv->sync_msg, sizeof(priv->sync_msg), "SYNC"); return status; } static void sierra_net_set_ctx_index(struct sierra_net_data *priv, u8 ctx_ix) { dev_dbg(&(priv->usbnet->udev->dev), "%s %d", __func__, ctx_ix); priv->tx_hdr_template[0] = 0x3F; priv->tx_hdr_template[1] = ctx_ix; *((__be16 *)&priv->tx_hdr_template[2]) = cpu_to_be16(SIERRA_NET_HIP_EXT_IP_OUT_ID); } static int sierra_net_parse_lsi(struct usbnet *dev, char *data, int datalen) { struct lsi_umts *lsi = (struct lsi_umts *)data; u32 expected_length; if (datalen < sizeof(struct lsi_umts_single)) { netdev_err(dev->net, "%s: Data length %d, exp >= %zu\n", __func__, datalen, sizeof(struct lsi_umts_single)); return -1; } /* Validate the session state */ if (lsi->session_state == SIERRA_NET_SESSION_IDLE) { netdev_err(dev->net, "Session idle, 0x%02x\n", lsi->session_state); return 0; } /* Validate the protocol - only support UMTS for now */ if (lsi->protocol == SIERRA_NET_PROTOCOL_UMTS) { struct lsi_umts_single *single = (struct lsi_umts_single *)lsi; /* Validate the link type */ if (single->link_type != SIERRA_NET_AS_LINK_TYPE_IPV4 && single->link_type != SIERRA_NET_AS_LINK_TYPE_IPV6) { netdev_err(dev->net, "Link type unsupported: 0x%02x\n", single->link_type); return -1; } expected_length = SIERRA_NET_LSI_UMTS_STATUS_LEN; } else if (lsi->protocol == SIERRA_NET_PROTOCOL_UMTS_DS) { expected_length = SIERRA_NET_LSI_UMTS_DS_STATUS_LEN; } else { netdev_err(dev->net, "Protocol unsupported, 0x%02x\n", lsi->protocol); return -1; } if (be16_to_cpu(lsi->length) != expected_length) { netdev_err(dev->net, "%s: LSI_UMTS_STATUS_LEN %d, exp %u\n", __func__, be16_to_cpu(lsi->length), expected_length); return -1; } /* Validate the coverage */ if (lsi->coverage == SIERRA_NET_COVERAGE_NONE || lsi->coverage == SIERRA_NET_COVERAGE_NOPACKET) { netdev_err(dev->net, "No coverage, 0x%02x\n", lsi->coverage); return 0; } /* Set link_sense true */ return 1; } static void sierra_net_handle_lsi(struct usbnet *dev, char *data, struct hip_hdr *hh) { struct sierra_net_data *priv = sierra_net_get_private(dev); int link_up; link_up = sierra_net_parse_lsi(dev, data + hh->hdrlen, hh->payload_len.word); if (link_up < 0) { netdev_err(dev->net, "Invalid LSI\n"); return; } if (link_up) { sierra_net_set_ctx_index(priv, hh->msgspecific.byte); priv->link_up = 1; } else { priv->link_up = 0; } usbnet_link_change(dev, link_up, 0); } static void sierra_net_dosync(struct usbnet *dev) { int status; struct sierra_net_data *priv = sierra_net_get_private(dev); dev_dbg(&dev->udev->dev, "%s", __func__); /* The SIERRA_NET_HIP_MSYNC_ID command appears to request that the * firmware restart itself. After restarting, the modem will respond * with the SIERRA_NET_HIP_RESTART_ID indication. The driver continues * sending MSYNC commands every few seconds until it receives the * RESTART event from the firmware */ /* tell modem we are ready */ status = sierra_net_send_sync(dev); if (status < 0) netdev_err(dev->net, "Send SYNC failed, status %d\n", status); status = sierra_net_send_sync(dev); if (status < 0) netdev_err(dev->net, "Send SYNC failed, status %d\n", status); /* Now, start a timer and make sure we get the Restart Indication */ priv->sync_timer.expires = jiffies + SIERRA_NET_SYNCDELAY; add_timer(&priv->sync_timer); } static void sierra_net_kevent(struct work_struct *work) { struct sierra_net_data *priv = container_of(work, struct sierra_net_data, sierra_net_kevent); struct usbnet *dev = priv->usbnet; int len; int err; u8 *buf; u8 ifnum; if (test_bit(SIERRA_NET_EVENT_RESP_AVAIL, &priv->kevent_flags)) { clear_bit(SIERRA_NET_EVENT_RESP_AVAIL, &priv->kevent_flags); /* Query the modem for the LSI message */ buf = kzalloc(SIERRA_NET_USBCTL_BUF_LEN, GFP_KERNEL); if (!buf) return; ifnum = priv->ifnum; len = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), USB_CDC_GET_ENCAPSULATED_RESPONSE, USB_DIR_IN|USB_TYPE_CLASS|USB_RECIP_INTERFACE, 0, ifnum, buf, SIERRA_NET_USBCTL_BUF_LEN, USB_CTRL_SET_TIMEOUT); if (len < 0) { netdev_err(dev->net, "usb_control_msg failed, status %d\n", len); } else { struct hip_hdr hh; dev_dbg(&dev->udev->dev, "%s: Received status message," " %04x bytes", __func__, len); err = parse_hip(buf, len, &hh); if (err) { netdev_err(dev->net, "%s: Bad packet," " parse result %d\n", __func__, err); kfree(buf); return; } /* Validate packet length */ if (len != hh.hdrlen + hh.payload_len.word) { netdev_err(dev->net, "%s: Bad packet, received" " %d, expected %d\n", __func__, len, hh.hdrlen + hh.payload_len.word); kfree(buf); return; } /* Switch on received message types */ switch (hh.msgid.byte) { case SIERRA_NET_HIP_LSI_UMTSID: dev_dbg(&dev->udev->dev, "LSI for ctx:%d", hh.msgspecific.byte); sierra_net_handle_lsi(dev, buf, &hh); break; case SIERRA_NET_HIP_RESTART_ID: dev_dbg(&dev->udev->dev, "Restart reported: %d," " stopping sync timer", hh.msgspecific.byte); /* Got sync resp - stop timer & clear mask */ del_timer_sync(&priv->sync_timer); clear_bit(SIERRA_NET_TIMER_EXPIRY, &priv->kevent_flags); break; case SIERRA_NET_HIP_HSYNC_ID: dev_dbg(&dev->udev->dev, "SYNC received"); err = sierra_net_send_sync(dev); if (err < 0) netdev_err(dev->net, "Send SYNC failed %d\n", err); break; case SIERRA_NET_HIP_EXTENDEDID: netdev_err(dev->net, "Unrecognized HIP msg, " "extmsgid 0x%04x\n", hh.extmsgid.word); break; case SIERRA_NET_HIP_RCGI: /* Ignored */ break; default: netdev_err(dev->net, "Unrecognized HIP msg, " "msgid 0x%02x\n", hh.msgid.byte); break; } } kfree(buf); } /* The sync timer bit might be set */ if (test_bit(SIERRA_NET_TIMER_EXPIRY, &priv->kevent_flags)) { clear_bit(SIERRA_NET_TIMER_EXPIRY, &priv->kevent_flags); dev_dbg(&dev->udev->dev, "Deferred sync timer expiry"); sierra_net_dosync(priv->usbnet); } if (priv->kevent_flags) dev_dbg(&dev->udev->dev, "sierra_net_kevent done, " "kevent_flags = 0x%lx", priv->kevent_flags); } static void sierra_net_defer_kevent(struct usbnet *dev, int work) { struct sierra_net_data *priv = sierra_net_get_private(dev); set_bit(work, &priv->kevent_flags); schedule_work(&priv->sierra_net_kevent); } /* * Sync Retransmit Timer Handler. On expiry, kick the work queue */ static void sierra_sync_timer(struct timer_list *t) { struct sierra_net_data *priv = from_timer(priv, t, sync_timer); struct usbnet *dev = priv->usbnet; dev_dbg(&dev->udev->dev, "%s", __func__); /* Kick the tasklet */ sierra_net_defer_kevent(dev, SIERRA_NET_TIMER_EXPIRY); } static void sierra_net_status(struct usbnet *dev, struct urb *urb) { struct usb_cdc_notification *event; dev_dbg(&dev->udev->dev, "%s", __func__); if (urb->actual_length < sizeof *event) return; /* Add cases to handle other standard notifications. */ event = urb->transfer_buffer; switch (event->bNotificationType) { case USB_CDC_NOTIFY_NETWORK_CONNECTION: case USB_CDC_NOTIFY_SPEED_CHANGE: /* USB 305 sends those */ break; case USB_CDC_NOTIFY_RESPONSE_AVAILABLE: sierra_net_defer_kevent(dev, SIERRA_NET_EVENT_RESP_AVAIL); break; default: netdev_err(dev->net, ": unexpected notification %02x!\n", event->bNotificationType); break; } } static void sierra_net_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) { /* Inherit standard device info */ usbnet_get_drvinfo(net, info); strscpy(info->driver, driver_name, sizeof(info->driver)); strscpy(info->version, DRIVER_VERSION, sizeof(info->version)); } static u32 sierra_net_get_link(struct net_device *net) { struct usbnet *dev = netdev_priv(net); /* Report link is down whenever the interface is down */ return sierra_net_get_private(dev)->link_up && netif_running(net); } static const struct ethtool_ops sierra_net_ethtool_ops = { .get_drvinfo = sierra_net_get_drvinfo, .get_link = sierra_net_get_link, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, .nway_reset = usbnet_nway_reset, .get_link_ksettings = usbnet_get_link_ksettings_mii, .set_link_ksettings = usbnet_set_link_ksettings_mii, }; static int sierra_net_get_fw_attr(struct usbnet *dev, u16 *datap) { int result = 0; __le16 attrdata; result = usbnet_read_cmd(dev, /* _u8 vendor specific request */ SWI_USB_REQUEST_GET_FW_ATTR, USB_DIR_IN | USB_TYPE_VENDOR, /* __u8 request type */ 0x0000, /* __u16 value not used */ 0x0000, /* __u16 index not used */ &attrdata, /* char *data */ sizeof(attrdata) /* __u16 size */ ); if (result < 0) return -EIO; *datap = le16_to_cpu(attrdata); return result; } /* * collects the bulk endpoints, the status endpoint. */ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf) { u8 ifacenum; u8 numendpoints; u16 fwattr = 0; int status; struct sierra_net_data *priv; static const u8 sync_tmplate[sizeof(priv->sync_msg)] = { 0x00, 0x00, SIERRA_NET_HIP_MSYNC_ID, 0x00}; static const u8 shdwn_tmplate[sizeof(priv->shdwn_msg)] = { 0x00, 0x00, SIERRA_NET_HIP_SHUTD_ID, 0x00}; u8 mod[2]; dev_dbg(&dev->udev->dev, "%s", __func__); ifacenum = intf->cur_altsetting->desc.bInterfaceNumber; numendpoints = intf->cur_altsetting->desc.bNumEndpoints; /* We have three endpoints, bulk in and out, and a status */ if (numendpoints != 3) { dev_err(&dev->udev->dev, "Expected 3 endpoints, found: %d", numendpoints); return -ENODEV; } /* Status endpoint set in usbnet_get_endpoints() */ dev->status = NULL; status = usbnet_get_endpoints(dev, intf); if (status < 0) { dev_err(&dev->udev->dev, "Error in usbnet_get_endpoints (%d)", status); return -ENODEV; } /* Initialize sierra private data */ priv = kzalloc(sizeof *priv, GFP_KERNEL); if (!priv) return -ENOMEM; priv->usbnet = dev; priv->ifnum = ifacenum; dev->net->netdev_ops = &sierra_net_device_ops; /* change MAC addr to include, ifacenum, and to be unique */ mod[0] = atomic_inc_return(&iface_counter); mod[1] = ifacenum; dev_addr_mod(dev->net, ETH_ALEN - 2, mod, 2); /* prepare shutdown message template */ memcpy(priv->shdwn_msg, shdwn_tmplate, sizeof(priv->shdwn_msg)); /* set context index initially to 0 - prepares tx hdr template */ sierra_net_set_ctx_index(priv, 0); /* prepare sync message template */ memcpy(priv->sync_msg, sync_tmplate, sizeof(priv->sync_msg)); /* decrease the rx_urb_size and max_tx_size to 4k on USB 1.1 */ dev->rx_urb_size = SIERRA_NET_RX_URB_SIZE; if (dev->udev->speed != USB_SPEED_HIGH) dev->rx_urb_size = min_t(size_t, 4096, SIERRA_NET_RX_URB_SIZE); dev->net->hard_header_len += SIERRA_NET_HIP_EXT_HDR_LEN; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; dev->net->max_mtu = SIERRA_NET_MAX_SUPPORTED_MTU; /* Set up the netdev */ dev->net->flags |= IFF_NOARP; dev->net->ethtool_ops = &sierra_net_ethtool_ops; netif_carrier_off(dev->net); sierra_net_set_private(dev, priv); priv->kevent_flags = 0; /* Use the shared workqueue */ INIT_WORK(&priv->sierra_net_kevent, sierra_net_kevent); /* Only need to do this once */ timer_setup(&priv->sync_timer, sierra_sync_timer, 0); /* verify fw attributes */ status = sierra_net_get_fw_attr(dev, &fwattr); dev_dbg(&dev->udev->dev, "Fw attr: %x\n", fwattr); /* test whether firmware supports DHCP */ if (!(status == sizeof(fwattr) && (fwattr & SWI_GET_FW_ATTR_MASK))) { /* found incompatible firmware version */ dev_err(&dev->udev->dev, "Incompatible driver and firmware" " versions\n"); kfree(priv); return -ENODEV; } return 0; } static void sierra_net_unbind(struct usbnet *dev, struct usb_interface *intf) { int status; struct sierra_net_data *priv = sierra_net_get_private(dev); dev_dbg(&dev->udev->dev, "%s", __func__); /* kill the timer and work */ timer_shutdown_sync(&priv->sync_timer); cancel_work_sync(&priv->sierra_net_kevent); /* tell modem we are going away */ status = sierra_net_send_cmd(dev, priv->shdwn_msg, sizeof(priv->shdwn_msg), "Shutdown"); if (status < 0) netdev_err(dev->net, "usb_control_msg failed, status %d\n", status); usbnet_status_stop(dev); sierra_net_set_private(dev, NULL); kfree(priv); } static struct sk_buff *sierra_net_skb_clone(struct usbnet *dev, struct sk_buff *skb, int len) { struct sk_buff *new_skb; /* clone skb */ new_skb = skb_clone(skb, GFP_ATOMIC); /* remove len bytes from original */ skb_pull(skb, len); /* trim next packet to it's length */ if (new_skb) { skb_trim(new_skb, len); } else { if (netif_msg_rx_err(dev)) netdev_err(dev->net, "failed to get skb\n"); dev->net->stats.rx_dropped++; } return new_skb; } /* ---------------------------- Receive data path ----------------------*/ static int sierra_net_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { int err; struct hip_hdr hh; struct sk_buff *new_skb; dev_dbg(&dev->udev->dev, "%s", __func__); /* could contain multiple packets */ while (likely(skb->len)) { err = parse_hip(skb->data, skb->len, &hh); if (err) { if (netif_msg_rx_err(dev)) netdev_err(dev->net, "Invalid HIP header %d\n", err); /* dev->net->stats.rx_errors incremented by caller */ dev->net->stats.rx_length_errors++; return 0; } /* Validate Extended HIP header */ if (!hh.extmsgid.is_present || hh.extmsgid.word != SIERRA_NET_HIP_EXT_IP_IN_ID) { if (netif_msg_rx_err(dev)) netdev_err(dev->net, "HIP/ETH: Invalid pkt\n"); dev->net->stats.rx_frame_errors++; /* dev->net->stats.rx_errors incremented by caller */ return 0; } skb_pull(skb, hh.hdrlen); /* We are going to accept this packet, prepare it. * In case protocol is IPv6, keep it, otherwise force IPv4. */ skb_reset_mac_header(skb); if (eth_hdr(skb)->h_proto != cpu_to_be16(ETH_P_IPV6)) eth_hdr(skb)->h_proto = cpu_to_be16(ETH_P_IP); eth_zero_addr(eth_hdr(skb)->h_source); memcpy(eth_hdr(skb)->h_dest, dev->net->dev_addr, ETH_ALEN); /* Last packet in batch handled by usbnet */ if (hh.payload_len.word == skb->len) return 1; new_skb = sierra_net_skb_clone(dev, skb, hh.payload_len.word); if (new_skb) usbnet_skb_return(dev, new_skb); } /* while */ return 0; } /* ---------------------------- Transmit data path ----------------------*/ static struct sk_buff *sierra_net_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { struct sierra_net_data *priv = sierra_net_get_private(dev); u16 len; bool need_tail; BUILD_BUG_ON(sizeof_field(struct usbnet, data) < sizeof(struct cdc_state)); dev_dbg(&dev->udev->dev, "%s", __func__); if (priv->link_up && check_ethip_packet(skb, dev) && is_ip(skb)) { /* enough head room as is? */ if (SIERRA_NET_HIP_EXT_HDR_LEN <= skb_headroom(skb)) { /* Save the Eth/IP length and set up HIP hdr */ len = skb->len; skb_push(skb, SIERRA_NET_HIP_EXT_HDR_LEN); /* Handle ZLP issue */ need_tail = ((len + SIERRA_NET_HIP_EXT_HDR_LEN) % dev->maxpacket == 0); if (need_tail) { if (unlikely(skb_tailroom(skb) == 0)) { netdev_err(dev->net, "tx_fixup:" "no room for packet\n"); dev_kfree_skb_any(skb); return NULL; } else { skb->data[skb->len] = 0; __skb_put(skb, 1); len = len + 1; } } build_hip(skb->data, len, priv); return skb; } else { /* * compensate in the future if necessary */ netdev_err(dev->net, "tx_fixup: no room for HIP\n"); } /* headroom */ } if (!priv->link_up) dev->net->stats.tx_carrier_errors++; /* tx_dropped incremented by usbnet */ /* filter the packet out, release it */ dev_kfree_skb_any(skb); return NULL; } static const struct driver_info sierra_net_info_direct_ip = { .description = "Sierra Wireless USB-to-WWAN Modem", .flags = FLAG_WWAN | FLAG_SEND_ZLP, .bind = sierra_net_bind, .unbind = sierra_net_unbind, .status = sierra_net_status, .rx_fixup = sierra_net_rx_fixup, .tx_fixup = sierra_net_tx_fixup, }; static int sierra_net_probe(struct usb_interface *udev, const struct usb_device_id *prod) { int ret; ret = usbnet_probe(udev, prod); if (ret == 0) { struct usbnet *dev = usb_get_intfdata(udev); ret = usbnet_status_start(dev, GFP_KERNEL); if (ret == 0) { /* Interrupt URB now set up; initiate sync sequence */ sierra_net_dosync(dev); } } return ret; } #define DIRECT_IP_DEVICE(vend, prod) \ {USB_DEVICE_INTERFACE_NUMBER(vend, prod, 7), \ .driver_info = (unsigned long)&sierra_net_info_direct_ip}, \ {USB_DEVICE_INTERFACE_NUMBER(vend, prod, 10), \ .driver_info = (unsigned long)&sierra_net_info_direct_ip}, \ {USB_DEVICE_INTERFACE_NUMBER(vend, prod, 11), \ .driver_info = (unsigned long)&sierra_net_info_direct_ip} static const struct usb_device_id products[] = { DIRECT_IP_DEVICE(0x1199, 0x68A3), /* Sierra Wireless USB-to-WWAN modem */ DIRECT_IP_DEVICE(0x0F3D, 0x68A3), /* AT&T Direct IP modem */ DIRECT_IP_DEVICE(0x1199, 0x68AA), /* Sierra Wireless Direct IP LTE modem */ DIRECT_IP_DEVICE(0x0F3D, 0x68AA), /* AT&T Direct IP LTE modem */ {}, /* last item */ }; MODULE_DEVICE_TABLE(usb, products); /* We are based on usbnet, so let it handle the USB driver specifics */ static struct usb_driver sierra_net_driver = { .name = "sierra_net", .id_table = products, .probe = sierra_net_probe, .disconnect = usbnet_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, .no_dynamic_id = 1, .disable_hub_initiated_lpm = 1, }; module_usb_driver(sierra_net_driver); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL");
918 907 81 1639 9 8 8 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_DST_METADATA_H #define __NET_DST_METADATA_H 1 #include <linux/skbuff.h> #include <net/ip_tunnels.h> #include <net/macsec.h> #include <net/dst.h> enum metadata_type { METADATA_IP_TUNNEL, METADATA_HW_PORT_MUX, METADATA_MACSEC, METADATA_XFRM, }; struct hw_port_info { struct net_device *lower_dev; u32 port_id; }; struct macsec_info { sci_t sci; }; struct xfrm_md_info { u32 if_id; int link; struct dst_entry *dst_orig; }; struct metadata_dst { struct dst_entry dst; enum metadata_type type; union { struct ip_tunnel_info tun_info; struct hw_port_info port_info; struct macsec_info macsec_info; struct xfrm_md_info xfrm_info; } u; }; static inline struct metadata_dst *skb_metadata_dst(const struct sk_buff *skb) { struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); if (md_dst && md_dst->dst.flags & DST_METADATA) return md_dst; return NULL; } static inline struct ip_tunnel_info * skb_tunnel_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_IP_TUNNEL) return &md_dst->u.tun_info; dst = skb_dst(skb); if (dst && dst->lwtstate && (dst->lwtstate->type == LWTUNNEL_ENCAP_IP || dst->lwtstate->type == LWTUNNEL_ENCAP_IP6)) return lwt_tun_info(dst->lwtstate); return NULL; } static inline struct xfrm_md_info *lwt_xfrm_info(struct lwtunnel_state *lwt) { return (struct xfrm_md_info *)lwt->data; } static inline struct xfrm_md_info *skb_xfrm_md_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_XFRM) return &md_dst->u.xfrm_info; dst = skb_dst(skb); if (dst && dst->lwtstate && dst->lwtstate->type == LWTUNNEL_ENCAP_XFRM) return lwt_xfrm_info(dst->lwtstate); return NULL; } static inline bool skb_valid_dst(const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); return dst && !(dst->flags & DST_METADATA); } static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, const struct sk_buff *skb_b) { const struct metadata_dst *a, *b; if (!(skb_a->_skb_refdst | skb_b->_skb_refdst)) return 0; a = (const struct metadata_dst *) skb_dst(skb_a); b = (const struct metadata_dst *) skb_dst(skb_b); if (!a != !b || a->type != b->type) return 1; switch (a->type) { case METADATA_HW_PORT_MUX: return memcmp(&a->u.port_info, &b->u.port_info, sizeof(a->u.port_info)); case METADATA_IP_TUNNEL: return memcmp(&a->u.tun_info, &b->u.tun_info, sizeof(a->u.tun_info) + a->u.tun_info.options_len); case METADATA_MACSEC: return memcmp(&a->u.macsec_info, &b->u.macsec_info, sizeof(a->u.macsec_info)); case METADATA_XFRM: return memcmp(&a->u.xfrm_info, &b->u.xfrm_info, sizeof(a->u.xfrm_info)); default: return 1; } } void metadata_dst_free(struct metadata_dst *); struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags); void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst); struct metadata_dst __percpu * metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags); static inline struct metadata_dst *tun_rx_dst(int md_size) { struct metadata_dst *tun_dst; tun_dst = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!tun_dst) return NULL; tun_dst->u.tun_info.options_len = 0; tun_dst->u.tun_info.mode = 0; return tun_dst; } static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); int md_size; struct metadata_dst *new_md; if (!md_dst || md_dst->type != METADATA_IP_TUNNEL) return ERR_PTR(-EINVAL); md_size = md_dst->u.tun_info.options_len; new_md = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!new_md) return ERR_PTR(-ENOMEM); unsafe_memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size, /* metadata_dst_alloc() reserves room (md_size bytes) for * options right after the ip_tunnel_info struct. */); #ifdef CONFIG_DST_CACHE /* Unclone the dst cache if there is one */ if (new_md->u.tun_info.dst_cache.cache) { int ret; ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); if (ret) { metadata_dst_free(new_md); return ERR_PTR(ret); } } #endif skb_dst_drop(skb); skb_dst_set(skb, &new_md->dst); return new_md; } static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb) { struct metadata_dst *dst; dst = tun_dst_unclone(skb); if (IS_ERR(dst)) return NULL; return &dst->u.tun_info; } static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, __be16 tp_dst, const unsigned long *flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; ip_tunnel_key_init(&tun_dst->u.tun_info.key, saddr, daddr, tos, ttl, 0, 0, tp_dst, tunnel_id, flags); return tun_dst; } static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, const unsigned long *flags, __be64 tunnel_id, int md_size) { const struct iphdr *iph = ip_hdr(skb); return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, 0, flags, tunnel_id, md_size); } static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 tos, __u8 ttl, __be16 tp_dst, __be32 label, const unsigned long *flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; struct ip_tunnel_info *info; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; info = &tun_dst->u.tun_info; info->mode = IP_TUNNEL_INFO_IPV6; ip_tunnel_flags_copy(info->key.tun_flags, flags); info->key.tun_id = tunnel_id; info->key.tp_src = 0; info->key.tp_dst = tp_dst; info->key.u.ipv6.src = *saddr; info->key.u.ipv6.dst = *daddr; info->key.tos = tos; info->key.ttl = ttl; info->key.label = label; return tun_dst; } static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, const unsigned long *flags, __be64 tunnel_id, int md_size) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, ipv6_get_dsfield(ip6h), ip6h->hop_limit, 0, ip6_flowlabel(ip6h), flags, tunnel_id, md_size); } #endif /* __NET_DST_METADATA_H */
10 14 9 8 5 14 79 1655 1653 31 8 5 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 // SPDX-License-Identifier: GPL-2.0-only #include <linux/etherdevice.h> #include <linux/if_macvlan.h> #include <linux/if_tap.h> #include <linux/if_vlan.h> #include <linux/interrupt.h> #include <linux/nsproxy.h> #include <linux/compat.h> #include <linux/if_tun.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/cache.h> #include <linux/sched/signal.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/wait.h> #include <linux/cdev.h> #include <linux/idr.h> #include <linux/fs.h> #include <linux/uio.h> #include <net/net_namespace.h> #include <net/rtnetlink.h> #include <net/sock.h> #include <linux/virtio_net.h> #include <linux/skb_array.h> struct macvtap_dev { struct macvlan_dev vlan; struct tap_dev tap; }; /* * Variables for dealing with macvtaps device numbers. */ static dev_t macvtap_major; static const void *macvtap_net_namespace(const struct device *d) { const struct net_device *dev = to_net_dev(d->parent); return dev_net(dev); } static struct class macvtap_class = { .name = "macvtap", .ns_type = &net_ns_type_operations, .namespace = macvtap_net_namespace, }; static struct cdev macvtap_cdev; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ NETIF_F_TSO6) static void macvtap_count_tx_dropped(struct tap_dev *tap) { struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); struct macvlan_dev *vlan = &vlantap->vlan; this_cpu_inc(vlan->pcpu_stats->tx_dropped); } static void macvtap_count_rx_dropped(struct tap_dev *tap) { struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); struct macvlan_dev *vlan = &vlantap->vlan; macvlan_count_rx(vlan, 0, 0, 0); } static void macvtap_update_features(struct tap_dev *tap, netdev_features_t features) { struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); struct macvlan_dev *vlan = &vlantap->vlan; vlan->set_features = features; netdev_update_features(vlan->dev); } static int macvtap_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct macvtap_dev *vlantap = netdev_priv(dev); int err; INIT_LIST_HEAD(&vlantap->tap.queue_list); /* Since macvlan supports all offloads by default, make * tap support all offloads also. */ vlantap->tap.tap_features = TUN_OFFLOADS; /* Register callbacks for rx/tx drops accounting and updating * net_device features */ vlantap->tap.count_tx_dropped = macvtap_count_tx_dropped; vlantap->tap.count_rx_dropped = macvtap_count_rx_dropped; vlantap->tap.update_features = macvtap_update_features; err = netdev_rx_handler_register(dev, tap_handle_frame, &vlantap->tap); if (err) return err; /* Don't put anything that may fail after macvlan_common_newlink * because we can't undo what it does. */ err = macvlan_common_newlink(src_net, dev, tb, data, extack); if (err) { netdev_rx_handler_unregister(dev); return err; } vlantap->tap.dev = vlantap->vlan.dev; return 0; } static void macvtap_dellink(struct net_device *dev, struct list_head *head) { struct macvtap_dev *vlantap = netdev_priv(dev); netdev_rx_handler_unregister(dev); tap_del_queues(&vlantap->tap); macvlan_dellink(dev, head); } static void macvtap_setup(struct net_device *dev) { macvlan_common_setup(dev); dev->tx_queue_len = TUN_READQ_SIZE; } static struct net *macvtap_link_net(const struct net_device *dev) { return dev_net(macvlan_dev_real_dev(dev)); } static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .kind = "macvtap", .setup = macvtap_setup, .newlink = macvtap_newlink, .dellink = macvtap_dellink, .get_link_net = macvtap_link_net, .priv_size = sizeof(struct macvtap_dev), }; static int macvtap_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct macvtap_dev *vlantap; struct device *classdev; dev_t devt; int err; char tap_name[IFNAMSIZ]; if (dev->rtnl_link_ops != &macvtap_link_ops) return NOTIFY_DONE; snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex); vlantap = netdev_priv(dev); switch (event) { case NETDEV_REGISTER: /* Create the device node here after the network device has * been registered but before register_netdevice has * finished running. */ err = tap_get_minor(macvtap_major, &vlantap->tap); if (err) return notifier_from_errno(err); devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); classdev = device_create(&macvtap_class, &dev->dev, devt, dev, "%s", tap_name); if (IS_ERR(classdev)) { tap_free_minor(macvtap_major, &vlantap->tap); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(&dev->dev.kobj, &classdev->kobj, tap_name); if (err) return notifier_from_errno(err); break; case NETDEV_UNREGISTER: /* vlan->minor == 0 if NETDEV_REGISTER above failed */ if (vlantap->tap.minor == 0) break; sysfs_remove_link(&dev->dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); device_destroy(&macvtap_class, devt); tap_free_minor(macvtap_major, &vlantap->tap); break; case NETDEV_CHANGE_TX_QUEUE_LEN: if (tap_queue_resize(&vlantap->tap)) return NOTIFY_BAD; break; } return NOTIFY_DONE; } static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; static int __init macvtap_init(void) { int err; err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap", THIS_MODULE); if (err) goto out1; err = class_register(&macvtap_class); if (err) goto out2; err = register_netdevice_notifier(&macvtap_notifier_block); if (err) goto out3; err = macvlan_link_register(&macvtap_link_ops); if (err) goto out4; return 0; out4: unregister_netdevice_notifier(&macvtap_notifier_block); out3: class_unregister(&macvtap_class); out2: tap_destroy_cdev(macvtap_major, &macvtap_cdev); out1: return err; } module_init(macvtap_init); static void __exit macvtap_exit(void) { rtnl_link_unregister(&macvtap_link_ops); unregister_netdevice_notifier(&macvtap_notifier_block); class_unregister(&macvtap_class); tap_destroy_cdev(macvtap_major, &macvtap_cdev); } module_exit(macvtap_exit); MODULE_ALIAS_RTNL_LINK("macvtap"); MODULE_DESCRIPTION("MAC-VLAN based tap driver"); MODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>"); MODULE_LICENSE("GPL");
6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2018 Intel Corporation. All rights reserved. */ #undef TRACE_SYSTEM #define TRACE_SYSTEM ib_mad #if !defined(_TRACE_IB_MAD_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_IB_MAD_H #include <linux/tracepoint.h> #include <rdma/ib_mad.h> #ifdef CONFIG_TRACEPOINTS struct trace_event_raw_ib_mad_send_template; static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_qp_info *qp_info, struct trace_event_raw_ib_mad_send_template *entry); #endif DECLARE_EVENT_CLASS(ib_mad_send_template, TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_mad_qp_info *qp_info), TP_ARGS(wr, qp_info), TP_STRUCT__entry( __field(u8, base_version) __field(u8, mgmt_class) __field(u8, class_version) __field(u8, port_num) __field(u32, qp_num) __field(u8, method) __field(u8, sl) __field(u16, attr_id) __field(u32, attr_mod) __field(u64, wrtid) __field(u64, tid) __field(u16, status) __field(u16, class_specific) __field(u32, length) __field(u32, dlid) __field(u32, rqpn) __field(u32, rqkey) __field(u32, dev_index) __field(void *, agent_priv) __field(unsigned long, timeout) __field(int, retries_left) __field(int, max_retries) __field(int, retry) ), TP_fast_assign( __entry->dev_index = wr->mad_agent_priv->agent.device->index; __entry->port_num = wr->mad_agent_priv->agent.port_num; __entry->qp_num = wr->mad_agent_priv->qp_info->qp->qp_num; __entry->agent_priv = wr->mad_agent_priv; __entry->wrtid = wr->tid; __entry->max_retries = wr->max_retries; __entry->retries_left = wr->retries_left; __entry->retry = wr->retry; __entry->timeout = wr->timeout; __entry->length = wr->send_buf.hdr_len + wr->send_buf.data_len; __entry->base_version = ((struct ib_mad_hdr *)wr->send_buf.mad)->base_version; __entry->mgmt_class = ((struct ib_mad_hdr *)wr->send_buf.mad)->mgmt_class; __entry->class_version = ((struct ib_mad_hdr *)wr->send_buf.mad)->class_version; __entry->method = ((struct ib_mad_hdr *)wr->send_buf.mad)->method; __entry->status = ((struct ib_mad_hdr *)wr->send_buf.mad)->status; __entry->class_specific = ((struct ib_mad_hdr *)wr->send_buf.mad)->class_specific; __entry->tid = ((struct ib_mad_hdr *)wr->send_buf.mad)->tid; __entry->attr_id = ((struct ib_mad_hdr *)wr->send_buf.mad)->attr_id; __entry->attr_mod = ((struct ib_mad_hdr *)wr->send_buf.mad)->attr_mod; create_mad_addr_info(wr, qp_info, __entry); ), TP_printk("%d:%d QP%d agent %p: " \ "wrtid 0x%llx; %d/%d retries(%d); timeout %lu length %d : " \ "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \ "method 0x%x status 0x%x class_specific 0x%x tid 0x%llx " \ "attr_id 0x%x attr_mod 0x%x => dlid 0x%08x sl %d "\ "rpqn 0x%x rqpkey 0x%x", __entry->dev_index, __entry->port_num, __entry->qp_num, __entry->agent_priv, be64_to_cpu(__entry->wrtid), __entry->retries_left, __entry->max_retries, __entry->retry, __entry->timeout, __entry->length, __entry->base_version, __entry->mgmt_class, __entry->class_version, __entry->method, be16_to_cpu(__entry->status), be16_to_cpu(__entry->class_specific), be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), be32_to_cpu(__entry->attr_mod), be32_to_cpu(__entry->dlid), __entry->sl, __entry->rqpn, __entry->rqkey ) ); DEFINE_EVENT(ib_mad_send_template, ib_mad_error_handler, TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_mad_qp_info *qp_info), TP_ARGS(wr, qp_info)); DEFINE_EVENT(ib_mad_send_template, ib_mad_ib_send_mad, TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_mad_qp_info *qp_info), TP_ARGS(wr, qp_info)); DEFINE_EVENT(ib_mad_send_template, ib_mad_send_done_resend, TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_mad_qp_info *qp_info), TP_ARGS(wr, qp_info)); TRACE_EVENT(ib_mad_send_done_handler, TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_wc *wc), TP_ARGS(wr, wc), TP_STRUCT__entry( __field(u8, port_num) __field(u8, base_version) __field(u8, mgmt_class) __field(u8, class_version) __field(u32, qp_num) __field(u64, wrtid) __field(u16, status) __field(u16, wc_status) __field(u32, length) __field(void *, agent_priv) __field(unsigned long, timeout) __field(u32, dev_index) __field(int, retries_left) __field(int, max_retries) __field(int, retry) __field(u8, method) ), TP_fast_assign( __entry->dev_index = wr->mad_agent_priv->agent.device->index; __entry->port_num = wr->mad_agent_priv->agent.port_num; __entry->qp_num = wr->mad_agent_priv->qp_info->qp->qp_num; __entry->agent_priv = wr->mad_agent_priv; __entry->wrtid = wr->tid; __entry->max_retries = wr->max_retries; __entry->retries_left = wr->retries_left; __entry->retry = wr->retry; __entry->timeout = wr->timeout; __entry->base_version = ((struct ib_mad_hdr *)wr->send_buf.mad)->base_version; __entry->mgmt_class = ((struct ib_mad_hdr *)wr->send_buf.mad)->mgmt_class; __entry->class_version = ((struct ib_mad_hdr *)wr->send_buf.mad)->class_version; __entry->method = ((struct ib_mad_hdr *)wr->send_buf.mad)->method; __entry->status = ((struct ib_mad_hdr *)wr->send_buf.mad)->status; __entry->wc_status = wc->status; __entry->length = wc->byte_len; ), TP_printk("%d:%d QP%d : SEND WC Status %d : agent %p: " \ "wrtid 0x%llx %d/%d retries(%d) timeout %lu length %d: " \ "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \ "method 0x%x status 0x%x", __entry->dev_index, __entry->port_num, __entry->qp_num, __entry->wc_status, __entry->agent_priv, be64_to_cpu(__entry->wrtid), __entry->retries_left, __entry->max_retries, __entry->retry, __entry->timeout, __entry->length, __entry->base_version, __entry->mgmt_class, __entry->class_version, __entry->method, be16_to_cpu(__entry->status) ) ); TRACE_EVENT(ib_mad_recv_done_handler, TP_PROTO(struct ib_mad_qp_info *qp_info, struct ib_wc *wc, struct ib_mad_hdr *mad_hdr), TP_ARGS(qp_info, wc, mad_hdr), TP_STRUCT__entry( __field(u8, base_version) __field(u8, mgmt_class) __field(u8, class_version) __field(u8, port_num) __field(u32, qp_num) __field(u16, status) __field(u16, class_specific) __field(u32, length) __field(u64, tid) __field(u8, method) __field(u8, sl) __field(u16, attr_id) __field(u32, attr_mod) __field(u16, src_qp) __field(u16, wc_status) __field(u32, slid) __field(u32, dev_index) ), TP_fast_assign( __entry->dev_index = qp_info->port_priv->device->index; __entry->port_num = qp_info->port_priv->port_num; __entry->qp_num = qp_info->qp->qp_num; __entry->length = wc->byte_len; __entry->base_version = mad_hdr->base_version; __entry->mgmt_class = mad_hdr->mgmt_class; __entry->class_version = mad_hdr->class_version; __entry->method = mad_hdr->method; __entry->status = mad_hdr->status; __entry->class_specific = mad_hdr->class_specific; __entry->tid = mad_hdr->tid; __entry->attr_id = mad_hdr->attr_id; __entry->attr_mod = mad_hdr->attr_mod; __entry->slid = wc->slid; __entry->src_qp = wc->src_qp; __entry->sl = wc->sl; __entry->wc_status = wc->status; ), TP_printk("%d:%d QP%d : RECV WC Status %d : length %d : hdr : " \ "base_ver 0x%02x class 0x%02x class_ver 0x%02x " \ "method 0x%02x status 0x%04x class_specific 0x%04x " \ "tid 0x%016llx attr_id 0x%04x attr_mod 0x%08x " \ "slid 0x%08x src QP%d, sl %d", __entry->dev_index, __entry->port_num, __entry->qp_num, __entry->wc_status, __entry->length, __entry->base_version, __entry->mgmt_class, __entry->class_version, __entry->method, be16_to_cpu(__entry->status), be16_to_cpu(__entry->class_specific), be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), be32_to_cpu(__entry->attr_mod), __entry->slid, __entry->src_qp, __entry->sl ) ); DECLARE_EVENT_CLASS(ib_mad_agent_template, TP_PROTO(struct ib_mad_agent_private *agent), TP_ARGS(agent), TP_STRUCT__entry( __field(u32, dev_index) __field(u32, hi_tid) __field(u8, port_num) __field(u8, mgmt_class) __field(u8, mgmt_class_version) ), TP_fast_assign( __entry->dev_index = agent->agent.device->index; __entry->port_num = agent->agent.port_num; __entry->hi_tid = agent->agent.hi_tid; if (agent->reg_req) { __entry->mgmt_class = agent->reg_req->mgmt_class; __entry->mgmt_class_version = agent->reg_req->mgmt_class_version; } else { __entry->mgmt_class = 0; __entry->mgmt_class_version = 0; } ), TP_printk("%d:%d mad agent : hi_tid 0x%08x class 0x%02x class_ver 0x%02x", __entry->dev_index, __entry->port_num, __entry->hi_tid, __entry->mgmt_class, __entry->mgmt_class_version ) ); DEFINE_EVENT(ib_mad_agent_template, ib_mad_recv_done_agent, TP_PROTO(struct ib_mad_agent_private *agent), TP_ARGS(agent)); DEFINE_EVENT(ib_mad_agent_template, ib_mad_send_done_agent, TP_PROTO(struct ib_mad_agent_private *agent), TP_ARGS(agent)); DEFINE_EVENT(ib_mad_agent_template, ib_mad_create_agent, TP_PROTO(struct ib_mad_agent_private *agent), TP_ARGS(agent)); DEFINE_EVENT(ib_mad_agent_template, ib_mad_unregister_agent, TP_PROTO(struct ib_mad_agent_private *agent), TP_ARGS(agent)); DECLARE_EVENT_CLASS(ib_mad_opa_smi_template, TP_PROTO(struct opa_smp *smp), TP_ARGS(smp), TP_STRUCT__entry( __field(u64, mkey) __field(u32, dr_slid) __field(u32, dr_dlid) __field(u8, hop_ptr) __field(u8, hop_cnt) __array(u8, initial_path, OPA_SMP_MAX_PATH_HOPS) __array(u8, return_path, OPA_SMP_MAX_PATH_HOPS) ), TP_fast_assign( __entry->hop_ptr = smp->hop_ptr; __entry->hop_cnt = smp->hop_cnt; __entry->mkey = smp->mkey; __entry->dr_slid = smp->route.dr.dr_slid; __entry->dr_dlid = smp->route.dr.dr_dlid; memcpy(__entry->initial_path, smp->route.dr.initial_path, OPA_SMP_MAX_PATH_HOPS); memcpy(__entry->return_path, smp->route.dr.return_path, OPA_SMP_MAX_PATH_HOPS); ), TP_printk("OPA SMP: hop_ptr %d hop_cnt %d " \ "mkey 0x%016llx dr_slid 0x%08x dr_dlid 0x%08x " \ "initial_path %*ph return_path %*ph ", __entry->hop_ptr, __entry->hop_cnt, be64_to_cpu(__entry->mkey), be32_to_cpu(__entry->dr_slid), be32_to_cpu(__entry->dr_dlid), OPA_SMP_MAX_PATH_HOPS, __entry->initial_path, OPA_SMP_MAX_PATH_HOPS, __entry->return_path ) ); DEFINE_EVENT(ib_mad_opa_smi_template, ib_mad_handle_opa_smi, TP_PROTO(struct opa_smp *smp), TP_ARGS(smp)); DEFINE_EVENT(ib_mad_opa_smi_template, ib_mad_handle_out_opa_smi, TP_PROTO(struct opa_smp *smp), TP_ARGS(smp)); DECLARE_EVENT_CLASS(ib_mad_opa_ib_template, TP_PROTO(struct ib_smp *smp), TP_ARGS(smp), TP_STRUCT__entry( __field(u64, mkey) __field(u32, dr_slid) __field(u32, dr_dlid) __field(u8, hop_ptr) __field(u8, hop_cnt) __array(u8, initial_path, IB_SMP_MAX_PATH_HOPS) __array(u8, return_path, IB_SMP_MAX_PATH_HOPS) ), TP_fast_assign( __entry->hop_ptr = smp->hop_ptr; __entry->hop_cnt = smp->hop_cnt; __entry->mkey = smp->mkey; __entry->dr_slid = smp->dr_slid; __entry->dr_dlid = smp->dr_dlid; memcpy(__entry->initial_path, smp->initial_path, IB_SMP_MAX_PATH_HOPS); memcpy(__entry->return_path, smp->return_path, IB_SMP_MAX_PATH_HOPS); ), TP_printk("OPA SMP: hop_ptr %d hop_cnt %d " \ "mkey 0x%016llx dr_slid 0x%04x dr_dlid 0x%04x " \ "initial_path %*ph return_path %*ph ", __entry->hop_ptr, __entry->hop_cnt, be64_to_cpu(__entry->mkey), be16_to_cpu(__entry->dr_slid), be16_to_cpu(__entry->dr_dlid), IB_SMP_MAX_PATH_HOPS, __entry->initial_path, IB_SMP_MAX_PATH_HOPS, __entry->return_path ) ); DEFINE_EVENT(ib_mad_opa_ib_template, ib_mad_handle_ib_smi, TP_PROTO(struct ib_smp *smp), TP_ARGS(smp)); DEFINE_EVENT(ib_mad_opa_ib_template, ib_mad_handle_out_ib_smi, TP_PROTO(struct ib_smp *smp), TP_ARGS(smp)); #endif /* _TRACE_IB_MAD_H */ #include <trace/define_trace.h>
16 16 16 16 16 16 16 16 16 9 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 // SPDX-License-Identifier: GPL-2.0-or-later /* PKCS#7 parser * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) "PKCS7: "fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/oid_registry.h> #include <crypto/public_key.h> #include "pkcs7_parser.h" #include "pkcs7.asn1.h" MODULE_DESCRIPTION("PKCS#7 parser"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL"); struct pkcs7_parse_context { struct pkcs7_message *msg; /* Message being constructed */ struct pkcs7_signed_info *sinfo; /* SignedInfo being constructed */ struct pkcs7_signed_info **ppsinfo; struct x509_certificate *certs; /* Certificate cache */ struct x509_certificate **ppcerts; unsigned long data; /* Start of data */ enum OID last_oid; /* Last OID encountered */ unsigned x509_index; unsigned sinfo_index; const void *raw_serial; unsigned raw_serial_size; unsigned raw_issuer_size; const void *raw_issuer; const void *raw_skid; unsigned raw_skid_size; bool expect_skid; }; /* * Free a signed information block. */ static void pkcs7_free_signed_info(struct pkcs7_signed_info *sinfo) { if (sinfo) { public_key_signature_free(sinfo->sig); kfree(sinfo); } } /** * pkcs7_free_message - Free a PKCS#7 message * @pkcs7: The PKCS#7 message to free */ void pkcs7_free_message(struct pkcs7_message *pkcs7) { struct x509_certificate *cert; struct pkcs7_signed_info *sinfo; if (pkcs7) { while (pkcs7->certs) { cert = pkcs7->certs; pkcs7->certs = cert->next; x509_free_certificate(cert); } while (pkcs7->crl) { cert = pkcs7->crl; pkcs7->crl = cert->next; x509_free_certificate(cert); } while (pkcs7->signed_infos) { sinfo = pkcs7->signed_infos; pkcs7->signed_infos = sinfo->next; pkcs7_free_signed_info(sinfo); } kfree(pkcs7); } } EXPORT_SYMBOL_GPL(pkcs7_free_message); /* * Check authenticatedAttributes are provided or not provided consistently. */ static int pkcs7_check_authattrs(struct pkcs7_message *msg) { struct pkcs7_signed_info *sinfo; bool want = false; sinfo = msg->signed_infos; if (!sinfo) goto inconsistent; if (sinfo->authattrs) { want = true; msg->have_authattrs = true; } for (sinfo = sinfo->next; sinfo; sinfo = sinfo->next) if (!!sinfo->authattrs != want) goto inconsistent; return 0; inconsistent: pr_warn("Inconsistently supplied authAttrs\n"); return -EINVAL; } /** * pkcs7_parse_message - Parse a PKCS#7 message * @data: The raw binary ASN.1 encoded message to be parsed * @datalen: The size of the encoded message */ struct pkcs7_message *pkcs7_parse_message(const void *data, size_t datalen) { struct pkcs7_parse_context *ctx; struct pkcs7_message *msg = ERR_PTR(-ENOMEM); int ret; ctx = kzalloc(sizeof(struct pkcs7_parse_context), GFP_KERNEL); if (!ctx) goto out_no_ctx; ctx->msg = kzalloc(sizeof(struct pkcs7_message), GFP_KERNEL); if (!ctx->msg) goto out_no_msg; ctx->sinfo = kzalloc(sizeof(struct pkcs7_signed_info), GFP_KERNEL); if (!ctx->sinfo) goto out_no_sinfo; ctx->sinfo->sig = kzalloc(sizeof(struct public_key_signature), GFP_KERNEL); if (!ctx->sinfo->sig) goto out_no_sig; ctx->data = (unsigned long)data; ctx->ppcerts = &ctx->certs; ctx->ppsinfo = &ctx->msg->signed_infos; /* Attempt to decode the signature */ ret = asn1_ber_decoder(&pkcs7_decoder, ctx, data, datalen); if (ret < 0) { msg = ERR_PTR(ret); goto out; } ret = pkcs7_check_authattrs(ctx->msg); if (ret < 0) { msg = ERR_PTR(ret); goto out; } msg = ctx->msg; ctx->msg = NULL; out: while (ctx->certs) { struct x509_certificate *cert = ctx->certs; ctx->certs = cert->next; x509_free_certificate(cert); } out_no_sig: pkcs7_free_signed_info(ctx->sinfo); out_no_sinfo: pkcs7_free_message(ctx->msg); out_no_msg: kfree(ctx); out_no_ctx: return msg; } EXPORT_SYMBOL_GPL(pkcs7_parse_message); /** * pkcs7_get_content_data - Get access to the PKCS#7 content * @pkcs7: The preparsed PKCS#7 message to access * @_data: Place to return a pointer to the data * @_data_len: Place to return the data length * @_headerlen: Size of ASN.1 header not included in _data * * Get access to the data content of the PKCS#7 message. The size of the * header of the ASN.1 object that contains it is also provided and can be used * to adjust *_data and *_data_len to get the entire object. * * Returns -ENODATA if the data object was missing from the message. */ int pkcs7_get_content_data(const struct pkcs7_message *pkcs7, const void **_data, size_t *_data_len, size_t *_headerlen) { if (!pkcs7->data) return -ENODATA; *_data = pkcs7->data; *_data_len = pkcs7->data_len; if (_headerlen) *_headerlen = pkcs7->data_hdrlen; return 0; } EXPORT_SYMBOL_GPL(pkcs7_get_content_data); /* * Note an OID when we find one for later processing when we know how * to interpret it. */ int pkcs7_note_OID(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; ctx->last_oid = look_up_OID(value, vlen); if (ctx->last_oid == OID__NR) { char buffer[50]; sprint_oid(value, vlen, buffer, sizeof(buffer)); printk("PKCS7: Unknown OID: [%lu] %s\n", (unsigned long)value - ctx->data, buffer); } return 0; } /* * Note the digest algorithm for the signature. */ int pkcs7_sig_note_digest_algo(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; switch (ctx->last_oid) { case OID_sha1: ctx->sinfo->sig->hash_algo = "sha1"; break; case OID_sha256: ctx->sinfo->sig->hash_algo = "sha256"; break; case OID_sha384: ctx->sinfo->sig->hash_algo = "sha384"; break; case OID_sha512: ctx->sinfo->sig->hash_algo = "sha512"; break; case OID_sha224: ctx->sinfo->sig->hash_algo = "sha224"; break; case OID_sm3: ctx->sinfo->sig->hash_algo = "sm3"; break; case OID_gost2012Digest256: ctx->sinfo->sig->hash_algo = "streebog256"; break; case OID_gost2012Digest512: ctx->sinfo->sig->hash_algo = "streebog512"; break; case OID_sha3_256: ctx->sinfo->sig->hash_algo = "sha3-256"; break; case OID_sha3_384: ctx->sinfo->sig->hash_algo = "sha3-384"; break; case OID_sha3_512: ctx->sinfo->sig->hash_algo = "sha3-512"; break; default: printk("Unsupported digest algo: %u\n", ctx->last_oid); return -ENOPKG; } return 0; } /* * Note the public key algorithm for the signature. */ int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; switch (ctx->last_oid) { case OID_rsaEncryption: ctx->sinfo->sig->pkey_algo = "rsa"; ctx->sinfo->sig->encoding = "pkcs1"; break; case OID_id_ecdsa_with_sha1: case OID_id_ecdsa_with_sha224: case OID_id_ecdsa_with_sha256: case OID_id_ecdsa_with_sha384: case OID_id_ecdsa_with_sha512: case OID_id_ecdsa_with_sha3_256: case OID_id_ecdsa_with_sha3_384: case OID_id_ecdsa_with_sha3_512: ctx->sinfo->sig->pkey_algo = "ecdsa"; ctx->sinfo->sig->encoding = "x962"; break; case OID_gost2012PKey256: case OID_gost2012PKey512: ctx->sinfo->sig->pkey_algo = "ecrdsa"; ctx->sinfo->sig->encoding = "raw"; break; default: printk("Unsupported pkey algo: %u\n", ctx->last_oid); return -ENOPKG; } return 0; } /* * We only support signed data [RFC2315 sec 9]. */ int pkcs7_check_content_type(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; if (ctx->last_oid != OID_signed_data) { pr_warn("Only support pkcs7_signedData type\n"); return -EINVAL; } return 0; } /* * Note the SignedData version */ int pkcs7_note_signeddata_version(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; unsigned version; if (vlen != 1) goto unsupported; ctx->msg->version = version = *(const u8 *)value; switch (version) { case 1: /* PKCS#7 SignedData [RFC2315 sec 9.1] * CMS ver 1 SignedData [RFC5652 sec 5.1] */ break; case 3: /* CMS ver 3 SignedData [RFC2315 sec 5.1] */ break; default: goto unsupported; } return 0; unsupported: pr_warn("Unsupported SignedData version\n"); return -EINVAL; } /* * Note the SignerInfo version */ int pkcs7_note_signerinfo_version(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; unsigned version; if (vlen != 1) goto unsupported; version = *(const u8 *)value; switch (version) { case 1: /* PKCS#7 SignerInfo [RFC2315 sec 9.2] * CMS ver 1 SignerInfo [RFC5652 sec 5.3] */ if (ctx->msg->version != 1) goto version_mismatch; ctx->expect_skid = false; break; case 3: /* CMS ver 3 SignerInfo [RFC2315 sec 5.3] */ if (ctx->msg->version == 1) goto version_mismatch; ctx->expect_skid = true; break; default: goto unsupported; } return 0; unsupported: pr_warn("Unsupported SignerInfo version\n"); return -EINVAL; version_mismatch: pr_warn("SignedData-SignerInfo version mismatch\n"); return -EBADMSG; } /* * Extract a certificate and store it in the context. */ int pkcs7_extract_cert(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; struct x509_certificate *x509; if (tag != ((ASN1_UNIV << 6) | ASN1_CONS_BIT | ASN1_SEQ)) { pr_debug("Cert began with tag %02x at %lu\n", tag, (unsigned long)ctx - ctx->data); return -EBADMSG; } /* We have to correct for the header so that the X.509 parser can start * from the beginning. Note that since X.509 stipulates DER, there * probably shouldn't be an EOC trailer - but it is in PKCS#7 (which * stipulates BER). */ value -= hdrlen; vlen += hdrlen; if (((u8*)value)[1] == 0x80) vlen += 2; /* Indefinite length - there should be an EOC */ x509 = x509_cert_parse(value, vlen); if (IS_ERR(x509)) return PTR_ERR(x509); x509->index = ++ctx->x509_index; pr_debug("Got cert %u for %s\n", x509->index, x509->subject); pr_debug("- fingerprint %*phN\n", x509->id->len, x509->id->data); *ctx->ppcerts = x509; ctx->ppcerts = &x509->next; return 0; } /* * Save the certificate list */ int pkcs7_note_certificate_list(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; pr_devel("Got cert list (%02x)\n", tag); *ctx->ppcerts = ctx->msg->certs; ctx->msg->certs = ctx->certs; ctx->certs = NULL; ctx->ppcerts = &ctx->certs; return 0; } /* * Note the content type. */ int pkcs7_note_content(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; if (ctx->last_oid != OID_data && ctx->last_oid != OID_msIndirectData) { pr_warn("Unsupported data type %d\n", ctx->last_oid); return -EINVAL; } ctx->msg->data_type = ctx->last_oid; return 0; } /* * Extract the data from the message and store that and its content type OID in * the context. */ int pkcs7_note_data(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; pr_debug("Got data\n"); ctx->msg->data = value; ctx->msg->data_len = vlen; ctx->msg->data_hdrlen = hdrlen; return 0; } /* * Parse authenticated attributes. */ int pkcs7_sig_note_authenticated_attr(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; struct pkcs7_signed_info *sinfo = ctx->sinfo; enum OID content_type; pr_devel("AuthAttr: %02x %zu [%*ph]\n", tag, vlen, (unsigned)vlen, value); switch (ctx->last_oid) { case OID_contentType: if (__test_and_set_bit(sinfo_has_content_type, &sinfo->aa_set)) goto repeated; content_type = look_up_OID(value, vlen); if (content_type != ctx->msg->data_type) { pr_warn("Mismatch between global data type (%d) and sinfo %u (%d)\n", ctx->msg->data_type, sinfo->index, content_type); return -EBADMSG; } return 0; case OID_signingTime: if (__test_and_set_bit(sinfo_has_signing_time, &sinfo->aa_set)) goto repeated; /* Should we check that the signing time is consistent * with the signer's X.509 cert? */ return x509_decode_time(&sinfo->signing_time, hdrlen, tag, value, vlen); case OID_messageDigest: if (__test_and_set_bit(sinfo_has_message_digest, &sinfo->aa_set)) goto repeated; if (tag != ASN1_OTS) return -EBADMSG; sinfo->msgdigest = value; sinfo->msgdigest_len = vlen; return 0; case OID_smimeCapabilites: if (__test_and_set_bit(sinfo_has_smime_caps, &sinfo->aa_set)) goto repeated; if (ctx->msg->data_type != OID_msIndirectData) { pr_warn("S/MIME Caps only allowed with Authenticode\n"); return -EKEYREJECTED; } return 0; /* Microsoft SpOpusInfo seems to be contain cont[0] 16-bit BE * char URLs and cont[1] 8-bit char URLs. * * Microsoft StatementType seems to contain a list of OIDs that * are also used as extendedKeyUsage types in X.509 certs. */ case OID_msSpOpusInfo: if (__test_and_set_bit(sinfo_has_ms_opus_info, &sinfo->aa_set)) goto repeated; goto authenticode_check; case OID_msStatementType: if (__test_and_set_bit(sinfo_has_ms_statement_type, &sinfo->aa_set)) goto repeated; authenticode_check: if (ctx->msg->data_type != OID_msIndirectData) { pr_warn("Authenticode AuthAttrs only allowed with Authenticode\n"); return -EKEYREJECTED; } /* I'm not sure how to validate these */ return 0; default: return 0; } repeated: /* We permit max one item per AuthenticatedAttribute and no repeats */ pr_warn("Repeated/multivalue AuthAttrs not permitted\n"); return -EKEYREJECTED; } /* * Note the set of auth attributes for digestion purposes [RFC2315 sec 9.3] */ int pkcs7_sig_note_set_of_authattrs(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; struct pkcs7_signed_info *sinfo = ctx->sinfo; if (!test_bit(sinfo_has_content_type, &sinfo->aa_set) || !test_bit(sinfo_has_message_digest, &sinfo->aa_set)) { pr_warn("Missing required AuthAttr\n"); return -EBADMSG; } if (ctx->msg->data_type != OID_msIndirectData && test_bit(sinfo_has_ms_opus_info, &sinfo->aa_set)) { pr_warn("Unexpected Authenticode AuthAttr\n"); return -EBADMSG; } /* We need to switch the 'CONT 0' to a 'SET OF' when we digest */ sinfo->authattrs = value - (hdrlen - 1); sinfo->authattrs_len = vlen + (hdrlen - 1); return 0; } /* * Note the issuing certificate serial number */ int pkcs7_sig_note_serial(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; ctx->raw_serial = value; ctx->raw_serial_size = vlen; return 0; } /* * Note the issuer's name */ int pkcs7_sig_note_issuer(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; ctx->raw_issuer = value; ctx->raw_issuer_size = vlen; return 0; } /* * Note the issuing cert's subjectKeyIdentifier */ int pkcs7_sig_note_skid(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; pr_devel("SKID: %02x %zu [%*ph]\n", tag, vlen, (unsigned)vlen, value); ctx->raw_skid = value; ctx->raw_skid_size = vlen; return 0; } /* * Note the signature data */ int pkcs7_sig_note_signature(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; ctx->sinfo->sig->s = kmemdup(value, vlen, GFP_KERNEL); if (!ctx->sinfo->sig->s) return -ENOMEM; ctx->sinfo->sig->s_size = vlen; return 0; } /* * Note a signature information block */ int pkcs7_note_signed_info(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; struct pkcs7_signed_info *sinfo = ctx->sinfo; struct asymmetric_key_id *kid; if (ctx->msg->data_type == OID_msIndirectData && !sinfo->authattrs) { pr_warn("Authenticode requires AuthAttrs\n"); return -EBADMSG; } /* Generate cert issuer + serial number key ID */ if (!ctx->expect_skid) { kid = asymmetric_key_generate_id(ctx->raw_serial, ctx->raw_serial_size, ctx->raw_issuer, ctx->raw_issuer_size); } else { kid = asymmetric_key_generate_id(ctx->raw_skid, ctx->raw_skid_size, "", 0); } if (IS_ERR(kid)) return PTR_ERR(kid); pr_devel("SINFO KID: %u [%*phN]\n", kid->len, kid->len, kid->data); sinfo->sig->auth_ids[0] = kid; sinfo->index = ++ctx->sinfo_index; *ctx->ppsinfo = sinfo; ctx->ppsinfo = &sinfo->next; ctx->sinfo = kzalloc(sizeof(struct pkcs7_signed_info), GFP_KERNEL); if (!ctx->sinfo) return -ENOMEM; ctx->sinfo->sig = kzalloc(sizeof(struct public_key_signature), GFP_KERNEL); if (!ctx->sinfo->sig) return -ENOMEM; return 0; }
8 8 7 7 1 14 5 9 7 7 14 9 5 5 6 1 1 1 2 2 6 2 1 1 2 2 1 1 2 1 1 2 6 6 4 2 6 6 9 6 3 5 1 1 3 2 3 3 3 1 3 2 1 1 2 2 3 3 1 3 1 2 3 3 1 2 2 2 2 6 6 6 6 6 8 1 7 26 7 22 26 9 17 1 12 6 10 10 10 9 2 1 1 1 1 1 1 1 26 5 1 5 19 2 1 1 2 6 11 1 8 1 1 1 14 13 4 1 2 11 10 8 1 1 3 3 7 1 6 3 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 // SPDX-License-Identifier: GPL-2.0-only /* * cec-adap.c - HDMI Consumer Electronics Control framework - CEC adapter * * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/errno.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/kmod.h> #include <linux/ktime.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/types.h> #include <drm/drm_connector.h> #include <drm/drm_device.h> #include <drm/drm_edid.h> #include <drm/drm_file.h> #include "cec-priv.h" static void cec_fill_msg_report_features(struct cec_adapter *adap, struct cec_msg *msg, unsigned int la_idx); static int cec_log_addr2idx(const struct cec_adapter *adap, u8 log_addr) { int i; for (i = 0; i < adap->log_addrs.num_log_addrs; i++) if (adap->log_addrs.log_addr[i] == log_addr) return i; return -1; } static unsigned int cec_log_addr2dev(const struct cec_adapter *adap, u8 log_addr) { int i = cec_log_addr2idx(adap, log_addr); return adap->log_addrs.primary_device_type[i < 0 ? 0 : i]; } u16 cec_get_edid_phys_addr(const u8 *edid, unsigned int size, unsigned int *offset) { unsigned int loc = cec_get_edid_spa_location(edid, size); if (offset) *offset = loc; if (loc == 0) return CEC_PHYS_ADDR_INVALID; return (edid[loc] << 8) | edid[loc + 1]; } EXPORT_SYMBOL_GPL(cec_get_edid_phys_addr); void cec_fill_conn_info_from_drm(struct cec_connector_info *conn_info, const struct drm_connector *connector) { memset(conn_info, 0, sizeof(*conn_info)); conn_info->type = CEC_CONNECTOR_TYPE_DRM; conn_info->drm.card_no = connector->dev->primary->index; conn_info->drm.connector_id = connector->base.id; } EXPORT_SYMBOL_GPL(cec_fill_conn_info_from_drm); /* * Queue a new event for this filehandle. If ts == 0, then set it * to the current time. * * We keep a queue of at most max_event events where max_event differs * per event. If the queue becomes full, then drop the oldest event and * keep track of how many events we've dropped. */ void cec_queue_event_fh(struct cec_fh *fh, const struct cec_event *new_ev, u64 ts) { static const u16 max_events[CEC_NUM_EVENTS] = { 1, 1, 800, 800, 8, 8, 8, 8 }; struct cec_event_entry *entry; unsigned int ev_idx = new_ev->event - 1; if (WARN_ON(ev_idx >= ARRAY_SIZE(fh->events))) return; if (ts == 0) ts = ktime_get_ns(); mutex_lock(&fh->lock); if (ev_idx < CEC_NUM_CORE_EVENTS) entry = &fh->core_events[ev_idx]; else entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (entry) { if (new_ev->event == CEC_EVENT_LOST_MSGS && fh->queued_events[ev_idx]) { entry->ev.lost_msgs.lost_msgs += new_ev->lost_msgs.lost_msgs; goto unlock; } entry->ev = *new_ev; entry->ev.ts = ts; if (fh->queued_events[ev_idx] < max_events[ev_idx]) { /* Add new msg at the end of the queue */ list_add_tail(&entry->list, &fh->events[ev_idx]); fh->queued_events[ev_idx]++; fh->total_queued_events++; goto unlock; } if (ev_idx >= CEC_NUM_CORE_EVENTS) { list_add_tail(&entry->list, &fh->events[ev_idx]); /* drop the oldest event */ entry = list_first_entry(&fh->events[ev_idx], struct cec_event_entry, list); list_del(&entry->list); kfree(entry); } } /* Mark that events were lost */ entry = list_first_entry_or_null(&fh->events[ev_idx], struct cec_event_entry, list); if (entry) entry->ev.flags |= CEC_EVENT_FL_DROPPED_EVENTS; unlock: mutex_unlock(&fh->lock); wake_up_interruptible(&fh->wait); } /* Queue a new event for all open filehandles. */ static void cec_queue_event(struct cec_adapter *adap, const struct cec_event *ev) { u64 ts = ktime_get_ns(); struct cec_fh *fh; mutex_lock(&adap->devnode.lock_fhs); list_for_each_entry(fh, &adap->devnode.fhs, list) cec_queue_event_fh(fh, ev, ts); mutex_unlock(&adap->devnode.lock_fhs); } /* Notify userspace that the CEC pin changed state at the given time. */ void cec_queue_pin_cec_event(struct cec_adapter *adap, bool is_high, bool dropped_events, ktime_t ts) { struct cec_event ev = { .event = is_high ? CEC_EVENT_PIN_CEC_HIGH : CEC_EVENT_PIN_CEC_LOW, .flags = dropped_events ? CEC_EVENT_FL_DROPPED_EVENTS : 0, }; struct cec_fh *fh; mutex_lock(&adap->devnode.lock_fhs); list_for_each_entry(fh, &adap->devnode.fhs, list) { if (fh->mode_follower == CEC_MODE_MONITOR_PIN) cec_queue_event_fh(fh, &ev, ktime_to_ns(ts)); } mutex_unlock(&adap->devnode.lock_fhs); } EXPORT_SYMBOL_GPL(cec_queue_pin_cec_event); /* Notify userspace that the HPD pin changed state at the given time. */ void cec_queue_pin_hpd_event(struct cec_adapter *adap, bool is_high, ktime_t ts) { struct cec_event ev = { .event = is_high ? CEC_EVENT_PIN_HPD_HIGH : CEC_EVENT_PIN_HPD_LOW, }; struct cec_fh *fh; mutex_lock(&adap->devnode.lock_fhs); list_for_each_entry(fh, &adap->devnode.fhs, list) cec_queue_event_fh(fh, &ev, ktime_to_ns(ts)); mutex_unlock(&adap->devnode.lock_fhs); } EXPORT_SYMBOL_GPL(cec_queue_pin_hpd_event); /* Notify userspace that the 5V pin changed state at the given time. */ void cec_queue_pin_5v_event(struct cec_adapter *adap, bool is_high, ktime_t ts) { struct cec_event ev = { .event = is_high ? CEC_EVENT_PIN_5V_HIGH : CEC_EVENT_PIN_5V_LOW, }; struct cec_fh *fh; mutex_lock(&adap->devnode.lock_fhs); list_for_each_entry(fh, &adap->devnode.fhs, list) cec_queue_event_fh(fh, &ev, ktime_to_ns(ts)); mutex_unlock(&adap->devnode.lock_fhs); } EXPORT_SYMBOL_GPL(cec_queue_pin_5v_event); /* * Queue a new message for this filehandle. * * We keep a queue of at most CEC_MAX_MSG_RX_QUEUE_SZ messages. If the * queue becomes full, then drop the oldest message and keep track * of how many messages we've dropped. */ static void cec_queue_msg_fh(struct cec_fh *fh, const struct cec_msg *msg) { static const struct cec_event ev_lost_msgs = { .event = CEC_EVENT_LOST_MSGS, .flags = 0, { .lost_msgs = { 1 }, }, }; struct cec_msg_entry *entry; mutex_lock(&fh->lock); entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (entry) { entry->msg = *msg; /* Add new msg at the end of the queue */ list_add_tail(&entry->list, &fh->msgs); if (fh->queued_msgs < CEC_MAX_MSG_RX_QUEUE_SZ) { /* All is fine if there is enough room */ fh->queued_msgs++; mutex_unlock(&fh->lock); wake_up_interruptible(&fh->wait); return; } /* * if the message queue is full, then drop the oldest one and * send a lost message event. */ entry = list_first_entry(&fh->msgs, struct cec_msg_entry, list); list_del(&entry->list); kfree(entry); } mutex_unlock(&fh->lock); /* * We lost a message, either because kmalloc failed or the queue * was full. */ cec_queue_event_fh(fh, &ev_lost_msgs, ktime_get_ns()); } /* * Queue the message for those filehandles that are in monitor mode. * If valid_la is true (this message is for us or was sent by us), * then pass it on to any monitoring filehandle. If this message * isn't for us or from us, then only give it to filehandles that * are in MONITOR_ALL mode. * * This can only happen if the CEC_CAP_MONITOR_ALL capability is * set and the CEC adapter was placed in 'monitor all' mode. */ static void cec_queue_msg_monitor(struct cec_adapter *adap, const struct cec_msg *msg, bool valid_la) { struct cec_fh *fh; u32 monitor_mode = valid_la ? CEC_MODE_MONITOR : CEC_MODE_MONITOR_ALL; mutex_lock(&adap->devnode.lock_fhs); list_for_each_entry(fh, &adap->devnode.fhs, list) { if (fh->mode_follower >= monitor_mode) cec_queue_msg_fh(fh, msg); } mutex_unlock(&adap->devnode.lock_fhs); } /* * Queue the message for follower filehandles. */ static void cec_queue_msg_followers(struct cec_adapter *adap, const struct cec_msg *msg) { struct cec_fh *fh; mutex_lock(&adap->devnode.lock_fhs); list_for_each_entry(fh, &adap->devnode.fhs, list) { if (fh->mode_follower == CEC_MODE_FOLLOWER) cec_queue_msg_fh(fh, msg); } mutex_unlock(&adap->devnode.lock_fhs); } /* Notify userspace of an adapter state change. */ static void cec_post_state_event(struct cec_adapter *adap) { struct cec_event ev = { .event = CEC_EVENT_STATE_CHANGE, }; ev.state_change.phys_addr = adap->phys_addr; ev.state_change.log_addr_mask = adap->log_addrs.log_addr_mask; ev.state_change.have_conn_info = adap->conn_info.type != CEC_CONNECTOR_TYPE_NO_CONNECTOR; cec_queue_event(adap, &ev); } /* * A CEC transmit (and a possible wait for reply) completed. * If this was in blocking mode, then complete it, otherwise * queue the message for userspace to dequeue later. * * This function is called with adap->lock held. */ static void cec_data_completed(struct cec_data *data) { /* * Delete this transmit from the filehandle's xfer_list since * we're done with it. * * Note that if the filehandle is closed before this transmit * finished, then the release() function will set data->fh to NULL. * Without that we would be referring to a closed filehandle. */ if (data->fh) list_del_init(&data->xfer_list); if (data->blocking) { /* * Someone is blocking so mark the message as completed * and call complete. */ data->completed = true; complete(&data->c); } else { /* * No blocking, so just queue the message if needed and * free the memory. */ if (data->fh) cec_queue_msg_fh(data->fh, &data->msg); kfree(data); } } /* * A pending CEC transmit needs to be cancelled, either because the CEC * adapter is disabled or the transmit takes an impossibly long time to * finish, or the reply timed out. * * This function is called with adap->lock held. */ static void cec_data_cancel(struct cec_data *data, u8 tx_status, u8 rx_status) { struct cec_adapter *adap = data->adap; /* * It's either the current transmit, or it is a pending * transmit. Take the appropriate action to clear it. */ if (adap->transmitting == data) { adap->transmitting = NULL; } else { list_del_init(&data->list); if (!(data->msg.tx_status & CEC_TX_STATUS_OK)) if (!WARN_ON(!adap->transmit_queue_sz)) adap->transmit_queue_sz--; } if (data->msg.tx_status & CEC_TX_STATUS_OK) { data->msg.rx_ts = ktime_get_ns(); data->msg.rx_status = rx_status; if (!data->blocking) data->msg.tx_status = 0; } else { data->msg.tx_ts = ktime_get_ns(); data->msg.tx_status |= tx_status | CEC_TX_STATUS_MAX_RETRIES; data->msg.tx_error_cnt++; data->attempts = 0; if (!data->blocking) data->msg.rx_status = 0; } /* Queue transmitted message for monitoring purposes */ cec_queue_msg_monitor(adap, &data->msg, 1); if (!data->blocking && data->msg.sequence) /* Allow drivers to react to a canceled transmit */ call_void_op(adap, adap_nb_transmit_canceled, &data->msg); cec_data_completed(data); } /* * Flush all pending transmits and cancel any pending timeout work. * * This function is called with adap->lock held. */ static void cec_flush(struct cec_adapter *adap) { struct cec_data *data, *n; /* * If the adapter is disabled, or we're asked to stop, * then cancel any pending transmits. */ while (!list_empty(&adap->transmit_queue)) { data = list_first_entry(&adap->transmit_queue, struct cec_data, list); cec_data_cancel(data, CEC_TX_STATUS_ABORTED, 0); } if (adap->transmitting) adap->transmit_in_progress_aborted = true; /* Cancel the pending timeout work. */ list_for_each_entry_safe(data, n, &adap->wait_queue, list) { if (cancel_delayed_work(&data->work)) cec_data_cancel(data, CEC_TX_STATUS_OK, CEC_RX_STATUS_ABORTED); /* * If cancel_delayed_work returned false, then * the cec_wait_timeout function is running, * which will call cec_data_completed. So no * need to do anything special in that case. */ } /* * If something went wrong and this counter isn't what it should * be, then this will reset it back to 0. Warn if it is not 0, * since it indicates a bug, either in this framework or in a * CEC driver. */ if (WARN_ON(adap->transmit_queue_sz)) adap->transmit_queue_sz = 0; } /* * Main CEC state machine * * Wait until the thread should be stopped, or we are not transmitting and * a new transmit message is queued up, in which case we start transmitting * that message. When the adapter finished transmitting the message it will * call cec_transmit_done(). * * If the adapter is disabled, then remove all queued messages instead. * * If the current transmit times out, then cancel that transmit. */ int cec_thread_func(void *_adap) { struct cec_adapter *adap = _adap; for (;;) { unsigned int signal_free_time; struct cec_data *data; bool timeout = false; u8 attempts; if (adap->transmit_in_progress) { int err; /* * We are transmitting a message, so add a timeout * to prevent the state machine to get stuck waiting * for this message to finalize and add a check to * see if the adapter is disabled in which case the * transmit should be canceled. */ err = wait_event_interruptible_timeout(adap->kthread_waitq, (adap->needs_hpd && (!adap->is_configured && !adap->is_configuring)) || kthread_should_stop() || (!adap->transmit_in_progress && !list_empty(&adap->transmit_queue)), msecs_to_jiffies(adap->xfer_timeout_ms)); timeout = err == 0; } else { /* Otherwise we just wait for something to happen. */ wait_event_interruptible(adap->kthread_waitq, kthread_should_stop() || (!adap->transmit_in_progress && !list_empty(&adap->transmit_queue))); } mutex_lock(&adap->lock); if ((adap->needs_hpd && (!adap->is_configured && !adap->is_configuring)) || kthread_should_stop()) { cec_flush(adap); goto unlock; } if (adap->transmit_in_progress && adap->transmit_in_progress_aborted) { if (adap->transmitting) cec_data_cancel(adap->transmitting, CEC_TX_STATUS_ABORTED, 0); adap->transmit_in_progress = false; adap->transmit_in_progress_aborted = false; goto unlock; } if (adap->transmit_in_progress && timeout) { /* * If we timeout, then log that. Normally this does * not happen and it is an indication of a faulty CEC * adapter driver, or the CEC bus is in some weird * state. On rare occasions it can happen if there is * so much traffic on the bus that the adapter was * unable to transmit for xfer_timeout_ms (2.1s by * default). */ if (adap->transmitting) { pr_warn("cec-%s: message %*ph timed out\n", adap->name, adap->transmitting->msg.len, adap->transmitting->msg.msg); /* Just give up on this. */ cec_data_cancel(adap->transmitting, CEC_TX_STATUS_TIMEOUT, 0); } else { pr_warn("cec-%s: transmit timed out\n", adap->name); } adap->transmit_in_progress = false; adap->tx_timeout_cnt++; goto unlock; } /* * If we are still transmitting, or there is nothing new to * transmit, then just continue waiting. */ if (adap->transmit_in_progress || list_empty(&adap->transmit_queue)) goto unlock; /* Get a new message to transmit */ data = list_first_entry(&adap->transmit_queue, struct cec_data, list); list_del_init(&data->list); if (!WARN_ON(!data->adap->transmit_queue_sz)) adap->transmit_queue_sz--; /* Make this the current transmitting message */ adap->transmitting = data; /* * Suggested number of attempts as per the CEC 2.0 spec: * 4 attempts is the default, except for 'secondary poll * messages', i.e. poll messages not sent during the adapter * configuration phase when it allocates logical addresses. */ if (data->msg.len == 1 && adap->is_configured) attempts = 2; else attempts = 4; /* Set the suggested signal free time */ if (data->attempts) { /* should be >= 3 data bit periods for a retry */ signal_free_time = CEC_SIGNAL_FREE_TIME_RETRY; } else if (adap->last_initiator != cec_msg_initiator(&data->msg)) { /* should be >= 5 data bit periods for new initiator */ signal_free_time = CEC_SIGNAL_FREE_TIME_NEW_INITIATOR; adap->last_initiator = cec_msg_initiator(&data->msg); } else { /* * should be >= 7 data bit periods for sending another * frame immediately after another. */ signal_free_time = CEC_SIGNAL_FREE_TIME_NEXT_XFER; } if (data->attempts == 0) data->attempts = attempts; adap->transmit_in_progress_aborted = false; /* Tell the adapter to transmit, cancel on error */ if (call_op(adap, adap_transmit, data->attempts, signal_free_time, &data->msg)) cec_data_cancel(data, CEC_TX_STATUS_ABORTED, 0); else adap->transmit_in_progress = true; unlock: mutex_unlock(&adap->lock); if (kthread_should_stop()) break; } return 0; } /* * Called by the CEC adapter if a transmit finished. */ void cec_transmit_done_ts(struct cec_adapter *adap, u8 status, u8 arb_lost_cnt, u8 nack_cnt, u8 low_drive_cnt, u8 error_cnt, ktime_t ts) { struct cec_data *data; struct cec_msg *msg; unsigned int attempts_made = arb_lost_cnt + nack_cnt + low_drive_cnt + error_cnt; bool done = status & (CEC_TX_STATUS_MAX_RETRIES | CEC_TX_STATUS_OK); bool aborted = adap->transmit_in_progress_aborted; dprintk(2, "%s: status 0x%02x\n", __func__, status); if (attempts_made < 1) attempts_made = 1; mutex_lock(&adap->lock); data = adap->transmitting; if (!data) { /* * This might happen if a transmit was issued and the cable is * unplugged while the transmit is ongoing. Ignore this * transmit in that case. */ if (!adap->transmit_in_progress) dprintk(1, "%s was called without an ongoing transmit!\n", __func__); adap->transmit_in_progress = false; goto wake_thread; } adap->transmit_in_progress = false; adap->transmit_in_progress_aborted = false; msg = &data->msg; /* Drivers must fill in the status! */ WARN_ON(status == 0); msg->tx_ts = ktime_to_ns(ts); msg->tx_status |= status; msg->tx_arb_lost_cnt += arb_lost_cnt; msg->tx_nack_cnt += nack_cnt; msg->tx_low_drive_cnt += low_drive_cnt; msg->tx_error_cnt += error_cnt; adap->tx_arb_lost_cnt += arb_lost_cnt; adap->tx_low_drive_cnt += low_drive_cnt; adap->tx_error_cnt += error_cnt; /* * Low Drive transmission errors should really not happen for * well-behaved CEC devices and proper HDMI cables. * * Ditto for the 'Error' status. * * For the first few times that this happens, log this. * Stop logging after that, since that will not add any more * useful information and instead it will just flood the kernel log. */ if (done && adap->tx_low_drive_log_cnt < 8 && msg->tx_low_drive_cnt) { adap->tx_low_drive_log_cnt++; dprintk(0, "low drive counter: %u (seq %u: %*ph)\n", msg->tx_low_drive_cnt, msg->sequence, msg->len, msg->msg); } if (done && adap->tx_error_log_cnt < 8 && msg->tx_error_cnt) { adap->tx_error_log_cnt++; dprintk(0, "error counter: %u (seq %u: %*ph)\n", msg->tx_error_cnt, msg->sequence, msg->len, msg->msg); } /* Mark that we're done with this transmit */ adap->transmitting = NULL; /* * If there are still retry attempts left and there was an error and * the hardware didn't signal that it retried itself (by setting * CEC_TX_STATUS_MAX_RETRIES), then we will retry ourselves. */ if (!aborted && data->attempts > attempts_made && !done) { /* Retry this message */ data->attempts -= attempts_made; if (msg->timeout) dprintk(2, "retransmit: %*ph (attempts: %d, wait for %*ph)\n", msg->len, msg->msg, data->attempts, data->match_len, data->match_reply); else dprintk(2, "retransmit: %*ph (attempts: %d)\n", msg->len, msg->msg, data->attempts); /* Add the message in front of the transmit queue */ list_add(&data->list, &adap->transmit_queue); adap->transmit_queue_sz++; goto wake_thread; } if (aborted && !done) status |= CEC_TX_STATUS_ABORTED; data->attempts = 0; /* Always set CEC_TX_STATUS_MAX_RETRIES on error */ if (!(status & CEC_TX_STATUS_OK)) msg->tx_status |= CEC_TX_STATUS_MAX_RETRIES; /* Queue transmitted message for monitoring purposes */ cec_queue_msg_monitor(adap, msg, 1); if ((status & CEC_TX_STATUS_OK) && adap->is_configured && msg->timeout) { /* * Queue the message into the wait queue if we want to wait * for a reply. */ list_add_tail(&data->list, &adap->wait_queue); schedule_delayed_work(&data->work, msecs_to_jiffies(msg->timeout)); } else { /* Otherwise we're done */ cec_data_completed(data); } wake_thread: /* * Wake up the main thread to see if another message is ready * for transmitting or to retry the current message. */ wake_up_interruptible(&adap->kthread_waitq); mutex_unlock(&adap->lock); } EXPORT_SYMBOL_GPL(cec_transmit_done_ts); void cec_transmit_attempt_done_ts(struct cec_adapter *adap, u8 status, ktime_t ts) { switch (status & ~CEC_TX_STATUS_MAX_RETRIES) { case CEC_TX_STATUS_OK: cec_transmit_done_ts(adap, status, 0, 0, 0, 0, ts); return; case CEC_TX_STATUS_ARB_LOST: cec_transmit_done_ts(adap, status, 1, 0, 0, 0, ts); return; case CEC_TX_STATUS_NACK: cec_transmit_done_ts(adap, status, 0, 1, 0, 0, ts); return; case CEC_TX_STATUS_LOW_DRIVE: cec_transmit_done_ts(adap, status, 0, 0, 1, 0, ts); return; case CEC_TX_STATUS_ERROR: cec_transmit_done_ts(adap, status, 0, 0, 0, 1, ts); return; default: /* Should never happen */ WARN(1, "cec-%s: invalid status 0x%02x\n", adap->name, status); return; } } EXPORT_SYMBOL_GPL(cec_transmit_attempt_done_ts); /* * Called when waiting for a reply times out. */ static void cec_wait_timeout(struct work_struct *work) { struct cec_data *data = container_of(work, struct cec_data, work.work); struct cec_adapter *adap = data->adap; mutex_lock(&adap->lock); /* * Sanity check in case the timeout and the arrival of the message * happened at the same time. */ if (list_empty(&data->list)) goto unlock; /* Mark the message as timed out */ list_del_init(&data->list); cec_data_cancel(data, CEC_TX_STATUS_OK, CEC_RX_STATUS_TIMEOUT); unlock: mutex_unlock(&adap->lock); } /* * Transmit a message. The fh argument may be NULL if the transmit is not * associated with a specific filehandle. * * This function is called with adap->lock held. */ int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg, struct cec_fh *fh, bool block) { struct cec_data *data; bool is_raw = msg_is_raw(msg); bool reply_vendor_id = (msg->flags & CEC_MSG_FL_REPLY_VENDOR_ID) && msg->len > 1 && msg->msg[1] == CEC_MSG_VENDOR_COMMAND_WITH_ID; int err; if (adap->devnode.unregistered) return -ENODEV; msg->rx_ts = 0; msg->tx_ts = 0; msg->rx_status = 0; msg->tx_status = 0; msg->tx_arb_lost_cnt = 0; msg->tx_nack_cnt = 0; msg->tx_low_drive_cnt = 0; msg->tx_error_cnt = 0; msg->sequence = 0; msg->flags &= CEC_MSG_FL_REPLY_TO_FOLLOWERS | CEC_MSG_FL_RAW | (reply_vendor_id ? CEC_MSG_FL_REPLY_VENDOR_ID : 0); if ((reply_vendor_id || msg->reply) && msg->timeout == 0) { /* Make sure the timeout isn't 0. */ msg->timeout = 1000; } if (!msg->timeout) msg->flags &= ~CEC_MSG_FL_REPLY_TO_FOLLOWERS; /* Sanity checks */ if (msg->len == 0 || msg->len > CEC_MAX_MSG_SIZE) { dprintk(1, "%s: invalid length %d\n", __func__, msg->len); return -EINVAL; } if (reply_vendor_id && msg->len < 6) { dprintk(1, "%s: <Vendor Command With ID> message too short\n", __func__); return -EINVAL; } memset(msg->msg + msg->len, 0, sizeof(msg->msg) - msg->len); if (msg->timeout) dprintk(2, "%s: %*ph (wait for 0x%02x%s)\n", __func__, msg->len, msg->msg, msg->reply, !block ? ", nb" : ""); else dprintk(2, "%s: %*ph%s\n", __func__, msg->len, msg->msg, !block ? " (nb)" : ""); if (msg->timeout && msg->len == 1) { dprintk(1, "%s: can't reply to poll msg\n", __func__); return -EINVAL; } if (is_raw) { if (!capable(CAP_SYS_RAWIO)) return -EPERM; } else { /* A CDC-Only device can only send CDC messages */ if ((adap->log_addrs.flags & CEC_LOG_ADDRS_FL_CDC_ONLY) && (msg->len == 1 || msg->msg[1] != CEC_MSG_CDC_MESSAGE)) { dprintk(1, "%s: not a CDC message\n", __func__); return -EINVAL; } if (msg->len >= 4 && msg->msg[1] == CEC_MSG_CDC_MESSAGE) { msg->msg[2] = adap->phys_addr >> 8; msg->msg[3] = adap->phys_addr & 0xff; } if (msg->len == 1) { if (cec_msg_destination(msg) == 0xf) { dprintk(1, "%s: invalid poll message\n", __func__); return -EINVAL; } if (cec_has_log_addr(adap, cec_msg_destination(msg))) { /* * If the destination is a logical address our * adapter has already claimed, then just NACK * this. It depends on the hardware what it will * do with a POLL to itself (some OK this), so * it is just as easy to handle it here so the * behavior will be consistent. */ msg->tx_ts = ktime_get_ns(); msg->tx_status = CEC_TX_STATUS_NACK | CEC_TX_STATUS_MAX_RETRIES; msg->tx_nack_cnt = 1; msg->sequence = ++adap->sequence; if (!msg->sequence) msg->sequence = ++adap->sequence; return 0; } } if (msg->len > 1 && !cec_msg_is_broadcast(msg) && cec_has_log_addr(adap, cec_msg_destination(msg))) { dprintk(1, "%s: destination is the adapter itself\n", __func__); return -EINVAL; } if (msg->len > 1 && adap->is_configured && !cec_has_log_addr(adap, cec_msg_initiator(msg))) { dprintk(1, "%s: initiator has unknown logical address %d\n", __func__, cec_msg_initiator(msg)); return -EINVAL; } /* * Special case: allow Ping and IMAGE/TEXT_VIEW_ON to be * transmitted to a TV, even if the adapter is unconfigured. * This makes it possible to detect or wake up displays that * pull down the HPD when in standby. */ if (!adap->is_configured && !adap->is_configuring && (msg->len > 2 || cec_msg_destination(msg) != CEC_LOG_ADDR_TV || (msg->len == 2 && msg->msg[1] != CEC_MSG_IMAGE_VIEW_ON && msg->msg[1] != CEC_MSG_TEXT_VIEW_ON))) { dprintk(1, "%s: adapter is unconfigured\n", __func__); return -ENONET; } } if (!adap->is_configured && !adap->is_configuring) { if (adap->needs_hpd) { dprintk(1, "%s: adapter is unconfigured and needs HPD\n", __func__); return -ENONET; } if (reply_vendor_id || msg->reply) { dprintk(1, "%s: adapter is unconfigured so reply is not supported\n", __func__); return -EINVAL; } } if (adap->transmit_queue_sz >= CEC_MAX_MSG_TX_QUEUE_SZ) { dprintk(2, "%s: transmit queue full\n", __func__); return -EBUSY; } data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; msg->sequence = ++adap->sequence; if (!msg->sequence) msg->sequence = ++adap->sequence; data->msg = *msg; data->fh = fh; data->adap = adap; data->blocking = block; if (reply_vendor_id) { memcpy(data->match_reply, msg->msg + 1, 4); data->match_reply[4] = msg->reply; data->match_len = 5; } else if (msg->timeout) { data->match_reply[0] = msg->reply; data->match_len = 1; } init_completion(&data->c); INIT_DELAYED_WORK(&data->work, cec_wait_timeout); if (fh) list_add_tail(&data->xfer_list, &fh->xfer_list); else INIT_LIST_HEAD(&data->xfer_list); list_add_tail(&data->list, &adap->transmit_queue); adap->transmit_queue_sz++; if (!adap->transmitting) wake_up_interruptible(&adap->kthread_waitq); /* All done if we don't need to block waiting for completion */ if (!block) return 0; /* * Release the lock and wait, retake the lock afterwards. */ mutex_unlock(&adap->lock); err = wait_for_completion_killable(&data->c); cancel_delayed_work_sync(&data->work); mutex_lock(&adap->lock); if (err) adap->transmit_in_progress_aborted = true; /* Cancel the transmit if it was interrupted */ if (!data->completed) { if (data->msg.tx_status & CEC_TX_STATUS_OK) cec_data_cancel(data, CEC_TX_STATUS_OK, CEC_RX_STATUS_ABORTED); else cec_data_cancel(data, CEC_TX_STATUS_ABORTED, 0); } /* The transmit completed (possibly with an error) */ *msg = data->msg; if (WARN_ON(!list_empty(&data->list))) list_del(&data->list); if (WARN_ON(!list_empty(&data->xfer_list))) list_del(&data->xfer_list); kfree(data); return 0; } /* Helper function to be used by drivers and this framework. */ int cec_transmit_msg(struct cec_adapter *adap, struct cec_msg *msg, bool block) { int ret; mutex_lock(&adap->lock); ret = cec_transmit_msg_fh(adap, msg, NULL, block); mutex_unlock(&adap->lock); return ret; } EXPORT_SYMBOL_GPL(cec_transmit_msg); /* * I don't like forward references but without this the low-level * cec_received_msg() function would come after a bunch of high-level * CEC protocol handling functions. That was very confusing. */ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, bool is_reply); #define DIRECTED 0x80 #define BCAST1_4 0x40 #define BCAST2_0 0x20 /* broadcast only allowed for >= 2.0 */ #define BCAST (BCAST1_4 | BCAST2_0) #define BOTH (BCAST | DIRECTED) /* * Specify minimum length and whether the message is directed, broadcast * or both. Messages that do not match the criteria are ignored as per * the CEC specification. */ static const u8 cec_msg_size[256] = { [CEC_MSG_ACTIVE_SOURCE] = 4 | BCAST, [CEC_MSG_IMAGE_VIEW_ON] = 2 | DIRECTED, [CEC_MSG_TEXT_VIEW_ON] = 2 | DIRECTED, [CEC_MSG_INACTIVE_SOURCE] = 4 | DIRECTED, [CEC_MSG_REQUEST_ACTIVE_SOURCE] = 2 | BCAST, [CEC_MSG_ROUTING_CHANGE] = 6 | BCAST, [CEC_MSG_ROUTING_INFORMATION] = 4 | BCAST, [CEC_MSG_SET_STREAM_PATH] = 4 | BCAST, [CEC_MSG_STANDBY] = 2 | BOTH, [CEC_MSG_RECORD_OFF] = 2 | DIRECTED, [CEC_MSG_RECORD_ON] = 3 | DIRECTED, [CEC_MSG_RECORD_STATUS] = 3 | DIRECTED, [CEC_MSG_RECORD_TV_SCREEN] = 2 | DIRECTED, [CEC_MSG_CLEAR_ANALOGUE_TIMER] = 13 | DIRECTED, [CEC_MSG_CLEAR_DIGITAL_TIMER] = 16 | DIRECTED, [CEC_MSG_CLEAR_EXT_TIMER] = 13 | DIRECTED, [CEC_MSG_SET_ANALOGUE_TIMER] = 13 | DIRECTED, [CEC_MSG_SET_DIGITAL_TIMER] = 16 | DIRECTED, [CEC_MSG_SET_EXT_TIMER] = 13 | DIRECTED, [CEC_MSG_SET_TIMER_PROGRAM_TITLE] = 2 | DIRECTED, [CEC_MSG_TIMER_CLEARED_STATUS] = 3 | DIRECTED, [CEC_MSG_TIMER_STATUS] = 3 | DIRECTED, [CEC_MSG_CEC_VERSION] = 3 | DIRECTED, [CEC_MSG_GET_CEC_VERSION] = 2 | DIRECTED, [CEC_MSG_GIVE_PHYSICAL_ADDR] = 2 | DIRECTED, [CEC_MSG_GET_MENU_LANGUAGE] = 2 | DIRECTED, [CEC_MSG_REPORT_PHYSICAL_ADDR] = 5 | BCAST, [CEC_MSG_SET_MENU_LANGUAGE] = 5 | BCAST, [CEC_MSG_REPORT_FEATURES] = 6 | BCAST, [CEC_MSG_GIVE_FEATURES] = 2 | DIRECTED, [CEC_MSG_DECK_CONTROL] = 3 | DIRECTED, [CEC_MSG_DECK_STATUS] = 3 | DIRECTED, [CEC_MSG_GIVE_DECK_STATUS] = 3 | DIRECTED, [CEC_MSG_PLAY] = 3 | DIRECTED, [CEC_MSG_GIVE_TUNER_DEVICE_STATUS] = 3 | DIRECTED, [CEC_MSG_SELECT_ANALOGUE_SERVICE] = 6 | DIRECTED, [CEC_MSG_SELECT_DIGITAL_SERVICE] = 9 | DIRECTED, [CEC_MSG_TUNER_DEVICE_STATUS] = 7 | DIRECTED, [CEC_MSG_TUNER_STEP_DECREMENT] = 2 | DIRECTED, [CEC_MSG_TUNER_STEP_INCREMENT] = 2 | DIRECTED, [CEC_MSG_DEVICE_VENDOR_ID] = 5 | BCAST, [CEC_MSG_GIVE_DEVICE_VENDOR_ID] = 2 | DIRECTED, [CEC_MSG_VENDOR_COMMAND] = 2 | DIRECTED, [CEC_MSG_VENDOR_COMMAND_WITH_ID] = 5 | BOTH, [CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN] = 2 | BOTH, [CEC_MSG_VENDOR_REMOTE_BUTTON_UP] = 2 | BOTH, [CEC_MSG_SET_OSD_STRING] = 3 | DIRECTED, [CEC_MSG_GIVE_OSD_NAME] = 2 | DIRECTED, [CEC_MSG_SET_OSD_NAME] = 2 | DIRECTED, [CEC_MSG_MENU_REQUEST] = 3 | DIRECTED, [CEC_MSG_MENU_STATUS] = 3 | DIRECTED, [CEC_MSG_USER_CONTROL_PRESSED] = 3 | DIRECTED, [CEC_MSG_USER_CONTROL_RELEASED] = 2 | DIRECTED, [CEC_MSG_GIVE_DEVICE_POWER_STATUS] = 2 | DIRECTED, [CEC_MSG_REPORT_POWER_STATUS] = 3 | DIRECTED | BCAST2_0, [CEC_MSG_FEATURE_ABORT] = 4 | DIRECTED, [CEC_MSG_ABORT] = 2 | DIRECTED, [CEC_MSG_GIVE_AUDIO_STATUS] = 2 | DIRECTED, [CEC_MSG_GIVE_SYSTEM_AUDIO_MODE_STATUS] = 2 | DIRECTED, [CEC_MSG_REPORT_AUDIO_STATUS] = 3 | DIRECTED, [CEC_MSG_REPORT_SHORT_AUDIO_DESCRIPTOR] = 2 | DIRECTED, [CEC_MSG_REQUEST_SHORT_AUDIO_DESCRIPTOR] = 2 | DIRECTED, [CEC_MSG_SET_SYSTEM_AUDIO_MODE] = 3 | BOTH, [CEC_MSG_SET_AUDIO_VOLUME_LEVEL] = 3 | DIRECTED, [CEC_MSG_SYSTEM_AUDIO_MODE_REQUEST] = 2 | DIRECTED, [CEC_MSG_SYSTEM_AUDIO_MODE_STATUS] = 3 | DIRECTED, [CEC_MSG_SET_AUDIO_RATE] = 3 | DIRECTED, [CEC_MSG_INITIATE_ARC] = 2 | DIRECTED, [CEC_MSG_REPORT_ARC_INITIATED] = 2 | DIRECTED, [CEC_MSG_REPORT_ARC_TERMINATED] = 2 | DIRECTED, [CEC_MSG_REQUEST_ARC_INITIATION] = 2 | DIRECTED, [CEC_MSG_REQUEST_ARC_TERMINATION] = 2 | DIRECTED, [CEC_MSG_TERMINATE_ARC] = 2 | DIRECTED, [CEC_MSG_REQUEST_CURRENT_LATENCY] = 4 | BCAST, [CEC_MSG_REPORT_CURRENT_LATENCY] = 6 | BCAST, [CEC_MSG_CDC_MESSAGE] = 2 | BCAST, }; /* Called by the CEC adapter if a message is received */ void cec_received_msg_ts(struct cec_adapter *adap, struct cec_msg *msg, ktime_t ts) { struct cec_data *data; u8 msg_init = cec_msg_initiator(msg); u8 msg_dest = cec_msg_destination(msg); u8 cmd = msg->msg[1]; bool is_reply = false; bool valid_la = true; bool monitor_valid_la = true; u8 min_len = 0; if (WARN_ON(!msg->len || msg->len > CEC_MAX_MSG_SIZE)) return; if (adap->devnode.unregistered) return; /* * Some CEC adapters will receive the messages that they transmitted. * This test filters out those messages by checking if we are the * initiator, and just returning in that case. * * Note that this won't work if this is an Unregistered device. * * It is bad practice if the hardware receives the message that it * transmitted and luckily most CEC adapters behave correctly in this * respect. */ if (msg_init != CEC_LOG_ADDR_UNREGISTERED && cec_has_log_addr(adap, msg_init)) return; msg->rx_ts = ktime_to_ns(ts); msg->rx_status = CEC_RX_STATUS_OK; msg->sequence = msg->reply = msg->timeout = 0; msg->tx_status = 0; msg->tx_ts = 0; msg->tx_arb_lost_cnt = 0; msg->tx_nack_cnt = 0; msg->tx_low_drive_cnt = 0; msg->tx_error_cnt = 0; msg->flags = 0; memset(msg->msg + msg->len, 0, sizeof(msg->msg) - msg->len); mutex_lock(&adap->lock); dprintk(2, "%s: %*ph\n", __func__, msg->len, msg->msg); if (!adap->transmit_in_progress) adap->last_initiator = 0xff; /* Check if this message was for us (directed or broadcast). */ if (!cec_msg_is_broadcast(msg)) { valid_la = cec_has_log_addr(adap, msg_dest); monitor_valid_la = valid_la; } /* * Check if the length is not too short or if the message is a * broadcast message where a directed message was expected or * vice versa. If so, then the message has to be ignored (according * to section CEC 7.3 and CEC 12.2). */ if (valid_la && msg->len > 1 && cec_msg_size[cmd]) { u8 dir_fl = cec_msg_size[cmd] & BOTH; min_len = cec_msg_size[cmd] & 0x1f; if (msg->len < min_len) valid_la = false; else if (!cec_msg_is_broadcast(msg) && !(dir_fl & DIRECTED)) valid_la = false; else if (cec_msg_is_broadcast(msg) && !(dir_fl & BCAST)) valid_la = false; else if (cec_msg_is_broadcast(msg) && adap->log_addrs.cec_version < CEC_OP_CEC_VERSION_2_0 && !(dir_fl & BCAST1_4)) valid_la = false; } if (valid_la && min_len) { /* These messages have special length requirements */ switch (cmd) { case CEC_MSG_RECORD_ON: switch (msg->msg[2]) { case CEC_OP_RECORD_SRC_OWN: break; case CEC_OP_RECORD_SRC_DIGITAL: if (msg->len < 10) valid_la = false; break; case CEC_OP_RECORD_SRC_ANALOG: if (msg->len < 7) valid_la = false; break; case CEC_OP_RECORD_SRC_EXT_PLUG: if (msg->len < 4) valid_la = false; break; case CEC_OP_RECORD_SRC_EXT_PHYS_ADDR: if (msg->len < 5) valid_la = false; break; } break; } } /* It's a valid message and not a poll or CDC message */ if (valid_la && msg->len > 1 && cmd != CEC_MSG_CDC_MESSAGE) { bool abort = cmd == CEC_MSG_FEATURE_ABORT; /* The aborted command is in msg[2] */ if (abort) cmd = msg->msg[2]; /* * Walk over all transmitted messages that are waiting for a * reply. */ list_for_each_entry(data, &adap->wait_queue, list) { struct cec_msg *dst = &data->msg; /* * The *only* CEC message that has two possible replies * is CEC_MSG_INITIATE_ARC. * In this case allow either of the two replies. */ if (!abort && dst->msg[1] == CEC_MSG_INITIATE_ARC && (cmd == CEC_MSG_REPORT_ARC_INITIATED || cmd == CEC_MSG_REPORT_ARC_TERMINATED) && (data->match_reply[0] == CEC_MSG_REPORT_ARC_INITIATED || data->match_reply[0] == CEC_MSG_REPORT_ARC_TERMINATED)) { dst->reply = cmd; data->match_reply[0] = cmd; } /* Does the command match? */ if ((abort && cmd != dst->msg[1]) || (!abort && memcmp(data->match_reply, msg->msg + 1, data->match_len))) continue; /* Does the addressing match? */ if (msg_init != cec_msg_destination(dst) && !cec_msg_is_broadcast(dst)) continue; /* We got a reply */ memcpy(dst->msg, msg->msg, msg->len); dst->len = msg->len; dst->rx_ts = msg->rx_ts; dst->rx_status = msg->rx_status; if (abort) dst->rx_status |= CEC_RX_STATUS_FEATURE_ABORT; msg->flags = dst->flags; msg->sequence = dst->sequence; /* Remove it from the wait_queue */ list_del_init(&data->list); /* Cancel the pending timeout work */ if (!cancel_delayed_work(&data->work)) { mutex_unlock(&adap->lock); cancel_delayed_work_sync(&data->work); mutex_lock(&adap->lock); } /* * Mark this as a reply, provided someone is still * waiting for the answer. */ if (data->fh) is_reply = true; cec_data_completed(data); break; } } mutex_unlock(&adap->lock); /* Pass the message on to any monitoring filehandles */ cec_queue_msg_monitor(adap, msg, monitor_valid_la); /* We're done if it is not for us or a poll message */ if (!valid_la || msg->len <= 1) return; if (adap->log_addrs.log_addr_mask == 0) return; /* * Process the message on the protocol level. If is_reply is true, * then cec_receive_notify() won't pass on the reply to the listener(s) * since that was already done by cec_data_completed() above. */ cec_receive_notify(adap, msg, is_reply); } EXPORT_SYMBOL_GPL(cec_received_msg_ts); /* Logical Address Handling */ /* * Attempt to claim a specific logical address. * * This function is called with adap->lock held. */ static int cec_config_log_addr(struct cec_adapter *adap, unsigned int idx, unsigned int log_addr) { struct cec_log_addrs *las = &adap->log_addrs; struct cec_msg msg = { }; const unsigned int max_retries = 2; unsigned int i; int err; if (cec_has_log_addr(adap, log_addr)) return 0; /* Send poll message */ msg.len = 1; msg.msg[0] = (log_addr << 4) | log_addr; for (i = 0; i < max_retries; i++) { err = cec_transmit_msg_fh(adap, &msg, NULL, true); /* * While trying to poll the physical address was reset * and the adapter was unconfigured, so bail out. */ if (adap->phys_addr == CEC_PHYS_ADDR_INVALID) return -EINTR; /* Also bail out if the PA changed while configuring. */ if (adap->must_reconfigure) return -EINTR; if (err) return err; /* * The message was aborted or timed out due to a disconnect or * unconfigure, just bail out. */ if (msg.tx_status & (CEC_TX_STATUS_ABORTED | CEC_TX_STATUS_TIMEOUT)) return -EINTR; if (msg.tx_status & CEC_TX_STATUS_OK) return 0; if (msg.tx_status & CEC_TX_STATUS_NACK) break; /* * Retry up to max_retries times if the message was neither * OKed or NACKed. This can happen due to e.g. a Lost * Arbitration condition. */ } /* * If we are unable to get an OK or a NACK after max_retries attempts * (and note that each attempt already consists of four polls), then * we assume that something is really weird and that it is not a * good idea to try and claim this logical address. */ if (i == max_retries) { dprintk(0, "polling for LA %u failed with tx_status=0x%04x\n", log_addr, msg.tx_status); return 0; } /* * Message not acknowledged, so this logical * address is free to use. */ err = call_op(adap, adap_log_addr, log_addr); if (err) return err; las->log_addr[idx] = log_addr; las->log_addr_mask |= 1 << log_addr; return 1; } /* * Unconfigure the adapter: clear all logical addresses and send * the state changed event. * * This function is called with adap->lock held. */ static void cec_adap_unconfigure(struct cec_adapter *adap) { if (!adap->needs_hpd || adap->phys_addr != CEC_PHYS_ADDR_INVALID) WARN_ON(call_op(adap, adap_log_addr, CEC_LOG_ADDR_INVALID)); adap->log_addrs.log_addr_mask = 0; adap->is_configured = false; cec_flush(adap); wake_up_interruptible(&adap->kthread_waitq); cec_post_state_event(adap); call_void_op(adap, adap_unconfigured); } /* * Attempt to claim the required logical addresses. */ static int cec_config_thread_func(void *arg) { /* The various LAs for each type of device */ static const u8 tv_log_addrs[] = { CEC_LOG_ADDR_TV, CEC_LOG_ADDR_SPECIFIC, CEC_LOG_ADDR_INVALID }; static const u8 record_log_addrs[] = { CEC_LOG_ADDR_RECORD_1, CEC_LOG_ADDR_RECORD_2, CEC_LOG_ADDR_RECORD_3, CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2, CEC_LOG_ADDR_INVALID }; static const u8 tuner_log_addrs[] = { CEC_LOG_ADDR_TUNER_1, CEC_LOG_ADDR_TUNER_2, CEC_LOG_ADDR_TUNER_3, CEC_LOG_ADDR_TUNER_4, CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2, CEC_LOG_ADDR_INVALID }; static const u8 playback_log_addrs[] = { CEC_LOG_ADDR_PLAYBACK_1, CEC_LOG_ADDR_PLAYBACK_2, CEC_LOG_ADDR_PLAYBACK_3, CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2, CEC_LOG_ADDR_INVALID }; static const u8 audiosystem_log_addrs[] = { CEC_LOG_ADDR_AUDIOSYSTEM, CEC_LOG_ADDR_INVALID }; static const u8 specific_use_log_addrs[] = { CEC_LOG_ADDR_SPECIFIC, CEC_LOG_ADDR_BACKUP_1, CEC_LOG_ADDR_BACKUP_2, CEC_LOG_ADDR_INVALID }; static const u8 *type2addrs[6] = { [CEC_LOG_ADDR_TYPE_TV] = tv_log_addrs, [CEC_LOG_ADDR_TYPE_RECORD] = record_log_addrs, [CEC_LOG_ADDR_TYPE_TUNER] = tuner_log_addrs, [CEC_LOG_ADDR_TYPE_PLAYBACK] = playback_log_addrs, [CEC_LOG_ADDR_TYPE_AUDIOSYSTEM] = audiosystem_log_addrs, [CEC_LOG_ADDR_TYPE_SPECIFIC] = specific_use_log_addrs, }; static const u16 type2mask[] = { [CEC_LOG_ADDR_TYPE_TV] = CEC_LOG_ADDR_MASK_TV, [CEC_LOG_ADDR_TYPE_RECORD] = CEC_LOG_ADDR_MASK_RECORD, [CEC_LOG_ADDR_TYPE_TUNER] = CEC_LOG_ADDR_MASK_TUNER, [CEC_LOG_ADDR_TYPE_PLAYBACK] = CEC_LOG_ADDR_MASK_PLAYBACK, [CEC_LOG_ADDR_TYPE_AUDIOSYSTEM] = CEC_LOG_ADDR_MASK_AUDIOSYSTEM, [CEC_LOG_ADDR_TYPE_SPECIFIC] = CEC_LOG_ADDR_MASK_SPECIFIC, }; struct cec_adapter *adap = arg; struct cec_log_addrs *las = &adap->log_addrs; int err; int i, j; mutex_lock(&adap->lock); dprintk(1, "physical address: %x.%x.%x.%x, claim %d logical addresses\n", cec_phys_addr_exp(adap->phys_addr), las->num_log_addrs); las->log_addr_mask = 0; if (las->log_addr_type[0] == CEC_LOG_ADDR_TYPE_UNREGISTERED) goto configured; reconfigure: for (i = 0; i < las->num_log_addrs; i++) { unsigned int type = las->log_addr_type[i]; const u8 *la_list; u8 last_la; /* * The TV functionality can only map to physical address 0. * For any other address, try the Specific functionality * instead as per the spec. */ if (adap->phys_addr && type == CEC_LOG_ADDR_TYPE_TV) type = CEC_LOG_ADDR_TYPE_SPECIFIC; la_list = type2addrs[type]; last_la = las->log_addr[i]; las->log_addr[i] = CEC_LOG_ADDR_INVALID; if (last_la == CEC_LOG_ADDR_INVALID || last_la == CEC_LOG_ADDR_UNREGISTERED || !((1 << last_la) & type2mask[type])) last_la = la_list[0]; err = cec_config_log_addr(adap, i, last_la); if (adap->must_reconfigure) { adap->must_reconfigure = false; las->log_addr_mask = 0; goto reconfigure; } if (err > 0) /* Reused last LA */ continue; if (err < 0) goto unconfigure; for (j = 0; la_list[j] != CEC_LOG_ADDR_INVALID; j++) { /* Tried this one already, skip it */ if (la_list[j] == last_la) continue; /* The backup addresses are CEC 2.0 specific */ if ((la_list[j] == CEC_LOG_ADDR_BACKUP_1 || la_list[j] == CEC_LOG_ADDR_BACKUP_2) && las->cec_version < CEC_OP_CEC_VERSION_2_0) continue; err = cec_config_log_addr(adap, i, la_list[j]); if (err == 0) /* LA is in use */ continue; if (err < 0) goto unconfigure; /* Done, claimed an LA */ break; } if (la_list[j] == CEC_LOG_ADDR_INVALID) dprintk(1, "could not claim LA %d\n", i); } if (adap->log_addrs.log_addr_mask == 0 && !(las->flags & CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK)) goto unconfigure; configured: if (adap->log_addrs.log_addr_mask == 0) { /* Fall back to unregistered */ las->log_addr[0] = CEC_LOG_ADDR_UNREGISTERED; las->log_addr_mask = 1 << las->log_addr[0]; for (i = 1; i < las->num_log_addrs; i++) las->log_addr[i] = CEC_LOG_ADDR_INVALID; } for (i = las->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++) las->log_addr[i] = CEC_LOG_ADDR_INVALID; adap->is_configured = true; adap->is_configuring = false; adap->must_reconfigure = false; cec_post_state_event(adap); /* * Now post the Report Features and Report Physical Address broadcast * messages. Note that these are non-blocking transmits, meaning that * they are just queued up and once adap->lock is unlocked the main * thread will kick in and start transmitting these. * * If after this function is done (but before one or more of these * messages are actually transmitted) the CEC adapter is unconfigured, * then any remaining messages will be dropped by the main thread. */ for (i = 0; i < las->num_log_addrs; i++) { struct cec_msg msg = {}; if (las->log_addr[i] == CEC_LOG_ADDR_INVALID || (las->flags & CEC_LOG_ADDRS_FL_CDC_ONLY)) continue; msg.msg[0] = (las->log_addr[i] << 4) | 0x0f; /* Report Features must come first according to CEC 2.0 */ if (las->log_addr[i] != CEC_LOG_ADDR_UNREGISTERED && adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0) { cec_fill_msg_report_features(adap, &msg, i); cec_transmit_msg_fh(adap, &msg, NULL, false); } /* Report Physical Address */ cec_msg_report_physical_addr(&msg, adap->phys_addr, las->primary_device_type[i]); dprintk(1, "config: la %d pa %x.%x.%x.%x\n", las->log_addr[i], cec_phys_addr_exp(adap->phys_addr)); cec_transmit_msg_fh(adap, &msg, NULL, false); /* Report Vendor ID */ if (adap->log_addrs.vendor_id != CEC_VENDOR_ID_NONE) { cec_msg_device_vendor_id(&msg, adap->log_addrs.vendor_id); cec_transmit_msg_fh(adap, &msg, NULL, false); } } adap->kthread_config = NULL; complete(&adap->config_completion); mutex_unlock(&adap->lock); call_void_op(adap, configured); return 0; unconfigure: for (i = 0; i < las->num_log_addrs; i++) las->log_addr[i] = CEC_LOG_ADDR_INVALID; cec_adap_unconfigure(adap); adap->is_configuring = false; adap->must_reconfigure = false; adap->kthread_config = NULL; complete(&adap->config_completion); mutex_unlock(&adap->lock); return 0; } /* * Called from either __cec_s_phys_addr or __cec_s_log_addrs to claim the * logical addresses. * * This function is called with adap->lock held. */ static void cec_claim_log_addrs(struct cec_adapter *adap, bool block) { if (WARN_ON(adap->is_claiming_log_addrs || adap->is_configuring || adap->is_configured)) return; adap->is_claiming_log_addrs = true; init_completion(&adap->config_completion); /* Ready to kick off the thread */ adap->is_configuring = true; adap->kthread_config = kthread_run(cec_config_thread_func, adap, "ceccfg-%s", adap->name); if (IS_ERR(adap->kthread_config)) { adap->kthread_config = NULL; adap->is_configuring = false; } else if (block) { mutex_unlock(&adap->lock); wait_for_completion(&adap->config_completion); mutex_lock(&adap->lock); } adap->is_claiming_log_addrs = false; } /* * Helper function to enable/disable the CEC adapter. * * This function is called with adap->lock held. */ int cec_adap_enable(struct cec_adapter *adap) { bool enable; int ret = 0; enable = adap->monitor_all_cnt || adap->monitor_pin_cnt || adap->log_addrs.num_log_addrs; if (adap->needs_hpd) enable = enable && adap->phys_addr != CEC_PHYS_ADDR_INVALID; if (adap->devnode.unregistered) enable = false; if (enable == adap->is_enabled) return 0; /* serialize adap_enable */ mutex_lock(&adap->devnode.lock); if (enable) { adap->last_initiator = 0xff; adap->transmit_in_progress = false; adap->tx_low_drive_log_cnt = 0; adap->tx_error_log_cnt = 0; ret = adap->ops->adap_enable(adap, true); if (!ret) { /* * Enable monitor-all/pin modes if needed. We warn, but * continue if this fails as this is not a critical error. */ if (adap->monitor_all_cnt) WARN_ON(call_op(adap, adap_monitor_all_enable, true)); if (adap->monitor_pin_cnt) WARN_ON(call_op(adap, adap_monitor_pin_enable, true)); } } else { /* Disable monitor-all/pin modes if needed (needs_hpd == 1) */ if (adap->monitor_all_cnt) WARN_ON(call_op(adap, adap_monitor_all_enable, false)); if (adap->monitor_pin_cnt) WARN_ON(call_op(adap, adap_monitor_pin_enable, false)); WARN_ON(adap->ops->adap_enable(adap, false)); adap->last_initiator = 0xff; adap->transmit_in_progress = false; adap->transmit_in_progress_aborted = false; if (adap->transmitting) cec_data_cancel(adap->transmitting, CEC_TX_STATUS_ABORTED, 0); } if (!ret) adap->is_enabled = enable; wake_up_interruptible(&adap->kthread_waitq); mutex_unlock(&adap->devnode.lock); return ret; } /* Set a new physical address and send an event notifying userspace of this. * * This function is called with adap->lock held. */ void __cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block) { bool becomes_invalid = phys_addr == CEC_PHYS_ADDR_INVALID; bool is_invalid = adap->phys_addr == CEC_PHYS_ADDR_INVALID; if (phys_addr == adap->phys_addr) return; if (!becomes_invalid && adap->devnode.unregistered) return; dprintk(1, "new physical address %x.%x.%x.%x\n", cec_phys_addr_exp(phys_addr)); if (becomes_invalid || !is_invalid) { adap->phys_addr = CEC_PHYS_ADDR_INVALID; cec_post_state_event(adap); cec_adap_unconfigure(adap); if (becomes_invalid) { cec_adap_enable(adap); return; } } adap->phys_addr = phys_addr; if (is_invalid) cec_adap_enable(adap); cec_post_state_event(adap); if (!adap->log_addrs.num_log_addrs) return; if (adap->is_configuring) adap->must_reconfigure = true; else cec_claim_log_addrs(adap, block); } void cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block) { if (IS_ERR_OR_NULL(adap)) return; mutex_lock(&adap->lock); __cec_s_phys_addr(adap, phys_addr, block); mutex_unlock(&adap->lock); } EXPORT_SYMBOL_GPL(cec_s_phys_addr); /* * Note: In the drm subsystem, prefer calling (if possible): * * cec_s_phys_addr(adap, connector->display_info.source_physical_address, false); */ void cec_s_phys_addr_from_edid(struct cec_adapter *adap, const struct edid *edid) { u16 pa = CEC_PHYS_ADDR_INVALID; if (edid && edid->extensions) pa = cec_get_edid_phys_addr((const u8 *)edid, EDID_LENGTH * (edid->extensions + 1), NULL); cec_s_phys_addr(adap, pa, false); } EXPORT_SYMBOL_GPL(cec_s_phys_addr_from_edid); void cec_s_conn_info(struct cec_adapter *adap, const struct cec_connector_info *conn_info) { if (IS_ERR_OR_NULL(adap)) return; if (!(adap->capabilities & CEC_CAP_CONNECTOR_INFO)) return; mutex_lock(&adap->lock); if (conn_info) adap->conn_info = *conn_info; else memset(&adap->conn_info, 0, sizeof(adap->conn_info)); cec_post_state_event(adap); mutex_unlock(&adap->lock); } EXPORT_SYMBOL_GPL(cec_s_conn_info); /* * Called from either the ioctl or a driver to set the logical addresses. * * This function is called with adap->lock held. */ int __cec_s_log_addrs(struct cec_adapter *adap, struct cec_log_addrs *log_addrs, bool block) { u16 type_mask = 0; int err; int i; if (adap->devnode.unregistered) return -ENODEV; if (!log_addrs || log_addrs->num_log_addrs == 0) { if (!adap->log_addrs.num_log_addrs) return 0; if (adap->is_configuring || adap->is_configured) cec_adap_unconfigure(adap); adap->log_addrs.num_log_addrs = 0; for (i = 0; i < CEC_MAX_LOG_ADDRS; i++) adap->log_addrs.log_addr[i] = CEC_LOG_ADDR_INVALID; adap->log_addrs.osd_name[0] = '\0'; adap->log_addrs.vendor_id = CEC_VENDOR_ID_NONE; adap->log_addrs.cec_version = CEC_OP_CEC_VERSION_2_0; cec_adap_enable(adap); return 0; } if (log_addrs->flags & CEC_LOG_ADDRS_FL_CDC_ONLY) { /* * Sanitize log_addrs fields if a CDC-Only device is * requested. */ log_addrs->num_log_addrs = 1; log_addrs->osd_name[0] = '\0'; log_addrs->vendor_id = CEC_VENDOR_ID_NONE; log_addrs->log_addr_type[0] = CEC_LOG_ADDR_TYPE_UNREGISTERED; /* * This is just an internal convention since a CDC-Only device * doesn't have to be a switch. But switches already use * unregistered, so it makes some kind of sense to pick this * as the primary device. Since a CDC-Only device never sends * any 'normal' CEC messages this primary device type is never * sent over the CEC bus. */ log_addrs->primary_device_type[0] = CEC_OP_PRIM_DEVTYPE_SWITCH; log_addrs->all_device_types[0] = 0; log_addrs->features[0][0] = 0; log_addrs->features[0][1] = 0; } /* Ensure the osd name is 0-terminated */ log_addrs->osd_name[sizeof(log_addrs->osd_name) - 1] = '\0'; /* Sanity checks */ if (log_addrs->num_log_addrs > adap->available_log_addrs) { dprintk(1, "num_log_addrs > %d\n", adap->available_log_addrs); return -EINVAL; } /* * Vendor ID is a 24 bit number, so check if the value is * within the correct range. */ if (log_addrs->vendor_id != CEC_VENDOR_ID_NONE && (log_addrs->vendor_id & 0xff000000) != 0) { dprintk(1, "invalid vendor ID\n"); return -EINVAL; } if (log_addrs->cec_version != CEC_OP_CEC_VERSION_1_4 && log_addrs->cec_version != CEC_OP_CEC_VERSION_2_0) { dprintk(1, "invalid CEC version\n"); return -EINVAL; } if (log_addrs->num_log_addrs > 1) for (i = 0; i < log_addrs->num_log_addrs; i++) if (log_addrs->log_addr_type[i] == CEC_LOG_ADDR_TYPE_UNREGISTERED) { dprintk(1, "num_log_addrs > 1 can't be combined with unregistered LA\n"); return -EINVAL; } for (i = 0; i < log_addrs->num_log_addrs; i++) { const u8 feature_sz = ARRAY_SIZE(log_addrs->features[0]); u8 *features = log_addrs->features[i]; bool op_is_dev_features = false; unsigned int j; log_addrs->log_addr[i] = CEC_LOG_ADDR_INVALID; if (log_addrs->log_addr_type[i] > CEC_LOG_ADDR_TYPE_UNREGISTERED) { dprintk(1, "unknown logical address type\n"); return -EINVAL; } if (type_mask & (1 << log_addrs->log_addr_type[i])) { dprintk(1, "duplicate logical address type\n"); return -EINVAL; } type_mask |= 1 << log_addrs->log_addr_type[i]; if ((type_mask & (1 << CEC_LOG_ADDR_TYPE_RECORD)) && (type_mask & (1 << CEC_LOG_ADDR_TYPE_PLAYBACK))) { /* Record already contains the playback functionality */ dprintk(1, "invalid record + playback combination\n"); return -EINVAL; } if (log_addrs->primary_device_type[i] > CEC_OP_PRIM_DEVTYPE_PROCESSOR) { dprintk(1, "unknown primary device type\n"); return -EINVAL; } if (log_addrs->primary_device_type[i] == 2) { dprintk(1, "invalid primary device type\n"); return -EINVAL; } for (j = 0; j < feature_sz; j++) { if ((features[j] & 0x80) == 0) { if (op_is_dev_features) break; op_is_dev_features = true; } } if (!op_is_dev_features || j == feature_sz) { dprintk(1, "malformed features\n"); return -EINVAL; } /* Zero unused part of the feature array */ memset(features + j + 1, 0, feature_sz - j - 1); } if (log_addrs->cec_version >= CEC_OP_CEC_VERSION_2_0) { if (log_addrs->num_log_addrs > 2) { dprintk(1, "CEC 2.0 allows no more than 2 logical addresses\n"); return -EINVAL; } if (log_addrs->num_log_addrs == 2) { if (!(type_mask & ((1 << CEC_LOG_ADDR_TYPE_AUDIOSYSTEM) | (1 << CEC_LOG_ADDR_TYPE_TV)))) { dprintk(1, "two LAs is only allowed for audiosystem and TV\n"); return -EINVAL; } if (!(type_mask & ((1 << CEC_LOG_ADDR_TYPE_PLAYBACK) | (1 << CEC_LOG_ADDR_TYPE_RECORD)))) { dprintk(1, "an audiosystem/TV can only be combined with record or playback\n"); return -EINVAL; } } } /* Zero unused LAs */ for (i = log_addrs->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++) { log_addrs->primary_device_type[i] = 0; log_addrs->log_addr_type[i] = 0; log_addrs->all_device_types[i] = 0; memset(log_addrs->features[i], 0, sizeof(log_addrs->features[i])); } log_addrs->log_addr_mask = adap->log_addrs.log_addr_mask; adap->log_addrs = *log_addrs; err = cec_adap_enable(adap); if (!err && adap->phys_addr != CEC_PHYS_ADDR_INVALID) cec_claim_log_addrs(adap, block); return err; } int cec_s_log_addrs(struct cec_adapter *adap, struct cec_log_addrs *log_addrs, bool block) { int err; mutex_lock(&adap->lock); err = __cec_s_log_addrs(adap, log_addrs, block); mutex_unlock(&adap->lock); return err; } EXPORT_SYMBOL_GPL(cec_s_log_addrs); /* High-level core CEC message handling */ /* Fill in the Report Features message */ static void cec_fill_msg_report_features(struct cec_adapter *adap, struct cec_msg *msg, unsigned int la_idx) { const struct cec_log_addrs *las = &adap->log_addrs; const u8 *features = las->features[la_idx]; bool op_is_dev_features = false; unsigned int idx; /* Report Features */ msg->msg[0] = (las->log_addr[la_idx] << 4) | 0x0f; msg->len = 4; msg->msg[1] = CEC_MSG_REPORT_FEATURES; msg->msg[2] = adap->log_addrs.cec_version; msg->msg[3] = las->all_device_types[la_idx]; /* Write RC Profiles first, then Device Features */ for (idx = 0; idx < ARRAY_SIZE(las->features[0]); idx++) { msg->msg[msg->len++] = features[idx]; if ((features[idx] & CEC_OP_FEAT_EXT) == 0) { if (op_is_dev_features) break; op_is_dev_features = true; } } } /* Transmit the Feature Abort message */ static int cec_feature_abort_reason(struct cec_adapter *adap, struct cec_msg *msg, u8 reason) { struct cec_msg tx_msg = { }; /* * Don't reply with CEC_MSG_FEATURE_ABORT to a CEC_MSG_FEATURE_ABORT * message! */ if (msg->msg[1] == CEC_MSG_FEATURE_ABORT) return 0; /* Don't Feature Abort messages from 'Unregistered' */ if (cec_msg_initiator(msg) == CEC_LOG_ADDR_UNREGISTERED) return 0; cec_msg_set_reply_to(&tx_msg, msg); cec_msg_feature_abort(&tx_msg, msg->msg[1], reason); return cec_transmit_msg(adap, &tx_msg, false); } static int cec_feature_abort(struct cec_adapter *adap, struct cec_msg *msg) { return cec_feature_abort_reason(adap, msg, CEC_OP_ABORT_UNRECOGNIZED_OP); } static int cec_feature_refused(struct cec_adapter *adap, struct cec_msg *msg) { return cec_feature_abort_reason(adap, msg, CEC_OP_ABORT_REFUSED); } /* * Called when a CEC message is received. This function will do any * necessary core processing. The is_reply bool is true if this message * is a reply to an earlier transmit. * * The message is either a broadcast message or a valid directed message. */ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, bool is_reply) { bool is_broadcast = cec_msg_is_broadcast(msg); u8 dest_laddr = cec_msg_destination(msg); u8 init_laddr = cec_msg_initiator(msg); u8 devtype = cec_log_addr2dev(adap, dest_laddr); int la_idx = cec_log_addr2idx(adap, dest_laddr); bool from_unregistered = init_laddr == 0xf; struct cec_msg tx_cec_msg = { }; dprintk(2, "%s: %*ph\n", __func__, msg->len, msg->msg); /* If this is a CDC-Only device, then ignore any non-CDC messages */ if (cec_is_cdc_only(&adap->log_addrs) && msg->msg[1] != CEC_MSG_CDC_MESSAGE) return 0; /* Allow drivers to process the message first */ if (adap->ops->received && !adap->devnode.unregistered && adap->ops->received(adap, msg) != -ENOMSG) return 0; /* * REPORT_PHYSICAL_ADDR, CEC_MSG_USER_CONTROL_PRESSED and * CEC_MSG_USER_CONTROL_RELEASED messages always have to be * handled by the CEC core, even if the passthrough mode is on. * The others are just ignored if passthrough mode is on. */ switch (msg->msg[1]) { case CEC_MSG_GET_CEC_VERSION: case CEC_MSG_ABORT: case CEC_MSG_GIVE_DEVICE_POWER_STATUS: case CEC_MSG_GIVE_OSD_NAME: /* * These messages reply with a directed message, so ignore if * the initiator is Unregistered. */ if (!adap->passthrough && from_unregistered) return 0; fallthrough; case CEC_MSG_GIVE_DEVICE_VENDOR_ID: case CEC_MSG_GIVE_FEATURES: case CEC_MSG_GIVE_PHYSICAL_ADDR: /* * Skip processing these messages if the passthrough mode * is on. */ if (adap->passthrough) goto skip_processing; /* Ignore if addressing is wrong */ if (is_broadcast) return 0; break; case CEC_MSG_USER_CONTROL_PRESSED: case CEC_MSG_USER_CONTROL_RELEASED: /* Wrong addressing mode: don't process */ if (is_broadcast || from_unregistered) goto skip_processing; break; case CEC_MSG_REPORT_PHYSICAL_ADDR: /* * This message is always processed, regardless of the * passthrough setting. * * Exception: don't process if wrong addressing mode. */ if (!is_broadcast) goto skip_processing; break; default: break; } cec_msg_set_reply_to(&tx_cec_msg, msg); switch (msg->msg[1]) { /* The following messages are processed but still passed through */ case CEC_MSG_REPORT_PHYSICAL_ADDR: { u16 pa = (msg->msg[2] << 8) | msg->msg[3]; dprintk(1, "reported physical address %x.%x.%x.%x for logical address %d\n", cec_phys_addr_exp(pa), init_laddr); break; } case CEC_MSG_USER_CONTROL_PRESSED: if (!(adap->capabilities & CEC_CAP_RC) || !(adap->log_addrs.flags & CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU)) break; #ifdef CONFIG_MEDIA_CEC_RC switch (msg->msg[2]) { /* * Play function, this message can have variable length * depending on the specific play function that is used. */ case CEC_OP_UI_CMD_PLAY_FUNCTION: if (msg->len == 2) rc_keydown(adap->rc, RC_PROTO_CEC, msg->msg[2], 0); else rc_keydown(adap->rc, RC_PROTO_CEC, msg->msg[2] << 8 | msg->msg[3], 0); break; /* * Other function messages that are not handled. * Currently the RC framework does not allow to supply an * additional parameter to a keypress. These "keys" contain * other information such as channel number, an input number * etc. * For the time being these messages are not processed by the * framework and are simply forwarded to the user space. */ case CEC_OP_UI_CMD_SELECT_BROADCAST_TYPE: case CEC_OP_UI_CMD_SELECT_SOUND_PRESENTATION: case CEC_OP_UI_CMD_TUNE_FUNCTION: case CEC_OP_UI_CMD_SELECT_MEDIA_FUNCTION: case CEC_OP_UI_CMD_SELECT_AV_INPUT_FUNCTION: case CEC_OP_UI_CMD_SELECT_AUDIO_INPUT_FUNCTION: break; default: rc_keydown(adap->rc, RC_PROTO_CEC, msg->msg[2], 0); break; } #endif break; case CEC_MSG_USER_CONTROL_RELEASED: if (!(adap->capabilities & CEC_CAP_RC) || !(adap->log_addrs.flags & CEC_LOG_ADDRS_FL_ALLOW_RC_PASSTHRU)) break; #ifdef CONFIG_MEDIA_CEC_RC rc_keyup(adap->rc); #endif break; /* * The remaining messages are only processed if the passthrough mode * is off. */ case CEC_MSG_GET_CEC_VERSION: cec_msg_cec_version(&tx_cec_msg, adap->log_addrs.cec_version); return cec_transmit_msg(adap, &tx_cec_msg, false); case CEC_MSG_GIVE_PHYSICAL_ADDR: /* Do nothing for CEC switches using addr 15 */ if (devtype == CEC_OP_PRIM_DEVTYPE_SWITCH && dest_laddr == 15) return 0; cec_msg_report_physical_addr(&tx_cec_msg, adap->phys_addr, devtype); return cec_transmit_msg(adap, &tx_cec_msg, false); case CEC_MSG_GIVE_DEVICE_VENDOR_ID: if (adap->log_addrs.vendor_id == CEC_VENDOR_ID_NONE) return cec_feature_abort(adap, msg); cec_msg_device_vendor_id(&tx_cec_msg, adap->log_addrs.vendor_id); return cec_transmit_msg(adap, &tx_cec_msg, false); case CEC_MSG_ABORT: /* Do nothing for CEC switches */ if (devtype == CEC_OP_PRIM_DEVTYPE_SWITCH) return 0; return cec_feature_refused(adap, msg); case CEC_MSG_GIVE_OSD_NAME: { if (adap->log_addrs.osd_name[0] == 0) return cec_feature_abort(adap, msg); cec_msg_set_osd_name(&tx_cec_msg, adap->log_addrs.osd_name); return cec_transmit_msg(adap, &tx_cec_msg, false); } case CEC_MSG_GIVE_FEATURES: if (adap->log_addrs.cec_version < CEC_OP_CEC_VERSION_2_0) return cec_feature_abort(adap, msg); cec_fill_msg_report_features(adap, &tx_cec_msg, la_idx); return cec_transmit_msg(adap, &tx_cec_msg, false); default: /* * Unprocessed messages are aborted if userspace isn't doing * any processing either. */ if (!is_broadcast && !is_reply && !adap->follower_cnt && !adap->cec_follower && msg->msg[1] != CEC_MSG_FEATURE_ABORT) return cec_feature_abort(adap, msg); break; } skip_processing: /* If this was a reply, then we're done, unless otherwise specified */ if (is_reply && !(msg->flags & CEC_MSG_FL_REPLY_TO_FOLLOWERS)) return 0; /* * Send to the exclusive follower if there is one, otherwise send * to all followers. */ if (adap->cec_follower) cec_queue_msg_fh(adap->cec_follower, msg); else cec_queue_msg_followers(adap, msg); return 0; } /* * Helper functions to keep track of the 'monitor all' use count. * * These functions are called with adap->lock held. */ int cec_monitor_all_cnt_inc(struct cec_adapter *adap) { int ret; if (adap->monitor_all_cnt++) return 0; ret = cec_adap_enable(adap); if (ret) adap->monitor_all_cnt--; return ret; } void cec_monitor_all_cnt_dec(struct cec_adapter *adap) { if (WARN_ON(!adap->monitor_all_cnt)) return; if (--adap->monitor_all_cnt) return; WARN_ON(call_op(adap, adap_monitor_all_enable, false)); cec_adap_enable(adap); } /* * Helper functions to keep track of the 'monitor pin' use count. * * These functions are called with adap->lock held. */ int cec_monitor_pin_cnt_inc(struct cec_adapter *adap) { int ret; if (adap->monitor_pin_cnt++) return 0; ret = cec_adap_enable(adap); if (ret) adap->monitor_pin_cnt--; return ret; } void cec_monitor_pin_cnt_dec(struct cec_adapter *adap) { if (WARN_ON(!adap->monitor_pin_cnt)) return; if (--adap->monitor_pin_cnt) return; WARN_ON(call_op(adap, adap_monitor_pin_enable, false)); cec_adap_enable(adap); } #ifdef CONFIG_DEBUG_FS /* * Log the current state of the CEC adapter. * Very useful for debugging. */ int cec_adap_status(struct seq_file *file, void *priv) { struct cec_adapter *adap = dev_get_drvdata(file->private); struct cec_data *data; mutex_lock(&adap->lock); seq_printf(file, "enabled: %d\n", adap->is_enabled); seq_printf(file, "configured: %d\n", adap->is_configured); seq_printf(file, "configuring: %d\n", adap->is_configuring); seq_printf(file, "phys_addr: %x.%x.%x.%x\n", cec_phys_addr_exp(adap->phys_addr)); seq_printf(file, "number of LAs: %d\n", adap->log_addrs.num_log_addrs); seq_printf(file, "LA mask: 0x%04x\n", adap->log_addrs.log_addr_mask); if (adap->cec_follower) seq_printf(file, "has CEC follower%s\n", adap->passthrough ? " (in passthrough mode)" : ""); if (adap->cec_initiator) seq_puts(file, "has CEC initiator\n"); if (adap->monitor_all_cnt) seq_printf(file, "file handles in Monitor All mode: %u\n", adap->monitor_all_cnt); if (adap->monitor_pin_cnt) seq_printf(file, "file handles in Monitor Pin mode: %u\n", adap->monitor_pin_cnt); if (adap->tx_timeout_cnt) { seq_printf(file, "transmit timeout count: %u\n", adap->tx_timeout_cnt); adap->tx_timeout_cnt = 0; } if (adap->tx_low_drive_cnt) { seq_printf(file, "transmit low drive count: %u\n", adap->tx_low_drive_cnt); adap->tx_low_drive_cnt = 0; } if (adap->tx_arb_lost_cnt) { seq_printf(file, "transmit arbitration lost count: %u\n", adap->tx_arb_lost_cnt); adap->tx_arb_lost_cnt = 0; } if (adap->tx_error_cnt) { seq_printf(file, "transmit error count: %u\n", adap->tx_error_cnt); adap->tx_error_cnt = 0; } data = adap->transmitting; if (data) seq_printf(file, "transmitting message: %*ph (reply: %*ph, timeout: %ums)\n", data->msg.len, data->msg.msg, data->match_len, data->match_reply, data->msg.timeout); seq_printf(file, "pending transmits: %u\n", adap->transmit_queue_sz); list_for_each_entry(data, &adap->transmit_queue, list) { seq_printf(file, "queued tx message: %*ph (reply: %*ph, timeout: %ums)\n", data->msg.len, data->msg.msg, data->match_len, data->match_reply, data->msg.timeout); } list_for_each_entry(data, &adap->wait_queue, list) { seq_printf(file, "message waiting for reply: %*ph (reply: %*ph, timeout: %ums)\n", data->msg.len, data->msg.msg, data->match_len, data->match_reply, data->msg.timeout); } call_void_op(adap, adap_status, file); mutex_unlock(&adap->lock); return 0; } #endif
1 2 2 1 5 2 3 4 1 5 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for some cypress "special" devices * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina * Copyright (c) 2008 Jiri Slaby */ /* */ #include <linux/device.h> #include <linux/hid.h> #include <linux/input.h> #include <linux/module.h> #include "hid-ids.h" #define CP_RDESC_SWAPPED_MIN_MAX 0x01 #define CP_2WHEEL_MOUSE_HACK 0x02 #define CP_2WHEEL_MOUSE_HACK_ON 0x04 #define VA_INVAL_LOGICAL_BOUNDARY 0x08 /* * Some USB barcode readers from cypress have usage min and usage max in * the wrong order */ static __u8 *cp_rdesc_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { unsigned int i; if (*rsize < 4) return rdesc; for (i = 0; i < *rsize - 4; i++) if (rdesc[i] == 0x29 && rdesc[i + 2] == 0x19) { rdesc[i] = 0x19; rdesc[i + 2] = 0x29; swap(rdesc[i + 3], rdesc[i + 1]); } return rdesc; } static __u8 *va_logical_boundary_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { /* * Varmilo VA104M (with VID Cypress and device ID 07B1) incorrectly * reports Logical Minimum of its Consumer Control device as 572 * (0x02 0x3c). Fix this by setting its Logical Minimum to zero. */ if (*rsize == 25 && rdesc[0] == 0x05 && rdesc[1] == 0x0c && rdesc[2] == 0x09 && rdesc[3] == 0x01 && rdesc[6] == 0x19 && rdesc[7] == 0x00 && rdesc[11] == 0x16 && rdesc[12] == 0x3c && rdesc[13] == 0x02) { hid_info(hdev, "fixing up varmilo VA104M consumer control report descriptor\n"); rdesc[12] = 0x00; rdesc[13] = 0x00; } return rdesc; } static const __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); if (quirks & CP_RDESC_SWAPPED_MIN_MAX) rdesc = cp_rdesc_fixup(hdev, rdesc, rsize); if (quirks & VA_INVAL_LOGICAL_BOUNDARY) rdesc = va_logical_boundary_fixup(hdev, rdesc, rsize); return rdesc; } static int cp_input_mapped(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); if (!(quirks & CP_2WHEEL_MOUSE_HACK)) return 0; if (usage->type == EV_REL && usage->code == REL_WHEEL) set_bit(REL_HWHEEL, *bit); if (usage->hid == 0x00090005) return -1; return 0; } static int cp_event(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value) { unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); if (!(hdev->claimed & HID_CLAIMED_INPUT) || !field->hidinput || !usage->type || !(quirks & CP_2WHEEL_MOUSE_HACK)) return 0; if (usage->hid == 0x00090005) { if (value) quirks |= CP_2WHEEL_MOUSE_HACK_ON; else quirks &= ~CP_2WHEEL_MOUSE_HACK_ON; hid_set_drvdata(hdev, (void *)quirks); return 1; } if (usage->code == REL_WHEEL && (quirks & CP_2WHEEL_MOUSE_HACK_ON)) { struct input_dev *input = field->hidinput->input; input_event(input, usage->type, REL_HWHEEL, value); return 1; } return 0; } static int cp_probe(struct hid_device *hdev, const struct hid_device_id *id) { unsigned long quirks = id->driver_data; int ret; hid_set_drvdata(hdev, (void *)quirks); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err_free; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "hw start failed\n"); goto err_free; } return 0; err_free: return ret; } static const struct hid_device_id cp_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1), .driver_data = CP_RDESC_SWAPPED_MIN_MAX }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_2), .driver_data = CP_RDESC_SWAPPED_MIN_MAX }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_3), .driver_data = CP_RDESC_SWAPPED_MIN_MAX }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_4), .driver_data = CP_RDESC_SWAPPED_MIN_MAX }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE), .driver_data = CP_2WHEEL_MOUSE_HACK }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_VARMILO_VA104M_07B1), .driver_data = VA_INVAL_LOGICAL_BOUNDARY }, { } }; MODULE_DEVICE_TABLE(hid, cp_devices); static struct hid_driver cp_driver = { .name = "cypress", .id_table = cp_devices, .report_fixup = cp_report_fixup, .input_mapped = cp_input_mapped, .event = cp_event, .probe = cp_probe, }; module_hid_driver(cp_driver); MODULE_DESCRIPTION("HID driver for some cypress \"special\" devices"); MODULE_LICENSE("GPL");
150 148 80 143 78 84 142 1 81 81 86 84 2 1 150 101 101 88 88 87 100 100 100 100 4 2 15 82 150 2 85 85 1 85 83 67 85 86 112 113 113 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 // SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * * cache.c */ /* * Blocks in Squashfs are compressed. To avoid repeatedly decompressing * recently accessed data Squashfs uses two small metadata and fragment caches. * * This file implements a generic cache implementation used for both caches, * plus functions layered ontop of the generic cache implementation to * access the metadata and fragment caches. * * To avoid out of memory and fragmentation issues with vmalloc the cache * uses sequences of kmalloced PAGE_SIZE buffers. * * It should be noted that the cache is not used for file datablocks, these * are decompressed and cached in the page-cache in the normal way. The * cache is only used to temporarily cache fragment and metadata blocks * which have been read as as a result of a metadata (i.e. inode or * directory) or fragment access. Because metadata and fragments are packed * together into blocks (to gain greater compression) the read of a particular * piece of metadata or fragment will retrieve other metadata/fragments which * have been packed with it, these because of locality-of-reference may be read * in the near future. Temporarily caching them ensures they are available for * near future access without requiring an additional read and decompress. */ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/wait.h> #include <linux/pagemap.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs.h" #include "page_actor.h" /* * Look-up block in cache, and increment usage count. If not in cache, read * and decompress it from disk. */ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, struct squashfs_cache *cache, u64 block, int length) { int i, n; struct squashfs_cache_entry *entry; spin_lock(&cache->lock); while (1) { for (i = cache->curr_blk, n = 0; n < cache->entries; n++) { if (cache->entry[i].block == block) { cache->curr_blk = i; break; } i = (i + 1) % cache->entries; } if (n == cache->entries) { /* * Block not in cache, if all cache entries are used * go to sleep waiting for one to become available. */ if (cache->unused == 0) { cache->num_waiters++; spin_unlock(&cache->lock); wait_event(cache->wait_queue, cache->unused); spin_lock(&cache->lock); cache->num_waiters--; continue; } /* * At least one unused cache entry. A simple * round-robin strategy is used to choose the entry to * be evicted from the cache. */ i = cache->next_blk; for (n = 0; n < cache->entries; n++) { if (cache->entry[i].refcount == 0) break; i = (i + 1) % cache->entries; } cache->next_blk = (i + 1) % cache->entries; entry = &cache->entry[i]; /* * Initialise chosen cache entry, and fill it in from * disk. */ cache->unused--; entry->block = block; entry->refcount = 1; entry->pending = 1; entry->num_waiters = 0; entry->error = 0; spin_unlock(&cache->lock); entry->length = squashfs_read_data(sb, block, length, &entry->next_index, entry->actor); spin_lock(&cache->lock); if (entry->length < 0) entry->error = entry->length; entry->pending = 0; /* * While filling this entry one or more other processes * have looked it up in the cache, and have slept * waiting for it to become available. */ if (entry->num_waiters) { spin_unlock(&cache->lock); wake_up_all(&entry->wait_queue); } else spin_unlock(&cache->lock); goto out; } /* * Block already in cache. Increment refcount so it doesn't * get reused until we're finished with it, if it was * previously unused there's one less cache entry available * for reuse. */ entry = &cache->entry[i]; if (entry->refcount == 0) cache->unused--; entry->refcount++; /* * If the entry is currently being filled in by another process * go to sleep waiting for it to become available. */ if (entry->pending) { entry->num_waiters++; spin_unlock(&cache->lock); wait_event(entry->wait_queue, !entry->pending); } else spin_unlock(&cache->lock); goto out; } out: TRACE("Got %s %d, start block %lld, refcount %d, error %d\n", cache->name, i, entry->block, entry->refcount, entry->error); if (entry->error) ERROR("Unable to read %s cache entry [%llx]\n", cache->name, block); return entry; } /* * Release cache entry, once usage count is zero it can be reused. */ void squashfs_cache_put(struct squashfs_cache_entry *entry) { struct squashfs_cache *cache = entry->cache; spin_lock(&cache->lock); entry->refcount--; if (entry->refcount == 0) { cache->unused++; /* * If there's any processes waiting for a block to become * available, wake one up. */ if (cache->num_waiters) { spin_unlock(&cache->lock); wake_up(&cache->wait_queue); return; } } spin_unlock(&cache->lock); } /* * Delete cache reclaiming all kmalloced buffers. */ void squashfs_cache_delete(struct squashfs_cache *cache) { int i, j; if (cache == NULL) return; for (i = 0; i < cache->entries; i++) { if (cache->entry[i].data) { for (j = 0; j < cache->pages; j++) kfree(cache->entry[i].data[j]); kfree(cache->entry[i].data); } kfree(cache->entry[i].actor); } kfree(cache->entry); kfree(cache); } /* * Initialise cache allocating the specified number of entries, each of * size block_size. To avoid vmalloc fragmentation issues each entry * is allocated as a sequence of kmalloced PAGE_SIZE buffers. */ struct squashfs_cache *squashfs_cache_init(char *name, int entries, int block_size) { int i, j; struct squashfs_cache *cache; if (entries == 0) return NULL; cache = kzalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) { ERROR("Failed to allocate %s cache\n", name); return ERR_PTR(-ENOMEM); } cache->entry = kcalloc(entries, sizeof(*(cache->entry)), GFP_KERNEL); if (cache->entry == NULL) { ERROR("Failed to allocate %s cache\n", name); goto cleanup; } cache->curr_blk = 0; cache->next_blk = 0; cache->unused = entries; cache->entries = entries; cache->block_size = block_size; cache->pages = block_size >> PAGE_SHIFT; cache->pages = cache->pages ? cache->pages : 1; cache->name = name; cache->num_waiters = 0; spin_lock_init(&cache->lock); init_waitqueue_head(&cache->wait_queue); for (i = 0; i < entries; i++) { struct squashfs_cache_entry *entry = &cache->entry[i]; init_waitqueue_head(&cache->entry[i].wait_queue); entry->cache = cache; entry->block = SQUASHFS_INVALID_BLK; entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL); if (entry->data == NULL) { ERROR("Failed to allocate %s cache entry\n", name); goto cleanup; } for (j = 0; j < cache->pages; j++) { entry->data[j] = kmalloc(PAGE_SIZE, GFP_KERNEL); if (entry->data[j] == NULL) { ERROR("Failed to allocate %s buffer\n", name); goto cleanup; } } entry->actor = squashfs_page_actor_init(entry->data, cache->pages, 0); if (entry->actor == NULL) { ERROR("Failed to allocate %s cache entry\n", name); goto cleanup; } } return cache; cleanup: squashfs_cache_delete(cache); return ERR_PTR(-ENOMEM); } /* * Copy up to length bytes from cache entry to buffer starting at offset bytes * into the cache entry. If there's not length bytes then copy the number of * bytes available. In all cases return the number of bytes copied. */ int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry, int offset, int length) { int remaining = length; if (length == 0) return 0; else if (buffer == NULL) return min(length, entry->length - offset); while (offset < entry->length) { void *buff = entry->data[offset / PAGE_SIZE] + (offset % PAGE_SIZE); int bytes = min_t(int, entry->length - offset, PAGE_SIZE - (offset % PAGE_SIZE)); if (bytes >= remaining) { memcpy(buffer, buff, remaining); remaining = 0; break; } memcpy(buffer, buff, bytes); buffer += bytes; remaining -= bytes; offset += bytes; } return length - remaining; } /* * Read length bytes from metadata position <block, offset> (block is the * start of the compressed block on disk, and offset is the offset into * the block once decompressed). Data is packed into consecutive blocks, * and length bytes may require reading more than one block. */ int squashfs_read_metadata(struct super_block *sb, void *buffer, u64 *block, int *offset, int length) { struct squashfs_sb_info *msblk = sb->s_fs_info; int bytes, res = length; struct squashfs_cache_entry *entry; TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset); if (unlikely(length < 0)) return -EIO; while (length) { entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); if (entry->error) { res = entry->error; goto error; } else if (*offset >= entry->length) { res = -EIO; goto error; } bytes = squashfs_copy_data(buffer, entry, *offset, length); if (buffer) buffer += bytes; length -= bytes; *offset += bytes; if (*offset == entry->length) { *block = entry->next_index; *offset = 0; } squashfs_cache_put(entry); } return res; error: squashfs_cache_put(entry); return res; } /* * Look-up in the fragmment cache the fragment located at <start_block> in the * filesystem. If necessary read and decompress it from disk. */ struct squashfs_cache_entry *squashfs_get_fragment(struct super_block *sb, u64 start_block, int length) { struct squashfs_sb_info *msblk = sb->s_fs_info; return squashfs_cache_get(sb, msblk->fragment_cache, start_block, length); } /* * Read and decompress the datablock located at <start_block> in the * filesystem. The cache is used here to avoid duplicating locking and * read/decompress code. */ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb, u64 start_block, int length) { struct squashfs_sb_info *msblk = sb->s_fs_info; return squashfs_cache_get(sb, msblk->read_page, start_block, length); } /* * Read a filesystem table (uncompressed sequence of bytes) from disk */ void *squashfs_read_table(struct super_block *sb, u64 block, int length) { int pages = (length + PAGE_SIZE - 1) >> PAGE_SHIFT; int i, res; void *table, *buffer, **data; struct squashfs_page_actor *actor; table = buffer = kmalloc(length, GFP_KERNEL); if (table == NULL) return ERR_PTR(-ENOMEM); data = kcalloc(pages, sizeof(void *), GFP_KERNEL); if (data == NULL) { res = -ENOMEM; goto failed; } actor = squashfs_page_actor_init(data, pages, length); if (actor == NULL) { res = -ENOMEM; goto failed2; } for (i = 0; i < pages; i++, buffer += PAGE_SIZE) data[i] = buffer; res = squashfs_read_data(sb, block, length | SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor); kfree(data); kfree(actor); if (res < 0) goto failed; return table; failed2: kfree(data); failed: kfree(table); return ERR_PTR(res); }
2 2 2 1 1 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 // SPDX-License-Identifier: GPL-2.0-or-later /* * Virtual PTP 1588 clock for use with KVM guests * * Copyright (C) 2017 Red Hat Inc. */ #include <linux/device.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/ptp_kvm.h> #include <uapi/linux/kvm_para.h> #include <asm/kvm_para.h> #include <uapi/asm/kvm_para.h> #include <linux/ptp_clock_kernel.h> struct kvm_ptp_clock { struct ptp_clock *ptp_clock; struct ptp_clock_info caps; }; static DEFINE_SPINLOCK(kvm_ptp_lock); static int ptp_kvm_get_time_fn(ktime_t *device_time, struct system_counterval_t *system_counter, void *ctx) { enum clocksource_ids cs_id; struct timespec64 tspec; u64 cycle; int ret; spin_lock(&kvm_ptp_lock); preempt_disable_notrace(); ret = kvm_arch_ptp_get_crosststamp(&cycle, &tspec, &cs_id); if (ret) { spin_unlock(&kvm_ptp_lock); preempt_enable_notrace(); return ret; } preempt_enable_notrace(); system_counter->cycles = cycle; system_counter->cs_id = cs_id; *device_time = timespec64_to_ktime(tspec); spin_unlock(&kvm_ptp_lock); return 0; } static int ptp_kvm_getcrosststamp(struct ptp_clock_info *ptp, struct system_device_crosststamp *xtstamp) { return get_device_system_crosststamp(ptp_kvm_get_time_fn, NULL, NULL, xtstamp); } /* * PTP clock operations */ static int ptp_kvm_adjfine(struct ptp_clock_info *ptp, long delta) { return -EOPNOTSUPP; } static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta) { return -EOPNOTSUPP; } static int ptp_kvm_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) { return -EOPNOTSUPP; } static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { long ret; struct timespec64 tspec; spin_lock(&kvm_ptp_lock); ret = kvm_arch_ptp_get_clock(&tspec); if (ret) { spin_unlock(&kvm_ptp_lock); return ret; } spin_unlock(&kvm_ptp_lock); memcpy(ts, &tspec, sizeof(struct timespec64)); return 0; } static int ptp_kvm_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) { return -EOPNOTSUPP; } static const struct ptp_clock_info ptp_kvm_caps = { .owner = THIS_MODULE, .name = "KVM virtual PTP", .max_adj = 0, .n_ext_ts = 0, .n_pins = 0, .pps = 0, .adjfine = ptp_kvm_adjfine, .adjtime = ptp_kvm_adjtime, .gettime64 = ptp_kvm_gettime, .settime64 = ptp_kvm_settime, .enable = ptp_kvm_enable, .getcrosststamp = ptp_kvm_getcrosststamp, }; /* module operations */ static struct kvm_ptp_clock kvm_ptp_clock; static void __exit ptp_kvm_exit(void) { ptp_clock_unregister(kvm_ptp_clock.ptp_clock); kvm_arch_ptp_exit(); } static int __init ptp_kvm_init(void) { long ret; ret = kvm_arch_ptp_init(); if (ret) { if (ret != -EOPNOTSUPP) pr_err("fail to initialize ptp_kvm"); return ret; } kvm_ptp_clock.caps = ptp_kvm_caps; kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL); return PTR_ERR_OR_ZERO(kvm_ptp_clock.ptp_clock); } module_init(ptp_kvm_init); module_exit(ptp_kvm_exit); MODULE_AUTHOR("Marcelo Tosatti <mtosatti@redhat.com>"); MODULE_DESCRIPTION("PTP clock using KVMCLOCK"); MODULE_LICENSE("GPL");
4 1 44 45 54 54 4 4 4 4 4 4 4 5 4 1 1 1 53 53 5 25 25 25 25 25 54 54 54 54 53 56 56 2 2 4 4 1 1 1 1 45 45 45 45 44 1 1 25 25 25 25 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 // SPDX-License-Identifier: LGPL-2.1+ /* Copyright (C) 2022 Kent Overstreet */ #include <linux/bitmap.h> #include <linux/err.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/string_helpers.h> #include "printbuf.h" static inline unsigned __printbuf_linelen(struct printbuf *buf, unsigned pos) { return pos - buf->last_newline; } static inline unsigned printbuf_linelen(struct printbuf *buf) { return __printbuf_linelen(buf, buf->pos); } /* * Returns spaces from start of line, if set, or 0 if unset: */ static inline unsigned cur_tabstop(struct printbuf *buf) { return buf->cur_tabstop < buf->nr_tabstops ? buf->_tabstops[buf->cur_tabstop] : 0; } int bch2_printbuf_make_room(struct printbuf *out, unsigned extra) { /* Reserved space for terminating nul: */ extra += 1; if (out->pos + extra <= out->size) return 0; if (!out->heap_allocated) { out->overflow = true; return 0; } unsigned new_size = roundup_pow_of_two(out->size + extra); /* Sanity check... */ if (new_size > PAGE_SIZE << MAX_PAGE_ORDER) { out->allocation_failure = true; out->overflow = true; return -ENOMEM; } /* * Note: output buffer must be freeable with kfree(), it's not required * that the user use printbuf_exit(). */ char *buf = krealloc(out->buf, new_size, !out->atomic ? GFP_KERNEL : GFP_NOWAIT); if (!buf) { out->allocation_failure = true; out->overflow = true; return -ENOMEM; } out->buf = buf; out->size = new_size; return 0; } static void printbuf_advance_pos(struct printbuf *out, unsigned len) { out->pos += min(len, printbuf_remaining(out)); } static void printbuf_insert_spaces(struct printbuf *out, unsigned pos, unsigned nr) { unsigned move = out->pos - pos; bch2_printbuf_make_room(out, nr); if (pos + nr < out->size) memmove(out->buf + pos + nr, out->buf + pos, min(move, out->size - 1 - pos - nr)); if (pos < out->size) memset(out->buf + pos, ' ', min(nr, out->size - pos)); printbuf_advance_pos(out, nr); printbuf_nul_terminate_reserved(out); } static void __printbuf_do_indent(struct printbuf *out, unsigned pos) { while (true) { int pad; unsigned len = out->pos - pos; char *p = out->buf + pos; char *n = memscan(p, '\n', len); if (cur_tabstop(out)) { n = min(n, (char *) memscan(p, '\r', len)); n = min(n, (char *) memscan(p, '\t', len)); } pos = n - out->buf; if (pos == out->pos) break; switch (*n) { case '\n': pos++; out->last_newline = pos; printbuf_insert_spaces(out, pos, out->indent); pos = min(pos + out->indent, out->pos); out->last_field = pos; out->cur_tabstop = 0; break; case '\r': memmove(n, n + 1, out->pos - pos); --out->pos; pad = (int) cur_tabstop(out) - (int) __printbuf_linelen(out, pos); if (pad > 0) { printbuf_insert_spaces(out, out->last_field, pad); pos += pad; } out->last_field = pos; out->cur_tabstop++; break; case '\t': pad = (int) cur_tabstop(out) - (int) __printbuf_linelen(out, pos) - 1; if (pad > 0) { *n = ' '; printbuf_insert_spaces(out, pos, pad - 1); pos += pad; } else { memmove(n, n + 1, out->pos - pos); --out->pos; } out->last_field = pos; out->cur_tabstop++; break; } } } static inline void printbuf_do_indent(struct printbuf *out, unsigned pos) { if (out->has_indent_or_tabstops && !out->suppress_indent_tabstop_handling) __printbuf_do_indent(out, pos); } void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list args) { int len; do { va_list args2; va_copy(args2, args); len = vsnprintf(out->buf + out->pos, printbuf_remaining_size(out), fmt, args2); va_end(args2); } while (len > printbuf_remaining(out) && !bch2_printbuf_make_room(out, len)); unsigned indent_pos = out->pos; printbuf_advance_pos(out, len); printbuf_do_indent(out, indent_pos); } void bch2_prt_printf(struct printbuf *out, const char *fmt, ...) { va_list args; int len; do { va_start(args, fmt); len = vsnprintf(out->buf + out->pos, printbuf_remaining_size(out), fmt, args); va_end(args); } while (len > printbuf_remaining(out) && !bch2_printbuf_make_room(out, len)); unsigned indent_pos = out->pos; printbuf_advance_pos(out, len); printbuf_do_indent(out, indent_pos); } /** * bch2_printbuf_str() - returns printbuf's buf as a C string, guaranteed to be * null terminated * @buf: printbuf to terminate * Returns: Printbuf contents, as a nul terminated C string */ const char *bch2_printbuf_str(const struct printbuf *buf) { /* * If we've written to a printbuf then it's guaranteed to be a null * terminated string - but if we haven't, then we might not have * allocated a buffer at all: */ return buf->pos ? buf->buf : ""; } /** * bch2_printbuf_exit() - exit a printbuf, freeing memory it owns and poisoning it * against accidental use. * @buf: printbuf to exit */ void bch2_printbuf_exit(struct printbuf *buf) { if (buf->heap_allocated) { kfree(buf->buf); buf->buf = ERR_PTR(-EINTR); /* poison value */ } } void bch2_printbuf_tabstops_reset(struct printbuf *buf) { buf->nr_tabstops = 0; } void bch2_printbuf_tabstop_pop(struct printbuf *buf) { if (buf->nr_tabstops) --buf->nr_tabstops; } /* * bch2_printbuf_tabstop_set() - add a tabstop, n spaces from the previous tabstop * * @buf: printbuf to control * @spaces: number of spaces from previous tabpstop * * In the future this function may allocate memory if setting more than * PRINTBUF_INLINE_TABSTOPS or setting tabstops more than 255 spaces from start * of line. */ int bch2_printbuf_tabstop_push(struct printbuf *buf, unsigned spaces) { unsigned prev_tabstop = buf->nr_tabstops ? buf->_tabstops[buf->nr_tabstops - 1] : 0; if (WARN_ON(buf->nr_tabstops >= ARRAY_SIZE(buf->_tabstops))) return -EINVAL; buf->_tabstops[buf->nr_tabstops++] = prev_tabstop + spaces; buf->has_indent_or_tabstops = true; return 0; } /** * bch2_printbuf_indent_add() - add to the current indent level * * @buf: printbuf to control * @spaces: number of spaces to add to the current indent level * * Subsequent lines, and the current line if the output position is at the start * of the current line, will be indented by @spaces more spaces. */ void bch2_printbuf_indent_add(struct printbuf *buf, unsigned spaces) { if (WARN_ON_ONCE(buf->indent + spaces < buf->indent)) spaces = 0; buf->indent += spaces; prt_chars(buf, ' ', spaces); buf->has_indent_or_tabstops = true; } /** * bch2_printbuf_indent_sub() - subtract from the current indent level * * @buf: printbuf to control * @spaces: number of spaces to subtract from the current indent level * * Subsequent lines, and the current line if the output position is at the start * of the current line, will be indented by @spaces less spaces. */ void bch2_printbuf_indent_sub(struct printbuf *buf, unsigned spaces) { if (WARN_ON_ONCE(spaces > buf->indent)) spaces = buf->indent; if (buf->last_newline + buf->indent == buf->pos) { buf->pos -= spaces; printbuf_nul_terminate(buf); } buf->indent -= spaces; if (!buf->indent && !buf->nr_tabstops) buf->has_indent_or_tabstops = false; } void bch2_prt_newline(struct printbuf *buf) { bch2_printbuf_make_room(buf, 1 + buf->indent); __prt_char_reserved(buf, '\n'); buf->last_newline = buf->pos; __prt_chars_reserved(buf, ' ', buf->indent); printbuf_nul_terminate_reserved(buf); buf->last_field = buf->pos; buf->cur_tabstop = 0; } void bch2_printbuf_strip_trailing_newline(struct printbuf *out) { for (int p = out->pos - 1; p >= 0; --p) { if (out->buf[p] == '\n') { out->pos = p; break; } if (out->buf[p] != ' ') break; } printbuf_nul_terminate_reserved(out); } static void __prt_tab(struct printbuf *out) { int spaces = max_t(int, 0, cur_tabstop(out) - printbuf_linelen(out)); prt_chars(out, ' ', spaces); out->last_field = out->pos; out->cur_tabstop++; } /** * bch2_prt_tab() - Advance printbuf to the next tabstop * @out: printbuf to control * * Advance output to the next tabstop by printing spaces. */ void bch2_prt_tab(struct printbuf *out) { if (WARN_ON(!cur_tabstop(out))) return; __prt_tab(out); } static void __prt_tab_rjust(struct printbuf *buf) { int pad = (int) cur_tabstop(buf) - (int) printbuf_linelen(buf); if (pad > 0) printbuf_insert_spaces(buf, buf->last_field, pad); buf->last_field = buf->pos; buf->cur_tabstop++; } /** * bch2_prt_tab_rjust - Advance printbuf to the next tabstop, right justifying * previous output * * @buf: printbuf to control * * Advance output to the next tabstop by inserting spaces immediately after the * previous tabstop, right justifying previously outputted text. */ void bch2_prt_tab_rjust(struct printbuf *buf) { if (WARN_ON(!cur_tabstop(buf))) return; __prt_tab_rjust(buf); } /** * bch2_prt_bytes_indented() - Print an array of chars, handling embedded control characters * * @out: output printbuf * @str: string to print * @count: number of bytes to print * * The following contol characters are handled as so: * \n: prt_newline newline that obeys current indent level * \t: prt_tab advance to next tabstop * \r: prt_tab_rjust advance to next tabstop, with right justification */ void bch2_prt_bytes_indented(struct printbuf *out, const char *str, unsigned count) { unsigned indent_pos = out->pos; prt_bytes(out, str, count); printbuf_do_indent(out, indent_pos); } /** * bch2_prt_human_readable_u64() - Print out a u64 in human readable units * @out: output printbuf * @v: integer to print * * Units of 2^10 (default) or 10^3 are controlled via @out->si_units */ void bch2_prt_human_readable_u64(struct printbuf *out, u64 v) { bch2_printbuf_make_room(out, 10); unsigned len = string_get_size(v, 1, !out->si_units, out->buf + out->pos, printbuf_remaining_size(out)); printbuf_advance_pos(out, len); } /** * bch2_prt_human_readable_s64() - Print out a s64 in human readable units * @out: output printbuf * @v: integer to print * * Units of 2^10 (default) or 10^3 are controlled via @out->si_units */ void bch2_prt_human_readable_s64(struct printbuf *out, s64 v) { if (v < 0) prt_char(out, '-'); bch2_prt_human_readable_u64(out, abs(v)); } /** * bch2_prt_units_u64() - Print out a u64 according to printbuf unit options * @out: output printbuf * @v: integer to print * * Units are either raw (default), or human reabable units (controlled via * @buf->human_readable_units) */ void bch2_prt_units_u64(struct printbuf *out, u64 v) { if (out->human_readable_units) bch2_prt_human_readable_u64(out, v); else bch2_prt_printf(out, "%llu", v); } /** * bch2_prt_units_s64() - Print out a s64 according to printbuf unit options * @out: output printbuf * @v: integer to print * * Units are either raw (default), or human reabable units (controlled via * @buf->human_readable_units) */ void bch2_prt_units_s64(struct printbuf *out, s64 v) { if (v < 0) prt_char(out, '-'); bch2_prt_units_u64(out, abs(v)); } void bch2_prt_string_option(struct printbuf *out, const char * const list[], size_t selected) { for (size_t i = 0; list[i]; i++) bch2_prt_printf(out, i == selected ? "[%s] " : "%s ", list[i]); } void bch2_prt_bitflags(struct printbuf *out, const char * const list[], u64 flags) { unsigned bit, nr = 0; bool first = true; while (list[nr]) nr++; while (flags && (bit = __ffs64(flags)) < nr) { if (!first) bch2_prt_printf(out, ","); first = false; bch2_prt_printf(out, "%s", list[bit]); flags ^= BIT_ULL(bit); } } void bch2_prt_bitflags_vector(struct printbuf *out, const char * const list[], unsigned long *v, unsigned nr) { bool first = true; unsigned i; for (i = 0; i < nr; i++) if (!list[i]) { nr = i - 1; break; } for_each_set_bit(i, v, nr) { if (!first) bch2_prt_printf(out, ","); first = false; bch2_prt_printf(out, "%s", list[i]); } }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_IPCOMP_H #define _NET_IPCOMP_H #include <linux/skbuff.h> #include <linux/types.h> #define IPCOMP_SCRATCH_SIZE 65400 struct crypto_comp; struct ip_comp_hdr; struct ipcomp_data { u16 threshold; struct crypto_comp * __percpu *tfms; }; struct ip_comp_hdr; struct sk_buff; struct xfrm_state; int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb); int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb); void ipcomp_destroy(struct xfrm_state *x); int ipcomp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack); static inline struct ip_comp_hdr *ip_comp_hdr(const struct sk_buff *skb) { return (struct ip_comp_hdr *)skb_transport_header(skb); } #endif
2337 2355 824 341 209 369 307 370 369 307 370 307 369 370 370 303 307 307 262 13 251 101 77 26 17 6 16 1 1 16 51 50 3849 3843 3850 3853 22 40 4198 4200 76 75 958 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 // SPDX-License-Identifier: GPL-2.0 #include <linux/mm.h> #include <linux/gfp.h> #include <linux/hugetlb.h> #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/fixmap.h> #include <asm/mtrr.h> #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; EXPORT_SYMBOL(physical_mask); #endif #ifdef CONFIG_HIGHPTE #define PGTABLE_HIGHMEM __GFP_HIGHMEM #else #define PGTABLE_HIGHMEM 0 #endif #ifndef CONFIG_PARAVIRT #ifndef CONFIG_PT_RECLAIM static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) { struct ptdesc *ptdesc = (struct ptdesc *)table; pagetable_dtor(ptdesc); tlb_remove_page(tlb, ptdesc_page(ptdesc)); } #else static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) { tlb_remove_table(tlb, table); } #endif /* !CONFIG_PT_RECLAIM */ #endif /* !CONFIG_PARAVIRT */ gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM; pgtable_t pte_alloc_one(struct mm_struct *mm) { return __pte_alloc_one(mm, __userpte_alloc_gfp); } static int __init setup_userpte(char *arg) { if (!arg) return -EINVAL; /* * "userpte=nohigh" disables allocation of user pagetables in * high memory. */ if (strcmp(arg, "nohigh") == 0) __userpte_alloc_gfp &= ~__GFP_HIGHMEM; else return -EINVAL; return 0; } early_param("userpte", setup_userpte); void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) { paravirt_release_pte(page_to_pfn(pte)); paravirt_tlb_remove_table(tlb, page_ptdesc(pte)); } #if CONFIG_PGTABLE_LEVELS > 2 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); /* * NOTE! For PAE, any changes to the top page-directory-pointer-table * entries need a full cr3 reload to flush. */ #ifdef CONFIG_X86_PAE tlb->need_flush_all = 1; #endif paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pmd)); } #if CONFIG_PGTABLE_LEVELS > 3 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) { paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pud)); } #if CONFIG_PGTABLE_LEVELS > 4 void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d) { paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT); paravirt_tlb_remove_table(tlb, virt_to_ptdesc(p4d)); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #endif /* CONFIG_PGTABLE_LEVELS > 2 */ static inline void pgd_list_add(pgd_t *pgd) { struct ptdesc *ptdesc = virt_to_ptdesc(pgd); list_add(&ptdesc->pt_list, &pgd_list); } static inline void pgd_list_del(pgd_t *pgd) { struct ptdesc *ptdesc = virt_to_ptdesc(pgd); list_del(&ptdesc->pt_list); } #define UNSHARED_PTRS_PER_PGD \ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) #define MAX_UNSHARED_PTRS_PER_PGD \ MAX_T(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) { virt_to_ptdesc(pgd)->pt_mm = mm; } struct mm_struct *pgd_page_get_mm(struct page *page) { return page_ptdesc(page)->pt_mm; } static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) { /* If the pgd points to a shared pagetable level (either the ptes in non-PAE, or shared PMD in PAE), then just copy the references from swapper_pg_dir. */ if (CONFIG_PGTABLE_LEVELS == 2 || (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || CONFIG_PGTABLE_LEVELS >= 4) { clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, KERNEL_PGD_PTRS); } /* list required to sync kernel mapping updates */ if (!SHARED_KERNEL_PMD) { pgd_set_mm(pgd, mm); pgd_list_add(pgd); } } static void pgd_dtor(pgd_t *pgd) { if (SHARED_KERNEL_PMD) return; spin_lock(&pgd_lock); pgd_list_del(pgd); spin_unlock(&pgd_lock); } /* * List of all pgd's needed for non-PAE so it can invalidate entries * in both cached and uncached pgd's; not needed for PAE since the * kernel pmd is shared. If PAE were not to share the pmd a similar * tactic would be needed. This is essentially codepath-based locking * against pageattr.c; it is the unique case in which a valid change * of kernel pagetables can't be lazily synchronized by vmalloc faults. * vmalloc faults work because attached pagetables are never freed. * -- nyc */ #ifdef CONFIG_X86_PAE /* * In PAE mode, we need to do a cr3 reload (=tlb flush) when * updating the top-level pagetable entries to guarantee the * processor notices the update. Since this is expensive, and * all 4 top-level entries are used almost immediately in a * new process's life, we just pre-populate them here. * * Also, if we're in a paravirt environment where the kernel pmd is * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate * and initialize the kernel pmds here. */ #define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD #define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD /* * We allocate separate PMDs for the kernel part of the user page-table * when PTI is enabled. We need them to map the per-process LDT into the * user-space page-table. */ #define PREALLOCATED_USER_PMDS (boot_cpu_has(X86_FEATURE_PTI) ? \ KERNEL_PGD_PTRS : 0) #define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) { paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); /* Note: almost everything apart from _PAGE_PRESENT is reserved at the pmd (PDPT) level. */ set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); /* * According to Intel App note "TLBs, Paging-Structure Caches, * and Their Invalidation", April 2007, document 317080-001, * section 8.1: in PAE mode we explicitly have to flush the * TLB via cr3 if the top-level pgd is changed... */ flush_tlb_mm(mm); } #else /* !CONFIG_X86_PAE */ /* No need to prepopulate any pagetable entries in non-PAE modes. */ #define PREALLOCATED_PMDS 0 #define MAX_PREALLOCATED_PMDS 0 #define PREALLOCATED_USER_PMDS 0 #define MAX_PREALLOCATED_USER_PMDS 0 #endif /* CONFIG_X86_PAE */ static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) { int i; struct ptdesc *ptdesc; for (i = 0; i < count; i++) if (pmds[i]) { ptdesc = virt_to_ptdesc(pmds[i]); pagetable_dtor(ptdesc); pagetable_free(ptdesc); mm_dec_nr_pmds(mm); } } static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) { int i; bool failed = false; gfp_t gfp = GFP_PGTABLE_USER; if (mm == &init_mm) gfp &= ~__GFP_ACCOUNT; gfp &= ~__GFP_HIGHMEM; for (i = 0; i < count; i++) { pmd_t *pmd = NULL; struct ptdesc *ptdesc = pagetable_alloc(gfp, 0); if (!ptdesc) failed = true; if (ptdesc && !pagetable_pmd_ctor(ptdesc)) { pagetable_free(ptdesc); ptdesc = NULL; failed = true; } if (ptdesc) { mm_inc_nr_pmds(mm); pmd = ptdesc_address(ptdesc); } pmds[i] = pmd; } if (failed) { free_pmds(mm, pmds, count); return -ENOMEM; } return 0; } /* * Mop up any pmd pages which may still be attached to the pgd. * Normally they will be freed by munmap/exit_mmap, but any pmd we * preallocate which never got a corresponding vma will need to be * freed manually. */ static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp) { pgd_t pgd = *pgdp; if (pgd_val(pgd) != 0) { pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); pgd_clear(pgdp); paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); pmd_free(mm, pmd); mm_dec_nr_pmds(mm); } } static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) { int i; for (i = 0; i < PREALLOCATED_PMDS; i++) mop_up_one_pmd(mm, &pgdp[i]); #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION if (!boot_cpu_has(X86_FEATURE_PTI)) return; pgdp = kernel_to_user_pgdp(pgdp); for (i = 0; i < PREALLOCATED_USER_PMDS; i++) mop_up_one_pmd(mm, &pgdp[i + KERNEL_PGD_BOUNDARY]); #endif } static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) { p4d_t *p4d; pud_t *pud; int i; p4d = p4d_offset(pgd, 0); pud = pud_offset(p4d, 0); for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) { pmd_t *pmd = pmds[i]; if (i >= KERNEL_PGD_BOUNDARY) memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), sizeof(pmd_t) * PTRS_PER_PMD); pud_populate(mm, pud, pmd); } } #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION static void pgd_prepopulate_user_pmd(struct mm_struct *mm, pgd_t *k_pgd, pmd_t *pmds[]) { pgd_t *s_pgd = kernel_to_user_pgdp(swapper_pg_dir); pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd); p4d_t *u_p4d; pud_t *u_pud; int i; u_p4d = p4d_offset(u_pgd, 0); u_pud = pud_offset(u_p4d, 0); s_pgd += KERNEL_PGD_BOUNDARY; u_pud += KERNEL_PGD_BOUNDARY; for (i = 0; i < PREALLOCATED_USER_PMDS; i++, u_pud++, s_pgd++) { pmd_t *pmd = pmds[i]; memcpy(pmd, (pmd_t *)pgd_page_vaddr(*s_pgd), sizeof(pmd_t) * PTRS_PER_PMD); pud_populate(mm, u_pud, pmd); } } #else static void pgd_prepopulate_user_pmd(struct mm_struct *mm, pgd_t *k_pgd, pmd_t *pmds[]) { } #endif /* * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also * assumes that pgd should be in one page. * * But kernel with PAE paging that is not running as a Xen domain * only needs to allocate 32 bytes for pgd instead of one page. */ #ifdef CONFIG_X86_PAE #include <linux/slab.h> #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #define PGD_ALIGN 32 static struct kmem_cache *pgd_cache; void __init pgtable_cache_init(void) { /* * When PAE kernel is running as a Xen domain, it does not use * shared kernel pmd. And this requires a whole page for pgd. */ if (!SHARED_KERNEL_PMD) return; /* * when PAE kernel is not running as a Xen domain, it uses * shared kernel pmd. Shared kernel pmd does not require a whole * page for pgd. We are able to just allocate a 32-byte for pgd. * During boot time, we create a 32-byte slab for pgd table allocation. */ pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN, SLAB_PANIC, NULL); } static inline pgd_t *_pgd_alloc(struct mm_struct *mm) { /* * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain. * We allocate one page for pgd. */ if (!SHARED_KERNEL_PMD) return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); /* * Now PAE kernel is not running as a Xen domain. We can allocate * a 32-byte slab for pgd to save memory space. */ return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER); } static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) { if (!SHARED_KERNEL_PMD) __pgd_free(mm, pgd); else kmem_cache_free(pgd_cache, pgd); } #else static inline pgd_t *_pgd_alloc(struct mm_struct *mm) { return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); } static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) { __pgd_free(mm, pgd); } #endif /* CONFIG_X86_PAE */ pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS]; pmd_t *pmds[MAX_PREALLOCATED_PMDS]; pgd = _pgd_alloc(mm); if (pgd == NULL) goto out; mm->pgd = pgd; if (sizeof(pmds) != 0 && preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0) goto out_free_pgd; if (sizeof(u_pmds) != 0 && preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0) goto out_free_pmds; if (paravirt_pgd_alloc(mm) != 0) goto out_free_user_pmds; /* * Make sure that pre-populating the pmds is atomic with * respect to anything walking the pgd_list, so that they * never see a partially populated pgd. */ spin_lock(&pgd_lock); pgd_ctor(mm, pgd); if (sizeof(pmds) != 0) pgd_prepopulate_pmd(mm, pgd, pmds); if (sizeof(u_pmds) != 0) pgd_prepopulate_user_pmd(mm, pgd, u_pmds); spin_unlock(&pgd_lock); return pgd; out_free_user_pmds: if (sizeof(u_pmds) != 0) free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS); out_free_pmds: if (sizeof(pmds) != 0) free_pmds(mm, pmds, PREALLOCATED_PMDS); out_free_pgd: _pgd_free(mm, pgd); out: return NULL; } void pgd_free(struct mm_struct *mm, pgd_t *pgd) { pgd_mop_up_pmds(mm, pgd); pgd_dtor(pgd); paravirt_pgd_free(mm, pgd); _pgd_free(mm, pgd); } /* * Used to set accessed or dirty bits in the page table entries * on other architectures. On x86, the accessed and dirty bits * are tracked by hardware. However, do_wp_page calls this function * to also make the pte writeable at the same time the dirty bit is * set. In that case we do actually need to write the PTE. */ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty) { int changed = !pte_same(*ptep, entry); if (changed && dirty) set_pte(ptep, entry); return changed; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty) { int changed = !pmd_same(*pmdp, entry); VM_BUG_ON(address & ~HPAGE_PMD_MASK); if (changed && dirty) { set_pmd(pmdp, entry); /* * We had a write-protection fault here and changed the pmd * to to more permissive. No need to flush the TLB for that, * #PF is architecturally guaranteed to do that and in the * worst-case we'll generate a spurious fault. */ } return changed; } int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pud_t *pudp, pud_t entry, int dirty) { int changed = !pud_same(*pudp, entry); VM_BUG_ON(address & ~HPAGE_PUD_MASK); if (changed && dirty) { set_pud(pudp, entry); /* * We had a write-protection fault here and changed the pud * to to more permissive. No need to flush the TLB for that, * #PF is architecturally guaranteed to do that and in the * worst-case we'll generate a spurious fault. */ } return changed; } #endif int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { int ret = 0; if (pte_young(*ptep)) ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *) &ptep->pte); return ret; } #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { int ret = 0; if (pmd_young(*pmdp)) ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *)pmdp); return ret; } #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE int pudp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pud_t *pudp) { int ret = 0; if (pud_young(*pudp)) ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *)pudp); return ret; } #endif int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { /* * On x86 CPUs, clearing the accessed bit without a TLB flush * doesn't cause data corruption. [ It could cause incorrect * page aging and the (mistaken) reclaim of hot pages, but the * chance of that should be relatively low. ] * * So as a performance optimization don't flush the TLB when * clearing the accessed bit, it will eventually be flushed by * a context switch or a VM operation anyway. [ In the rare * event of it not getting flushed for a long time the delay * shouldn't really matter because there's no real memory * pressure for swapout to react to. ] */ return ptep_test_and_clear_young(vma, address, ptep); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { int young; VM_BUG_ON(address & ~HPAGE_PMD_MASK); young = pmdp_test_and_clear_young(vma, address, pmdp); if (young) flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return young; } pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { VM_WARN_ON_ONCE(!pmd_present(*pmdp)); /* * No flush is necessary. Once an invalid PTE is established, the PTE's * access and dirty bits cannot be updated. */ return pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp)); } #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address, pud_t *pudp) { VM_WARN_ON_ONCE(!pud_present(*pudp)); pud_t old = pudp_establish(vma, address, pudp, pud_mkinvalid(*pudp)); flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE); return old; } #endif /** * reserve_top_address - reserves a hole in the top of kernel address space * @reserve - size of hole to reserve * * Can be used to relocate the fixmap area and poke a hole in the top * of kernel address space to make room for a hypervisor. */ void __init reserve_top_address(unsigned long reserve) { #ifdef CONFIG_X86_32 BUG_ON(fixmaps_set > 0); __FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE; printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n", -reserve, __FIXADDR_TOP + PAGE_SIZE); #endif } int fixmaps_set; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) { unsigned long address = __fix_to_virt(idx); #ifdef CONFIG_X86_64 /* * Ensure that the static initial page tables are covering the * fixmap completely. */ BUILD_BUG_ON(__end_of_permanent_fixed_addresses > (FIXMAP_PMD_NUM * PTRS_PER_PTE)); #endif if (idx >= __end_of_fixed_addresses) { BUG(); return; } set_pte_vaddr(address, pte); fixmaps_set++; } void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags) { /* Sanitize 'prot' against any unsupported bits: */ pgprot_val(flags) &= __default_kernel_pte_mask; __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); } #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP #ifdef CONFIG_X86_5LEVEL /** * p4d_set_huge - setup kernel P4D mapping * * No 512GB pages yet -- always return 0 */ int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) { return 0; } /** * p4d_clear_huge - clear kernel P4D mapping when it is set * * No 512GB pages yet -- always return 0 */ void p4d_clear_huge(p4d_t *p4d) { } #endif /** * pud_set_huge - setup kernel PUD mapping * * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this * function sets up a huge page only if the complete range has the same MTRR * caching mode. * * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger * page mapping attempt fails. * * Returns 1 on success and 0 on failure. */ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { u8 uniform; mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform); if (!uniform) return 0; /* Bail out if we are we on a populated non-leaf entry: */ if (pud_present(*pud) && !pud_leaf(*pud)) return 0; set_pte((pte_t *)pud, pfn_pte( (u64)addr >> PAGE_SHIFT, __pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE))); return 1; } /** * pmd_set_huge - setup kernel PMD mapping * * See text over pud_set_huge() above. * * Returns 1 on success and 0 on failure. */ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) { u8 uniform; mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform); if (!uniform) { pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n", __func__, addr, addr + PMD_SIZE); return 0; } /* Bail out if we are we on a populated non-leaf entry: */ if (pmd_present(*pmd) && !pmd_leaf(*pmd)) return 0; set_pte((pte_t *)pmd, pfn_pte( (u64)addr >> PAGE_SHIFT, __pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE))); return 1; } /** * pud_clear_huge - clear kernel PUD mapping when it is set * * Returns 1 on success and 0 on failure (no PUD map is found). */ int pud_clear_huge(pud_t *pud) { if (pud_leaf(*pud)) { pud_clear(pud); return 1; } return 0; } /** * pmd_clear_huge - clear kernel PMD mapping when it is set * * Returns 1 on success and 0 on failure (no PMD map is found). */ int pmd_clear_huge(pmd_t *pmd) { if (pmd_leaf(*pmd)) { pmd_clear(pmd); return 1; } return 0; } #ifdef CONFIG_X86_64 /** * pud_free_pmd_page - Clear pud entry and free pmd page. * @pud: Pointer to a PUD. * @addr: Virtual address associated with pud. * * Context: The pud range has been unmapped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. * * NOTE: Callers must allow a single page allocation. */ int pud_free_pmd_page(pud_t *pud, unsigned long addr) { pmd_t *pmd, *pmd_sv; pte_t *pte; int i; pmd = pud_pgtable(*pud); pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL); if (!pmd_sv) return 0; for (i = 0; i < PTRS_PER_PMD; i++) { pmd_sv[i] = pmd[i]; if (!pmd_none(pmd[i])) pmd_clear(&pmd[i]); } pud_clear(pud); /* INVLPG to clear all paging-structure caches */ flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); for (i = 0; i < PTRS_PER_PMD; i++) { if (!pmd_none(pmd_sv[i])) { pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]); free_page((unsigned long)pte); } } free_page((unsigned long)pmd_sv); pagetable_dtor(virt_to_ptdesc(pmd)); free_page((unsigned long)pmd); return 1; } /** * pmd_free_pte_page - Clear pmd entry and free pte page. * @pmd: Pointer to a PMD. * @addr: Virtual address associated with pmd. * * Context: The pmd range has been unmapped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. */ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { pte_t *pte; pte = (pte_t *)pmd_page_vaddr(*pmd); pmd_clear(pmd); /* INVLPG to clear all paging-structure caches */ flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); free_page((unsigned long)pte); return 1; } #else /* !CONFIG_X86_64 */ /* * Disable free page handling on x86-PAE. This assures that ioremap() * does not update sync'd pmd entries. See vmalloc_sync_one(). */ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return pmd_none(*pmd); } #endif /* CONFIG_X86_64 */ #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma) { if (vma->vm_flags & VM_SHADOW_STACK) return pte_mkwrite_shstk(pte); pte = pte_mkwrite_novma(pte); return pte_clear_saveddirty(pte); } pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) { if (vma->vm_flags & VM_SHADOW_STACK) return pmd_mkwrite_shstk(pmd); pmd = pmd_mkwrite_novma(pmd); return pmd_clear_saveddirty(pmd); } void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte) { /* * Hardware before shadow stack can (rarely) set Dirty=1 * on a Write=0 PTE. So the below condition * only indicates a software bug when shadow stack is * supported by the HW. This checking is covered in * pte_shstk(). */ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) && pte_shstk(pte)); } void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd) { /* See note in arch_check_zapped_pte() */ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) && pmd_shstk(pmd)); } void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud) { /* See note in arch_check_zapped_pte() */ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) && pud_shstk(pud)); }
8 8 8 8 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat */ #include <linux/module.h> #include <drm/clients/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_shmem.h> #include <drm/drm_file.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_managed.h> #include <drm/drm_modeset_helper.h> #include <drm/drm_ioctl.h> #include <drm/drm_probe_helper.h> #include <drm/drm_print.h> #include "udl_drv.h" static int udl_usb_suspend(struct usb_interface *interface, pm_message_t message) { struct drm_device *dev = usb_get_intfdata(interface); int ret; ret = drm_mode_config_helper_suspend(dev); if (ret) return ret; udl_sync_pending_urbs(dev); return 0; } static int udl_usb_resume(struct usb_interface *interface) { struct drm_device *dev = usb_get_intfdata(interface); return drm_mode_config_helper_resume(dev); } static int udl_usb_reset_resume(struct usb_interface *interface) { struct drm_device *dev = usb_get_intfdata(interface); struct udl_device *udl = to_udl(dev); udl_select_std_channel(udl); return drm_mode_config_helper_resume(dev); } /* * FIXME: Dma-buf sharing requires DMA support by the importing device. * This function is a workaround to make USB devices work as well. * See todo.rst for how to fix the issue in the dma-buf framework. */ static struct drm_gem_object *udl_driver_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) { struct udl_device *udl = to_udl(dev); if (!udl->dmadev) return ERR_PTR(-ENODEV); return drm_gem_prime_import_dev(dev, dma_buf, udl->dmadev); } DEFINE_DRM_GEM_FOPS(udl_driver_fops); static const struct drm_driver driver = { .driver_features = DRIVER_ATOMIC | DRIVER_GEM | DRIVER_MODESET, /* GEM hooks */ .fops = &udl_driver_fops, DRM_GEM_SHMEM_DRIVER_OPS, .gem_prime_import = udl_driver_gem_prime_import, DRM_FBDEV_SHMEM_DRIVER_OPS, .name = DRIVER_NAME, .desc = DRIVER_DESC, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, }; static struct udl_device *udl_driver_create(struct usb_interface *interface) { struct udl_device *udl; int r; udl = devm_drm_dev_alloc(&interface->dev, &driver, struct udl_device, drm); if (IS_ERR(udl)) return udl; r = udl_init(udl); if (r) return ERR_PTR(r); usb_set_intfdata(interface, udl); return udl; } static int udl_usb_probe(struct usb_interface *interface, const struct usb_device_id *id) { int r; struct udl_device *udl; udl = udl_driver_create(interface); if (IS_ERR(udl)) return PTR_ERR(udl); r = drm_dev_register(&udl->drm, 0); if (r) return r; DRM_INFO("Initialized udl on minor %d\n", udl->drm.primary->index); drm_client_setup(&udl->drm, NULL); return 0; } static void udl_usb_disconnect(struct usb_interface *interface) { struct drm_device *dev = usb_get_intfdata(interface); drm_kms_helper_poll_fini(dev); udl_drop_usb(dev); drm_dev_unplug(dev); } /* * There are many DisplayLink-based graphics products, all with unique PIDs. * So we match on DisplayLink's VID + Vendor-Defined Interface Class (0xff) * We also require a match on SubClass (0x00) and Protocol (0x00), * which is compatible with all known USB 2.0 era graphics chips and firmware, * but allows DisplayLink to increment those for any future incompatible chips */ static const struct usb_device_id id_table[] = { {.idVendor = 0x17e9, .bInterfaceClass = 0xff, .bInterfaceSubClass = 0x00, .bInterfaceProtocol = 0x00, .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS | USB_DEVICE_ID_MATCH_INT_SUBCLASS | USB_DEVICE_ID_MATCH_INT_PROTOCOL,}, {}, }; MODULE_DEVICE_TABLE(usb, id_table); static struct usb_driver udl_driver = { .name = "udl", .probe = udl_usb_probe, .disconnect = udl_usb_disconnect, .suspend = udl_usb_suspend, .resume = udl_usb_resume, .reset_resume = udl_usb_reset_resume, .id_table = id_table, }; module_usb_driver(udl_driver); MODULE_DESCRIPTION("KMS driver for the USB displaylink video adapters"); MODULE_LICENSE("GPL");
2 2 2 2 1 2 1 2 2 2 2 2 2 3 3 3 3 2 2 2 1 1 1 21 2 21 21 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. */ #include <linux/skbuff.h> #include <linux/if_ether.h> #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/ethtool.h> #include <linux/etherdevice.h> #include <linux/if_bonding.h> #include <linux/pkt_sched.h> #include <net/net_namespace.h> #include <net/bonding.h> #include <net/bond_3ad.h> #include <net/netlink.h> /* General definitions */ #define AD_SHORT_TIMEOUT 1 #define AD_LONG_TIMEOUT 0 #define AD_STANDBY 0x2 #define AD_MAX_TX_IN_SECOND 3 #define AD_COLLECTOR_MAX_DELAY 0 /* Timer definitions (43.4.4 in the 802.3ad standard) */ #define AD_FAST_PERIODIC_TIME 1 #define AD_SLOW_PERIODIC_TIME 30 #define AD_SHORT_TIMEOUT_TIME (3*AD_FAST_PERIODIC_TIME) #define AD_LONG_TIMEOUT_TIME (3*AD_SLOW_PERIODIC_TIME) #define AD_CHURN_DETECTION_TIME 60 #define AD_AGGREGATE_WAIT_TIME 2 /* Port Variables definitions used by the State Machines (43.4.7 in the * 802.3ad standard) */ #define AD_PORT_BEGIN 0x1 #define AD_PORT_LACP_ENABLED 0x2 #define AD_PORT_ACTOR_CHURN 0x4 #define AD_PORT_PARTNER_CHURN 0x8 #define AD_PORT_READY 0x10 #define AD_PORT_READY_N 0x20 #define AD_PORT_MATCHED 0x40 #define AD_PORT_STANDBY 0x80 #define AD_PORT_SELECTED 0x100 #define AD_PORT_MOVED 0x200 #define AD_PORT_CHURNED (AD_PORT_ACTOR_CHURN | AD_PORT_PARTNER_CHURN) /* Port Key definitions * key is determined according to the link speed, duplex and * user key (which is yet not supported) * -------------------------------------------------------------- * Port key | User key (10 bits) | Speed (5 bits) | Duplex| * -------------------------------------------------------------- * |15 6|5 1|0 */ #define AD_DUPLEX_KEY_MASKS 0x1 #define AD_SPEED_KEY_MASKS 0x3E #define AD_USER_KEY_MASKS 0xFFC0 enum ad_link_speed_type { AD_LINK_SPEED_1MBPS = 1, AD_LINK_SPEED_10MBPS, AD_LINK_SPEED_100MBPS, AD_LINK_SPEED_1000MBPS, AD_LINK_SPEED_2500MBPS, AD_LINK_SPEED_5000MBPS, AD_LINK_SPEED_10000MBPS, AD_LINK_SPEED_14000MBPS, AD_LINK_SPEED_20000MBPS, AD_LINK_SPEED_25000MBPS, AD_LINK_SPEED_40000MBPS, AD_LINK_SPEED_50000MBPS, AD_LINK_SPEED_56000MBPS, AD_LINK_SPEED_100000MBPS, AD_LINK_SPEED_200000MBPS, AD_LINK_SPEED_400000MBPS, AD_LINK_SPEED_800000MBPS, }; /* compare MAC addresses */ #define MAC_ADDRESS_EQUAL(A, B) \ ether_addr_equal_64bits((const u8 *)A, (const u8 *)B) static const u16 ad_ticks_per_sec = 1000 / AD_TIMER_INTERVAL; static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000; const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }; /* ================= main 802.3ad protocol functions ================== */ static int ad_lacpdu_send(struct port *port); static int ad_marker_send(struct port *port, struct bond_marker *marker); static void ad_mux_machine(struct port *port, bool *update_slave_arr); static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); static void ad_tx_machine(struct port *port); static void ad_periodic_machine(struct port *port, struct bond_params *bond_params); static void ad_port_selection_logic(struct port *port, bool *update_slave_arr); static void ad_agg_selection_logic(struct aggregator *aggregator, bool *update_slave_arr); static void ad_clear_agg(struct aggregator *aggregator); static void ad_initialize_agg(struct aggregator *aggregator); static void ad_initialize_port(struct port *port, int lacp_fast); static void ad_enable_collecting(struct port *port); static void ad_disable_distributing(struct port *port, bool *update_slave_arr); static void ad_enable_collecting_distributing(struct port *port, bool *update_slave_arr); static void ad_disable_collecting_distributing(struct port *port, bool *update_slave_arr); static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); static void ad_marker_response_received(struct bond_marker *marker, struct port *port); static void ad_update_actor_keys(struct port *port, bool reset); /* ================= api to bonding and kernel code ================== */ /** * __get_bond_by_port - get the port's bonding struct * @port: the port we're looking at * * Return @port's bonding struct, or %NULL if it can't be found. */ static inline struct bonding *__get_bond_by_port(struct port *port) { if (port->slave == NULL) return NULL; return bond_get_bond_by_slave(port->slave); } /** * __get_first_agg - get the first aggregator in the bond * @port: the port we're looking at * * Return the aggregator of the first slave in @bond, or %NULL if it can't be * found. * The caller must hold RCU or RTNL lock. */ static inline struct aggregator *__get_first_agg(struct port *port) { struct bonding *bond = __get_bond_by_port(port); struct slave *first_slave; struct aggregator *agg; /* If there's no bond for this port, or bond has no slaves */ if (bond == NULL) return NULL; rcu_read_lock(); first_slave = bond_first_slave_rcu(bond); agg = first_slave ? &(SLAVE_AD_INFO(first_slave)->aggregator) : NULL; rcu_read_unlock(); return agg; } /** * __agg_has_partner - see if we have a partner * @agg: the agregator we're looking at * * Return nonzero if aggregator has a partner (denoted by a non-zero ether * address for the partner). Return 0 if not. */ static inline int __agg_has_partner(struct aggregator *agg) { return !is_zero_ether_addr(agg->partner_system.mac_addr_value); } /** * __disable_distributing_port - disable the port's slave for distributing. * Port will still be able to collect. * @port: the port we're looking at * * This will disable only distributing on the port's slave. */ static void __disable_distributing_port(struct port *port) { bond_set_slave_tx_disabled_flags(port->slave, BOND_SLAVE_NOTIFY_LATER); } /** * __enable_collecting_port - enable the port's slave for collecting, * if it's up * @port: the port we're looking at * * This will enable only collecting on the port's slave. */ static void __enable_collecting_port(struct port *port) { struct slave *slave = port->slave; if (slave->link == BOND_LINK_UP && bond_slave_is_up(slave)) bond_set_slave_rx_enabled_flags(slave, BOND_SLAVE_NOTIFY_LATER); } /** * __disable_port - disable the port's slave * @port: the port we're looking at * * This will disable both collecting and distributing on the port's slave. */ static inline void __disable_port(struct port *port) { bond_set_slave_inactive_flags(port->slave, BOND_SLAVE_NOTIFY_LATER); } /** * __enable_port - enable the port's slave, if it's up * @port: the port we're looking at * * This will enable both collecting and distributing on the port's slave. */ static inline void __enable_port(struct port *port) { struct slave *slave = port->slave; if ((slave->link == BOND_LINK_UP) && bond_slave_is_up(slave)) bond_set_slave_active_flags(slave, BOND_SLAVE_NOTIFY_LATER); } /** * __port_move_to_attached_state - check if port should transition back to attached * state. * @port: the port we're looking at */ static bool __port_move_to_attached_state(struct port *port) { if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) || !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) port->sm_mux_state = AD_MUX_ATTACHED; return port->sm_mux_state == AD_MUX_ATTACHED; } /** * __port_is_collecting_distributing - check if the port's slave is in the * combined collecting/distributing state * @port: the port we're looking at */ static int __port_is_collecting_distributing(struct port *port) { return bond_is_active_slave(port->slave); } /** * __get_agg_selection_mode - get the aggregator selection mode * @port: the port we're looking at * * Get the aggregator selection mode. Can be %STABLE, %BANDWIDTH or %COUNT. */ static inline u32 __get_agg_selection_mode(struct port *port) { struct bonding *bond = __get_bond_by_port(port); if (bond == NULL) return BOND_AD_STABLE; return bond->params.ad_select; } /** * __check_agg_selection_timer - check if the selection timer has expired * @port: the port we're looking at */ static inline int __check_agg_selection_timer(struct port *port) { struct bonding *bond = __get_bond_by_port(port); if (bond == NULL) return 0; return atomic_read(&BOND_AD_INFO(bond).agg_select_timer) ? 1 : 0; } /** * __get_link_speed - get a port's speed * @port: the port we're looking at * * Return @port's speed in 802.3ad enum format. i.e. one of: * 0, * %AD_LINK_SPEED_10MBPS, * %AD_LINK_SPEED_100MBPS, * %AD_LINK_SPEED_1000MBPS, * %AD_LINK_SPEED_2500MBPS, * %AD_LINK_SPEED_5000MBPS, * %AD_LINK_SPEED_10000MBPS * %AD_LINK_SPEED_14000MBPS, * %AD_LINK_SPEED_20000MBPS * %AD_LINK_SPEED_25000MBPS * %AD_LINK_SPEED_40000MBPS * %AD_LINK_SPEED_50000MBPS * %AD_LINK_SPEED_56000MBPS * %AD_LINK_SPEED_100000MBPS * %AD_LINK_SPEED_200000MBPS * %AD_LINK_SPEED_400000MBPS * %AD_LINK_SPEED_800000MBPS */ static u16 __get_link_speed(struct port *port) { struct slave *slave = port->slave; u16 speed; /* this if covers only a special case: when the configuration starts * with link down, it sets the speed to 0. * This is done in spite of the fact that the e100 driver reports 0 * to be compatible with MVT in the future. */ if (slave->link != BOND_LINK_UP) speed = 0; else { switch (slave->speed) { case SPEED_10: speed = AD_LINK_SPEED_10MBPS; break; case SPEED_100: speed = AD_LINK_SPEED_100MBPS; break; case SPEED_1000: speed = AD_LINK_SPEED_1000MBPS; break; case SPEED_2500: speed = AD_LINK_SPEED_2500MBPS; break; case SPEED_5000: speed = AD_LINK_SPEED_5000MBPS; break; case SPEED_10000: speed = AD_LINK_SPEED_10000MBPS; break; case SPEED_14000: speed = AD_LINK_SPEED_14000MBPS; break; case SPEED_20000: speed = AD_LINK_SPEED_20000MBPS; break; case SPEED_25000: speed = AD_LINK_SPEED_25000MBPS; break; case SPEED_40000: speed = AD_LINK_SPEED_40000MBPS; break; case SPEED_50000: speed = AD_LINK_SPEED_50000MBPS; break; case SPEED_56000: speed = AD_LINK_SPEED_56000MBPS; break; case SPEED_100000: speed = AD_LINK_SPEED_100000MBPS; break; case SPEED_200000: speed = AD_LINK_SPEED_200000MBPS; break; case SPEED_400000: speed = AD_LINK_SPEED_400000MBPS; break; case SPEED_800000: speed = AD_LINK_SPEED_800000MBPS; break; default: /* unknown speed value from ethtool. shouldn't happen */ if (slave->speed != SPEED_UNKNOWN) pr_err_once("%s: (slave %s): unknown ethtool speed (%d) for port %d (set it to 0)\n", slave->bond->dev->name, slave->dev->name, slave->speed, port->actor_port_number); speed = 0; break; } } slave_dbg(slave->bond->dev, slave->dev, "Port %d Received link speed %d update from adapter\n", port->actor_port_number, speed); return speed; } /** * __get_duplex - get a port's duplex * @port: the port we're looking at * * Return @port's duplex in 802.3ad bitmask format. i.e.: * 0x01 if in full duplex * 0x00 otherwise */ static u8 __get_duplex(struct port *port) { struct slave *slave = port->slave; u8 retval = 0x0; /* handling a special case: when the configuration starts with * link down, it sets the duplex to 0. */ if (slave->link == BOND_LINK_UP) { switch (slave->duplex) { case DUPLEX_FULL: retval = 0x1; slave_dbg(slave->bond->dev, slave->dev, "Port %d Received status full duplex update from adapter\n", port->actor_port_number); break; case DUPLEX_HALF: default: retval = 0x0; slave_dbg(slave->bond->dev, slave->dev, "Port %d Received status NOT full duplex update from adapter\n", port->actor_port_number); break; } } return retval; } static void __ad_actor_update_port(struct port *port) { const struct bonding *bond = bond_get_bond_by_slave(port->slave); port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; port->actor_system_priority = BOND_AD_INFO(bond).system.sys_priority; } /* Conversions */ /** * __ad_timer_to_ticks - convert a given timer type to AD module ticks * @timer_type: which timer to operate * @par: timer parameter. see below * * If @timer_type is %current_while_timer, @par indicates long/short timer. * If @timer_type is %periodic_timer, @par is one of %FAST_PERIODIC_TIME, * %SLOW_PERIODIC_TIME. */ static u16 __ad_timer_to_ticks(u16 timer_type, u16 par) { u16 retval = 0; /* to silence the compiler */ switch (timer_type) { case AD_CURRENT_WHILE_TIMER: /* for rx machine usage */ if (par) retval = (AD_SHORT_TIMEOUT_TIME*ad_ticks_per_sec); else retval = (AD_LONG_TIMEOUT_TIME*ad_ticks_per_sec); break; case AD_ACTOR_CHURN_TIMER: /* for local churn machine */ retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); break; case AD_PERIODIC_TIMER: /* for periodic machine */ retval = (par*ad_ticks_per_sec); /* long timeout */ break; case AD_PARTNER_CHURN_TIMER: /* for remote churn machine */ retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec); break; case AD_WAIT_WHILE_TIMER: /* for selection machine */ retval = (AD_AGGREGATE_WAIT_TIME*ad_ticks_per_sec); break; } return retval; } /* ================= ad_rx_machine helper functions ================== */ /** * __choose_matched - update a port's matched variable from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Update the value of the matched variable, using parameter values from a * newly received lacpdu. Parameter values for the partner carried in the * received PDU are compared with the corresponding operational parameter * values for the actor. Matched is set to TRUE if all of these parameters * match and the PDU parameter partner_state.aggregation has the same value as * actor_oper_port_state.aggregation and lacp will actively maintain the link * in the aggregation. Matched is also set to TRUE if the value of * actor_state.aggregation in the received PDU is set to FALSE, i.e., indicates * an individual link and lacp will actively maintain the link. Otherwise, * matched is set to FALSE. LACP is considered to be actively maintaining the * link if either the PDU's actor_state.lacp_activity variable is TRUE or both * the actor's actor_oper_port_state.lacp_activity and the PDU's * partner_state.lacp_activity variables are TRUE. * * Note: the AD_PORT_MATCHED "variable" is not specified by 802.3ad; it is * used here to implement the language from 802.3ad 43.4.9 that requires * recordPDU to "match" the LACPDU parameters to the stored values. */ static void __choose_matched(struct lacpdu *lacpdu, struct port *port) { /* check if all parameters are alike * or this is individual link(aggregation == FALSE) * then update the state machine Matched variable. */ if (((ntohs(lacpdu->partner_port) == port->actor_port_number) && (ntohs(lacpdu->partner_port_priority) == port->actor_port_priority) && MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) && (ntohs(lacpdu->partner_system_priority) == port->actor_system_priority) && (ntohs(lacpdu->partner_key) == port->actor_oper_port_key) && ((lacpdu->partner_state & LACP_STATE_AGGREGATION) == (port->actor_oper_port_state & LACP_STATE_AGGREGATION))) || ((lacpdu->actor_state & LACP_STATE_AGGREGATION) == 0) ) { port->sm_vars |= AD_PORT_MATCHED; } else { port->sm_vars &= ~AD_PORT_MATCHED; } } /** * __record_pdu - record parameters from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Record the parameter values for the Actor carried in a received lacpdu as * the current partner operational parameter values and sets * actor_oper_port_state.defaulted to FALSE. */ static void __record_pdu(struct lacpdu *lacpdu, struct port *port) { if (lacpdu && port) { struct port_params *partner = &port->partner_oper; __choose_matched(lacpdu, port); /* record the new parameter values for the partner * operational */ partner->port_number = ntohs(lacpdu->actor_port); partner->port_priority = ntohs(lacpdu->actor_port_priority); partner->system = lacpdu->actor_system; partner->system_priority = ntohs(lacpdu->actor_system_priority); partner->key = ntohs(lacpdu->actor_key); partner->port_state = lacpdu->actor_state; /* set actor_oper_port_state.defaulted to FALSE */ port->actor_oper_port_state &= ~LACP_STATE_DEFAULTED; /* set the partner sync. to on if the partner is sync, * and the port is matched */ if ((port->sm_vars & AD_PORT_MATCHED) && (lacpdu->actor_state & LACP_STATE_SYNCHRONIZATION)) { partner->port_state |= LACP_STATE_SYNCHRONIZATION; slave_dbg(port->slave->bond->dev, port->slave->dev, "partner sync=1\n"); } else { partner->port_state &= ~LACP_STATE_SYNCHRONIZATION; slave_dbg(port->slave->bond->dev, port->slave->dev, "partner sync=0\n"); } } } /** * __record_default - record default parameters * @port: the port we're looking at * * This function records the default parameter values for the partner carried * in the Partner Admin parameters as the current partner operational parameter * values and sets actor_oper_port_state.defaulted to TRUE. */ static void __record_default(struct port *port) { if (port) { /* record the partner admin parameters */ memcpy(&port->partner_oper, &port->partner_admin, sizeof(struct port_params)); /* set actor_oper_port_state.defaulted to true */ port->actor_oper_port_state |= LACP_STATE_DEFAULTED; } } /** * __update_selected - update a port's Selected variable from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Update the value of the selected variable, using parameter values from a * newly received lacpdu. The parameter values for the Actor carried in the * received PDU are compared with the corresponding operational parameter * values for the ports partner. If one or more of the comparisons shows that * the value(s) received in the PDU differ from the current operational values, * then selected is set to FALSE and actor_oper_port_state.synchronization is * set to out_of_sync. Otherwise, selected remains unchanged. */ static void __update_selected(struct lacpdu *lacpdu, struct port *port) { if (lacpdu && port) { const struct port_params *partner = &port->partner_oper; /* check if any parameter is different then * update the state machine selected variable. */ if (ntohs(lacpdu->actor_port) != partner->port_number || ntohs(lacpdu->actor_port_priority) != partner->port_priority || !MAC_ADDRESS_EQUAL(&lacpdu->actor_system, &partner->system) || ntohs(lacpdu->actor_system_priority) != partner->system_priority || ntohs(lacpdu->actor_key) != partner->key || (lacpdu->actor_state & LACP_STATE_AGGREGATION) != (partner->port_state & LACP_STATE_AGGREGATION)) { port->sm_vars &= ~AD_PORT_SELECTED; } } } /** * __update_default_selected - update a port's Selected variable from Partner * @port: the port we're looking at * * This function updates the value of the selected variable, using the partner * administrative parameter values. The administrative values are compared with * the corresponding operational parameter values for the partner. If one or * more of the comparisons shows that the administrative value(s) differ from * the current operational values, then Selected is set to FALSE and * actor_oper_port_state.synchronization is set to OUT_OF_SYNC. Otherwise, * Selected remains unchanged. */ static void __update_default_selected(struct port *port) { if (port) { const struct port_params *admin = &port->partner_admin; const struct port_params *oper = &port->partner_oper; /* check if any parameter is different then * update the state machine selected variable. */ if (admin->port_number != oper->port_number || admin->port_priority != oper->port_priority || !MAC_ADDRESS_EQUAL(&admin->system, &oper->system) || admin->system_priority != oper->system_priority || admin->key != oper->key || (admin->port_state & LACP_STATE_AGGREGATION) != (oper->port_state & LACP_STATE_AGGREGATION)) { port->sm_vars &= ~AD_PORT_SELECTED; } } } /** * __update_ntt - update a port's ntt variable from a received lacpdu * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * Updates the value of the ntt variable, using parameter values from a newly * received lacpdu. The parameter values for the partner carried in the * received PDU are compared with the corresponding operational parameter * values for the Actor. If one or more of the comparisons shows that the * value(s) received in the PDU differ from the current operational values, * then ntt is set to TRUE. Otherwise, ntt remains unchanged. */ static void __update_ntt(struct lacpdu *lacpdu, struct port *port) { /* validate lacpdu and port */ if (lacpdu && port) { /* check if any parameter is different then * update the port->ntt. */ if ((ntohs(lacpdu->partner_port) != port->actor_port_number) || (ntohs(lacpdu->partner_port_priority) != port->actor_port_priority) || !MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) || (ntohs(lacpdu->partner_system_priority) != port->actor_system_priority) || (ntohs(lacpdu->partner_key) != port->actor_oper_port_key) || ((lacpdu->partner_state & LACP_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY)) || ((lacpdu->partner_state & LACP_STATE_LACP_TIMEOUT) != (port->actor_oper_port_state & LACP_STATE_LACP_TIMEOUT)) || ((lacpdu->partner_state & LACP_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) || ((lacpdu->partner_state & LACP_STATE_AGGREGATION) != (port->actor_oper_port_state & LACP_STATE_AGGREGATION)) ) { port->ntt = true; } } } /** * __agg_ports_are_ready - check if all ports in an aggregator are ready * @aggregator: the aggregator we're looking at * */ static int __agg_ports_are_ready(struct aggregator *aggregator) { struct port *port; int retval = 1; if (aggregator) { /* scan all ports in this aggregator to verfy if they are * all ready. */ for (port = aggregator->lag_ports; port; port = port->next_port_in_aggregator) { if (!(port->sm_vars & AD_PORT_READY_N)) { retval = 0; break; } } } return retval; } /** * __set_agg_ports_ready - set value of Ready bit in all ports of an aggregator * @aggregator: the aggregator we're looking at * @val: Should the ports' ready bit be set on or off * */ static void __set_agg_ports_ready(struct aggregator *aggregator, int val) { struct port *port; for (port = aggregator->lag_ports; port; port = port->next_port_in_aggregator) { if (val) port->sm_vars |= AD_PORT_READY; else port->sm_vars &= ~AD_PORT_READY; } } static int __agg_active_ports(struct aggregator *agg) { struct port *port; int active = 0; for (port = agg->lag_ports; port; port = port->next_port_in_aggregator) { if (port->is_enabled) active++; } return active; } /** * __get_agg_bandwidth - get the total bandwidth of an aggregator * @aggregator: the aggregator we're looking at * */ static u32 __get_agg_bandwidth(struct aggregator *aggregator) { int nports = __agg_active_ports(aggregator); u32 bandwidth = 0; if (nports) { switch (__get_link_speed(aggregator->lag_ports)) { case AD_LINK_SPEED_1MBPS: bandwidth = nports; break; case AD_LINK_SPEED_10MBPS: bandwidth = nports * 10; break; case AD_LINK_SPEED_100MBPS: bandwidth = nports * 100; break; case AD_LINK_SPEED_1000MBPS: bandwidth = nports * 1000; break; case AD_LINK_SPEED_2500MBPS: bandwidth = nports * 2500; break; case AD_LINK_SPEED_5000MBPS: bandwidth = nports * 5000; break; case AD_LINK_SPEED_10000MBPS: bandwidth = nports * 10000; break; case AD_LINK_SPEED_14000MBPS: bandwidth = nports * 14000; break; case AD_LINK_SPEED_20000MBPS: bandwidth = nports * 20000; break; case AD_LINK_SPEED_25000MBPS: bandwidth = nports * 25000; break; case AD_LINK_SPEED_40000MBPS: bandwidth = nports * 40000; break; case AD_LINK_SPEED_50000MBPS: bandwidth = nports * 50000; break; case AD_LINK_SPEED_56000MBPS: bandwidth = nports * 56000; break; case AD_LINK_SPEED_100000MBPS: bandwidth = nports * 100000; break; case AD_LINK_SPEED_200000MBPS: bandwidth = nports * 200000; break; case AD_LINK_SPEED_400000MBPS: bandwidth = nports * 400000; break; case AD_LINK_SPEED_800000MBPS: bandwidth = nports * 800000; break; default: bandwidth = 0; /* to silence the compiler */ } } return bandwidth; } /** * __get_active_agg - get the current active aggregator * @aggregator: the aggregator we're looking at * * Caller must hold RCU lock. */ static struct aggregator *__get_active_agg(struct aggregator *aggregator) { struct bonding *bond = aggregator->slave->bond; struct list_head *iter; struct slave *slave; bond_for_each_slave_rcu(bond, slave, iter) if (SLAVE_AD_INFO(slave)->aggregator.is_active) return &(SLAVE_AD_INFO(slave)->aggregator); return NULL; } /** * __update_lacpdu_from_port - update a port's lacpdu fields * @port: the port we're looking at */ static inline void __update_lacpdu_from_port(struct port *port) { struct lacpdu *lacpdu = &port->lacpdu; const struct port_params *partner = &port->partner_oper; /* update current actual Actor parameters * lacpdu->subtype initialized * lacpdu->version_number initialized * lacpdu->tlv_type_actor_info initialized * lacpdu->actor_information_length initialized */ lacpdu->actor_system_priority = htons(port->actor_system_priority); lacpdu->actor_system = port->actor_system; lacpdu->actor_key = htons(port->actor_oper_port_key); lacpdu->actor_port_priority = htons(port->actor_port_priority); lacpdu->actor_port = htons(port->actor_port_number); lacpdu->actor_state = port->actor_oper_port_state; slave_dbg(port->slave->bond->dev, port->slave->dev, "update lacpdu: actor port state %x\n", port->actor_oper_port_state); /* lacpdu->reserved_3_1 initialized * lacpdu->tlv_type_partner_info initialized * lacpdu->partner_information_length initialized */ lacpdu->partner_system_priority = htons(partner->system_priority); lacpdu->partner_system = partner->system; lacpdu->partner_key = htons(partner->key); lacpdu->partner_port_priority = htons(partner->port_priority); lacpdu->partner_port = htons(partner->port_number); lacpdu->partner_state = partner->port_state; /* lacpdu->reserved_3_2 initialized * lacpdu->tlv_type_collector_info initialized * lacpdu->collector_information_length initialized * collector_max_delay initialized * reserved_12[12] initialized * tlv_type_terminator initialized * terminator_length initialized * reserved_50[50] initialized */ } /* ================= main 802.3ad protocol code ========================= */ /** * ad_lacpdu_send - send out a lacpdu packet on a given port * @port: the port we're looking at * * Returns: 0 on success * < 0 on error */ static int ad_lacpdu_send(struct port *port) { struct slave *slave = port->slave; struct sk_buff *skb; struct lacpdu_header *lacpdu_header; int length = sizeof(struct lacpdu_header); skb = dev_alloc_skb(length); if (!skb) return -ENOMEM; atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_tx); atomic64_inc(&BOND_AD_INFO(slave->bond).stats.lacpdu_tx); skb->dev = slave->dev; skb_reset_mac_header(skb); skb->network_header = skb->mac_header + ETH_HLEN; skb->protocol = PKT_TYPE_LACPDU; skb->priority = TC_PRIO_CONTROL; lacpdu_header = skb_put(skb, length); ether_addr_copy(lacpdu_header->hdr.h_dest, lacpdu_mcast_addr); /* Note: source address is set to be the member's PERMANENT address, * because we use it to identify loopback lacpdus in receive. */ ether_addr_copy(lacpdu_header->hdr.h_source, slave->perm_hwaddr); lacpdu_header->hdr.h_proto = PKT_TYPE_LACPDU; lacpdu_header->lacpdu = port->lacpdu; dev_queue_xmit(skb); return 0; } /** * ad_marker_send - send marker information/response on a given port * @port: the port we're looking at * @marker: marker data to send * * Returns: 0 on success * < 0 on error */ static int ad_marker_send(struct port *port, struct bond_marker *marker) { struct slave *slave = port->slave; struct sk_buff *skb; struct bond_marker_header *marker_header; int length = sizeof(struct bond_marker_header); skb = dev_alloc_skb(length + 16); if (!skb) return -ENOMEM; switch (marker->tlv_type) { case AD_MARKER_INFORMATION_SUBTYPE: atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_tx); atomic64_inc(&BOND_AD_INFO(slave->bond).stats.marker_tx); break; case AD_MARKER_RESPONSE_SUBTYPE: atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_resp_tx); atomic64_inc(&BOND_AD_INFO(slave->bond).stats.marker_resp_tx); break; } skb_reserve(skb, 16); skb->dev = slave->dev; skb_reset_mac_header(skb); skb->network_header = skb->mac_header + ETH_HLEN; skb->protocol = PKT_TYPE_LACPDU; marker_header = skb_put(skb, length); ether_addr_copy(marker_header->hdr.h_dest, lacpdu_mcast_addr); /* Note: source address is set to be the member's PERMANENT address, * because we use it to identify loopback MARKERs in receive. */ ether_addr_copy(marker_header->hdr.h_source, slave->perm_hwaddr); marker_header->hdr.h_proto = PKT_TYPE_LACPDU; marker_header->marker = *marker; dev_queue_xmit(skb); return 0; } /** * ad_mux_machine - handle a port's mux state machine * @port: the port we're looking at * @update_slave_arr: Does slave array need update? */ static void ad_mux_machine(struct port *port, bool *update_slave_arr) { struct bonding *bond = __get_bond_by_port(port); mux_states_t last_state; /* keep current State Machine state to compare later if it was * changed */ last_state = port->sm_mux_state; if (port->sm_vars & AD_PORT_BEGIN) { port->sm_mux_state = AD_MUX_DETACHED; } else { switch (port->sm_mux_state) { case AD_MUX_DETACHED: if ((port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) /* if SELECTED or STANDBY */ port->sm_mux_state = AD_MUX_WAITING; break; case AD_MUX_WAITING: /* if SELECTED == FALSE return to DETACH state */ if (!(port->sm_vars & AD_PORT_SELECTED)) { port->sm_vars &= ~AD_PORT_READY_N; /* in order to withhold the Selection Logic to * check all ports READY_N value every callback * cycle to update ready variable, we check * READY_N and update READY here */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); port->sm_mux_state = AD_MUX_DETACHED; break; } /* check if the wait_while_timer expired */ if (port->sm_mux_timer_counter && !(--port->sm_mux_timer_counter)) port->sm_vars |= AD_PORT_READY_N; /* in order to withhold the selection logic to check * all ports READY_N value every callback cycle to * update ready variable, we check READY_N and update * READY here */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); /* if the wait_while_timer expired, and the port is * in READY state, move to ATTACHED state */ if ((port->sm_vars & AD_PORT_READY) && !port->sm_mux_timer_counter) port->sm_mux_state = AD_MUX_ATTACHED; break; case AD_MUX_ATTACHED: /* check also if agg_select_timer expired (so the * edable port will take place only after this timer) */ if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) { if (port->aggregator->is_active) { int state = AD_MUX_COLLECTING_DISTRIBUTING; if (!bond->params.coupled_control) state = AD_MUX_COLLECTING; port->sm_mux_state = state; } } else if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) { /* if UNSELECTED or STANDBY */ port->sm_vars &= ~AD_PORT_READY_N; /* in order to withhold the selection logic to * check all ports READY_N value every callback * cycle to update ready variable, we check * READY_N and update READY here */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); port->sm_mux_state = AD_MUX_DETACHED; } else if (port->aggregator->is_active) { port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; } break; case AD_MUX_COLLECTING_DISTRIBUTING: if (!__port_move_to_attached_state(port)) { /* if port state hasn't changed make * sure that a collecting distributing * port in an active aggregator is enabled */ if (port->aggregator->is_active && !__port_is_collecting_distributing(port)) { __enable_port(port); *update_slave_arr = true; } } break; case AD_MUX_COLLECTING: if (!__port_move_to_attached_state(port)) { if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && (port->partner_oper.port_state & LACP_STATE_COLLECTING)) { port->sm_mux_state = AD_MUX_DISTRIBUTING; } else { /* If port state hasn't changed, make sure that a collecting * port is enabled for an active aggregator. */ struct slave *slave = port->slave; if (port->aggregator->is_active && bond_is_slave_rx_disabled(slave)) { ad_enable_collecting(port); *update_slave_arr = true; } } } break; case AD_MUX_DISTRIBUTING: if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || !(port->partner_oper.port_state & LACP_STATE_COLLECTING) || !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) || !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) { port->sm_mux_state = AD_MUX_COLLECTING; } else { /* if port state hasn't changed make * sure that a collecting distributing * port in an active aggregator is enabled */ if (port->aggregator && port->aggregator->is_active && !__port_is_collecting_distributing(port)) { __enable_port(port); *update_slave_arr = true; } } break; default: break; } } /* check if the state machine was changed */ if (port->sm_mux_state != last_state) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Mux Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_mux_state); switch (port->sm_mux_state) { case AD_MUX_DETACHED: port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; ad_disable_collecting_distributing(port, update_slave_arr); port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; port->ntt = true; break; case AD_MUX_WAITING: port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0); break; case AD_MUX_ATTACHED: if (port->aggregator->is_active) port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; else port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; ad_disable_collecting_distributing(port, update_slave_arr); port->ntt = true; break; case AD_MUX_COLLECTING_DISTRIBUTING: port->actor_oper_port_state |= LACP_STATE_COLLECTING; port->actor_oper_port_state |= LACP_STATE_DISTRIBUTING; port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; ad_enable_collecting_distributing(port, update_slave_arr); port->ntt = true; break; case AD_MUX_COLLECTING: port->actor_oper_port_state |= LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; ad_enable_collecting(port); ad_disable_distributing(port, update_slave_arr); port->ntt = true; break; case AD_MUX_DISTRIBUTING: port->actor_oper_port_state |= LACP_STATE_DISTRIBUTING; port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; ad_enable_collecting_distributing(port, update_slave_arr); break; default: break; } } } /** * ad_rx_machine - handle a port's rx State Machine * @lacpdu: the lacpdu we've received * @port: the port we're looking at * * If lacpdu arrived, stop previous timer (if exists) and set the next state as * CURRENT. If timer expired set the state machine in the proper state. * In other cases, this function checks if we need to switch to other state. */ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) { rx_states_t last_state; /* keep current State Machine state to compare later if it was * changed */ last_state = port->sm_rx_state; if (lacpdu) { atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.lacpdu_rx); atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.lacpdu_rx); } /* check if state machine should change state */ /* first, check if port was reinitialized */ if (port->sm_vars & AD_PORT_BEGIN) { port->sm_rx_state = AD_RX_INITIALIZE; port->sm_vars |= AD_PORT_CHURNED; /* check if port is not enabled */ } else if (!(port->sm_vars & AD_PORT_BEGIN) && !port->is_enabled) port->sm_rx_state = AD_RX_PORT_DISABLED; /* check if new lacpdu arrived */ else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) || (port->sm_rx_state == AD_RX_DEFAULTED) || (port->sm_rx_state == AD_RX_CURRENT))) { if (port->sm_rx_state != AD_RX_CURRENT) port->sm_vars |= AD_PORT_CHURNED; port->sm_rx_timer_counter = 0; port->sm_rx_state = AD_RX_CURRENT; } else { /* if timer is on, and if it is expired */ if (port->sm_rx_timer_counter && !(--port->sm_rx_timer_counter)) { switch (port->sm_rx_state) { case AD_RX_EXPIRED: port->sm_rx_state = AD_RX_DEFAULTED; break; case AD_RX_CURRENT: port->sm_rx_state = AD_RX_EXPIRED; break; default: break; } } else { /* if no lacpdu arrived and no timer is on */ switch (port->sm_rx_state) { case AD_RX_PORT_DISABLED: if (port->is_enabled && (port->sm_vars & AD_PORT_LACP_ENABLED)) port->sm_rx_state = AD_RX_EXPIRED; else if (port->is_enabled && ((port->sm_vars & AD_PORT_LACP_ENABLED) == 0)) port->sm_rx_state = AD_RX_LACP_DISABLED; break; default: break; } } } /* check if the State machine was changed or new lacpdu arrived */ if ((port->sm_rx_state != last_state) || (lacpdu)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Rx Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_rx_state); switch (port->sm_rx_state) { case AD_RX_INITIALIZE: if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS)) port->sm_vars &= ~AD_PORT_LACP_ENABLED; else port->sm_vars |= AD_PORT_LACP_ENABLED; port->sm_vars &= ~AD_PORT_SELECTED; __record_default(port); port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; port->sm_rx_state = AD_RX_PORT_DISABLED; fallthrough; case AD_RX_PORT_DISABLED: port->sm_vars &= ~AD_PORT_MATCHED; break; case AD_RX_LACP_DISABLED: port->sm_vars &= ~AD_PORT_SELECTED; __record_default(port); port->partner_oper.port_state &= ~LACP_STATE_AGGREGATION; port->sm_vars |= AD_PORT_MATCHED; port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; case AD_RX_EXPIRED: /* Reset of the Synchronization flag (Standard 43.4.12) * This reset cause to disable this port in the * COLLECTING_DISTRIBUTING state of the mux machine in * case of EXPIRED even if LINK_DOWN didn't arrive for * the port. */ port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION; port->sm_vars &= ~AD_PORT_MATCHED; port->partner_oper.port_state |= LACP_STATE_LACP_TIMEOUT; port->partner_oper.port_state |= LACP_STATE_LACP_ACTIVITY; port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT)); port->actor_oper_port_state |= LACP_STATE_EXPIRED; port->sm_vars |= AD_PORT_CHURNED; break; case AD_RX_DEFAULTED: __update_default_selected(port); __record_default(port); port->sm_vars |= AD_PORT_MATCHED; port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; case AD_RX_CURRENT: /* detect loopback situation */ if (MAC_ADDRESS_EQUAL(&(lacpdu->actor_system), &(port->actor_system))) { slave_err(port->slave->bond->dev, port->slave->dev, "An illegal loopback occurred on slave\n" "Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n"); return; } __update_selected(lacpdu, port); __update_ntt(lacpdu, port); __record_pdu(lacpdu, port); port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & LACP_STATE_LACP_TIMEOUT)); port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; default: break; } } } /** * ad_churn_machine - handle port churn's state machine * @port: the port we're looking at * */ static void ad_churn_machine(struct port *port) { if (port->sm_vars & AD_PORT_CHURNED) { port->sm_vars &= ~AD_PORT_CHURNED; port->sm_churn_actor_state = AD_CHURN_MONITOR; port->sm_churn_partner_state = AD_CHURN_MONITOR; port->sm_churn_actor_timer_counter = __ad_timer_to_ticks(AD_ACTOR_CHURN_TIMER, 0); port->sm_churn_partner_timer_counter = __ad_timer_to_ticks(AD_PARTNER_CHURN_TIMER, 0); return; } if (port->sm_churn_actor_timer_counter && !(--port->sm_churn_actor_timer_counter) && port->sm_churn_actor_state == AD_CHURN_MONITOR) { if (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION) { port->sm_churn_actor_state = AD_NO_CHURN; } else { port->churn_actor_count++; port->sm_churn_actor_state = AD_CHURN; } } if (port->sm_churn_partner_timer_counter && !(--port->sm_churn_partner_timer_counter) && port->sm_churn_partner_state == AD_CHURN_MONITOR) { if (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) { port->sm_churn_partner_state = AD_NO_CHURN; } else { port->churn_partner_count++; port->sm_churn_partner_state = AD_CHURN; } } } /** * ad_tx_machine - handle a port's tx state machine * @port: the port we're looking at */ static void ad_tx_machine(struct port *port) { /* check if tx timer expired, to verify that we do not send more than * 3 packets per second */ if (port->sm_tx_timer_counter && !(--port->sm_tx_timer_counter)) { /* check if there is something to send */ if (port->ntt && (port->sm_vars & AD_PORT_LACP_ENABLED)) { __update_lacpdu_from_port(port); if (ad_lacpdu_send(port) >= 0) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Sent LACPDU on port %d\n", port->actor_port_number); /* mark ntt as false, so it will not be sent * again until demanded */ port->ntt = false; } } /* restart tx timer(to verify that we will not exceed * AD_MAX_TX_IN_SECOND */ port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; } } /** * ad_periodic_machine - handle a port's periodic state machine * @port: the port we're looking at * @bond_params: bond parameters we will use * * Turn ntt flag on priodically to perform periodic transmission of lacpdu's. */ static void ad_periodic_machine(struct port *port, struct bond_params *bond_params) { periodic_states_t last_state; /* keep current state machine state to compare later if it was changed */ last_state = port->sm_periodic_state; /* check if port was reinitialized */ if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) || !bond_params->lacp_active) { port->sm_periodic_state = AD_NO_PERIODIC; } /* check if state machine should change state */ else if (port->sm_periodic_timer_counter) { /* check if periodic state machine expired */ if (!(--port->sm_periodic_timer_counter)) { /* if expired then do tx */ port->sm_periodic_state = AD_PERIODIC_TX; } else { /* If not expired, check if there is some new timeout * parameter from the partner state */ switch (port->sm_periodic_state) { case AD_FAST_PERIODIC: if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) port->sm_periodic_state = AD_SLOW_PERIODIC; break; case AD_SLOW_PERIODIC: if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) { port->sm_periodic_timer_counter = 0; port->sm_periodic_state = AD_PERIODIC_TX; } break; default: break; } } } else { switch (port->sm_periodic_state) { case AD_NO_PERIODIC: port->sm_periodic_state = AD_FAST_PERIODIC; break; case AD_PERIODIC_TX: if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) port->sm_periodic_state = AD_SLOW_PERIODIC; else port->sm_periodic_state = AD_FAST_PERIODIC; break; default: break; } } /* check if the state machine was changed */ if (port->sm_periodic_state != last_state) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n", port->actor_port_number, last_state, port->sm_periodic_state); switch (port->sm_periodic_state) { case AD_NO_PERIODIC: port->sm_periodic_timer_counter = 0; break; case AD_FAST_PERIODIC: /* decrement 1 tick we lost in the PERIODIC_TX cycle */ port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1; break; case AD_SLOW_PERIODIC: /* decrement 1 tick we lost in the PERIODIC_TX cycle */ port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1; break; case AD_PERIODIC_TX: port->ntt = true; break; default: break; } } } /** * ad_port_selection_logic - select aggregation groups * @port: the port we're looking at * @update_slave_arr: Does slave array need update? * * Select aggregation groups, and assign each port for it's aggregetor. The * selection logic is called in the inititalization (after all the handshkes), * and after every lacpdu receive (if selected is off). */ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) { struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator; struct port *last_port = NULL, *curr_port; struct list_head *iter; struct bonding *bond; struct slave *slave; int found = 0; /* if the port is already Selected, do nothing */ if (port->sm_vars & AD_PORT_SELECTED) return; bond = __get_bond_by_port(port); /* if the port is connected to other aggregator, detach it */ if (port->aggregator) { /* detach the port from its former aggregator */ temp_aggregator = port->aggregator; for (curr_port = temp_aggregator->lag_ports; curr_port; last_port = curr_port, curr_port = curr_port->next_port_in_aggregator) { if (curr_port == port) { temp_aggregator->num_of_ports--; /* if it is the first port attached to the * aggregator */ if (!last_port) { temp_aggregator->lag_ports = port->next_port_in_aggregator; } else { /* not the first port attached to the * aggregator */ last_port->next_port_in_aggregator = port->next_port_in_aggregator; } /* clear the port's relations to this * aggregator */ port->aggregator = NULL; port->next_port_in_aggregator = NULL; port->actor_port_aggregator_identifier = 0; slave_dbg(bond->dev, port->slave->dev, "Port %d left LAG %d\n", port->actor_port_number, temp_aggregator->aggregator_identifier); /* if the aggregator is empty, clear its * parameters, and set it ready to be attached */ if (!temp_aggregator->lag_ports) ad_clear_agg(temp_aggregator); break; } } if (!curr_port) { /* meaning: the port was related to an aggregator * but was not on the aggregator port list */ net_warn_ratelimited("%s: (slave %s): Warning: Port %d was related to aggregator %d but was not on its port list\n", port->slave->bond->dev->name, port->slave->dev->name, port->actor_port_number, port->aggregator->aggregator_identifier); } } /* search on all aggregators for a suitable aggregator for this port */ bond_for_each_slave(bond, slave, iter) { aggregator = &(SLAVE_AD_INFO(slave)->aggregator); /* keep a free aggregator for later use(if needed) */ if (!aggregator->lag_ports) { if (!free_aggregator) free_aggregator = aggregator; continue; } /* check if current aggregator suits us */ if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && /* if all parameters match AND */ MAC_ADDRESS_EQUAL(&(aggregator->partner_system), &(port->partner_oper.system)) && (aggregator->partner_system_priority == port->partner_oper.system_priority) && (aggregator->partner_oper_aggregator_key == port->partner_oper.key) ) && ((__agg_has_partner(aggregator) && /* partner answers */ !aggregator->is_individual) /* but is not individual OR */ ) ) { /* attach to the founded aggregator */ port->aggregator = aggregator; port->actor_port_aggregator_identifier = port->aggregator->aggregator_identifier; port->next_port_in_aggregator = aggregator->lag_ports; port->aggregator->num_of_ports++; aggregator->lag_ports = port; slave_dbg(bond->dev, slave->dev, "Port %d joined LAG %d (existing LAG)\n", port->actor_port_number, port->aggregator->aggregator_identifier); /* mark this port as selected */ port->sm_vars |= AD_PORT_SELECTED; found = 1; break; } } /* the port couldn't find an aggregator - attach it to a new * aggregator */ if (!found) { if (free_aggregator) { /* assign port a new aggregator */ port->aggregator = free_aggregator; port->actor_port_aggregator_identifier = port->aggregator->aggregator_identifier; /* update the new aggregator's parameters * if port was responsed from the end-user */ if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS) /* if port is full duplex */ port->aggregator->is_individual = false; else port->aggregator->is_individual = true; port->aggregator->actor_admin_aggregator_key = port->actor_admin_port_key; port->aggregator->actor_oper_aggregator_key = port->actor_oper_port_key; port->aggregator->partner_system = port->partner_oper.system; port->aggregator->partner_system_priority = port->partner_oper.system_priority; port->aggregator->partner_oper_aggregator_key = port->partner_oper.key; port->aggregator->receive_state = 1; port->aggregator->transmit_state = 1; port->aggregator->lag_ports = port; port->aggregator->num_of_ports++; /* mark this port as selected */ port->sm_vars |= AD_PORT_SELECTED; slave_dbg(bond->dev, port->slave->dev, "Port %d joined LAG %d (new LAG)\n", port->actor_port_number, port->aggregator->aggregator_identifier); } else { slave_err(bond->dev, port->slave->dev, "Port %d did not find a suitable aggregator\n", port->actor_port_number); return; } } /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE * in all aggregator's ports, else set ready=FALSE in all * aggregator's ports */ __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); aggregator = __get_first_agg(port); ad_agg_selection_logic(aggregator, update_slave_arr); if (!port->aggregator->is_active) port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; } /* Decide if "agg" is a better choice for the new active aggregator that * the current best, according to the ad_select policy. */ static struct aggregator *ad_agg_selection_test(struct aggregator *best, struct aggregator *curr) { /* 0. If no best, select current. * * 1. If the current agg is not individual, and the best is * individual, select current. * * 2. If current agg is individual and the best is not, keep best. * * 3. Therefore, current and best are both individual or both not * individual, so: * * 3a. If current agg partner replied, and best agg partner did not, * select current. * * 3b. If current agg partner did not reply and best agg partner * did reply, keep best. * * 4. Therefore, current and best both have partner replies or * both do not, so perform selection policy: * * BOND_AD_COUNT: Select by count of ports. If count is equal, * select by bandwidth. * * BOND_AD_STABLE, BOND_AD_BANDWIDTH: Select by bandwidth. */ if (!best) return curr; if (!curr->is_individual && best->is_individual) return curr; if (curr->is_individual && !best->is_individual) return best; if (__agg_has_partner(curr) && !__agg_has_partner(best)) return curr; if (!__agg_has_partner(curr) && __agg_has_partner(best)) return best; switch (__get_agg_selection_mode(curr->lag_ports)) { case BOND_AD_COUNT: if (__agg_active_ports(curr) > __agg_active_ports(best)) return curr; if (__agg_active_ports(curr) < __agg_active_ports(best)) return best; fallthrough; case BOND_AD_STABLE: case BOND_AD_BANDWIDTH: if (__get_agg_bandwidth(curr) > __get_agg_bandwidth(best)) return curr; break; default: net_warn_ratelimited("%s: (slave %s): Impossible agg select mode %d\n", curr->slave->bond->dev->name, curr->slave->dev->name, __get_agg_selection_mode(curr->lag_ports)); break; } return best; } static int agg_device_up(const struct aggregator *agg) { struct port *port = agg->lag_ports; if (!port) return 0; for (port = agg->lag_ports; port; port = port->next_port_in_aggregator) { if (netif_running(port->slave->dev) && netif_carrier_ok(port->slave->dev)) return 1; } return 0; } /** * ad_agg_selection_logic - select an aggregation group for a team * @agg: the aggregator we're looking at * @update_slave_arr: Does slave array need update? * * It is assumed that only one aggregator may be selected for a team. * * The logic of this function is to select the aggregator according to * the ad_select policy: * * BOND_AD_STABLE: select the aggregator with the most ports attached to * it, and to reselect the active aggregator only if the previous * aggregator has no more ports related to it. * * BOND_AD_BANDWIDTH: select the aggregator with the highest total * bandwidth, and reselect whenever a link state change takes place or the * set of slaves in the bond changes. * * BOND_AD_COUNT: select the aggregator with largest number of ports * (slaves), and reselect whenever a link state change takes place or the * set of slaves in the bond changes. * * FIXME: this function MUST be called with the first agg in the bond, or * __get_active_agg() won't work correctly. This function should be better * called with the bond itself, and retrieve the first agg from it. */ static void ad_agg_selection_logic(struct aggregator *agg, bool *update_slave_arr) { struct aggregator *best, *active, *origin; struct bonding *bond = agg->slave->bond; struct list_head *iter; struct slave *slave; struct port *port; rcu_read_lock(); origin = agg; active = __get_active_agg(agg); best = (active && agg_device_up(active)) ? active : NULL; bond_for_each_slave_rcu(bond, slave, iter) { agg = &(SLAVE_AD_INFO(slave)->aggregator); agg->is_active = 0; if (__agg_active_ports(agg) && agg_device_up(agg)) best = ad_agg_selection_test(best, agg); } if (best && __get_agg_selection_mode(best->lag_ports) == BOND_AD_STABLE) { /* For the STABLE policy, don't replace the old active * aggregator if it's still active (it has an answering * partner) or if both the best and active don't have an * answering partner. */ if (active && active->lag_ports && __agg_active_ports(active) && (__agg_has_partner(active) || (!__agg_has_partner(active) && !__agg_has_partner(best)))) { if (!(!active->actor_oper_aggregator_key && best->actor_oper_aggregator_key)) { best = NULL; active->is_active = 1; } } } if (best && (best == active)) { best = NULL; active->is_active = 1; } /* if there is new best aggregator, activate it */ if (best) { netdev_dbg(bond->dev, "(slave %s): best Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", best->slave ? best->slave->dev->name : "NULL", best->aggregator_identifier, best->num_of_ports, best->actor_oper_aggregator_key, best->partner_oper_aggregator_key, best->is_individual, best->is_active); netdev_dbg(bond->dev, "(slave %s): best ports %p slave %p\n", best->slave ? best->slave->dev->name : "NULL", best->lag_ports, best->slave); bond_for_each_slave_rcu(bond, slave, iter) { agg = &(SLAVE_AD_INFO(slave)->aggregator); slave_dbg(bond->dev, slave->dev, "Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", agg->aggregator_identifier, agg->num_of_ports, agg->actor_oper_aggregator_key, agg->partner_oper_aggregator_key, agg->is_individual, agg->is_active); } /* check if any partner replies */ if (best->is_individual) net_warn_ratelimited("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n", bond->dev->name); best->is_active = 1; netdev_dbg(bond->dev, "(slave %s): LAG %d chosen as the active LAG\n", best->slave ? best->slave->dev->name : "NULL", best->aggregator_identifier); netdev_dbg(bond->dev, "(slave %s): Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", best->slave ? best->slave->dev->name : "NULL", best->aggregator_identifier, best->num_of_ports, best->actor_oper_aggregator_key, best->partner_oper_aggregator_key, best->is_individual, best->is_active); /* disable the ports that were related to the former * active_aggregator */ if (active) { for (port = active->lag_ports; port; port = port->next_port_in_aggregator) { __disable_port(port); } } /* Slave array needs update. */ *update_slave_arr = true; } /* if the selected aggregator is of join individuals * (partner_system is NULL), enable their ports */ active = __get_active_agg(origin); if (active) { if (!__agg_has_partner(active)) { for (port = active->lag_ports; port; port = port->next_port_in_aggregator) { __enable_port(port); } *update_slave_arr = true; } } rcu_read_unlock(); bond_3ad_set_carrier(bond); } /** * ad_clear_agg - clear a given aggregator's parameters * @aggregator: the aggregator we're looking at */ static void ad_clear_agg(struct aggregator *aggregator) { if (aggregator) { aggregator->is_individual = false; aggregator->actor_admin_aggregator_key = 0; aggregator->actor_oper_aggregator_key = 0; eth_zero_addr(aggregator->partner_system.mac_addr_value); aggregator->partner_system_priority = 0; aggregator->partner_oper_aggregator_key = 0; aggregator->receive_state = 0; aggregator->transmit_state = 0; aggregator->lag_ports = NULL; aggregator->is_active = 0; aggregator->num_of_ports = 0; pr_debug("%s: LAG %d was cleared\n", aggregator->slave ? aggregator->slave->dev->name : "NULL", aggregator->aggregator_identifier); } } /** * ad_initialize_agg - initialize a given aggregator's parameters * @aggregator: the aggregator we're looking at */ static void ad_initialize_agg(struct aggregator *aggregator) { if (aggregator) { ad_clear_agg(aggregator); eth_zero_addr(aggregator->aggregator_mac_address.mac_addr_value); aggregator->aggregator_identifier = 0; aggregator->slave = NULL; } } /** * ad_initialize_port - initialize a given port's parameters * @port: the port we're looking at * @lacp_fast: boolean. whether fast periodic should be used */ static void ad_initialize_port(struct port *port, int lacp_fast) { static const struct port_params tmpl = { .system_priority = 0xffff, .key = 1, .port_number = 1, .port_priority = 0xff, .port_state = 1, }; static const struct lacpdu lacpdu = { .subtype = 0x01, .version_number = 0x01, .tlv_type_actor_info = 0x01, .actor_information_length = 0x14, .tlv_type_partner_info = 0x02, .partner_information_length = 0x14, .tlv_type_collector_info = 0x03, .collector_information_length = 0x10, .collector_max_delay = htons(AD_COLLECTOR_MAX_DELAY), }; if (port) { port->actor_port_priority = 0xff; port->actor_port_aggregator_identifier = 0; port->ntt = false; port->actor_admin_port_state = LACP_STATE_AGGREGATION | LACP_STATE_LACP_ACTIVITY; port->actor_oper_port_state = LACP_STATE_AGGREGATION | LACP_STATE_LACP_ACTIVITY; if (lacp_fast) port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT; memcpy(&port->partner_admin, &tmpl, sizeof(tmpl)); memcpy(&port->partner_oper, &tmpl, sizeof(tmpl)); port->is_enabled = true; /* private parameters */ port->sm_vars = AD_PORT_BEGIN | AD_PORT_LACP_ENABLED; port->sm_rx_state = 0; port->sm_rx_timer_counter = 0; port->sm_periodic_state = 0; port->sm_periodic_timer_counter = 0; port->sm_mux_state = 0; port->sm_mux_timer_counter = 0; port->sm_tx_state = 0; port->aggregator = NULL; port->next_port_in_aggregator = NULL; port->transaction_id = 0; port->sm_churn_actor_timer_counter = 0; port->sm_churn_actor_state = 0; port->churn_actor_count = 0; port->sm_churn_partner_timer_counter = 0; port->sm_churn_partner_state = 0; port->churn_partner_count = 0; memcpy(&port->lacpdu, &lacpdu, sizeof(lacpdu)); } } /** * ad_enable_collecting - enable a port's receive * @port: the port we're looking at * * Enable @port if it's in an active aggregator */ static void ad_enable_collecting(struct port *port) { if (port->aggregator->is_active) { struct slave *slave = port->slave; slave_dbg(slave->bond->dev, slave->dev, "Enabling collecting on port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __enable_collecting_port(port); } } /** * ad_disable_distributing - disable a port's transmit * @port: the port we're looking at * @update_slave_arr: Does slave array need update? */ static void ad_disable_distributing(struct port *port, bool *update_slave_arr) { if (port->aggregator && __agg_has_partner(port->aggregator)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Disabling distributing on port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __disable_distributing_port(port); /* Slave array needs an update */ *update_slave_arr = true; } } /** * ad_enable_collecting_distributing - enable a port's transmit/receive * @port: the port we're looking at * @update_slave_arr: Does slave array need update? * * Enable @port if it's in an active aggregator */ static void ad_enable_collecting_distributing(struct port *port, bool *update_slave_arr) { if (port->aggregator->is_active) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Enabling port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __enable_port(port); /* Slave array needs update */ *update_slave_arr = true; } } /** * ad_disable_collecting_distributing - disable a port's transmit/receive * @port: the port we're looking at * @update_slave_arr: Does slave array need update? */ static void ad_disable_collecting_distributing(struct port *port, bool *update_slave_arr) { if (port->aggregator && __agg_has_partner(port->aggregator)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Disabling port %d (LAG %d)\n", port->actor_port_number, port->aggregator->aggregator_identifier); __disable_port(port); /* Slave array needs an update */ *update_slave_arr = true; } } /** * ad_marker_info_received - handle receive of a Marker information frame * @marker_info: Marker info received * @port: the port we're looking at */ static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port) { struct bond_marker marker; atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_rx); atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.marker_rx); /* copy the received marker data to the response marker */ memcpy(&marker, marker_info, sizeof(struct bond_marker)); /* change the marker subtype to marker response */ marker.tlv_type = AD_MARKER_RESPONSE_SUBTYPE; /* send the marker response */ if (ad_marker_send(port, &marker) >= 0) slave_dbg(port->slave->bond->dev, port->slave->dev, "Sent Marker Response on port %d\n", port->actor_port_number); } /** * ad_marker_response_received - handle receive of a marker response frame * @marker: marker PDU received * @port: the port we're looking at * * This function does nothing since we decided not to implement send and handle * response for marker PDU's, in this stage, but only to respond to marker * information. */ static void ad_marker_response_received(struct bond_marker *marker, struct port *port) { atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_resp_rx); atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.marker_resp_rx); /* DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW */ } /* ========= AD exported functions to the main bonding code ========= */ /* Check aggregators status in team every T seconds */ #define AD_AGGREGATOR_SELECTION_TIMER 8 /** * bond_3ad_initiate_agg_selection - initate aggregator selection * @bond: bonding struct * @timeout: timeout value to set * * Set the aggregation selection timer, to initiate an agg selection in * the very near future. Called during first initialization, and during * any down to up transitions of the bond. */ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) { atomic_set(&BOND_AD_INFO(bond).agg_select_timer, timeout); } /** * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures * @bond: bonding struct to work on * * Can be called only after the mac address of the bond is set. */ void bond_3ad_initialize(struct bonding *bond) { BOND_AD_INFO(bond).aggregator_identifier = 0; BOND_AD_INFO(bond).system.sys_priority = bond->params.ad_actor_sys_prio; if (is_zero_ether_addr(bond->params.ad_actor_system)) BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); else BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->params.ad_actor_system); bond_3ad_initiate_agg_selection(bond, AD_AGGREGATOR_SELECTION_TIMER * ad_ticks_per_sec); } /** * bond_3ad_bind_slave - initialize a slave's port * @slave: slave struct to work on * * Returns: 0 on success * < 0 on error */ void bond_3ad_bind_slave(struct slave *slave) { struct bonding *bond = bond_get_bond_by_slave(slave); struct port *port; struct aggregator *aggregator; /* check that the slave has not been initialized yet. */ if (SLAVE_AD_INFO(slave)->port.slave != slave) { /* port initialization */ port = &(SLAVE_AD_INFO(slave)->port); ad_initialize_port(port, bond->params.lacp_fast); port->slave = slave; port->actor_port_number = SLAVE_AD_INFO(slave)->id; /* key is determined according to the link speed, duplex and * user key */ port->actor_admin_port_key = bond->params.ad_user_port_key << 6; ad_update_actor_keys(port, false); /* actor system is the bond's system */ __ad_actor_update_port(port); /* tx timer(to verify that no more than MAX_TX_IN_SECOND * lacpdu's are sent in one second) */ port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; __disable_port(port); /* aggregator initialization */ aggregator = &(SLAVE_AD_INFO(slave)->aggregator); ad_initialize_agg(aggregator); aggregator->aggregator_mac_address = *((struct mac_addr *)bond->dev->dev_addr); aggregator->aggregator_identifier = ++BOND_AD_INFO(bond).aggregator_identifier; aggregator->slave = slave; aggregator->is_active = 0; aggregator->num_of_ports = 0; } } /** * bond_3ad_unbind_slave - deinitialize a slave's port * @slave: slave struct to work on * * Search for the aggregator that is related to this port, remove the * aggregator and assign another aggregator for other port related to it * (if any), and remove the port. */ void bond_3ad_unbind_slave(struct slave *slave) { struct port *port, *prev_port, *temp_port; struct aggregator *aggregator, *new_aggregator, *temp_aggregator; int select_new_active_agg = 0; struct bonding *bond = slave->bond; struct slave *slave_iter; struct list_head *iter; bool dummy_slave_update; /* Ignore this value as caller updates array */ /* Sync against bond_3ad_state_machine_handler() */ spin_lock_bh(&bond->mode_lock); aggregator = &(SLAVE_AD_INFO(slave)->aggregator); port = &(SLAVE_AD_INFO(slave)->port); /* if slave is null, the whole port is not initialized */ if (!port->slave) { slave_warn(bond->dev, slave->dev, "Trying to unbind an uninitialized port\n"); goto out; } slave_dbg(bond->dev, slave->dev, "Unbinding Link Aggregation Group %d\n", aggregator->aggregator_identifier); /* Tell the partner that this port is not suitable for aggregation */ port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; port->actor_oper_port_state &= ~LACP_STATE_AGGREGATION; __update_lacpdu_from_port(port); ad_lacpdu_send(port); /* check if this aggregator is occupied */ if (aggregator->lag_ports) { /* check if there are other ports related to this aggregator * except the port related to this slave(thats ensure us that * there is a reason to search for new aggregator, and that we * will find one */ if ((aggregator->lag_ports != port) || (aggregator->lag_ports->next_port_in_aggregator)) { /* find new aggregator for the related port(s) */ bond_for_each_slave(bond, slave_iter, iter) { new_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator); /* if the new aggregator is empty, or it is * connected to our port only */ if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) break; } if (!slave_iter) new_aggregator = NULL; /* if new aggregator found, copy the aggregator's * parameters and connect the related lag_ports to the * new aggregator */ if ((new_aggregator) && ((!new_aggregator->lag_ports) || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator))) { slave_dbg(bond->dev, slave->dev, "Some port(s) related to LAG %d - replacing with LAG %d\n", aggregator->aggregator_identifier, new_aggregator->aggregator_identifier); if ((new_aggregator->lag_ports == port) && new_aggregator->is_active) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); select_new_active_agg = 1; } new_aggregator->is_individual = aggregator->is_individual; new_aggregator->actor_admin_aggregator_key = aggregator->actor_admin_aggregator_key; new_aggregator->actor_oper_aggregator_key = aggregator->actor_oper_aggregator_key; new_aggregator->partner_system = aggregator->partner_system; new_aggregator->partner_system_priority = aggregator->partner_system_priority; new_aggregator->partner_oper_aggregator_key = aggregator->partner_oper_aggregator_key; new_aggregator->receive_state = aggregator->receive_state; new_aggregator->transmit_state = aggregator->transmit_state; new_aggregator->lag_ports = aggregator->lag_ports; new_aggregator->is_active = aggregator->is_active; new_aggregator->num_of_ports = aggregator->num_of_ports; /* update the information that is written on * the ports about the aggregator */ for (temp_port = aggregator->lag_ports; temp_port; temp_port = temp_port->next_port_in_aggregator) { temp_port->aggregator = new_aggregator; temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier; } ad_clear_agg(aggregator); if (select_new_active_agg) ad_agg_selection_logic(__get_first_agg(port), &dummy_slave_update); } else { slave_warn(bond->dev, slave->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n"); } } else { /* in case that the only port related to this * aggregator is the one we want to remove */ select_new_active_agg = aggregator->is_active; ad_clear_agg(aggregator); if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ temp_aggregator = __get_first_agg(port); if (temp_aggregator) ad_agg_selection_logic(temp_aggregator, &dummy_slave_update); } } } slave_dbg(bond->dev, slave->dev, "Unbinding port %d\n", port->actor_port_number); /* find the aggregator that this port is connected to */ bond_for_each_slave(bond, slave_iter, iter) { temp_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator); prev_port = NULL; /* search the port in the aggregator's related ports */ for (temp_port = temp_aggregator->lag_ports; temp_port; prev_port = temp_port, temp_port = temp_port->next_port_in_aggregator) { if (temp_port == port) { /* the aggregator found - detach the port from * this aggregator */ if (prev_port) prev_port->next_port_in_aggregator = temp_port->next_port_in_aggregator; else temp_aggregator->lag_ports = temp_port->next_port_in_aggregator; temp_aggregator->num_of_ports--; if (__agg_active_ports(temp_aggregator) == 0) { select_new_active_agg = temp_aggregator->is_active; if (temp_aggregator->num_of_ports == 0) ad_clear_agg(temp_aggregator); if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ ad_agg_selection_logic(__get_first_agg(port), &dummy_slave_update); } } break; } } } port->slave = NULL; out: spin_unlock_bh(&bond->mode_lock); } /** * bond_3ad_update_ad_actor_settings - reflect change of actor settings to ports * @bond: bonding struct to work on * * If an ad_actor setting gets changed we need to update the individual port * settings so the bond device will use the new values when it gets upped. */ void bond_3ad_update_ad_actor_settings(struct bonding *bond) { struct list_head *iter; struct slave *slave; ASSERT_RTNL(); BOND_AD_INFO(bond).system.sys_priority = bond->params.ad_actor_sys_prio; if (is_zero_ether_addr(bond->params.ad_actor_system)) BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); else BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->params.ad_actor_system); spin_lock_bh(&bond->mode_lock); bond_for_each_slave(bond, slave, iter) { struct port *port = &(SLAVE_AD_INFO(slave))->port; __ad_actor_update_port(port); port->ntt = true; } spin_unlock_bh(&bond->mode_lock); } /** * bond_agg_timer_advance - advance agg_select_timer * @bond: bonding structure * * Return true when agg_select_timer reaches 0. */ static bool bond_agg_timer_advance(struct bonding *bond) { int val, nval; while (1) { val = atomic_read(&BOND_AD_INFO(bond).agg_select_timer); if (!val) return false; nval = val - 1; if (atomic_cmpxchg(&BOND_AD_INFO(bond).agg_select_timer, val, nval) == val) break; } return nval == 0; } /** * bond_3ad_state_machine_handler - handle state machines timeout * @work: work context to fetch bonding struct to work on from * * The state machine handling concept in this module is to check every tick * which state machine should operate any function. The execution order is * round robin, so when we have an interaction between state machines, the * reply of one to each other might be delayed until next tick. * * This function also complete the initialization when the agg_select_timer * times out, and it selects an aggregator for the ports that are yet not * related to any aggregator, and selects the active aggregator for a bond. */ void bond_3ad_state_machine_handler(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, ad_work.work); struct aggregator *aggregator; struct list_head *iter; struct slave *slave; struct port *port; bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; bool update_slave_arr = false; /* Lock to protect data accessed by all (e.g., port->sm_vars) and * against running with bond_3ad_unbind_slave. ad_rx_machine may run * concurrently due to incoming LACPDU as well. */ spin_lock_bh(&bond->mode_lock); rcu_read_lock(); /* check if there are any slaves */ if (!bond_has_slaves(bond)) goto re_arm; if (bond_agg_timer_advance(bond)) { slave = bond_first_slave_rcu(bond); port = slave ? &(SLAVE_AD_INFO(slave)->port) : NULL; /* select the active aggregator for the bond */ if (port) { if (!port->slave) { net_warn_ratelimited("%s: Warning: bond's first port is uninitialized\n", bond->dev->name); goto re_arm; } aggregator = __get_first_agg(port); ad_agg_selection_logic(aggregator, &update_slave_arr); } bond_3ad_set_carrier(bond); } /* for each port run the state machines */ bond_for_each_slave_rcu(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); if (!port->slave) { net_warn_ratelimited("%s: Warning: Found an uninitialized port\n", bond->dev->name); goto re_arm; } ad_rx_machine(NULL, port); ad_periodic_machine(port, &bond->params); ad_port_selection_logic(port, &update_slave_arr); ad_mux_machine(port, &update_slave_arr); ad_tx_machine(port); ad_churn_machine(port); /* turn off the BEGIN bit, since we already handled it */ if (port->sm_vars & AD_PORT_BEGIN) port->sm_vars &= ~AD_PORT_BEGIN; } re_arm: bond_for_each_slave_rcu(bond, slave, iter) { if (slave->should_notify) { should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; break; } } rcu_read_unlock(); spin_unlock_bh(&bond->mode_lock); if (update_slave_arr) bond_slave_arr_work_rearm(bond, 0); if (should_notify_rtnl && rtnl_trylock()) { bond_slave_state_notify(bond); rtnl_unlock(); } queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); } /** * bond_3ad_rx_indication - handle a received frame * @lacpdu: received lacpdu * @slave: slave struct to work on * * It is assumed that frames that were sent on this NIC don't returned as new * received frames (loopback). Since only the payload is given to this * function, it check for loopback. */ static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave) { struct bonding *bond = slave->bond; int ret = RX_HANDLER_ANOTHER; struct bond_marker *marker; struct port *port; atomic64_t *stat; port = &(SLAVE_AD_INFO(slave)->port); if (!port->slave) { net_warn_ratelimited("%s: Warning: port of slave %s is uninitialized\n", slave->dev->name, slave->bond->dev->name); return ret; } switch (lacpdu->subtype) { case AD_TYPE_LACPDU: ret = RX_HANDLER_CONSUMED; slave_dbg(slave->bond->dev, slave->dev, "Received LACPDU on port %d\n", port->actor_port_number); /* Protect against concurrent state machines */ spin_lock(&slave->bond->mode_lock); ad_rx_machine(lacpdu, port); spin_unlock(&slave->bond->mode_lock); break; case AD_TYPE_MARKER: ret = RX_HANDLER_CONSUMED; /* No need to convert fields to Little Endian since we * don't use the marker's fields. */ marker = (struct bond_marker *)lacpdu; switch (marker->tlv_type) { case AD_MARKER_INFORMATION_SUBTYPE: slave_dbg(slave->bond->dev, slave->dev, "Received Marker Information on port %d\n", port->actor_port_number); ad_marker_info_received(marker, port); break; case AD_MARKER_RESPONSE_SUBTYPE: slave_dbg(slave->bond->dev, slave->dev, "Received Marker Response on port %d\n", port->actor_port_number); ad_marker_response_received(marker, port); break; default: slave_dbg(slave->bond->dev, slave->dev, "Received an unknown Marker subtype on port %d\n", port->actor_port_number); stat = &SLAVE_AD_INFO(slave)->stats.marker_unknown_rx; atomic64_inc(stat); stat = &BOND_AD_INFO(bond).stats.marker_unknown_rx; atomic64_inc(stat); } break; default: atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_unknown_rx); atomic64_inc(&BOND_AD_INFO(bond).stats.lacpdu_unknown_rx); } return ret; } /** * ad_update_actor_keys - Update the oper / admin keys for a port based on * its current speed and duplex settings. * * @port: the port we'are looking at * @reset: Boolean to just reset the speed and the duplex part of the key * * The logic to change the oper / admin keys is: * (a) A full duplex port can participate in LACP with partner. * (b) When the speed is changed, LACP need to be reinitiated. */ static void ad_update_actor_keys(struct port *port, bool reset) { u8 duplex = 0; u16 ospeed = 0, speed = 0; u16 old_oper_key = port->actor_oper_port_key; port->actor_admin_port_key &= ~(AD_SPEED_KEY_MASKS|AD_DUPLEX_KEY_MASKS); if (!reset) { speed = __get_link_speed(port); ospeed = (old_oper_key & AD_SPEED_KEY_MASKS) >> 1; duplex = __get_duplex(port); port->actor_admin_port_key |= (speed << 1) | duplex; } port->actor_oper_port_key = port->actor_admin_port_key; if (old_oper_key != port->actor_oper_port_key) { /* Only 'duplex' port participates in LACP */ if (duplex) port->sm_vars |= AD_PORT_LACP_ENABLED; else port->sm_vars &= ~AD_PORT_LACP_ENABLED; if (!reset) { if (!speed) { slave_err(port->slave->bond->dev, port->slave->dev, "speed changed to 0 on port %d\n", port->actor_port_number); } else if (duplex && ospeed != speed) { /* Speed change restarts LACP state-machine */ port->sm_vars |= AD_PORT_BEGIN; } } } } /** * bond_3ad_adapter_speed_duplex_changed - handle a slave's speed / duplex * change indication * * @slave: slave struct to work on * * Handle reselection of aggregator (if needed) for this port. */ void bond_3ad_adapter_speed_duplex_changed(struct slave *slave) { struct port *port; port = &(SLAVE_AD_INFO(slave)->port); /* if slave is null, the whole port is not initialized */ if (!port->slave) { slave_warn(slave->bond->dev, slave->dev, "speed/duplex changed for uninitialized port\n"); return; } spin_lock_bh(&slave->bond->mode_lock); ad_update_actor_keys(port, false); spin_unlock_bh(&slave->bond->mode_lock); slave_dbg(slave->bond->dev, slave->dev, "Port %d changed speed/duplex\n", port->actor_port_number); } /** * bond_3ad_handle_link_change - handle a slave's link status change indication * @slave: slave struct to work on * @link: whether the link is now up or down * * Handle reselection of aggregator (if needed) for this port. */ void bond_3ad_handle_link_change(struct slave *slave, char link) { struct aggregator *agg; struct port *port; bool dummy; port = &(SLAVE_AD_INFO(slave)->port); /* if slave is null, the whole port is not initialized */ if (!port->slave) { slave_warn(slave->bond->dev, slave->dev, "link status changed for uninitialized port\n"); return; } spin_lock_bh(&slave->bond->mode_lock); /* on link down we are zeroing duplex and speed since * some of the adaptors(ce1000.lan) report full duplex/speed * instead of N/A(duplex) / 0(speed). * * on link up we are forcing recheck on the duplex and speed since * some of he adaptors(ce1000.lan) report. */ if (link == BOND_LINK_UP) { port->is_enabled = true; ad_update_actor_keys(port, false); } else { /* link has failed */ port->is_enabled = false; ad_update_actor_keys(port, true); } agg = __get_first_agg(port); ad_agg_selection_logic(agg, &dummy); spin_unlock_bh(&slave->bond->mode_lock); slave_dbg(slave->bond->dev, slave->dev, "Port %d changed link status to %s\n", port->actor_port_number, link == BOND_LINK_UP ? "UP" : "DOWN"); /* RTNL is held and mode_lock is released so it's safe * to update slave_array here. */ bond_update_slave_arr(slave->bond, NULL); } /** * bond_3ad_set_carrier - set link state for bonding master * @bond: bonding structure * * if we have an active aggregator, we're up, if not, we're down. * Presumes that we cannot have an active aggregator if there are * no slaves with link up. * * This behavior complies with IEEE 802.3 section 43.3.9. * * Called by bond_set_carrier(). Return zero if carrier state does not * change, nonzero if it does. */ int bond_3ad_set_carrier(struct bonding *bond) { struct aggregator *active; struct slave *first_slave; int ret = 1; rcu_read_lock(); first_slave = bond_first_slave_rcu(bond); if (!first_slave) { ret = 0; goto out; } active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator)); if (active) { /* are enough slaves available to consider link up? */ if (__agg_active_ports(active) < bond->params.min_links) { if (netif_carrier_ok(bond->dev)) { netif_carrier_off(bond->dev); goto out; } } else if (!netif_carrier_ok(bond->dev)) { netif_carrier_on(bond->dev); goto out; } } else if (netif_carrier_ok(bond->dev)) { netif_carrier_off(bond->dev); } out: rcu_read_unlock(); return ret; } /** * __bond_3ad_get_active_agg_info - get information of the active aggregator * @bond: bonding struct to work on * @ad_info: ad_info struct to fill with the bond's info * * Returns: 0 on success * < 0 on error */ int __bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { struct aggregator *aggregator = NULL; struct list_head *iter; struct slave *slave; struct port *port; bond_for_each_slave_rcu(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); if (port->aggregator && port->aggregator->is_active) { aggregator = port->aggregator; break; } } if (!aggregator) return -1; ad_info->aggregator_id = aggregator->aggregator_identifier; ad_info->ports = __agg_active_ports(aggregator); ad_info->actor_key = aggregator->actor_oper_aggregator_key; ad_info->partner_key = aggregator->partner_oper_aggregator_key; ether_addr_copy(ad_info->partner_system, aggregator->partner_system.mac_addr_value); return 0; } int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { int ret; rcu_read_lock(); ret = __bond_3ad_get_active_agg_info(bond, ad_info); rcu_read_unlock(); return ret; } int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { struct lacpdu *lacpdu, _lacpdu; if (skb->protocol != PKT_TYPE_LACPDU) return RX_HANDLER_ANOTHER; if (!MAC_ADDRESS_EQUAL(eth_hdr(skb)->h_dest, lacpdu_mcast_addr)) return RX_HANDLER_ANOTHER; lacpdu = skb_header_pointer(skb, 0, sizeof(_lacpdu), &_lacpdu); if (!lacpdu) { atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_illegal_rx); atomic64_inc(&BOND_AD_INFO(bond).stats.lacpdu_illegal_rx); return RX_HANDLER_ANOTHER; } return bond_3ad_rx_indication(lacpdu, slave); } /** * bond_3ad_update_lacp_rate - change the lacp rate * @bond: bonding struct * * When modify lacp_rate parameter via sysfs, * update actor_oper_port_state of each port. * * Hold bond->mode_lock, * so we can modify port->actor_oper_port_state, * no matter bond is up or down. */ void bond_3ad_update_lacp_rate(struct bonding *bond) { struct port *port = NULL; struct list_head *iter; struct slave *slave; int lacp_fast; lacp_fast = bond->params.lacp_fast; spin_lock_bh(&bond->mode_lock); bond_for_each_slave(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); if (lacp_fast) port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT; else port->actor_oper_port_state &= ~LACP_STATE_LACP_TIMEOUT; } spin_unlock_bh(&bond->mode_lock); } size_t bond_3ad_stats_size(void) { return nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_TX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_UNKNOWN_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_ILLEGAL_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_TX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RESP_RX */ nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RESP_TX */ nla_total_size_64bit(sizeof(u64)); /* BOND_3AD_STAT_MARKER_UNKNOWN_RX */ } int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats) { u64 val; val = atomic64_read(&stats->lacpdu_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->lacpdu_tx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_TX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->lacpdu_unknown_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_UNKNOWN_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->lacpdu_illegal_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_ILLEGAL_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_tx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_TX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_resp_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RESP_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_resp_tx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RESP_TX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; val = atomic64_read(&stats->marker_unknown_rx); if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_UNKNOWN_RX, val, BOND_3AD_STAT_PAD)) return -EMSGSIZE; return 0; }
2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 1 2 2 3 1 1 1 2 1 3 3 1 1 8 7 1 7 1 10 2 8 8 2 1 1 11 9 2 2 9 1 8 8 8 8 1 8 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 // SPDX-License-Identifier: GPL-2.0 /* * cdc-wdm.c * * This driver supports USB CDC WCM Device Management. * * Copyright (c) 2007-2009 Oliver Neukum * * Some code taken from cdc-acm.c * * Released under the GPLv2. * * Many thanks to Carl Nordbeck */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/ioctl.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/poll.h> #include <linux/skbuff.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <linux/wwan.h> #include <asm/byteorder.h> #include <linux/unaligned.h> #include <linux/usb/cdc-wdm.h> #define DRIVER_AUTHOR "Oliver Neukum" #define DRIVER_DESC "USB Abstract Control Model driver for USB WCM Device Management" static const struct usb_device_id wdm_ids[] = { { .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS | USB_DEVICE_ID_MATCH_INT_SUBCLASS, .bInterfaceClass = USB_CLASS_COMM, .bInterfaceSubClass = USB_CDC_SUBCLASS_DMM }, { } }; MODULE_DEVICE_TABLE (usb, wdm_ids); #define WDM_MINOR_BASE 176 #define WDM_IN_USE 1 #define WDM_DISCONNECTING 2 #define WDM_RESULT 3 #define WDM_READ 4 #define WDM_INT_STALL 5 #define WDM_POLL_RUNNING 6 #define WDM_RESPONDING 7 #define WDM_SUSPENDING 8 #define WDM_RESETTING 9 #define WDM_OVERFLOW 10 #define WDM_WWAN_IN_USE 11 #define WDM_MAX 16 /* we cannot wait forever at flush() */ #define WDM_FLUSH_TIMEOUT (30 * HZ) /* CDC-WMC r1.1 requires wMaxCommand to be "at least 256 decimal (0x100)" */ #define WDM_DEFAULT_BUFSIZE 256 static DEFINE_MUTEX(wdm_mutex); static DEFINE_SPINLOCK(wdm_device_list_lock); static LIST_HEAD(wdm_device_list); /* --- method tables --- */ struct wdm_device { u8 *inbuf; /* buffer for response */ u8 *outbuf; /* buffer for command */ u8 *sbuf; /* buffer for status */ u8 *ubuf; /* buffer for copy to user space */ struct urb *command; struct urb *response; struct urb *validity; struct usb_interface *intf; struct usb_ctrlrequest *orq; struct usb_ctrlrequest *irq; spinlock_t iuspin; unsigned long flags; u16 bufsize; u16 wMaxCommand; u16 wMaxPacketSize; __le16 inum; int reslength; int length; int read; int count; dma_addr_t shandle; dma_addr_t ihandle; struct mutex wlock; struct mutex rlock; wait_queue_head_t wait; struct work_struct rxwork; struct work_struct service_outs_intr; int werr; int rerr; int resp_count; struct list_head device_list; int (*manage_power)(struct usb_interface *, int); enum wwan_port_type wwanp_type; struct wwan_port *wwanp; }; static struct usb_driver wdm_driver; /* return intfdata if we own the interface, else look up intf in the list */ static struct wdm_device *wdm_find_device(struct usb_interface *intf) { struct wdm_device *desc; spin_lock(&wdm_device_list_lock); list_for_each_entry(desc, &wdm_device_list, device_list) if (desc->intf == intf) goto found; desc = NULL; found: spin_unlock(&wdm_device_list_lock); return desc; } static struct wdm_device *wdm_find_device_by_minor(int minor) { struct wdm_device *desc; spin_lock(&wdm_device_list_lock); list_for_each_entry(desc, &wdm_device_list, device_list) if (desc->intf->minor == minor) goto found; desc = NULL; found: spin_unlock(&wdm_device_list_lock); return desc; } /* --- callbacks --- */ static void wdm_out_callback(struct urb *urb) { struct wdm_device *desc; unsigned long flags; desc = urb->context; spin_lock_irqsave(&desc->iuspin, flags); desc->werr = urb->status; spin_unlock_irqrestore(&desc->iuspin, flags); kfree(desc->outbuf); desc->outbuf = NULL; clear_bit(WDM_IN_USE, &desc->flags); wake_up_all(&desc->wait); } static void wdm_wwan_rx(struct wdm_device *desc, int length); static void wdm_in_callback(struct urb *urb) { unsigned long flags; struct wdm_device *desc = urb->context; int status = urb->status; int length = urb->actual_length; spin_lock_irqsave(&desc->iuspin, flags); clear_bit(WDM_RESPONDING, &desc->flags); if (status) { switch (status) { case -ENOENT: dev_dbg(&desc->intf->dev, "nonzero urb status received: -ENOENT\n"); goto skip_error; case -ECONNRESET: dev_dbg(&desc->intf->dev, "nonzero urb status received: -ECONNRESET\n"); goto skip_error; case -ESHUTDOWN: dev_dbg(&desc->intf->dev, "nonzero urb status received: -ESHUTDOWN\n"); goto skip_error; case -EPIPE: dev_err(&desc->intf->dev, "nonzero urb status received: -EPIPE\n"); break; default: dev_err(&desc->intf->dev, "Unexpected error %d\n", status); break; } } if (test_bit(WDM_WWAN_IN_USE, &desc->flags)) { wdm_wwan_rx(desc, length); goto out; } /* * only set a new error if there is no previous error. * Errors are only cleared during read/open * Avoid propagating -EPIPE (stall) to userspace since it is * better handled as an empty read */ if (desc->rerr == 0 && status != -EPIPE) desc->rerr = status; if (length + desc->length > desc->wMaxCommand) { /* The buffer would overflow */ set_bit(WDM_OVERFLOW, &desc->flags); } else { /* we may already be in overflow */ if (!test_bit(WDM_OVERFLOW, &desc->flags)) { memmove(desc->ubuf + desc->length, desc->inbuf, length); desc->length += length; desc->reslength = length; } } skip_error: if (desc->rerr) { /* * Since there was an error, userspace may decide to not read * any data after poll'ing. * We should respond to further attempts from the device to send * data, so that we can get unstuck. */ schedule_work(&desc->service_outs_intr); } else { set_bit(WDM_READ, &desc->flags); wake_up(&desc->wait); } out: spin_unlock_irqrestore(&desc->iuspin, flags); } static void wdm_int_callback(struct urb *urb) { unsigned long flags; int rv = 0; int responding; int status = urb->status; struct wdm_device *desc; struct usb_cdc_notification *dr; desc = urb->context; dr = (struct usb_cdc_notification *)desc->sbuf; if (status) { switch (status) { case -ESHUTDOWN: case -ENOENT: case -ECONNRESET: return; /* unplug */ case -EPIPE: set_bit(WDM_INT_STALL, &desc->flags); dev_err(&desc->intf->dev, "Stall on int endpoint\n"); goto sw; /* halt is cleared in work */ default: dev_err_ratelimited(&desc->intf->dev, "nonzero urb status received: %d\n", status); break; } } if (urb->actual_length < sizeof(struct usb_cdc_notification)) { dev_err_ratelimited(&desc->intf->dev, "wdm_int_callback - %d bytes\n", urb->actual_length); goto exit; } switch (dr->bNotificationType) { case USB_CDC_NOTIFY_RESPONSE_AVAILABLE: dev_dbg(&desc->intf->dev, "NOTIFY_RESPONSE_AVAILABLE received: index %d len %d\n", le16_to_cpu(dr->wIndex), le16_to_cpu(dr->wLength)); break; case USB_CDC_NOTIFY_NETWORK_CONNECTION: dev_dbg(&desc->intf->dev, "NOTIFY_NETWORK_CONNECTION %s network\n", dr->wValue ? "connected to" : "disconnected from"); goto exit; case USB_CDC_NOTIFY_SPEED_CHANGE: dev_dbg(&desc->intf->dev, "SPEED_CHANGE received (len %u)\n", urb->actual_length); goto exit; default: clear_bit(WDM_POLL_RUNNING, &desc->flags); dev_err(&desc->intf->dev, "unknown notification %d received: index %d len %d\n", dr->bNotificationType, le16_to_cpu(dr->wIndex), le16_to_cpu(dr->wLength)); goto exit; } spin_lock_irqsave(&desc->iuspin, flags); responding = test_and_set_bit(WDM_RESPONDING, &desc->flags); if (!desc->resp_count++ && !responding && !test_bit(WDM_DISCONNECTING, &desc->flags) && !test_bit(WDM_SUSPENDING, &desc->flags)) { rv = usb_submit_urb(desc->response, GFP_ATOMIC); dev_dbg(&desc->intf->dev, "submit response URB %d\n", rv); } spin_unlock_irqrestore(&desc->iuspin, flags); if (rv < 0) { clear_bit(WDM_RESPONDING, &desc->flags); if (rv == -EPERM) return; if (rv == -ENOMEM) { sw: rv = schedule_work(&desc->rxwork); if (rv) dev_err(&desc->intf->dev, "Cannot schedule work\n"); } } exit: rv = usb_submit_urb(urb, GFP_ATOMIC); if (rv) dev_err(&desc->intf->dev, "%s - usb_submit_urb failed with result %d\n", __func__, rv); } static void poison_urbs(struct wdm_device *desc) { /* the order here is essential */ usb_poison_urb(desc->command); usb_poison_urb(desc->validity); usb_poison_urb(desc->response); } static void unpoison_urbs(struct wdm_device *desc) { /* * the order here is not essential * it is symmetrical just to be nice */ usb_unpoison_urb(desc->response); usb_unpoison_urb(desc->validity); usb_unpoison_urb(desc->command); } static void free_urbs(struct wdm_device *desc) { usb_free_urb(desc->validity); usb_free_urb(desc->response); usb_free_urb(desc->command); } static void cleanup(struct wdm_device *desc) { kfree(desc->sbuf); kfree(desc->inbuf); kfree(desc->orq); kfree(desc->irq); kfree(desc->ubuf); free_urbs(desc); kfree(desc); } static ssize_t wdm_write (struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { u8 *buf; int rv = -EMSGSIZE, r, we; struct wdm_device *desc = file->private_data; struct usb_ctrlrequest *req; if (count > desc->wMaxCommand) count = desc->wMaxCommand; spin_lock_irq(&desc->iuspin); we = desc->werr; desc->werr = 0; spin_unlock_irq(&desc->iuspin); if (we < 0) return usb_translate_errors(we); buf = memdup_user(buffer, count); if (IS_ERR(buf)) return PTR_ERR(buf); /* concurrent writes and disconnect */ r = mutex_lock_interruptible(&desc->wlock); rv = -ERESTARTSYS; if (r) goto out_free_mem; if (test_bit(WDM_DISCONNECTING, &desc->flags)) { rv = -ENODEV; goto out_free_mem_lock; } r = usb_autopm_get_interface(desc->intf); if (r < 0) { rv = usb_translate_errors(r); goto out_free_mem_lock; } if (!(file->f_flags & O_NONBLOCK)) r = wait_event_interruptible(desc->wait, !test_bit(WDM_IN_USE, &desc->flags)); else if (test_bit(WDM_IN_USE, &desc->flags)) r = -EAGAIN; if (test_bit(WDM_RESETTING, &desc->flags)) r = -EIO; if (test_bit(WDM_DISCONNECTING, &desc->flags)) r = -ENODEV; if (r < 0) { rv = r; goto out_free_mem_pm; } req = desc->orq; usb_fill_control_urb( desc->command, interface_to_usbdev(desc->intf), /* using common endpoint 0 */ usb_sndctrlpipe(interface_to_usbdev(desc->intf), 0), (unsigned char *)req, buf, count, wdm_out_callback, desc ); req->bRequestType = (USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE); req->bRequest = USB_CDC_SEND_ENCAPSULATED_COMMAND; req->wValue = 0; req->wIndex = desc->inum; /* already converted */ req->wLength = cpu_to_le16(count); set_bit(WDM_IN_USE, &desc->flags); desc->outbuf = buf; rv = usb_submit_urb(desc->command, GFP_KERNEL); if (rv < 0) { desc->outbuf = NULL; clear_bit(WDM_IN_USE, &desc->flags); wake_up_all(&desc->wait); /* for wdm_wait_for_response() */ dev_err(&desc->intf->dev, "Tx URB error: %d\n", rv); rv = usb_translate_errors(rv); goto out_free_mem_pm; } else { dev_dbg(&desc->intf->dev, "Tx URB has been submitted index=%d\n", le16_to_cpu(req->wIndex)); } usb_autopm_put_interface(desc->intf); mutex_unlock(&desc->wlock); return count; out_free_mem_pm: usb_autopm_put_interface(desc->intf); out_free_mem_lock: mutex_unlock(&desc->wlock); out_free_mem: kfree(buf); return rv; } /* * Submit the read urb if resp_count is non-zero. * * Called with desc->iuspin locked */ static int service_outstanding_interrupt(struct wdm_device *desc) { int rv = 0; /* submit read urb only if the device is waiting for it */ if (!desc->resp_count || !--desc->resp_count) goto out; if (test_bit(WDM_DISCONNECTING, &desc->flags)) { rv = -ENODEV; goto out; } if (test_bit(WDM_RESETTING, &desc->flags)) { rv = -EIO; goto out; } set_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); rv = usb_submit_urb(desc->response, GFP_KERNEL); spin_lock_irq(&desc->iuspin); if (rv) { if (!test_bit(WDM_DISCONNECTING, &desc->flags)) dev_err(&desc->intf->dev, "usb_submit_urb failed with result %d\n", rv); /* make sure the next notification trigger a submit */ clear_bit(WDM_RESPONDING, &desc->flags); desc->resp_count = 0; } out: return rv; } static ssize_t wdm_read (struct file *file, char __user *buffer, size_t count, loff_t *ppos) { int rv, cntr; int i = 0; struct wdm_device *desc = file->private_data; rv = mutex_lock_interruptible(&desc->rlock); /*concurrent reads */ if (rv < 0) return -ERESTARTSYS; cntr = READ_ONCE(desc->length); if (cntr == 0) { desc->read = 0; retry: if (test_bit(WDM_DISCONNECTING, &desc->flags)) { rv = -ENODEV; goto err; } if (test_bit(WDM_OVERFLOW, &desc->flags)) { clear_bit(WDM_OVERFLOW, &desc->flags); rv = -ENOBUFS; goto err; } i++; if (file->f_flags & O_NONBLOCK) { if (!test_bit(WDM_READ, &desc->flags)) { rv = -EAGAIN; goto err; } rv = 0; } else { rv = wait_event_interruptible(desc->wait, test_bit(WDM_READ, &desc->flags)); } /* may have happened while we slept */ if (test_bit(WDM_DISCONNECTING, &desc->flags)) { rv = -ENODEV; goto err; } if (test_bit(WDM_RESETTING, &desc->flags)) { rv = -EIO; goto err; } usb_mark_last_busy(interface_to_usbdev(desc->intf)); if (rv < 0) { rv = -ERESTARTSYS; goto err; } spin_lock_irq(&desc->iuspin); if (desc->rerr) { /* read completed, error happened */ rv = usb_translate_errors(desc->rerr); desc->rerr = 0; spin_unlock_irq(&desc->iuspin); goto err; } /* * recheck whether we've lost the race * against the completion handler */ if (!test_bit(WDM_READ, &desc->flags)) { /* lost race */ spin_unlock_irq(&desc->iuspin); goto retry; } if (!desc->reslength) { /* zero length read */ dev_dbg(&desc->intf->dev, "zero length - clearing WDM_READ\n"); clear_bit(WDM_READ, &desc->flags); rv = service_outstanding_interrupt(desc); spin_unlock_irq(&desc->iuspin); if (rv < 0) goto err; goto retry; } cntr = desc->length; spin_unlock_irq(&desc->iuspin); } if (cntr > count) cntr = count; rv = copy_to_user(buffer, desc->ubuf, cntr); if (rv > 0) { rv = -EFAULT; goto err; } spin_lock_irq(&desc->iuspin); for (i = 0; i < desc->length - cntr; i++) desc->ubuf[i] = desc->ubuf[i + cntr]; desc->length -= cntr; /* in case we had outstanding data */ if (!desc->length) { clear_bit(WDM_READ, &desc->flags); service_outstanding_interrupt(desc); } spin_unlock_irq(&desc->iuspin); rv = cntr; err: mutex_unlock(&desc->rlock); return rv; } static int wdm_wait_for_response(struct file *file, long timeout) { struct wdm_device *desc = file->private_data; long rv; /* Use long here because (int) MAX_SCHEDULE_TIMEOUT < 0. */ /* * Needs both flags. We cannot do with one because resetting it would * cause a race with write() yet we need to signal a disconnect. */ rv = wait_event_interruptible_timeout(desc->wait, !test_bit(WDM_IN_USE, &desc->flags) || test_bit(WDM_DISCONNECTING, &desc->flags), timeout); /* * To report the correct error. This is best effort. * We are inevitably racing with the hardware. */ if (test_bit(WDM_DISCONNECTING, &desc->flags)) return -ENODEV; if (!rv) return -EIO; if (rv < 0) return -EINTR; spin_lock_irq(&desc->iuspin); rv = desc->werr; desc->werr = 0; spin_unlock_irq(&desc->iuspin); return usb_translate_errors(rv); } /* * You need to send a signal when you react to malicious or defective hardware. * Also, don't abort when fsync() returned -EINVAL, for older kernels which do * not implement wdm_flush() will return -EINVAL. */ static int wdm_fsync(struct file *file, loff_t start, loff_t end, int datasync) { return wdm_wait_for_response(file, MAX_SCHEDULE_TIMEOUT); } /* * Same with wdm_fsync(), except it uses finite timeout in order to react to * malicious or defective hardware which ceased communication after close() was * implicitly called due to process termination. */ static int wdm_flush(struct file *file, fl_owner_t id) { return wdm_wait_for_response(file, WDM_FLUSH_TIMEOUT); } static __poll_t wdm_poll(struct file *file, struct poll_table_struct *wait) { struct wdm_device *desc = file->private_data; unsigned long flags; __poll_t mask = 0; spin_lock_irqsave(&desc->iuspin, flags); if (test_bit(WDM_DISCONNECTING, &desc->flags)) { mask = EPOLLHUP | EPOLLERR; spin_unlock_irqrestore(&desc->iuspin, flags); goto desc_out; } if (test_bit(WDM_READ, &desc->flags)) mask = EPOLLIN | EPOLLRDNORM; if (desc->rerr || desc->werr) mask |= EPOLLERR; if (!test_bit(WDM_IN_USE, &desc->flags)) mask |= EPOLLOUT | EPOLLWRNORM; spin_unlock_irqrestore(&desc->iuspin, flags); poll_wait(file, &desc->wait, wait); desc_out: return mask; } static int wdm_open(struct inode *inode, struct file *file) { int minor = iminor(inode); int rv = -ENODEV; struct usb_interface *intf; struct wdm_device *desc; mutex_lock(&wdm_mutex); desc = wdm_find_device_by_minor(minor); if (!desc) goto out; intf = desc->intf; if (test_bit(WDM_DISCONNECTING, &desc->flags)) goto out; file->private_data = desc; if (test_bit(WDM_WWAN_IN_USE, &desc->flags)) { rv = -EBUSY; goto out; } rv = usb_autopm_get_interface(desc->intf); if (rv < 0) { dev_err(&desc->intf->dev, "Error autopm - %d\n", rv); goto out; } /* using write lock to protect desc->count */ mutex_lock(&desc->wlock); if (!desc->count++) { desc->werr = 0; desc->rerr = 0; rv = usb_submit_urb(desc->validity, GFP_KERNEL); if (rv < 0) { desc->count--; dev_err(&desc->intf->dev, "Error submitting int urb - %d\n", rv); rv = usb_translate_errors(rv); } } else { rv = 0; } mutex_unlock(&desc->wlock); if (desc->count == 1) desc->manage_power(intf, 1); usb_autopm_put_interface(desc->intf); out: mutex_unlock(&wdm_mutex); return rv; } static int wdm_release(struct inode *inode, struct file *file) { struct wdm_device *desc = file->private_data; mutex_lock(&wdm_mutex); /* using write lock to protect desc->count */ mutex_lock(&desc->wlock); desc->count--; mutex_unlock(&desc->wlock); if (!desc->count) { if (!test_bit(WDM_DISCONNECTING, &desc->flags)) { dev_dbg(&desc->intf->dev, "wdm_release: cleanup\n"); poison_urbs(desc); spin_lock_irq(&desc->iuspin); desc->resp_count = 0; clear_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); desc->manage_power(desc->intf, 0); unpoison_urbs(desc); } else { /* must avoid dev_printk here as desc->intf is invalid */ pr_debug(KBUILD_MODNAME " %s: device gone - cleaning up\n", __func__); cleanup(desc); } } mutex_unlock(&wdm_mutex); return 0; } static long wdm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct wdm_device *desc = file->private_data; int rv = 0; switch (cmd) { case IOCTL_WDM_MAX_COMMAND: if (copy_to_user((void __user *)arg, &desc->wMaxCommand, sizeof(desc->wMaxCommand))) rv = -EFAULT; break; default: rv = -ENOTTY; } return rv; } static const struct file_operations wdm_fops = { .owner = THIS_MODULE, .read = wdm_read, .write = wdm_write, .fsync = wdm_fsync, .open = wdm_open, .flush = wdm_flush, .release = wdm_release, .poll = wdm_poll, .unlocked_ioctl = wdm_ioctl, .compat_ioctl = compat_ptr_ioctl, .llseek = noop_llseek, }; static struct usb_class_driver wdm_class = { .name = "cdc-wdm%d", .fops = &wdm_fops, .minor_base = WDM_MINOR_BASE, }; /* --- WWAN framework integration --- */ #ifdef CONFIG_WWAN static int wdm_wwan_port_start(struct wwan_port *port) { struct wdm_device *desc = wwan_port_get_drvdata(port); /* The interface is both exposed via the WWAN framework and as a * legacy usbmisc chardev. If chardev is already open, just fail * to prevent concurrent usage. Otherwise, switch to WWAN mode. */ mutex_lock(&wdm_mutex); if (desc->count) { mutex_unlock(&wdm_mutex); return -EBUSY; } set_bit(WDM_WWAN_IN_USE, &desc->flags); mutex_unlock(&wdm_mutex); desc->manage_power(desc->intf, 1); /* tx is allowed */ wwan_port_txon(port); /* Start getting events */ return usb_submit_urb(desc->validity, GFP_KERNEL); } static void wdm_wwan_port_stop(struct wwan_port *port) { struct wdm_device *desc = wwan_port_get_drvdata(port); /* Stop all transfers and disable WWAN mode */ poison_urbs(desc); desc->manage_power(desc->intf, 0); clear_bit(WDM_READ, &desc->flags); clear_bit(WDM_WWAN_IN_USE, &desc->flags); unpoison_urbs(desc); } static void wdm_wwan_port_tx_complete(struct urb *urb) { struct sk_buff *skb = urb->context; struct wdm_device *desc = skb_shinfo(skb)->destructor_arg; usb_autopm_put_interface(desc->intf); wwan_port_txon(desc->wwanp); kfree_skb(skb); } static int wdm_wwan_port_tx(struct wwan_port *port, struct sk_buff *skb) { struct wdm_device *desc = wwan_port_get_drvdata(port); struct usb_interface *intf = desc->intf; struct usb_ctrlrequest *req = desc->orq; int rv; rv = usb_autopm_get_interface(intf); if (rv) return rv; usb_fill_control_urb( desc->command, interface_to_usbdev(intf), usb_sndctrlpipe(interface_to_usbdev(intf), 0), (unsigned char *)req, skb->data, skb->len, wdm_wwan_port_tx_complete, skb ); req->bRequestType = (USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE); req->bRequest = USB_CDC_SEND_ENCAPSULATED_COMMAND; req->wValue = 0; req->wIndex = desc->inum; req->wLength = cpu_to_le16(skb->len); skb_shinfo(skb)->destructor_arg = desc; rv = usb_submit_urb(desc->command, GFP_KERNEL); if (rv) usb_autopm_put_interface(intf); else /* One transfer at a time, stop TX until URB completion */ wwan_port_txoff(port); return rv; } static const struct wwan_port_ops wdm_wwan_port_ops = { .start = wdm_wwan_port_start, .stop = wdm_wwan_port_stop, .tx = wdm_wwan_port_tx, }; static void wdm_wwan_init(struct wdm_device *desc) { struct usb_interface *intf = desc->intf; struct wwan_port *port; /* Only register to WWAN core if protocol/type is known */ if (desc->wwanp_type == WWAN_PORT_UNKNOWN) { dev_info(&intf->dev, "Unknown control protocol\n"); return; } port = wwan_create_port(&intf->dev, desc->wwanp_type, &wdm_wwan_port_ops, NULL, desc); if (IS_ERR(port)) { dev_err(&intf->dev, "%s: Unable to create WWAN port\n", dev_name(intf->usb_dev)); return; } desc->wwanp = port; } static void wdm_wwan_deinit(struct wdm_device *desc) { if (!desc->wwanp) return; wwan_remove_port(desc->wwanp); desc->wwanp = NULL; } static void wdm_wwan_rx(struct wdm_device *desc, int length) { struct wwan_port *port = desc->wwanp; struct sk_buff *skb; /* Forward data to WWAN port */ skb = alloc_skb(length, GFP_ATOMIC); if (!skb) return; skb_put_data(skb, desc->inbuf, length); wwan_port_rx(port, skb); /* inbuf has been copied, it is safe to check for outstanding data */ schedule_work(&desc->service_outs_intr); } #else /* CONFIG_WWAN */ static void wdm_wwan_init(struct wdm_device *desc) {} static void wdm_wwan_deinit(struct wdm_device *desc) {} static void wdm_wwan_rx(struct wdm_device *desc, int length) {} #endif /* CONFIG_WWAN */ /* --- error handling --- */ static void wdm_rxwork(struct work_struct *work) { struct wdm_device *desc = container_of(work, struct wdm_device, rxwork); unsigned long flags; int rv = 0; int responding; spin_lock_irqsave(&desc->iuspin, flags); if (test_bit(WDM_DISCONNECTING, &desc->flags)) { spin_unlock_irqrestore(&desc->iuspin, flags); } else { responding = test_and_set_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irqrestore(&desc->iuspin, flags); if (!responding) rv = usb_submit_urb(desc->response, GFP_KERNEL); if (rv < 0 && rv != -EPERM) { spin_lock_irqsave(&desc->iuspin, flags); clear_bit(WDM_RESPONDING, &desc->flags); if (!test_bit(WDM_DISCONNECTING, &desc->flags)) schedule_work(&desc->rxwork); spin_unlock_irqrestore(&desc->iuspin, flags); } } } static void service_interrupt_work(struct work_struct *work) { struct wdm_device *desc; desc = container_of(work, struct wdm_device, service_outs_intr); spin_lock_irq(&desc->iuspin); service_outstanding_interrupt(desc); if (!desc->resp_count) { set_bit(WDM_READ, &desc->flags); wake_up(&desc->wait); } spin_unlock_irq(&desc->iuspin); } /* --- hotplug --- */ static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor *ep, u16 bufsize, enum wwan_port_type type, int (*manage_power)(struct usb_interface *, int)) { int rv = -ENOMEM; struct wdm_device *desc; desc = kzalloc(sizeof(struct wdm_device), GFP_KERNEL); if (!desc) goto out; INIT_LIST_HEAD(&desc->device_list); mutex_init(&desc->rlock); mutex_init(&desc->wlock); spin_lock_init(&desc->iuspin); init_waitqueue_head(&desc->wait); desc->wMaxCommand = bufsize; /* this will be expanded and needed in hardware endianness */ desc->inum = cpu_to_le16((u16)intf->cur_altsetting->desc.bInterfaceNumber); desc->intf = intf; desc->wwanp_type = type; INIT_WORK(&desc->rxwork, wdm_rxwork); INIT_WORK(&desc->service_outs_intr, service_interrupt_work); if (!usb_endpoint_is_int_in(ep)) { rv = -EINVAL; goto err; } desc->wMaxPacketSize = usb_endpoint_maxp(ep); desc->orq = kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL); if (!desc->orq) goto err; desc->irq = kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL); if (!desc->irq) goto err; desc->validity = usb_alloc_urb(0, GFP_KERNEL); if (!desc->validity) goto err; desc->response = usb_alloc_urb(0, GFP_KERNEL); if (!desc->response) goto err; desc->command = usb_alloc_urb(0, GFP_KERNEL); if (!desc->command) goto err; desc->ubuf = kmalloc(desc->wMaxCommand, GFP_KERNEL); if (!desc->ubuf) goto err; desc->sbuf = kmalloc(desc->wMaxPacketSize, GFP_KERNEL); if (!desc->sbuf) goto err; desc->inbuf = kmalloc(desc->wMaxCommand, GFP_KERNEL); if (!desc->inbuf) goto err; usb_fill_int_urb( desc->validity, interface_to_usbdev(intf), usb_rcvintpipe(interface_to_usbdev(intf), ep->bEndpointAddress), desc->sbuf, desc->wMaxPacketSize, wdm_int_callback, desc, ep->bInterval ); desc->irq->bRequestType = (USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE); desc->irq->bRequest = USB_CDC_GET_ENCAPSULATED_RESPONSE; desc->irq->wValue = 0; desc->irq->wIndex = desc->inum; /* already converted */ desc->irq->wLength = cpu_to_le16(desc->wMaxCommand); usb_fill_control_urb( desc->response, interface_to_usbdev(intf), /* using common endpoint 0 */ usb_rcvctrlpipe(interface_to_usbdev(desc->intf), 0), (unsigned char *)desc->irq, desc->inbuf, desc->wMaxCommand, wdm_in_callback, desc ); desc->manage_power = manage_power; spin_lock(&wdm_device_list_lock); list_add(&desc->device_list, &wdm_device_list); spin_unlock(&wdm_device_list_lock); rv = usb_register_dev(intf, &wdm_class); if (rv < 0) goto err; else dev_info(&intf->dev, "%s: USB WDM device\n", dev_name(intf->usb_dev)); wdm_wwan_init(desc); out: return rv; err: spin_lock(&wdm_device_list_lock); list_del(&desc->device_list); spin_unlock(&wdm_device_list_lock); cleanup(desc); return rv; } static int wdm_manage_power(struct usb_interface *intf, int on) { /* need autopm_get/put here to ensure the usbcore sees the new value */ int rv = usb_autopm_get_interface(intf); intf->needs_remote_wakeup = on; if (!rv) usb_autopm_put_interface(intf); return 0; } static int wdm_probe(struct usb_interface *intf, const struct usb_device_id *id) { int rv = -EINVAL; struct usb_host_interface *iface; struct usb_endpoint_descriptor *ep; struct usb_cdc_parsed_header hdr; u8 *buffer = intf->altsetting->extra; int buflen = intf->altsetting->extralen; u16 maxcom = WDM_DEFAULT_BUFSIZE; if (!buffer) goto err; cdc_parse_cdc_header(&hdr, intf, buffer, buflen); if (hdr.usb_cdc_dmm_desc) maxcom = le16_to_cpu(hdr.usb_cdc_dmm_desc->wMaxCommand); iface = intf->cur_altsetting; if (iface->desc.bNumEndpoints != 1) goto err; ep = &iface->endpoint[0].desc; rv = wdm_create(intf, ep, maxcom, WWAN_PORT_UNKNOWN, &wdm_manage_power); err: return rv; } /** * usb_cdc_wdm_register - register a WDM subdriver * @intf: usb interface the subdriver will associate with * @ep: interrupt endpoint to monitor for notifications * @bufsize: maximum message size to support for read/write * @type: Type/protocol of the transported data (MBIM, QMI...) * @manage_power: call-back invoked during open and release to * manage the device's power * Create WDM usb class character device and associate it with intf * without binding, allowing another driver to manage the interface. * * The subdriver will manage the given interrupt endpoint exclusively * and will issue control requests referring to the given intf. It * will otherwise avoid interferring, and in particular not do * usb_set_intfdata/usb_get_intfdata on intf. * * The return value is a pointer to the subdriver's struct usb_driver. * The registering driver is responsible for calling this subdriver's * disconnect, suspend, resume, pre_reset and post_reset methods from * its own. */ struct usb_driver *usb_cdc_wdm_register(struct usb_interface *intf, struct usb_endpoint_descriptor *ep, int bufsize, enum wwan_port_type type, int (*manage_power)(struct usb_interface *, int)) { int rv; rv = wdm_create(intf, ep, bufsize, type, manage_power); if (rv < 0) goto err; return &wdm_driver; err: return ERR_PTR(rv); } EXPORT_SYMBOL(usb_cdc_wdm_register); static void wdm_disconnect(struct usb_interface *intf) { struct wdm_device *desc; unsigned long flags; usb_deregister_dev(intf, &wdm_class); desc = wdm_find_device(intf); mutex_lock(&wdm_mutex); wdm_wwan_deinit(desc); /* the spinlock makes sure no new urbs are generated in the callbacks */ spin_lock_irqsave(&desc->iuspin, flags); set_bit(WDM_DISCONNECTING, &desc->flags); set_bit(WDM_READ, &desc->flags); spin_unlock_irqrestore(&desc->iuspin, flags); wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); /* the desc->intf pointer used as list key is now invalid */ spin_lock(&wdm_device_list_lock); list_del(&desc->device_list); spin_unlock(&wdm_device_list_lock); if (!desc->count) cleanup(desc); else dev_dbg(&intf->dev, "%d open files - postponing cleanup\n", desc->count); mutex_unlock(&wdm_mutex); } #ifdef CONFIG_PM static int wdm_suspend(struct usb_interface *intf, pm_message_t message) { struct wdm_device *desc = wdm_find_device(intf); int rv = 0; dev_dbg(&desc->intf->dev, "wdm%d_suspend\n", intf->minor); /* if this is an autosuspend the caller does the locking */ if (!PMSG_IS_AUTO(message)) { mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); } spin_lock_irq(&desc->iuspin); if (PMSG_IS_AUTO(message) && (test_bit(WDM_IN_USE, &desc->flags) || test_bit(WDM_RESPONDING, &desc->flags))) { spin_unlock_irq(&desc->iuspin); rv = -EBUSY; } else { set_bit(WDM_SUSPENDING, &desc->flags); spin_unlock_irq(&desc->iuspin); /* callback submits work - order is essential */ poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); unpoison_urbs(desc); } if (!PMSG_IS_AUTO(message)) { mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); } return rv; } #endif static int recover_from_urb_loss(struct wdm_device *desc) { int rv = 0; if (desc->count) { rv = usb_submit_urb(desc->validity, GFP_NOIO); if (rv < 0) dev_err(&desc->intf->dev, "Error resume submitting int urb - %d\n", rv); } return rv; } #ifdef CONFIG_PM static int wdm_resume(struct usb_interface *intf) { struct wdm_device *desc = wdm_find_device(intf); int rv; dev_dbg(&desc->intf->dev, "wdm%d_resume\n", intf->minor); clear_bit(WDM_SUSPENDING, &desc->flags); rv = recover_from_urb_loss(desc); return rv; } #endif static int wdm_pre_reset(struct usb_interface *intf) { struct wdm_device *desc = wdm_find_device(intf); /* * we notify everybody using poll of * an exceptional situation * must be done before recovery lest a spontaneous * message from the device is lost */ spin_lock_irq(&desc->iuspin); set_bit(WDM_RESETTING, &desc->flags); /* inform read/write */ set_bit(WDM_READ, &desc->flags); /* unblock read */ clear_bit(WDM_IN_USE, &desc->flags); /* unblock write */ desc->rerr = -EINTR; spin_unlock_irq(&desc->iuspin); wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); return 0; } static int wdm_post_reset(struct usb_interface *intf) { struct wdm_device *desc = wdm_find_device(intf); int rv; unpoison_urbs(desc); clear_bit(WDM_OVERFLOW, &desc->flags); clear_bit(WDM_RESETTING, &desc->flags); rv = recover_from_urb_loss(desc); mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); return rv; } static struct usb_driver wdm_driver = { .name = "cdc_wdm", .probe = wdm_probe, .disconnect = wdm_disconnect, #ifdef CONFIG_PM .suspend = wdm_suspend, .resume = wdm_resume, .reset_resume = wdm_resume, #endif .pre_reset = wdm_pre_reset, .post_reset = wdm_post_reset, .id_table = wdm_ids, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, }; module_usb_driver(wdm_driver); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef __DSA_TAG_H #define __DSA_TAG_H #include <linux/if_vlan.h> #include <linux/list.h> #include <linux/types.h> #include <net/dsa.h> #include "port.h" #include "user.h" struct dsa_tag_driver { const struct dsa_device_ops *ops; struct list_head list; struct module *owner; }; extern struct packet_type dsa_pack_type; const struct dsa_device_ops *dsa_tag_driver_get_by_id(int tag_protocol); const struct dsa_device_ops *dsa_tag_driver_get_by_name(const char *name); void dsa_tag_driver_put(const struct dsa_device_ops *ops); const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops); static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops) { return ops->needed_headroom + ops->needed_tailroom; } static inline struct net_device *dsa_conduit_find_user(struct net_device *dev, int device, int port) { struct dsa_port *cpu_dp = dev->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->dst; struct dsa_port *dp; list_for_each_entry(dp, &dst->ports, list) if (dp->ds->index == device && dp->index == port && dp->type == DSA_PORT_TYPE_USER) return dp->user; return NULL; } /** * dsa_software_untag_vlan_aware_bridge: Software untagging for VLAN-aware bridge * @skb: Pointer to received socket buffer (packet) * @br: Pointer to bridge upper interface of ingress port * @vid: Parsed VID from packet * * The bridge can process tagged packets. Software like STP/PTP may not. The * bridge can also process untagged packets, to the same effect as if they were * tagged with the PVID of the ingress port. So packets tagged with the PVID of * the bridge port must be software-untagged, to support both use cases. */ static inline void dsa_software_untag_vlan_aware_bridge(struct sk_buff *skb, struct net_device *br, u16 vid) { u16 pvid, proto; int err; err = br_vlan_get_proto(br, &proto); if (err) return; err = br_vlan_get_pvid_rcu(skb->dev, &pvid); if (err) return; if (vid == pvid && skb->vlan_proto == htons(proto)) __vlan_hwaccel_clear_tag(skb); } /** * dsa_software_untag_vlan_unaware_bridge: Software untagging for VLAN-unaware bridge * @skb: Pointer to received socket buffer (packet) * @br: Pointer to bridge upper interface of ingress port * @vid: Parsed VID from packet * * The bridge ignores all VLAN tags. Software like STP/PTP may not (it may run * on the plain port, or on a VLAN upper interface). Maybe packets are coming * to software as tagged with a driver-defined VID which is NOT equal to the * PVID of the bridge port (since the bridge is VLAN-unaware, its configuration * should NOT be committed to hardware). DSA needs a method for this private * VID to be communicated by software to it, and if packets are tagged with it, * software-untag them. Note: the private VID may be different per bridge, to * support the FDB isolation use case. * * FIXME: this is currently implemented based on the broken assumption that * the "private VID" used by the driver in VLAN-unaware mode is equal to the * bridge PVID. It should not be, except for a coincidence; the bridge PVID is * irrelevant to the data path in the VLAN-unaware mode. Thus, the VID that * this function removes is wrong. * * All users of ds->untag_bridge_pvid should fix their drivers, if necessary, * to make the two independent. Only then, if there still remains a need to * strip the private VID from packets, then a new ds->ops->get_private_vid() * API shall be introduced to communicate to DSA what this VID is, which needs * to be stripped here. */ static inline void dsa_software_untag_vlan_unaware_bridge(struct sk_buff *skb, struct net_device *br, u16 vid) { struct net_device *upper_dev; u16 pvid, proto; int err; err = br_vlan_get_proto(br, &proto); if (err) return; err = br_vlan_get_pvid_rcu(skb->dev, &pvid); if (err) return; if (vid != pvid || skb->vlan_proto != htons(proto)) return; /* The sad part about attempting to untag from DSA is that we * don't know, unless we check, if the skb will end up in * the bridge's data path - br_allowed_ingress() - or not. * For example, there might be an 8021q upper for the * default_pvid of the bridge, which will steal VLAN-tagged traffic * from the bridge's data path. This is a configuration that DSA * supports because vlan_filtering is 0. In that case, we should * definitely keep the tag, to make sure it keeps working. */ upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid); if (!upper_dev) __vlan_hwaccel_clear_tag(skb); } /** * dsa_software_vlan_untag: Software VLAN untagging in DSA receive path * @skb: Pointer to socket buffer (packet) * * Receive path method for switches which send some packets as VLAN-tagged * towards the CPU port (generally from VLAN-aware bridge ports) even when the * packet was not tagged on the wire. Called when ds->untag_bridge_pvid * (legacy) or ds->untag_vlan_aware_bridge_pvid is set to true. * * As a side effect of this method, any VLAN tag from the skb head is moved * to hwaccel. */ static inline struct sk_buff *dsa_software_vlan_untag(struct sk_buff *skb) { struct dsa_port *dp = dsa_user_to_port(skb->dev); struct net_device *br = dsa_port_bridge_dev_get(dp); u16 vid, proto; int err; /* software untagging for standalone ports not yet necessary */ if (!br) return skb; err = br_vlan_get_proto(br, &proto); if (err) return skb; /* Move VLAN tag from data to hwaccel */ if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) { skb = skb_vlan_untag(skb); if (!skb) return NULL; } if (!skb_vlan_tag_present(skb)) return skb; vid = skb_vlan_tag_get_id(skb); if (br_vlan_enabled(br)) { if (dp->ds->untag_vlan_aware_bridge_pvid) dsa_software_untag_vlan_aware_bridge(skb, br, vid); } else { if (dp->ds->untag_bridge_pvid) dsa_software_untag_vlan_unaware_bridge(skb, br, vid); } return skb; } /* For switches without hardware support for DSA tagging to be able * to support termination through the bridge. */ static inline struct net_device * dsa_find_designated_bridge_port_by_vid(struct net_device *conduit, u16 vid) { struct dsa_port *cpu_dp = conduit->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->dst; struct bridge_vlan_info vinfo; struct net_device *user; struct dsa_port *dp; int err; list_for_each_entry(dp, &dst->ports, list) { if (dp->type != DSA_PORT_TYPE_USER) continue; if (!dp->bridge) continue; if (dp->stp_state != BR_STATE_LEARNING && dp->stp_state != BR_STATE_FORWARDING) continue; /* Since the bridge might learn this packet, keep the CPU port * affinity with the port that will be used for the reply on * xmit. */ if (dp->cpu_dp != cpu_dp) continue; user = dp->user; err = br_vlan_get_info_rcu(user, vid, &vinfo); if (err) continue; return user; } return NULL; } /* If the ingress port offloads the bridge, we mark the frame as autonomously * forwarded by hardware, so the software bridge doesn't forward in twice, back * to us, because we already did. However, if we're in fallback mode and we do * software bridging, we are not offloading it, therefore the dp->bridge * pointer is not populated, and flooding needs to be done by software (we are * effectively operating in standalone ports mode). */ static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb) { struct dsa_port *dp = dsa_user_to_port(skb->dev); skb->offload_fwd_mark = !!(dp->bridge); } /* Helper for removing DSA header tags from packets in the RX path. * Must not be called before skb_pull(len). * skb->data * | * v * | | | | | | | | | | | | | | | | | | | * +-----------------------+-----------------------+---------------+-------+ * | Destination MAC | Source MAC | DSA header | EType | * +-----------------------+-----------------------+---------------+-------+ * | | * <----- len -----> <----- len -----> * | * >>>>>>> v * >>>>>>> | | | | | | | | | | | | | | | * >>>>>>> +-----------------------+-----------------------+-------+ * >>>>>>> | Destination MAC | Source MAC | EType | * +-----------------------+-----------------------+-------+ * ^ * | * skb->data */ static inline void dsa_strip_etype_header(struct sk_buff *skb, int len) { memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - len, 2 * ETH_ALEN); } /* Helper for creating space for DSA header tags in TX path packets. * Must not be called before skb_push(len). * * Before: * * <<<<<<< | | | | | | | | | | | | | | | * ^ <<<<<<< +-----------------------+-----------------------+-------+ * | <<<<<<< | Destination MAC | Source MAC | EType | * | +-----------------------+-----------------------+-------+ * <----- len -----> * | * | * skb->data * * After: * * | | | | | | | | | | | | | | | | | | | * +-----------------------+-----------------------+---------------+-------+ * | Destination MAC | Source MAC | DSA header | EType | * +-----------------------+-----------------------+---------------+-------+ * ^ | | * | <----- len -----> * skb->data */ static inline void dsa_alloc_etype_header(struct sk_buff *skb, int len) { memmove(skb->data, skb->data + len, 2 * ETH_ALEN); } /* On RX, eth_type_trans() on the DSA conduit pulls ETH_HLEN bytes starting from * skb_mac_header(skb), which leaves skb->data pointing at the first byte after * what the DSA conduit perceives as the EtherType (the beginning of the L3 * protocol). Since DSA EtherType header taggers treat the EtherType as part of * the DSA tag itself, and the EtherType is 2 bytes in length, the DSA header * is located 2 bytes behind skb->data. Note that EtherType in this context * means the first 2 bytes of the DSA header, not the encapsulated EtherType * that will become visible after the DSA header is stripped. */ static inline void *dsa_etype_header_pos_rx(struct sk_buff *skb) { return skb->data - 2; } /* On TX, skb->data points to the MAC header, which means that EtherType * header taggers start exactly where the EtherType is (the EtherType is * treated as part of the DSA header). */ static inline void *dsa_etype_header_pos_tx(struct sk_buff *skb) { return skb->data + 2 * ETH_ALEN; } /* Create 2 modaliases per tagging protocol, one to auto-load the module * given the ID reported by get_tag_protocol(), and the other by name. */ #define DSA_TAG_DRIVER_ALIAS "dsa_tag:" #define MODULE_ALIAS_DSA_TAG_DRIVER(__proto, __name) \ MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __name); \ MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS "id-" \ __stringify(__proto##_VALUE)) void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[], unsigned int count, struct module *owner); void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[], unsigned int count); #define dsa_tag_driver_module_drivers(__dsa_tag_drivers_array, __count) \ static int __init dsa_tag_driver_module_init(void) \ { \ dsa_tag_drivers_register(__dsa_tag_drivers_array, __count, \ THIS_MODULE); \ return 0; \ } \ module_init(dsa_tag_driver_module_init); \ \ static void __exit dsa_tag_driver_module_exit(void) \ { \ dsa_tag_drivers_unregister(__dsa_tag_drivers_array, __count); \ } \ module_exit(dsa_tag_driver_module_exit) /** * module_dsa_tag_drivers() - Helper macro for registering DSA tag * drivers * @__ops_array: Array of tag driver structures * * Helper macro for DSA tag drivers which do not do anything special * in module init/exit. Each module may only use this macro once, and * calling it replaces module_init() and module_exit(). */ #define module_dsa_tag_drivers(__ops_array) \ dsa_tag_driver_module_drivers(__ops_array, ARRAY_SIZE(__ops_array)) #define DSA_TAG_DRIVER_NAME(__ops) dsa_tag_driver ## _ ## __ops /* Create a static structure we can build a linked list of dsa_tag * drivers */ #define DSA_TAG_DRIVER(__ops) \ static struct dsa_tag_driver DSA_TAG_DRIVER_NAME(__ops) = { \ .ops = &__ops, \ } /** * module_dsa_tag_driver() - Helper macro for registering a single DSA tag * driver * @__ops: Single tag driver structures * * Helper macro for DSA tag drivers which do not do anything special * in module init/exit. Each module may only use this macro once, and * calling it replaces module_init() and module_exit(). */ #define module_dsa_tag_driver(__ops) \ DSA_TAG_DRIVER(__ops); \ \ static struct dsa_tag_driver *dsa_tag_driver_array[] = { \ &DSA_TAG_DRIVER_NAME(__ops) \ }; \ module_dsa_tag_drivers(dsa_tag_driver_array) #endif
653 370 311 651 359 310 45 46 45 95 95 86 72 22 22 260 260 2 31 50 6 44 28 34 10 34 1 2 10 33 13 17 6 25 1 22 19 20 30 8 10 28 43 79 9 4 2 55 9 7 2 3 36 35 27 56 56 7 33 5 7 10 31 31 60 46 15 60 53 24 1 8 7 8 417 371 192 178 386 373 192 179 387 48 84 66 51 45 72 45 68 85 16 4 12 6 10 5 2 1 2 5 454 454 452 452 1 454 453 48 38 10 48 10 42 38 10 44 5 37 31 25 6 31 32 1 1 29 1 32 3 6 23 20 11 43 43 39 39 19 20 18 17 12 4 12 10 10 35 18 2 5 12 5 2 2 16 3 12 10 2 2 6 2 3 2 10 2 11 23 1 3 3 1 10 9 4 15 1 13 9 21 16 7 3 4 14 738 592 341 26 1 24 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/pipe.c * * Copyright (C) 1991, 1992, 1999 Linus Torvalds */ #include <linux/mm.h> #include <linux/file.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/log2.h> #include <linux/mount.h> #include <linux/pseudo_fs.h> #include <linux/magic.h> #include <linux/pipe_fs_i.h> #include <linux/uio.h> #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/audit.h> #include <linux/syscalls.h> #include <linux/fcntl.h> #include <linux/memcontrol.h> #include <linux/watch_queue.h> #include <linux/sysctl.h> #include <linux/uaccess.h> #include <asm/ioctls.h> #include "internal.h" /* * New pipe buffers will be restricted to this size while the user is exceeding * their pipe buffer quota. The general pipe use case needs at least two * buffers: one for data yet to be read, and one for new data. If this is less * than two, then a write to a non-empty pipe may block even if the pipe is not * full. This can occur with GNU make jobserver or similar uses of pipes as * semaphores: multiple processes may be waiting to write tokens back to the * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/. * * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their * own risk, namely: pipe writes to non-full pipes may block until the pipe is * emptied. */ #define PIPE_MIN_DEF_BUFFERS 2 /* * The max size that a non-root user is allowed to grow the pipe. Can * be set by root in /proc/sys/fs/pipe-max-size */ static unsigned int pipe_max_size = 1048576; /* Maximum allocatable pages per user. Hard limit is unset by default, soft * matches default values. */ static unsigned long pipe_user_pages_hard; static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; /* * We use head and tail indices that aren't masked off, except at the point of * dereference, but rather they're allowed to wrap naturally. This means there * isn't a dead spot in the buffer, but the ring has to be a power of two and * <= 2^31. * -- David Howells 2019-09-23. * * Reads with count = 0 should always return 0. * -- Julian Bradfield 1999-06-07. * * FIFOs and Pipes now generate SIGIO for both readers and writers. * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16 * * pipe_read & write cleanup * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 */ #define cmp_int(l, r) ((l > r) - (l < r)) #ifdef CONFIG_PROVE_LOCKING static int pipe_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) { return cmp_int((unsigned long) a, (unsigned long) b); } #endif void pipe_lock(struct pipe_inode_info *pipe) { if (pipe->files) mutex_lock(&pipe->mutex); } EXPORT_SYMBOL(pipe_lock); void pipe_unlock(struct pipe_inode_info *pipe) { if (pipe->files) mutex_unlock(&pipe->mutex); } EXPORT_SYMBOL(pipe_unlock); void pipe_double_lock(struct pipe_inode_info *pipe1, struct pipe_inode_info *pipe2) { BUG_ON(pipe1 == pipe2); if (pipe1 > pipe2) swap(pipe1, pipe2); pipe_lock(pipe1); pipe_lock(pipe2); } static void anon_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct page *page = buf->page; /* * If nobody else uses this page, and we don't already have a * temporary page, let's keep track of it as a one-deep * allocation cache. (Otherwise just release our reference to it) */ if (page_count(page) == 1 && !pipe->tmp_page) pipe->tmp_page = page; else put_page(page); } static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct page *page = buf->page; if (page_count(page) != 1) return false; memcg_kmem_uncharge_page(page, 0); __SetPageLocked(page); return true; } /** * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to attempt to steal * * Description: * This function attempts to steal the &struct page attached to * @buf. If successful, this function returns 0 and returns with * the page locked. The caller may then reuse the page for whatever * he wishes; the typical use is insertion into a different file * page cache. */ bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct page *page = buf->page; /* * A reference of one is golden, that means that the owner of this * page is the only one holding a reference to it. lock the page * and return OK. */ if (page_count(page) == 1) { lock_page(page); return true; } return false; } EXPORT_SYMBOL(generic_pipe_buf_try_steal); /** * generic_pipe_buf_get - get a reference to a &struct pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to get a reference to * * Description: * This function grabs an extra reference to @buf. It's used in * the tee() system call, when we duplicate the buffers in one * pipe into another. */ bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { return try_get_page(buf->page); } EXPORT_SYMBOL(generic_pipe_buf_get); /** * generic_pipe_buf_release - put a reference to a &struct pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to put a reference to * * Description: * This function releases a reference to @buf. */ void generic_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { put_page(buf->page); } EXPORT_SYMBOL(generic_pipe_buf_release); static const struct pipe_buf_operations anon_pipe_buf_ops = { .release = anon_pipe_buf_release, .try_steal = anon_pipe_buf_try_steal, .get = generic_pipe_buf_get, }; /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ static inline bool pipe_readable(const struct pipe_inode_info *pipe) { unsigned int head = READ_ONCE(pipe->head); unsigned int tail = READ_ONCE(pipe->tail); unsigned int writers = READ_ONCE(pipe->writers); return !pipe_empty(head, tail) || !writers; } static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe, struct pipe_buffer *buf, unsigned int tail) { pipe_buf_release(pipe, buf); /* * If the pipe has a watch_queue, we need additional protection * by the spinlock because notifications get posted with only * this spinlock, no mutex */ if (pipe_has_watch_queue(pipe)) { spin_lock_irq(&pipe->rd_wait.lock); #ifdef CONFIG_WATCH_QUEUE if (buf->flags & PIPE_BUF_FLAG_LOSS) pipe->note_loss = true; #endif pipe->tail = ++tail; spin_unlock_irq(&pipe->rd_wait.lock); return tail; } /* * Without a watch_queue, we can simply increment the tail * without the spinlock - the mutex is enough. */ pipe->tail = ++tail; return tail; } static ssize_t pipe_read(struct kiocb *iocb, struct iov_iter *to) { size_t total_len = iov_iter_count(to); struct file *filp = iocb->ki_filp; struct pipe_inode_info *pipe = filp->private_data; bool wake_writer = false, wake_next_reader = false; ssize_t ret; /* Null read succeeds. */ if (unlikely(total_len == 0)) return 0; ret = 0; mutex_lock(&pipe->mutex); /* * We only wake up writers if the pipe was full when we started reading * and it is no longer full after reading to avoid unnecessary wakeups. * * But when we do wake up writers, we do so using a sync wakeup * (WF_SYNC), because we want them to get going and generate more * data for us. */ for (;;) { /* Read ->head with a barrier vs post_one_notification() */ unsigned int head = smp_load_acquire(&pipe->head); unsigned int tail = pipe->tail; unsigned int mask = pipe->ring_size - 1; #ifdef CONFIG_WATCH_QUEUE if (pipe->note_loss) { struct watch_notification n; if (total_len < 8) { if (ret == 0) ret = -ENOBUFS; break; } n.type = WATCH_TYPE_META; n.subtype = WATCH_META_LOSS_NOTIFICATION; n.info = watch_sizeof(n); if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) { if (ret == 0) ret = -EFAULT; break; } ret += sizeof(n); total_len -= sizeof(n); pipe->note_loss = false; } #endif if (!pipe_empty(head, tail)) { struct pipe_buffer *buf = &pipe->bufs[tail & mask]; size_t chars = buf->len; size_t written; int error; if (chars > total_len) { if (buf->flags & PIPE_BUF_FLAG_WHOLE) { if (ret == 0) ret = -ENOBUFS; break; } chars = total_len; } error = pipe_buf_confirm(pipe, buf); if (error) { if (!ret) ret = error; break; } written = copy_page_to_iter(buf->page, buf->offset, chars, to); if (unlikely(written < chars)) { if (!ret) ret = -EFAULT; break; } ret += chars; buf->offset += chars; buf->len -= chars; /* Was it a packet buffer? Clean up and exit */ if (buf->flags & PIPE_BUF_FLAG_PACKET) { total_len = chars; buf->len = 0; } if (!buf->len) { wake_writer |= pipe_full(head, tail, pipe->max_usage); tail = pipe_update_tail(pipe, buf, tail); } total_len -= chars; if (!total_len) break; /* common path: read succeeded */ if (!pipe_empty(head, tail)) /* More to do? */ continue; } if (!pipe->writers) break; if (ret) break; if ((filp->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) { ret = -EAGAIN; break; } mutex_unlock(&pipe->mutex); /* * We only get here if we didn't actually read anything. * * However, we could have seen (and removed) a zero-sized * pipe buffer, and might have made space in the buffers * that way. * * You can't make zero-sized pipe buffers by doing an empty * write (not even in packet mode), but they can happen if * the writer gets an EFAULT when trying to fill a buffer * that already got allocated and inserted in the buffer * array. * * So we still need to wake up any pending writers in the * _very_ unlikely case that the pipe was full, but we got * no data. */ if (unlikely(wake_writer)) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); /* * But because we didn't read anything, at this point we can * just return directly with -ERESTARTSYS if we're interrupted, * since we've done any required wakeups and there's no need * to mark anything accessed. And we've dropped the lock. */ if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) return -ERESTARTSYS; wake_writer = false; wake_next_reader = true; mutex_lock(&pipe->mutex); } if (pipe_empty(pipe->head, pipe->tail)) wake_next_reader = false; mutex_unlock(&pipe->mutex); if (wake_writer) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); if (wake_next_reader) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); if (ret > 0) file_accessed(filp); return ret; } static inline int is_packetized(struct file *file) { return (file->f_flags & O_DIRECT) != 0; } /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ static inline bool pipe_writable(const struct pipe_inode_info *pipe) { unsigned int head = READ_ONCE(pipe->head); unsigned int tail = READ_ONCE(pipe->tail); unsigned int max_usage = READ_ONCE(pipe->max_usage); return !pipe_full(head, tail, max_usage) || !READ_ONCE(pipe->readers); } static ssize_t pipe_write(struct kiocb *iocb, struct iov_iter *from) { struct file *filp = iocb->ki_filp; struct pipe_inode_info *pipe = filp->private_data; unsigned int head; ssize_t ret = 0; size_t total_len = iov_iter_count(from); ssize_t chars; bool was_empty = false; bool wake_next_writer = false; /* * Reject writing to watch queue pipes before the point where we lock * the pipe. * Otherwise, lockdep would be unhappy if the caller already has another * pipe locked. * If we had to support locking a normal pipe and a notification pipe at * the same time, we could set up lockdep annotations for that, but * since we don't actually need that, it's simpler to just bail here. */ if (pipe_has_watch_queue(pipe)) return -EXDEV; /* Null write succeeds. */ if (unlikely(total_len == 0)) return 0; mutex_lock(&pipe->mutex); if (!pipe->readers) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; goto out; } /* * If it wasn't empty we try to merge new data into * the last buffer. * * That naturally merges small writes, but it also * page-aligns the rest of the writes for large writes * spanning multiple pages. */ head = pipe->head; was_empty = pipe_empty(head, pipe->tail); chars = total_len & (PAGE_SIZE-1); if (chars && !was_empty) { unsigned int mask = pipe->ring_size - 1; struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; int offset = buf->offset + buf->len; if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) && offset + chars <= PAGE_SIZE) { ret = pipe_buf_confirm(pipe, buf); if (ret) goto out; ret = copy_page_from_iter(buf->page, offset, chars, from); if (unlikely(ret < chars)) { ret = -EFAULT; goto out; } buf->len += ret; if (!iov_iter_count(from)) goto out; } } for (;;) { if (!pipe->readers) { send_sig(SIGPIPE, current, 0); if (!ret) ret = -EPIPE; break; } head = pipe->head; if (!pipe_full(head, pipe->tail, pipe->max_usage)) { unsigned int mask = pipe->ring_size - 1; struct pipe_buffer *buf; struct page *page = pipe->tmp_page; int copied; if (!page) { page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); if (unlikely(!page)) { ret = ret ? : -ENOMEM; break; } pipe->tmp_page = page; } /* Allocate a slot in the ring in advance and attach an * empty buffer. If we fault or otherwise fail to use * it, either the reader will consume it or it'll still * be there for the next write. */ pipe->head = head + 1; /* Insert it into the buffer array */ buf = &pipe->bufs[head & mask]; buf->page = page; buf->ops = &anon_pipe_buf_ops; buf->offset = 0; buf->len = 0; if (is_packetized(filp)) buf->flags = PIPE_BUF_FLAG_PACKET; else buf->flags = PIPE_BUF_FLAG_CAN_MERGE; pipe->tmp_page = NULL; copied = copy_page_from_iter(page, 0, PAGE_SIZE, from); if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) { if (!ret) ret = -EFAULT; break; } ret += copied; buf->len = copied; if (!iov_iter_count(from)) break; } if (!pipe_full(head, pipe->tail, pipe->max_usage)) continue; /* Wait for buffer space to become available. */ if ((filp->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) { if (!ret) ret = -EAGAIN; break; } if (signal_pending(current)) { if (!ret) ret = -ERESTARTSYS; break; } /* * We're going to release the pipe lock and wait for more * space. We wake up any readers if necessary, and then * after waiting we need to re-check whether the pipe * become empty while we dropped the lock. */ mutex_unlock(&pipe->mutex); if (was_empty) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); mutex_lock(&pipe->mutex); was_empty = pipe_empty(pipe->head, pipe->tail); wake_next_writer = true; } out: if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) wake_next_writer = false; mutex_unlock(&pipe->mutex); /* * If we do do a wakeup event, we do a 'sync' wakeup, because we * want the reader to start processing things asap, rather than * leave the data pending. * * This is particularly important for small writes, because of * how (for example) the GNU make jobserver uses small writes to * wake up pending jobs * * Epoll nonsensically wants a wakeup whether the pipe * was already empty or not. */ if (was_empty || pipe->poll_usage) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); if (wake_next_writer) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { int err = file_update_time(filp); if (err) ret = err; sb_end_write(file_inode(filp)->i_sb); } return ret; } static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct pipe_inode_info *pipe = filp->private_data; unsigned int count, head, tail, mask; switch (cmd) { case FIONREAD: mutex_lock(&pipe->mutex); count = 0; head = pipe->head; tail = pipe->tail; mask = pipe->ring_size - 1; while (tail != head) { count += pipe->bufs[tail & mask].len; tail++; } mutex_unlock(&pipe->mutex); return put_user(count, (int __user *)arg); #ifdef CONFIG_WATCH_QUEUE case IOC_WATCH_QUEUE_SET_SIZE: { int ret; mutex_lock(&pipe->mutex); ret = watch_queue_set_size(pipe, arg); mutex_unlock(&pipe->mutex); return ret; } case IOC_WATCH_QUEUE_SET_FILTER: return watch_queue_set_filter( pipe, (struct watch_notification_filter __user *)arg); #endif default: return -ENOIOCTLCMD; } } /* No kernel lock held - fine */ static __poll_t pipe_poll(struct file *filp, poll_table *wait) { __poll_t mask; struct pipe_inode_info *pipe = filp->private_data; unsigned int head, tail; /* Epoll has some historical nasty semantics, this enables them */ WRITE_ONCE(pipe->poll_usage, true); /* * Reading pipe state only -- no need for acquiring the semaphore. * * But because this is racy, the code has to add the * entry to the poll table _first_ .. */ if (filp->f_mode & FMODE_READ) poll_wait(filp, &pipe->rd_wait, wait); if (filp->f_mode & FMODE_WRITE) poll_wait(filp, &pipe->wr_wait, wait); /* * .. and only then can you do the racy tests. That way, * if something changes and you got it wrong, the poll * table entry will wake you up and fix it. */ head = READ_ONCE(pipe->head); tail = READ_ONCE(pipe->tail); mask = 0; if (filp->f_mode & FMODE_READ) { if (!pipe_empty(head, tail)) mask |= EPOLLIN | EPOLLRDNORM; if (!pipe->writers && filp->f_pipe != pipe->w_counter) mask |= EPOLLHUP; } if (filp->f_mode & FMODE_WRITE) { if (!pipe_full(head, tail, pipe->max_usage)) mask |= EPOLLOUT | EPOLLWRNORM; /* * Most Unices do not set EPOLLERR for FIFOs but on Linux they * behave exactly like pipes for poll(). */ if (!pipe->readers) mask |= EPOLLERR; } return mask; } static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe) { int kill = 0; spin_lock(&inode->i_lock); if (!--pipe->files) { inode->i_pipe = NULL; kill = 1; } spin_unlock(&inode->i_lock); if (kill) free_pipe_info(pipe); } static int pipe_release(struct inode *inode, struct file *file) { struct pipe_inode_info *pipe = file->private_data; mutex_lock(&pipe->mutex); if (file->f_mode & FMODE_READ) pipe->readers--; if (file->f_mode & FMODE_WRITE) pipe->writers--; /* Was that the last reader or writer, but not the other side? */ if (!pipe->readers != !pipe->writers) { wake_up_interruptible_all(&pipe->rd_wait); wake_up_interruptible_all(&pipe->wr_wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } mutex_unlock(&pipe->mutex); put_pipe_info(inode, pipe); return 0; } static int pipe_fasync(int fd, struct file *filp, int on) { struct pipe_inode_info *pipe = filp->private_data; int retval = 0; mutex_lock(&pipe->mutex); if (filp->f_mode & FMODE_READ) retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); if ((filp->f_mode & FMODE_WRITE) && retval >= 0) { retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); if (retval < 0 && (filp->f_mode & FMODE_READ)) /* this can happen only if on == T */ fasync_helper(-1, filp, 0, &pipe->fasync_readers); } mutex_unlock(&pipe->mutex); return retval; } unsigned long account_pipe_buffers(struct user_struct *user, unsigned long old, unsigned long new) { return atomic_long_add_return(new - old, &user->pipe_bufs); } bool too_many_pipe_buffers_soft(unsigned long user_bufs) { unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft); return soft_limit && user_bufs > soft_limit; } bool too_many_pipe_buffers_hard(unsigned long user_bufs) { unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard); return hard_limit && user_bufs > hard_limit; } bool pipe_is_unprivileged_user(void) { return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); } struct pipe_inode_info *alloc_pipe_info(void) { struct pipe_inode_info *pipe; unsigned long pipe_bufs = PIPE_DEF_BUFFERS; struct user_struct *user = get_current_user(); unsigned long user_bufs; unsigned int max_size = READ_ONCE(pipe_max_size); pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT); if (pipe == NULL) goto out_free_uid; if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE)) pipe_bufs = max_size >> PAGE_SHIFT; user_bufs = account_pipe_buffers(user, 0, pipe_bufs); if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) { user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS); pipe_bufs = PIPE_MIN_DEF_BUFFERS; } if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user()) goto out_revert_acct; pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), GFP_KERNEL_ACCOUNT); if (pipe->bufs) { init_waitqueue_head(&pipe->rd_wait); init_waitqueue_head(&pipe->wr_wait); pipe->r_counter = pipe->w_counter = 1; pipe->max_usage = pipe_bufs; pipe->ring_size = pipe_bufs; pipe->nr_accounted = pipe_bufs; pipe->user = user; mutex_init(&pipe->mutex); lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL); return pipe; } out_revert_acct: (void) account_pipe_buffers(user, pipe_bufs, 0); kfree(pipe); out_free_uid: free_uid(user); return NULL; } void free_pipe_info(struct pipe_inode_info *pipe) { unsigned int i; #ifdef CONFIG_WATCH_QUEUE if (pipe->watch_queue) watch_queue_clear(pipe->watch_queue); #endif (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0); free_uid(pipe->user); for (i = 0; i < pipe->ring_size; i++) { struct pipe_buffer *buf = pipe->bufs + i; if (buf->ops) pipe_buf_release(pipe, buf); } #ifdef CONFIG_WATCH_QUEUE if (pipe->watch_queue) put_watch_queue(pipe->watch_queue); #endif if (pipe->tmp_page) __free_page(pipe->tmp_page); kfree(pipe->bufs); kfree(pipe); } static struct vfsmount *pipe_mnt __ro_after_init; /* * pipefs_dname() is called from d_path(). */ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen) { return dynamic_dname(buffer, buflen, "pipe:[%lu]", d_inode(dentry)->i_ino); } static const struct dentry_operations pipefs_dentry_operations = { .d_dname = pipefs_dname, }; static struct inode * get_pipe_inode(void) { struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb); struct pipe_inode_info *pipe; if (!inode) goto fail_inode; inode->i_ino = get_next_ino(); pipe = alloc_pipe_info(); if (!pipe) goto fail_iput; inode->i_pipe = pipe; pipe->files = 2; pipe->readers = pipe->writers = 1; inode->i_fop = &pipefifo_fops; /* * Mark the inode dirty from the very beginning, * that way it will never be moved to the dirty * list because "mark_inode_dirty()" will think * that it already _is_ on the dirty list. */ inode->i_state = I_DIRTY; inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); simple_inode_init_ts(inode); return inode; fail_iput: iput(inode); fail_inode: return NULL; } int create_pipe_files(struct file **res, int flags) { struct inode *inode = get_pipe_inode(); struct file *f; int error; if (!inode) return -ENFILE; if (flags & O_NOTIFICATION_PIPE) { error = watch_queue_init(inode->i_pipe); if (error) { free_pipe_info(inode->i_pipe); iput(inode); return error; } } f = alloc_file_pseudo(inode, pipe_mnt, "", O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)), &pipefifo_fops); if (IS_ERR(f)) { free_pipe_info(inode->i_pipe); iput(inode); return PTR_ERR(f); } f->private_data = inode->i_pipe; f->f_pipe = 0; res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK), &pipefifo_fops); if (IS_ERR(res[0])) { put_pipe_info(inode, inode->i_pipe); fput(f); return PTR_ERR(res[0]); } res[0]->private_data = inode->i_pipe; res[0]->f_pipe = 0; res[1] = f; stream_open(inode, res[0]); stream_open(inode, res[1]); return 0; } static int __do_pipe_flags(int *fd, struct file **files, int flags) { int error; int fdw, fdr; if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE)) return -EINVAL; error = create_pipe_files(files, flags); if (error) return error; error = get_unused_fd_flags(flags); if (error < 0) goto err_read_pipe; fdr = error; error = get_unused_fd_flags(flags); if (error < 0) goto err_fdr; fdw = error; audit_fd_pair(fdr, fdw); fd[0] = fdr; fd[1] = fdw; /* pipe groks IOCB_NOWAIT */ files[0]->f_mode |= FMODE_NOWAIT; files[1]->f_mode |= FMODE_NOWAIT; return 0; err_fdr: put_unused_fd(fdr); err_read_pipe: fput(files[0]); fput(files[1]); return error; } int do_pipe_flags(int *fd, int flags) { struct file *files[2]; int error = __do_pipe_flags(fd, files, flags); if (!error) { fd_install(fd[0], files[0]); fd_install(fd[1], files[1]); } return error; } /* * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way Unix traditionally does this, though. */ static int do_pipe2(int __user *fildes, int flags) { struct file *files[2]; int fd[2]; int error; error = __do_pipe_flags(fd, files, flags); if (!error) { if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) { fput(files[0]); fput(files[1]); put_unused_fd(fd[0]); put_unused_fd(fd[1]); error = -EFAULT; } else { fd_install(fd[0], files[0]); fd_install(fd[1], files[1]); } } return error; } SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) { return do_pipe2(fildes, flags); } SYSCALL_DEFINE1(pipe, int __user *, fildes) { return do_pipe2(fildes, 0); } /* * This is the stupid "wait for pipe to be readable or writable" * model. * * See pipe_read/write() for the proper kind of exclusive wait, * but that requires that we wake up any other readers/writers * if we then do not end up reading everything (ie the whole * "wake_next_reader/writer" logic in pipe_read/write()). */ void pipe_wait_readable(struct pipe_inode_info *pipe) { pipe_unlock(pipe); wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe)); pipe_lock(pipe); } void pipe_wait_writable(struct pipe_inode_info *pipe) { pipe_unlock(pipe); wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe)); pipe_lock(pipe); } /* * This depends on both the wait (here) and the wakeup (wake_up_partner) * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot * race with the count check and waitqueue prep. * * Normally in order to avoid races, you'd do the prepare_to_wait() first, * then check the condition you're waiting for, and only then sleep. But * because of the pipe lock, we can check the condition before being on * the wait queue. * * We use the 'rd_wait' waitqueue for pipe partner waiting. */ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) { DEFINE_WAIT(rdwait); int cur = *cnt; while (cur == *cnt) { prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE); pipe_unlock(pipe); schedule(); finish_wait(&pipe->rd_wait, &rdwait); pipe_lock(pipe); if (signal_pending(current)) break; } return cur == *cnt ? -ERESTARTSYS : 0; } static void wake_up_partner(struct pipe_inode_info *pipe) { wake_up_interruptible_all(&pipe->rd_wait); } static int fifo_open(struct inode *inode, struct file *filp) { struct pipe_inode_info *pipe; bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; int ret; filp->f_pipe = 0; spin_lock(&inode->i_lock); if (inode->i_pipe) { pipe = inode->i_pipe; pipe->files++; spin_unlock(&inode->i_lock); } else { spin_unlock(&inode->i_lock); pipe = alloc_pipe_info(); if (!pipe) return -ENOMEM; pipe->files = 1; spin_lock(&inode->i_lock); if (unlikely(inode->i_pipe)) { inode->i_pipe->files++; spin_unlock(&inode->i_lock); free_pipe_info(pipe); pipe = inode->i_pipe; } else { inode->i_pipe = pipe; spin_unlock(&inode->i_lock); } } filp->private_data = pipe; /* OK, we have a pipe and it's pinned down */ mutex_lock(&pipe->mutex); /* We can only do regular read/write on fifos */ stream_open(inode, filp); switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) { case FMODE_READ: /* * O_RDONLY * POSIX.1 says that O_NONBLOCK means return with the FIFO * opened, even when there is no process writing the FIFO. */ pipe->r_counter++; if (pipe->readers++ == 0) wake_up_partner(pipe); if (!is_pipe && !pipe->writers) { if ((filp->f_flags & O_NONBLOCK)) { /* suppress EPOLLHUP until we have * seen a writer */ filp->f_pipe = pipe->w_counter; } else { if (wait_for_partner(pipe, &pipe->w_counter)) goto err_rd; } } break; case FMODE_WRITE: /* * O_WRONLY * POSIX.1 says that O_NONBLOCK means return -1 with * errno=ENXIO when there is no process reading the FIFO. */ ret = -ENXIO; if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers) goto err; pipe->w_counter++; if (!pipe->writers++) wake_up_partner(pipe); if (!is_pipe && !pipe->readers) { if (wait_for_partner(pipe, &pipe->r_counter)) goto err_wr; } break; case FMODE_READ | FMODE_WRITE: /* * O_RDWR * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. * This implementation will NEVER block on a O_RDWR open, since * the process can at least talk to itself. */ pipe->readers++; pipe->writers++; pipe->r_counter++; pipe->w_counter++; if (pipe->readers == 1 || pipe->writers == 1) wake_up_partner(pipe); break; default: ret = -EINVAL; goto err; } /* Ok! */ mutex_unlock(&pipe->mutex); return 0; err_rd: if (!--pipe->readers) wake_up_interruptible(&pipe->wr_wait); ret = -ERESTARTSYS; goto err; err_wr: if (!--pipe->writers) wake_up_interruptible_all(&pipe->rd_wait); ret = -ERESTARTSYS; goto err; err: mutex_unlock(&pipe->mutex); put_pipe_info(inode, pipe); return ret; } const struct file_operations pipefifo_fops = { .open = fifo_open, .read_iter = pipe_read, .write_iter = pipe_write, .poll = pipe_poll, .unlocked_ioctl = pipe_ioctl, .release = pipe_release, .fasync = pipe_fasync, .splice_write = iter_file_splice_write, }; /* * Currently we rely on the pipe array holding a power-of-2 number * of pages. Returns 0 on error. */ unsigned int round_pipe_size(unsigned int size) { if (size > (1U << 31)) return 0; /* Minimum pipe size, as required by POSIX */ if (size < PAGE_SIZE) return PAGE_SIZE; return roundup_pow_of_two(size); } /* * Resize the pipe ring to a number of slots. * * Note the pipe can be reduced in capacity, but only if the current * occupancy doesn't exceed nr_slots; if it does, EBUSY will be * returned instead. */ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) { struct pipe_buffer *bufs; unsigned int head, tail, mask, n; bufs = kcalloc(nr_slots, sizeof(*bufs), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (unlikely(!bufs)) return -ENOMEM; spin_lock_irq(&pipe->rd_wait.lock); mask = pipe->ring_size - 1; head = pipe->head; tail = pipe->tail; n = pipe_occupancy(head, tail); if (nr_slots < n) { spin_unlock_irq(&pipe->rd_wait.lock); kfree(bufs); return -EBUSY; } /* * The pipe array wraps around, so just start the new one at zero * and adjust the indices. */ if (n > 0) { unsigned int h = head & mask; unsigned int t = tail & mask; if (h > t) { memcpy(bufs, pipe->bufs + t, n * sizeof(struct pipe_buffer)); } else { unsigned int tsize = pipe->ring_size - t; if (h > 0) memcpy(bufs + tsize, pipe->bufs, h * sizeof(struct pipe_buffer)); memcpy(bufs, pipe->bufs + t, tsize * sizeof(struct pipe_buffer)); } } head = n; tail = 0; kfree(pipe->bufs); pipe->bufs = bufs; pipe->ring_size = nr_slots; if (pipe->max_usage > nr_slots) pipe->max_usage = nr_slots; pipe->tail = tail; pipe->head = head; if (!pipe_has_watch_queue(pipe)) { pipe->max_usage = nr_slots; pipe->nr_accounted = nr_slots; } spin_unlock_irq(&pipe->rd_wait.lock); /* This might have made more room for writers */ wake_up_interruptible(&pipe->wr_wait); return 0; } /* * Allocate a new array of pipe buffers and copy the info over. Returns the * pipe size if successful, or return -ERROR on error. */ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg) { unsigned long user_bufs; unsigned int nr_slots, size; long ret = 0; if (pipe_has_watch_queue(pipe)) return -EBUSY; size = round_pipe_size(arg); nr_slots = size >> PAGE_SHIFT; if (!nr_slots) return -EINVAL; /* * If trying to increase the pipe capacity, check that an * unprivileged user is not trying to exceed various limits * (soft limit check here, hard limit check just below). * Decreasing the pipe capacity is always permitted, even * if the user is currently over a limit. */ if (nr_slots > pipe->max_usage && size > pipe_max_size && !capable(CAP_SYS_RESOURCE)) return -EPERM; user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots); if (nr_slots > pipe->max_usage && (too_many_pipe_buffers_hard(user_bufs) || too_many_pipe_buffers_soft(user_bufs)) && pipe_is_unprivileged_user()) { ret = -EPERM; goto out_revert_acct; } ret = pipe_resize_ring(pipe, nr_slots); if (ret < 0) goto out_revert_acct; return pipe->max_usage * PAGE_SIZE; out_revert_acct: (void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted); return ret; } /* * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is * not enough to verify that this is a pipe. */ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) { struct pipe_inode_info *pipe = file->private_data; if (file->f_op != &pipefifo_fops || !pipe) return NULL; if (for_splice && pipe_has_watch_queue(pipe)) return NULL; return pipe; } long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) { struct pipe_inode_info *pipe; long ret; pipe = get_pipe_info(file, false); if (!pipe) return -EBADF; mutex_lock(&pipe->mutex); switch (cmd) { case F_SETPIPE_SZ: ret = pipe_set_size(pipe, arg); break; case F_GETPIPE_SZ: ret = pipe->max_usage * PAGE_SIZE; break; default: ret = -EINVAL; break; } mutex_unlock(&pipe->mutex); return ret; } static const struct super_operations pipefs_ops = { .destroy_inode = free_inode_nonrcu, .statfs = simple_statfs, }; /* * pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole file system mounted. So we don't need * any operations on the root directory. However, we need a non-trivial * d_name - pipe: will go nicely and kill the special-casing in procfs. */ static int pipefs_init_fs_context(struct fs_context *fc) { struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC); if (!ctx) return -ENOMEM; ctx->ops = &pipefs_ops; ctx->dops = &pipefs_dentry_operations; return 0; } static struct file_system_type pipe_fs_type = { .name = "pipefs", .init_fs_context = pipefs_init_fs_context, .kill_sb = kill_anon_super, }; #ifdef CONFIG_SYSCTL static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, unsigned int *valp, int write, void *data) { if (write) { unsigned int val; val = round_pipe_size(*lvalp); if (val == 0) return -EINVAL; *valp = val; } else { unsigned int val = *valp; *lvalp = (unsigned long) val; } return 0; } static int proc_dopipe_max_size(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_douintvec(table, write, buffer, lenp, ppos, do_proc_dopipe_max_size_conv, NULL); } static struct ctl_table fs_pipe_sysctls[] = { { .procname = "pipe-max-size", .data = &pipe_max_size, .maxlen = sizeof(pipe_max_size), .mode = 0644, .proc_handler = proc_dopipe_max_size, }, { .procname = "pipe-user-pages-hard", .data = &pipe_user_pages_hard, .maxlen = sizeof(pipe_user_pages_hard), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "pipe-user-pages-soft", .data = &pipe_user_pages_soft, .maxlen = sizeof(pipe_user_pages_soft), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, }; #endif static int __init init_pipe_fs(void) { int err = register_filesystem(&pipe_fs_type); if (!err) { pipe_mnt = kern_mount(&pipe_fs_type); if (IS_ERR(pipe_mnt)) { err = PTR_ERR(pipe_mnt); unregister_filesystem(&pipe_fs_type); } } #ifdef CONFIG_SYSCTL register_sysctl_init("fs", fs_pipe_sysctls); #endif return err; } fs_initcall(init_pipe_fs);
155 99 157 158 158 158 157 6 6 6 6 6 6 3 3 3 3 3 3 3 8 1 11 1 1 1 1 1 1 1 1 1 1 1 55 38 56 55 4 3 3 1 2 2 1 1 1 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 // SPDX-License-Identifier: GPL-2.0+ /* * Buffer/page management specific to NILFS * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Ryusuke Konishi and Seiji Kihara. */ #include <linux/pagemap.h> #include <linux/writeback.h> #include <linux/swap.h> #include <linux/bitops.h> #include <linux/page-flags.h> #include <linux/list.h> #include <linux/highmem.h> #include <linux/pagevec.h> #include <linux/gfp.h> #include "nilfs.h" #include "page.h" #include "mdt.h" #define NILFS_BUFFER_INHERENT_BITS \ (BIT(BH_Uptodate) | BIT(BH_Mapped) | BIT(BH_NILFS_Node) | \ BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked)) static struct buffer_head *__nilfs_get_folio_block(struct folio *folio, unsigned long block, pgoff_t index, int blkbits, unsigned long b_state) { unsigned long first_block; struct buffer_head *bh = folio_buffers(folio); if (!bh) bh = create_empty_buffers(folio, 1 << blkbits, b_state); first_block = (unsigned long)index << (PAGE_SHIFT - blkbits); bh = get_nth_bh(bh, block - first_block); wait_on_buffer(bh); return bh; } struct buffer_head *nilfs_grab_buffer(struct inode *inode, struct address_space *mapping, unsigned long blkoff, unsigned long b_state) { int blkbits = inode->i_blkbits; pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits); struct folio *folio; struct buffer_head *bh; folio = filemap_grab_folio(mapping, index); if (IS_ERR(folio)) return NULL; bh = __nilfs_get_folio_block(folio, blkoff, index, blkbits, b_state); if (unlikely(!bh)) { folio_unlock(folio); folio_put(folio); return NULL; } bh->b_bdev = inode->i_sb->s_bdev; return bh; } /** * nilfs_forget_buffer - discard dirty state * @bh: buffer head of the buffer to be discarded */ void nilfs_forget_buffer(struct buffer_head *bh) { struct folio *folio = bh->b_folio; const unsigned long clear_bits = (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | BIT(BH_Delay)); lock_buffer(bh); set_mask_bits(&bh->b_state, clear_bits, 0); if (nilfs_folio_buffers_clean(folio)) __nilfs_clear_folio_dirty(folio); bh->b_blocknr = -1; folio_clear_uptodate(folio); folio_clear_mappedtodisk(folio); unlock_buffer(bh); brelse(bh); } /** * nilfs_copy_buffer -- copy buffer data and flags * @dbh: destination buffer * @sbh: source buffer */ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) { void *saddr, *daddr; unsigned long bits; struct folio *sfolio = sbh->b_folio, *dfolio = dbh->b_folio; struct buffer_head *bh; saddr = kmap_local_folio(sfolio, bh_offset(sbh)); daddr = kmap_local_folio(dfolio, bh_offset(dbh)); memcpy(daddr, saddr, sbh->b_size); kunmap_local(daddr); kunmap_local(saddr); dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; dbh->b_blocknr = sbh->b_blocknr; dbh->b_bdev = sbh->b_bdev; bh = dbh; bits = sbh->b_state & (BIT(BH_Uptodate) | BIT(BH_Mapped)); while ((bh = bh->b_this_page) != dbh) { lock_buffer(bh); bits &= bh->b_state; unlock_buffer(bh); } if (bits & BIT(BH_Uptodate)) folio_mark_uptodate(dfolio); else folio_clear_uptodate(dfolio); if (bits & BIT(BH_Mapped)) folio_set_mappedtodisk(dfolio); else folio_clear_mappedtodisk(dfolio); } /** * nilfs_folio_buffers_clean - Check if a folio has dirty buffers or not. * @folio: Folio to be checked. * * Return: false if the folio has dirty buffers, true otherwise. */ bool nilfs_folio_buffers_clean(struct folio *folio) { struct buffer_head *bh, *head; bh = head = folio_buffers(folio); do { if (buffer_dirty(bh)) return false; bh = bh->b_this_page; } while (bh != head); return true; } void nilfs_folio_bug(struct folio *folio) { struct buffer_head *bh, *head; struct address_space *m; unsigned long ino; if (unlikely(!folio)) { printk(KERN_CRIT "NILFS_FOLIO_BUG(NULL)\n"); return; } m = folio->mapping; ino = m ? m->host->i_ino : 0; printk(KERN_CRIT "NILFS_FOLIO_BUG(%p): cnt=%d index#=%llu flags=0x%lx " "mapping=%p ino=%lu\n", folio, folio_ref_count(folio), (unsigned long long)folio->index, folio->flags, m, ino); head = folio_buffers(folio); if (head) { int i = 0; bh = head; do { printk(KERN_CRIT " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", i++, bh, atomic_read(&bh->b_count), (unsigned long long)bh->b_blocknr, bh->b_state); bh = bh->b_this_page; } while (bh != head); } } /** * nilfs_copy_folio -- copy the folio with buffers * @dst: destination folio * @src: source folio * @copy_dirty: flag whether to copy dirty states on the folio's buffer heads. * * This function is for both data folios and btnode folios. The dirty flag * should be treated by caller. The folio must not be under i/o. * Both src and dst folio must be locked */ static void nilfs_copy_folio(struct folio *dst, struct folio *src, bool copy_dirty) { struct buffer_head *dbh, *dbufs, *sbh; unsigned long mask = NILFS_BUFFER_INHERENT_BITS; BUG_ON(folio_test_writeback(dst)); sbh = folio_buffers(src); dbh = folio_buffers(dst); if (!dbh) dbh = create_empty_buffers(dst, sbh->b_size, 0); if (copy_dirty) mask |= BIT(BH_Dirty); dbufs = dbh; do { lock_buffer(sbh); lock_buffer(dbh); dbh->b_state = sbh->b_state & mask; dbh->b_blocknr = sbh->b_blocknr; dbh->b_bdev = sbh->b_bdev; sbh = sbh->b_this_page; dbh = dbh->b_this_page; } while (dbh != dbufs); folio_copy(dst, src); if (folio_test_uptodate(src) && !folio_test_uptodate(dst)) folio_mark_uptodate(dst); else if (!folio_test_uptodate(src) && folio_test_uptodate(dst)) folio_clear_uptodate(dst); if (folio_test_mappedtodisk(src) && !folio_test_mappedtodisk(dst)) folio_set_mappedtodisk(dst); else if (!folio_test_mappedtodisk(src) && folio_test_mappedtodisk(dst)) folio_clear_mappedtodisk(dst); do { unlock_buffer(sbh); unlock_buffer(dbh); sbh = sbh->b_this_page; dbh = dbh->b_this_page; } while (dbh != dbufs); } int nilfs_copy_dirty_pages(struct address_space *dmap, struct address_space *smap) { struct folio_batch fbatch; unsigned int i; pgoff_t index = 0; int err = 0; folio_batch_init(&fbatch); repeat: if (!filemap_get_folios_tag(smap, &index, (pgoff_t)-1, PAGECACHE_TAG_DIRTY, &fbatch)) return 0; for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i], *dfolio; folio_lock(folio); if (unlikely(!folio_test_dirty(folio))) NILFS_FOLIO_BUG(folio, "inconsistent dirty state"); dfolio = filemap_grab_folio(dmap, folio->index); if (IS_ERR(dfolio)) { /* No empty page is added to the page cache */ folio_unlock(folio); err = PTR_ERR(dfolio); break; } if (unlikely(!folio_buffers(folio))) NILFS_FOLIO_BUG(folio, "found empty page in dat page cache"); nilfs_copy_folio(dfolio, folio, true); filemap_dirty_folio(folio_mapping(dfolio), dfolio); folio_unlock(dfolio); folio_put(dfolio); folio_unlock(folio); } folio_batch_release(&fbatch); cond_resched(); if (likely(!err)) goto repeat; return err; } /** * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache * @dmap: destination page cache * @smap: source page cache * * No pages must be added to the cache during this process. * This must be ensured by the caller. */ void nilfs_copy_back_pages(struct address_space *dmap, struct address_space *smap) { struct folio_batch fbatch; unsigned int i, n; pgoff_t start = 0; folio_batch_init(&fbatch); repeat: n = filemap_get_folios(smap, &start, ~0UL, &fbatch); if (!n) return; for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i], *dfolio; pgoff_t index = folio->index; folio_lock(folio); dfolio = filemap_lock_folio(dmap, index); if (!IS_ERR(dfolio)) { /* overwrite existing folio in the destination cache */ WARN_ON(folio_test_dirty(dfolio)); nilfs_copy_folio(dfolio, folio, false); folio_unlock(dfolio); folio_put(dfolio); /* Do we not need to remove folio from smap here? */ } else { struct folio *f; /* move the folio to the destination cache */ xa_lock_irq(&smap->i_pages); f = __xa_erase(&smap->i_pages, index); WARN_ON(folio != f); smap->nrpages--; xa_unlock_irq(&smap->i_pages); xa_lock_irq(&dmap->i_pages); f = __xa_store(&dmap->i_pages, index, folio, GFP_NOFS); if (unlikely(f)) { /* Probably -ENOMEM */ folio->mapping = NULL; folio_put(folio); } else { folio->mapping = dmap; dmap->nrpages++; if (folio_test_dirty(folio)) __xa_set_mark(&dmap->i_pages, index, PAGECACHE_TAG_DIRTY); } xa_unlock_irq(&dmap->i_pages); } folio_unlock(folio); } folio_batch_release(&fbatch); cond_resched(); goto repeat; } /** * nilfs_clear_dirty_pages - discard dirty pages in address space * @mapping: address space with dirty pages for discarding */ void nilfs_clear_dirty_pages(struct address_space *mapping) { struct folio_batch fbatch; unsigned int i; pgoff_t index = 0; folio_batch_init(&fbatch); while (filemap_get_folios_tag(mapping, &index, (pgoff_t)-1, PAGECACHE_TAG_DIRTY, &fbatch)) { for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; folio_lock(folio); /* * This folio may have been removed from the address * space by truncation or invalidation when the lock * was acquired. Skip processing in that case. */ if (likely(folio->mapping == mapping)) nilfs_clear_folio_dirty(folio); folio_unlock(folio); } folio_batch_release(&fbatch); cond_resched(); } } /** * nilfs_clear_folio_dirty - discard dirty folio * @folio: dirty folio that will be discarded * * nilfs_clear_folio_dirty() clears working states including dirty state for * the folio and its buffers. If the folio has buffers, clear only if it is * confirmed that none of the buffer heads are busy (none have valid * references and none are locked). */ void nilfs_clear_folio_dirty(struct folio *folio) { struct buffer_head *bh, *head; BUG_ON(!folio_test_locked(folio)); head = folio_buffers(folio); if (head) { const unsigned long clear_bits = (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | BIT(BH_Delay)); bool busy, invalidated = false; recheck_buffers: busy = false; bh = head; do { if (atomic_read(&bh->b_count) | buffer_locked(bh)) { busy = true; break; } } while (bh = bh->b_this_page, bh != head); if (busy) { if (invalidated) return; invalidate_bh_lrus(); invalidated = true; goto recheck_buffers; } bh = head; do { lock_buffer(bh); set_mask_bits(&bh->b_state, clear_bits, 0); unlock_buffer(bh); } while (bh = bh->b_this_page, bh != head); } folio_clear_uptodate(folio); folio_clear_mappedtodisk(folio); folio_clear_checked(folio); __nilfs_clear_folio_dirty(folio); } unsigned int nilfs_page_count_clean_buffers(struct folio *folio, unsigned int from, unsigned int to) { unsigned int block_start, block_end; struct buffer_head *bh, *head; unsigned int nc = 0; for (bh = head = folio_buffers(folio), block_start = 0; bh != head || !block_start; block_start = block_end, bh = bh->b_this_page) { block_end = block_start + bh->b_size; if (block_end > from && block_start < to && !buffer_dirty(bh)) nc++; } return nc; } /* * NILFS2 needs clear_page_dirty() in the following two cases: * * 1) For B-tree node pages and data pages of DAT file, NILFS2 clears dirty * flag of pages when it copies back pages from shadow cache to the * original cache. * * 2) Some B-tree operations like insertion or deletion may dispose buffers * in dirty state, and this needs to cancel the dirty state of their pages. */ void __nilfs_clear_folio_dirty(struct folio *folio) { struct address_space *mapping = folio->mapping; if (mapping) { xa_lock_irq(&mapping->i_pages); if (folio_test_dirty(folio)) { __xa_clear_mark(&mapping->i_pages, folio->index, PAGECACHE_TAG_DIRTY); xa_unlock_irq(&mapping->i_pages); folio_clear_dirty_for_io(folio); return; } xa_unlock_irq(&mapping->i_pages); return; } folio_clear_dirty(folio); } /** * nilfs_find_uncommitted_extent - find extent of uncommitted data * @inode: inode * @start_blk: start block offset (in) * @blkoff: start offset of the found extent (out) * * This function searches an extent of buffers marked "delayed" which * starts from a block offset equal to or larger than @start_blk. If * such an extent was found, this will store the start offset in * @blkoff and return its length in blocks. * * Return: Length in blocks of found extent, 0 otherwise. */ unsigned long nilfs_find_uncommitted_extent(struct inode *inode, sector_t start_blk, sector_t *blkoff) { unsigned int i, nr_folios; pgoff_t index; unsigned long length = 0; struct folio_batch fbatch; struct folio *folio; if (inode->i_mapping->nrpages == 0) return 0; index = start_blk >> (PAGE_SHIFT - inode->i_blkbits); folio_batch_init(&fbatch); repeat: nr_folios = filemap_get_folios_contig(inode->i_mapping, &index, ULONG_MAX, &fbatch); if (nr_folios == 0) return length; i = 0; do { folio = fbatch.folios[i]; folio_lock(folio); if (folio_buffers(folio)) { struct buffer_head *bh, *head; sector_t b; b = folio->index << (PAGE_SHIFT - inode->i_blkbits); bh = head = folio_buffers(folio); do { if (b < start_blk) continue; if (buffer_delay(bh)) { if (length == 0) *blkoff = b; length++; } else if (length > 0) { goto out_locked; } } while (++b, bh = bh->b_this_page, bh != head); } else { if (length > 0) goto out_locked; } folio_unlock(folio); } while (++i < nr_folios); folio_batch_release(&fbatch); cond_resched(); goto repeat; out_locked: folio_unlock(folio); folio_batch_release(&fbatch); return length; }
81 64 1 1 2 2 841 1 5 7 7 5 5 164 165 16 16 16 16 871 879 10 22 23 393 392 4242 4244 2 1 1 4241 4241 4210 64 168 64 25 64 4162 4160 4166 4149 9 9 9 4165 9 9 4151 4164 4162 113 4165 113 4151 9 4236 4234 4244 4248 4110 64 4042 1025 1025 16 3546 2031 40 28 4156 4 4246 4238 72 3609 2066 4248 4150 4159 1 34 64 4146 787 96 1602 4966 2430 4878 4943 4945 4947 2 2491 1598 4936 103 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1994, Karl Keyte: Added support for disk statistics * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> * - July2000 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 */ /* * This handles all read/write requests to block devices */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/blk-pm.h> #include <linux/blk-integrity.h> #include <linux/highmem.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/kernel_stat.h> #include <linux/string.h> #include <linux/init.h> #include <linux/completion.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/writeback.h> #include <linux/task_io_accounting_ops.h> #include <linux/fault-inject.h> #include <linux/list_sort.h> #include <linux/delay.h> #include <linux/ratelimit.h> #include <linux/pm_runtime.h> #include <linux/t10-pi.h> #include <linux/debugfs.h> #include <linux/bpf.h> #include <linux/part_stat.h> #include <linux/sched/sysctl.h> #include <linux/blk-crypto.h> #define CREATE_TRACE_POINTS #include <trace/events/block.h> #include "blk.h" #include "blk-mq-sched.h" #include "blk-pm.h" #include "blk-cgroup.h" #include "blk-throttle.h" #include "blk-ioprio.h" struct dentry *blk_debugfs_root; EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); EXPORT_TRACEPOINT_SYMBOL_GPL(block_split); EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_insert); static DEFINE_IDA(blk_queue_ida); /* * For queue allocation */ static struct kmem_cache *blk_requestq_cachep; /* * Controlling structure to kblockd */ static struct workqueue_struct *kblockd_workqueue; /** * blk_queue_flag_set - atomically set a queue flag * @flag: flag to be set * @q: request queue */ void blk_queue_flag_set(unsigned int flag, struct request_queue *q) { set_bit(flag, &q->queue_flags); } EXPORT_SYMBOL(blk_queue_flag_set); /** * blk_queue_flag_clear - atomically clear a queue flag * @flag: flag to be cleared * @q: request queue */ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q) { clear_bit(flag, &q->queue_flags); } EXPORT_SYMBOL(blk_queue_flag_clear); #define REQ_OP_NAME(name) [REQ_OP_##name] = #name static const char *const blk_op_name[] = { REQ_OP_NAME(READ), REQ_OP_NAME(WRITE), REQ_OP_NAME(FLUSH), REQ_OP_NAME(DISCARD), REQ_OP_NAME(SECURE_ERASE), REQ_OP_NAME(ZONE_RESET), REQ_OP_NAME(ZONE_RESET_ALL), REQ_OP_NAME(ZONE_OPEN), REQ_OP_NAME(ZONE_CLOSE), REQ_OP_NAME(ZONE_FINISH), REQ_OP_NAME(ZONE_APPEND), REQ_OP_NAME(WRITE_ZEROES), REQ_OP_NAME(DRV_IN), REQ_OP_NAME(DRV_OUT), }; #undef REQ_OP_NAME /** * blk_op_str - Return string XXX in the REQ_OP_XXX. * @op: REQ_OP_XXX. * * Description: Centralize block layer function to convert REQ_OP_XXX into * string format. Useful in the debugging and tracing bio or request. For * invalid REQ_OP_XXX it returns string "UNKNOWN". */ inline const char *blk_op_str(enum req_op op) { const char *op_str = "UNKNOWN"; if (op < ARRAY_SIZE(blk_op_name) && blk_op_name[op]) op_str = blk_op_name[op]; return op_str; } EXPORT_SYMBOL_GPL(blk_op_str); static const struct { int errno; const char *name; } blk_errors[] = { [BLK_STS_OK] = { 0, "" }, [BLK_STS_NOTSUPP] = { -EOPNOTSUPP, "operation not supported" }, [BLK_STS_TIMEOUT] = { -ETIMEDOUT, "timeout" }, [BLK_STS_NOSPC] = { -ENOSPC, "critical space allocation" }, [BLK_STS_TRANSPORT] = { -ENOLINK, "recoverable transport" }, [BLK_STS_TARGET] = { -EREMOTEIO, "critical target" }, [BLK_STS_RESV_CONFLICT] = { -EBADE, "reservation conflict" }, [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" }, [BLK_STS_PROTECTION] = { -EILSEQ, "protection" }, [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" }, [BLK_STS_DEV_RESOURCE] = { -EBUSY, "device resource" }, [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" }, [BLK_STS_OFFLINE] = { -ENODEV, "device offline" }, /* device mapper special case, should not leak out: */ [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" }, /* zone device specific errors */ [BLK_STS_ZONE_OPEN_RESOURCE] = { -ETOOMANYREFS, "open zones exceeded" }, [BLK_STS_ZONE_ACTIVE_RESOURCE] = { -EOVERFLOW, "active zones exceeded" }, /* Command duration limit device-side timeout */ [BLK_STS_DURATION_LIMIT] = { -ETIME, "duration limit exceeded" }, [BLK_STS_INVAL] = { -EINVAL, "invalid" }, /* everything else not covered above: */ [BLK_STS_IOERR] = { -EIO, "I/O" }, }; blk_status_t errno_to_blk_status(int errno) { int i; for (i = 0; i < ARRAY_SIZE(blk_errors); i++) { if (blk_errors[i].errno == errno) return (__force blk_status_t)i; } return BLK_STS_IOERR; } EXPORT_SYMBOL_GPL(errno_to_blk_status); int blk_status_to_errno(blk_status_t status) { int idx = (__force int)status; if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors))) return -EIO; return blk_errors[idx].errno; } EXPORT_SYMBOL_GPL(blk_status_to_errno); const char *blk_status_to_str(blk_status_t status) { int idx = (__force int)status; if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors))) return "<null>"; return blk_errors[idx].name; } EXPORT_SYMBOL_GPL(blk_status_to_str); /** * blk_sync_queue - cancel any pending callbacks on a queue * @q: the queue * * Description: * The block layer may perform asynchronous callback activity * on a queue, such as calling the unplug function after a timeout. * A block device may call blk_sync_queue to ensure that any * such activity is cancelled, thus allowing it to release resources * that the callbacks might use. The caller must already have made sure * that its ->submit_bio will not re-add plugging prior to calling * this function. * * This function does not cancel any asynchronous activity arising * out of elevator or throttling code. That would require elevator_exit() * and blkcg_exit_queue() to be called with queue lock initialized. * */ void blk_sync_queue(struct request_queue *q) { del_timer_sync(&q->timeout); cancel_work_sync(&q->timeout_work); } EXPORT_SYMBOL(blk_sync_queue); /** * blk_set_pm_only - increment pm_only counter * @q: request queue pointer */ void blk_set_pm_only(struct request_queue *q) { atomic_inc(&q->pm_only); } EXPORT_SYMBOL_GPL(blk_set_pm_only); void blk_clear_pm_only(struct request_queue *q) { int pm_only; pm_only = atomic_dec_return(&q->pm_only); WARN_ON_ONCE(pm_only < 0); if (pm_only == 0) wake_up_all(&q->mq_freeze_wq); } EXPORT_SYMBOL_GPL(blk_clear_pm_only); static void blk_free_queue_rcu(struct rcu_head *rcu_head) { struct request_queue *q = container_of(rcu_head, struct request_queue, rcu_head); percpu_ref_exit(&q->q_usage_counter); kmem_cache_free(blk_requestq_cachep, q); } static void blk_free_queue(struct request_queue *q) { blk_free_queue_stats(q->stats); if (queue_is_mq(q)) blk_mq_release(q); ida_free(&blk_queue_ida, q->id); lockdep_unregister_key(&q->io_lock_cls_key); lockdep_unregister_key(&q->q_lock_cls_key); call_rcu(&q->rcu_head, blk_free_queue_rcu); } /** * blk_put_queue - decrement the request_queue refcount * @q: the request_queue structure to decrement the refcount for * * Decrements the refcount of the request_queue and free it when the refcount * reaches 0. */ void blk_put_queue(struct request_queue *q) { if (refcount_dec_and_test(&q->refs)) blk_free_queue(q); } EXPORT_SYMBOL(blk_put_queue); bool blk_queue_start_drain(struct request_queue *q) { /* * When queue DYING flag is set, we need to block new req * entering queue, so we call blk_freeze_queue_start() to * prevent I/O from crossing blk_queue_enter(). */ bool freeze = __blk_freeze_queue_start(q, current); if (queue_is_mq(q)) blk_mq_wake_waiters(q); /* Make blk_queue_enter() reexamine the DYING flag. */ wake_up_all(&q->mq_freeze_wq); return freeze; } /** * blk_queue_enter() - try to increase q->q_usage_counter * @q: request queue pointer * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PM */ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) { const bool pm = flags & BLK_MQ_REQ_PM; while (!blk_try_enter_queue(q, pm)) { if (flags & BLK_MQ_REQ_NOWAIT) return -EAGAIN; /* * read pair of barrier in blk_freeze_queue_start(), we need to * order reading __PERCPU_REF_DEAD flag of .q_usage_counter and * reading .mq_freeze_depth or queue dying flag, otherwise the * following wait may never return if the two reads are * reordered. */ smp_rmb(); wait_event(q->mq_freeze_wq, (!q->mq_freeze_depth && blk_pm_resume_queue(pm, q)) || blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; } rwsem_acquire_read(&q->q_lockdep_map, 0, 0, _RET_IP_); rwsem_release(&q->q_lockdep_map, _RET_IP_); return 0; } int __bio_queue_enter(struct request_queue *q, struct bio *bio) { while (!blk_try_enter_queue(q, false)) { struct gendisk *disk = bio->bi_bdev->bd_disk; if (bio->bi_opf & REQ_NOWAIT) { if (test_bit(GD_DEAD, &disk->state)) goto dead; bio_wouldblock_error(bio); return -EAGAIN; } /* * read pair of barrier in blk_freeze_queue_start(), we need to * order reading __PERCPU_REF_DEAD flag of .q_usage_counter and * reading .mq_freeze_depth or queue dying flag, otherwise the * following wait may never return if the two reads are * reordered. */ smp_rmb(); wait_event(q->mq_freeze_wq, (!q->mq_freeze_depth && blk_pm_resume_queue(false, q)) || test_bit(GD_DEAD, &disk->state)); if (test_bit(GD_DEAD, &disk->state)) goto dead; } rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_); rwsem_release(&q->io_lockdep_map, _RET_IP_); return 0; dead: bio_io_error(bio); return -ENODEV; } void blk_queue_exit(struct request_queue *q) { percpu_ref_put(&q->q_usage_counter); } static void blk_queue_usage_counter_release(struct percpu_ref *ref) { struct request_queue *q = container_of(ref, struct request_queue, q_usage_counter); wake_up_all(&q->mq_freeze_wq); } static void blk_rq_timed_out_timer(struct timer_list *t) { struct request_queue *q = from_timer(q, t, timeout); kblockd_schedule_work(&q->timeout_work); } static void blk_timeout_work(struct work_struct *work) { } struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) { struct request_queue *q; int error; q = kmem_cache_alloc_node(blk_requestq_cachep, GFP_KERNEL | __GFP_ZERO, node_id); if (!q) return ERR_PTR(-ENOMEM); q->last_merge = NULL; q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL); if (q->id < 0) { error = q->id; goto fail_q; } q->stats = blk_alloc_queue_stats(); if (!q->stats) { error = -ENOMEM; goto fail_id; } error = blk_set_default_limits(lim); if (error) goto fail_stats; q->limits = *lim; q->node = node_id; atomic_set(&q->nr_active_requests_shared_tags, 0); timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); INIT_WORK(&q->timeout_work, blk_timeout_work); INIT_LIST_HEAD(&q->icq_list); refcount_set(&q->refs, 1); mutex_init(&q->debugfs_mutex); mutex_init(&q->sysfs_lock); mutex_init(&q->sysfs_dir_lock); mutex_init(&q->limits_lock); mutex_init(&q->rq_qos_mutex); spin_lock_init(&q->queue_lock); init_waitqueue_head(&q->mq_freeze_wq); mutex_init(&q->mq_freeze_lock); blkg_init_queue(q); /* * Init percpu_ref in atomic mode so that it's faster to shutdown. * See blk_register_queue() for details. */ error = percpu_ref_init(&q->q_usage_counter, blk_queue_usage_counter_release, PERCPU_REF_INIT_ATOMIC, GFP_KERNEL); if (error) goto fail_stats; lockdep_register_key(&q->io_lock_cls_key); lockdep_register_key(&q->q_lock_cls_key); lockdep_init_map(&q->io_lockdep_map, "&q->q_usage_counter(io)", &q->io_lock_cls_key, 0); lockdep_init_map(&q->q_lockdep_map, "&q->q_usage_counter(queue)", &q->q_lock_cls_key, 0); q->nr_requests = BLKDEV_DEFAULT_RQ; return q; fail_stats: blk_free_queue_stats(q->stats); fail_id: ida_free(&blk_queue_ida, q->id); fail_q: kmem_cache_free(blk_requestq_cachep, q); return ERR_PTR(error); } /** * blk_get_queue - increment the request_queue refcount * @q: the request_queue structure to increment the refcount for * * Increment the refcount of the request_queue kobject. * * Context: Any context. */ bool blk_get_queue(struct request_queue *q) { if (unlikely(blk_queue_dying(q))) return false; refcount_inc(&q->refs); return true; } EXPORT_SYMBOL(blk_get_queue); #ifdef CONFIG_FAIL_MAKE_REQUEST static DECLARE_FAULT_ATTR(fail_make_request); static int __init setup_fail_make_request(char *str) { return setup_fault_attr(&fail_make_request, str); } __setup("fail_make_request=", setup_fail_make_request); bool should_fail_request(struct block_device *part, unsigned int bytes) { return bdev_test_flag(part, BD_MAKE_IT_FAIL) && should_fail(&fail_make_request, bytes); } static int __init fail_make_request_debugfs(void) { struct dentry *dir = fault_create_debugfs_attr("fail_make_request", NULL, &fail_make_request); return PTR_ERR_OR_ZERO(dir); } late_initcall(fail_make_request_debugfs); #endif /* CONFIG_FAIL_MAKE_REQUEST */ static inline void bio_check_ro(struct bio *bio) { if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) { if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) return; if (bdev_test_flag(bio->bi_bdev, BD_RO_WARNED)) return; bdev_set_flag(bio->bi_bdev, BD_RO_WARNED); /* * Use ioctl to set underlying disk of raid/dm to read-only * will trigger this. */ pr_warn("Trying to write to read-only block-device %pg\n", bio->bi_bdev); } } static noinline int should_fail_bio(struct bio *bio) { if (should_fail_request(bdev_whole(bio->bi_bdev), bio->bi_iter.bi_size)) return -EIO; return 0; } ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO); /* * Check whether this bio extends beyond the end of the device or partition. * This may well happen - the kernel calls bread() without checking the size of * the device, e.g., when mounting a file system. */ static inline int bio_check_eod(struct bio *bio) { sector_t maxsector = bdev_nr_sectors(bio->bi_bdev); unsigned int nr_sectors = bio_sectors(bio); if (nr_sectors && (nr_sectors > maxsector || bio->bi_iter.bi_sector > maxsector - nr_sectors)) { pr_info_ratelimited("%s: attempt to access beyond end of device\n" "%pg: rw=%d, sector=%llu, nr_sectors = %u limit=%llu\n", current->comm, bio->bi_bdev, bio->bi_opf, bio->bi_iter.bi_sector, nr_sectors, maxsector); return -EIO; } return 0; } /* * Remap block n of partition p to block n+start(p) of the disk. */ static int blk_partition_remap(struct bio *bio) { struct block_device *p = bio->bi_bdev; if (unlikely(should_fail_request(p, bio->bi_iter.bi_size))) return -EIO; if (bio_sectors(bio)) { bio->bi_iter.bi_sector += p->bd_start_sect; trace_block_bio_remap(bio, p->bd_dev, bio->bi_iter.bi_sector - p->bd_start_sect); } bio_set_flag(bio, BIO_REMAPPED); return 0; } /* * Check write append to a zoned block device. */ static inline blk_status_t blk_check_zone_append(struct request_queue *q, struct bio *bio) { int nr_sectors = bio_sectors(bio); /* Only applicable to zoned block devices */ if (!bdev_is_zoned(bio->bi_bdev)) return BLK_STS_NOTSUPP; /* The bio sector must point to the start of a sequential zone */ if (!bdev_is_zone_start(bio->bi_bdev, bio->bi_iter.bi_sector)) return BLK_STS_IOERR; /* * Not allowed to cross zone boundaries. Otherwise, the BIO will be * split and could result in non-contiguous sectors being written in * different zones. */ if (nr_sectors > q->limits.chunk_sectors) return BLK_STS_IOERR; /* Make sure the BIO is small enough and will not get split */ if (nr_sectors > q->limits.max_zone_append_sectors) return BLK_STS_IOERR; bio->bi_opf |= REQ_NOMERGE; return BLK_STS_OK; } static void __submit_bio(struct bio *bio) { /* If plug is not used, add new plug here to cache nsecs time. */ struct blk_plug plug; if (unlikely(!blk_crypto_bio_prep(&bio))) return; blk_start_plug(&plug); if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) { blk_mq_submit_bio(bio); } else if (likely(bio_queue_enter(bio) == 0)) { struct gendisk *disk = bio->bi_bdev->bd_disk; if ((bio->bi_opf & REQ_POLLED) && !(disk->queue->limits.features & BLK_FEAT_POLL)) { bio->bi_status = BLK_STS_NOTSUPP; bio_endio(bio); } else { disk->fops->submit_bio(bio); } blk_queue_exit(disk->queue); } blk_finish_plug(&plug); } /* * The loop in this function may be a bit non-obvious, and so deserves some * explanation: * * - Before entering the loop, bio->bi_next is NULL (as all callers ensure * that), so we have a list with a single bio. * - We pretend that we have just taken it off a longer list, so we assign * bio_list to a pointer to the bio_list_on_stack, thus initialising the * bio_list of new bios to be added. ->submit_bio() may indeed add some more * bios through a recursive call to submit_bio_noacct. If it did, we find a * non-NULL value in bio_list and re-enter the loop from the top. * - In this case we really did just take the bio of the top of the list (no * pretending) and so remove it from bio_list, and call into ->submit_bio() * again. * * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio. * bio_list_on_stack[1] contains bios that were submitted before the current * ->submit_bio, but that haven't been processed yet. */ static void __submit_bio_noacct(struct bio *bio) { struct bio_list bio_list_on_stack[2]; BUG_ON(bio->bi_next); bio_list_init(&bio_list_on_stack[0]); current->bio_list = bio_list_on_stack; do { struct request_queue *q = bdev_get_queue(bio->bi_bdev); struct bio_list lower, same; /* * Create a fresh bio_list for all subordinate requests. */ bio_list_on_stack[1] = bio_list_on_stack[0]; bio_list_init(&bio_list_on_stack[0]); __submit_bio(bio); /* * Sort new bios into those for a lower level and those for the * same level. */ bio_list_init(&lower); bio_list_init(&same); while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) if (q == bdev_get_queue(bio->bi_bdev)) bio_list_add(&same, bio); else bio_list_add(&lower, bio); /* * Now assemble so we handle the lowest level first. */ bio_list_merge(&bio_list_on_stack[0], &lower); bio_list_merge(&bio_list_on_stack[0], &same); bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); } while ((bio = bio_list_pop(&bio_list_on_stack[0]))); current->bio_list = NULL; } static void __submit_bio_noacct_mq(struct bio *bio) { struct bio_list bio_list[2] = { }; current->bio_list = bio_list; do { __submit_bio(bio); } while ((bio = bio_list_pop(&bio_list[0]))); current->bio_list = NULL; } void submit_bio_noacct_nocheck(struct bio *bio) { blk_cgroup_bio_start(bio); blkcg_bio_issue_init(bio); if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { trace_block_bio_queue(bio); /* * Now that enqueuing has been traced, we need to trace * completion as well. */ bio_set_flag(bio, BIO_TRACE_COMPLETION); } /* * We only want one ->submit_bio to be active at a time, else stack * usage with stacked devices could be a problem. Use current->bio_list * to collect a list of requests submited by a ->submit_bio method while * it is active, and then process them after it returned. */ if (current->bio_list) bio_list_add(&current->bio_list[0], bio); else if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) __submit_bio_noacct_mq(bio); else __submit_bio_noacct(bio); } static blk_status_t blk_validate_atomic_write_op_size(struct request_queue *q, struct bio *bio) { if (bio->bi_iter.bi_size > queue_atomic_write_unit_max_bytes(q)) return BLK_STS_INVAL; if (bio->bi_iter.bi_size % queue_atomic_write_unit_min_bytes(q)) return BLK_STS_INVAL; return BLK_STS_OK; } /** * submit_bio_noacct - re-submit a bio to the block device layer for I/O * @bio: The bio describing the location in memory and on the device. * * This is a version of submit_bio() that shall only be used for I/O that is * resubmitted to lower level drivers by stacking block drivers. All file * systems and other upper level users of the block layer should use * submit_bio() instead. */ void submit_bio_noacct(struct bio *bio) { struct block_device *bdev = bio->bi_bdev; struct request_queue *q = bdev_get_queue(bdev); blk_status_t status = BLK_STS_IOERR; might_sleep(); /* * For a REQ_NOWAIT based request, return -EOPNOTSUPP * if queue does not support NOWAIT. */ if ((bio->bi_opf & REQ_NOWAIT) && !bdev_nowait(bdev)) goto not_supported; if (should_fail_bio(bio)) goto end_io; bio_check_ro(bio); if (!bio_flagged(bio, BIO_REMAPPED)) { if (unlikely(bio_check_eod(bio))) goto end_io; if (bdev_is_partition(bdev) && unlikely(blk_partition_remap(bio))) goto end_io; } /* * Filter flush bio's early so that bio based drivers without flush * support don't have to worry about them. */ if (op_is_flush(bio->bi_opf)) { if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_WRITE && bio_op(bio) != REQ_OP_ZONE_APPEND)) goto end_io; if (!bdev_write_cache(bdev)) { bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); if (!bio_sectors(bio)) { status = BLK_STS_OK; goto end_io; } } } switch (bio_op(bio)) { case REQ_OP_READ: break; case REQ_OP_WRITE: if (bio->bi_opf & REQ_ATOMIC) { status = blk_validate_atomic_write_op_size(q, bio); if (status != BLK_STS_OK) goto end_io; } break; case REQ_OP_FLUSH: /* * REQ_OP_FLUSH can't be submitted through bios, it is only * synthetized in struct request by the flush state machine. */ goto not_supported; case REQ_OP_DISCARD: if (!bdev_max_discard_sectors(bdev)) goto not_supported; break; case REQ_OP_SECURE_ERASE: if (!bdev_max_secure_erase_sectors(bdev)) goto not_supported; break; case REQ_OP_ZONE_APPEND: status = blk_check_zone_append(q, bio); if (status != BLK_STS_OK) goto end_io; break; case REQ_OP_WRITE_ZEROES: if (!q->limits.max_write_zeroes_sectors) goto not_supported; break; case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: case REQ_OP_ZONE_RESET_ALL: if (!bdev_is_zoned(bio->bi_bdev)) goto not_supported; break; case REQ_OP_DRV_IN: case REQ_OP_DRV_OUT: /* * Driver private operations are only used with passthrough * requests. */ fallthrough; default: goto not_supported; } if (blk_throtl_bio(bio)) return; submit_bio_noacct_nocheck(bio); return; not_supported: status = BLK_STS_NOTSUPP; end_io: bio->bi_status = status; bio_endio(bio); } EXPORT_SYMBOL(submit_bio_noacct); static void bio_set_ioprio(struct bio *bio) { /* Nobody set ioprio so far? Initialize it based on task's nice value */ if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE) bio->bi_ioprio = get_current_ioprio(); blkcg_set_ioprio(bio); } /** * submit_bio - submit a bio to the block device layer for I/O * @bio: The &struct bio which describes the I/O * * submit_bio() is used to submit I/O requests to block devices. It is passed a * fully set up &struct bio that describes the I/O that needs to be done. The * bio will be send to the device described by the bi_bdev field. * * The success/failure status of the request, along with notification of * completion, is delivered asynchronously through the ->bi_end_io() callback * in @bio. The bio must NOT be touched by the caller until ->bi_end_io() has * been called. */ void submit_bio(struct bio *bio) { if (bio_op(bio) == REQ_OP_READ) { task_io_account_read(bio->bi_iter.bi_size); count_vm_events(PGPGIN, bio_sectors(bio)); } else if (bio_op(bio) == REQ_OP_WRITE) { count_vm_events(PGPGOUT, bio_sectors(bio)); } bio_set_ioprio(bio); submit_bio_noacct(bio); } EXPORT_SYMBOL(submit_bio); /** * bio_poll - poll for BIO completions * @bio: bio to poll for * @iob: batches of IO * @flags: BLK_POLL_* flags that control the behavior * * Poll for completions on queue associated with the bio. Returns number of * completed entries found. * * Note: the caller must either be the context that submitted @bio, or * be in a RCU critical section to prevent freeing of @bio. */ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) { blk_qc_t cookie = READ_ONCE(bio->bi_cookie); struct block_device *bdev; struct request_queue *q; int ret = 0; bdev = READ_ONCE(bio->bi_bdev); if (!bdev) return 0; q = bdev_get_queue(bdev); if (cookie == BLK_QC_T_NONE) return 0; blk_flush_plug(current->plug, false); /* * We need to be able to enter a frozen queue, similar to how * timeouts also need to do that. If that is blocked, then we can * have pending IO when a queue freeze is started, and then the * wait for the freeze to finish will wait for polled requests to * timeout as the poller is preventer from entering the queue and * completing them. As long as we prevent new IO from being queued, * that should be all that matters. */ if (!percpu_ref_tryget(&q->q_usage_counter)) return 0; if (queue_is_mq(q)) { ret = blk_mq_poll(q, cookie, iob, flags); } else { struct gendisk *disk = q->disk; if ((q->limits.features & BLK_FEAT_POLL) && disk && disk->fops->poll_bio) ret = disk->fops->poll_bio(bio, iob, flags); } blk_queue_exit(q); return ret; } EXPORT_SYMBOL_GPL(bio_poll); /* * Helper to implement file_operations.iopoll. Requires the bio to be stored * in iocb->private, and cleared before freeing the bio. */ int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, unsigned int flags) { struct bio *bio; int ret = 0; /* * Note: the bio cache only uses SLAB_TYPESAFE_BY_RCU, so bio can * point to a freshly allocated bio at this point. If that happens * we have a few cases to consider: * * 1) the bio is beeing initialized and bi_bdev is NULL. We can just * simply nothing in this case * 2) the bio points to a not poll enabled device. bio_poll will catch * this and return 0 * 3) the bio points to a poll capable device, including but not * limited to the one that the original bio pointed to. In this * case we will call into the actual poll method and poll for I/O, * even if we don't need to, but it won't cause harm either. * * For cases 2) and 3) above the RCU grace period ensures that bi_bdev * is still allocated. Because partitions hold a reference to the whole * device bdev and thus disk, the disk is also still valid. Grabbing * a reference to the queue in bio_poll() ensures the hctxs and requests * are still valid as well. */ rcu_read_lock(); bio = READ_ONCE(kiocb->private); if (bio) ret = bio_poll(bio, iob, flags); rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(iocb_bio_iopoll); void update_io_ticks(struct block_device *part, unsigned long now, bool end) { unsigned long stamp; again: stamp = READ_ONCE(part->bd_stamp); if (unlikely(time_after(now, stamp)) && likely(try_cmpxchg(&part->bd_stamp, &stamp, now)) && (end || part_in_flight(part))) __part_stat_add(part, io_ticks, now - stamp); if (bdev_is_partition(part)) { part = bdev_whole(part); goto again; } } unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time) { part_stat_lock(); update_io_ticks(bdev, start_time, false); part_stat_local_inc(bdev, in_flight[op_is_write(op)]); part_stat_unlock(); return start_time; } EXPORT_SYMBOL(bdev_start_io_acct); /** * bio_start_io_acct - start I/O accounting for bio based drivers * @bio: bio to start account for * * Returns the start time that should be passed back to bio_end_io_acct(). */ unsigned long bio_start_io_acct(struct bio *bio) { return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies); } EXPORT_SYMBOL_GPL(bio_start_io_acct); void bdev_end_io_acct(struct block_device *bdev, enum req_op op, unsigned int sectors, unsigned long start_time) { const int sgrp = op_stat_group(op); unsigned long now = READ_ONCE(jiffies); unsigned long duration = now - start_time; part_stat_lock(); update_io_ticks(bdev, now, true); part_stat_inc(bdev, ios[sgrp]); part_stat_add(bdev, sectors[sgrp], sectors); part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration)); part_stat_local_dec(bdev, in_flight[op_is_write(op)]); part_stat_unlock(); } EXPORT_SYMBOL(bdev_end_io_acct); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, struct block_device *orig_bdev) { bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time); } EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped); /** * blk_lld_busy - Check if underlying low-level drivers of a device are busy * @q : the queue of the device being checked * * Description: * Check if underlying low-level drivers of a device are busy. * If the drivers want to export their busy state, they must set own * exporting function using blk_queue_lld_busy() first. * * Basically, this function is used only by request stacking drivers * to stop dispatching requests to underlying devices when underlying * devices are busy. This behavior helps more I/O merging on the queue * of the request stacking driver and prevents I/O throughput regression * on burst I/O load. * * Return: * 0 - Not busy (The request stacking driver should dispatch request) * 1 - Busy (The request stacking driver should stop dispatching request) */ int blk_lld_busy(struct request_queue *q) { if (queue_is_mq(q) && q->mq_ops->busy) return q->mq_ops->busy(q); return 0; } EXPORT_SYMBOL_GPL(blk_lld_busy); int kblockd_schedule_work(struct work_struct *work) { return queue_work(kblockd_workqueue, work); } EXPORT_SYMBOL(kblockd_schedule_work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay); } EXPORT_SYMBOL(kblockd_mod_delayed_work_on); void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) { struct task_struct *tsk = current; /* * If this is a nested plug, don't actually assign it. */ if (tsk->plug) return; plug->cur_ktime = 0; rq_list_init(&plug->mq_list); rq_list_init(&plug->cached_rqs); plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT); plug->rq_count = 0; plug->multiple_queues = false; plug->has_elevator = false; INIT_LIST_HEAD(&plug->cb_list); /* * Store ordering should not be needed here, since a potential * preempt will imply a full memory barrier */ tsk->plug = plug; } /** * blk_start_plug - initialize blk_plug and track it inside the task_struct * @plug: The &struct blk_plug that needs to be initialized * * Description: * blk_start_plug() indicates to the block layer an intent by the caller * to submit multiple I/O requests in a batch. The block layer may use * this hint to defer submitting I/Os from the caller until blk_finish_plug() * is called. However, the block layer may choose to submit requests * before a call to blk_finish_plug() if the number of queued I/Os * exceeds %BLK_MAX_REQUEST_COUNT, or if the size of the I/O is larger than * %BLK_PLUG_FLUSH_SIZE. The queued I/Os may also be submitted early if * the task schedules (see below). * * Tracking blk_plug inside the task_struct will help with auto-flushing the * pending I/O should the task end up blocking between blk_start_plug() and * blk_finish_plug(). This is important from a performance perspective, but * also ensures that we don't deadlock. For instance, if the task is blocking * for a memory allocation, memory reclaim could end up wanting to free a * page belonging to that request that is currently residing in our private * plug. By flushing the pending I/O when the process goes to sleep, we avoid * this kind of deadlock. */ void blk_start_plug(struct blk_plug *plug) { blk_start_plug_nr_ios(plug, 1); } EXPORT_SYMBOL(blk_start_plug); static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) { LIST_HEAD(callbacks); while (!list_empty(&plug->cb_list)) { list_splice_init(&plug->cb_list, &callbacks); while (!list_empty(&callbacks)) { struct blk_plug_cb *cb = list_first_entry(&callbacks, struct blk_plug_cb, list); list_del(&cb->list); cb->callback(cb, from_schedule); } } } struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data, int size) { struct blk_plug *plug = current->plug; struct blk_plug_cb *cb; if (!plug) return NULL; list_for_each_entry(cb, &plug->cb_list, list) if (cb->callback == unplug && cb->data == data) return cb; /* Not currently on the callback list */ BUG_ON(size < sizeof(*cb)); cb = kzalloc(size, GFP_ATOMIC); if (cb) { cb->data = data; cb->callback = unplug; list_add(&cb->list, &plug->cb_list); } return cb; } EXPORT_SYMBOL(blk_check_plugged); void __blk_flush_plug(struct blk_plug *plug, bool from_schedule) { if (!list_empty(&plug->cb_list)) flush_plug_callbacks(plug, from_schedule); blk_mq_flush_plug_list(plug, from_schedule); /* * Unconditionally flush out cached requests, even if the unplug * event came from schedule. Since we know hold references to the * queue for cached requests, we don't want a blocked task holding * up a queue freeze/quiesce event. */ if (unlikely(!rq_list_empty(&plug->cached_rqs))) blk_mq_free_plug_rqs(plug); plug->cur_ktime = 0; current->flags &= ~PF_BLOCK_TS; } /** * blk_finish_plug - mark the end of a batch of submitted I/O * @plug: The &struct blk_plug passed to blk_start_plug() * * Description: * Indicate that a batch of I/O submissions is complete. This function * must be paired with an initial call to blk_start_plug(). The intent * is to allow the block layer to optimize I/O submission. See the * documentation for blk_start_plug() for more information. */ void blk_finish_plug(struct blk_plug *plug) { if (plug == current->plug) { __blk_flush_plug(plug, false); current->plug = NULL; } } EXPORT_SYMBOL(blk_finish_plug); void blk_io_schedule(void) { /* Prevent hang_check timer from firing at us during very long I/O */ unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2; if (timeout) io_schedule_timeout(timeout); else io_schedule(); } EXPORT_SYMBOL_GPL(blk_io_schedule); int __init blk_dev_init(void) { BUILD_BUG_ON((__force u32)REQ_OP_LAST >= (1 << REQ_OP_BITS)); BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * sizeof_field(struct request, cmd_flags)); BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * sizeof_field(struct bio, bi_opf)); /* used for unplugging and affects IO latency/throughput - HIGHPRI */ kblockd_workqueue = alloc_workqueue("kblockd", WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); if (!kblockd_workqueue) panic("Failed to create kblockd\n"); blk_requestq_cachep = KMEM_CACHE(request_queue, SLAB_PANIC); blk_debugfs_root = debugfs_create_dir("block", NULL); return 0; }
16 16 16 1 16 16 7 6 1 6 1 35 9 29 29 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 // SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/revoke.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 2000 * * Copyright 2000 Red Hat corp --- All Rights Reserved * * Journal revoke routines for the generic filesystem journaling code; * part of the ext2fs journaling system. * * Revoke is the mechanism used to prevent old log records for deleted * metadata from being replayed on top of newer data using the same * blocks. The revoke mechanism is used in two separate places: * * + Commit: during commit we write the entire list of the current * transaction's revoked blocks to the journal * * + Recovery: during recovery we record the transaction ID of all * revoked blocks. If there are multiple revoke records in the log * for a single block, only the last one counts, and if there is a log * entry for a block beyond the last revoke, then that log entry still * gets replayed. * * We can get interactions between revokes and new log data within a * single transaction: * * Block is revoked and then journaled: * The desired end result is the journaling of the new block, so we * cancel the revoke before the transaction commits. * * Block is journaled and then revoked: * The revoke must take precedence over the write of the block, so we * need either to cancel the journal entry or to write the revoke * later in the log than the log block. In this case, we choose the * latter: journaling a block cancels any revoke record for that block * in the current transaction, so any revoke for that block in the * transaction must have happened after the block was journaled and so * the revoke must take precedence. * * Block is revoked and then written as data: * The data write is allowed to succeed, but the revoke is _not_ * cancelled. We still need to prevent old log records from * overwriting the new data. We don't even need to clear the revoke * bit here. * * We cache revoke status of a buffer in the current transaction in b_states * bits. As the name says, revokevalid flag indicates that the cached revoke * status of a buffer is valid and we can rely on the cached status. * * Revoke information on buffers is a tri-state value: * * RevokeValid clear: no cached revoke status, need to look it up * RevokeValid set, Revoked clear: * buffer has not been revoked, and cancel_revoke * need do nothing. * RevokeValid set, Revoked set: * buffer has been revoked. * * Locking rules: * We keep two hash tables of revoke records. One hashtable belongs to the * running transaction (is pointed to by journal->j_revoke), the other one * belongs to the committing transaction. Accesses to the second hash table * happen only from the kjournald and no other thread touches this table. Also * journal_switch_revoke_table() which switches which hashtable belongs to the * running and which to the committing transaction is called only from * kjournald. Therefore we need no locks when accessing the hashtable belonging * to the committing transaction. * * All users operating on the hash table belonging to the running transaction * have a handle to the transaction. Therefore they are safe from kjournald * switching hash tables under them. For operations on the lists of entries in * the hash table j_revoke_lock is used. * * Finally, also replay code uses the hash tables but at this moment no one else * can touch them (filesystem isn't mounted yet) and hence no locking is * needed. */ #ifndef __KERNEL__ #include "jfs_user.h" #else #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/init.h> #include <linux/bio.h> #include <linux/log2.h> #include <linux/hash.h> #endif static struct kmem_cache *jbd2_revoke_record_cache; static struct kmem_cache *jbd2_revoke_table_cache; /* Each revoke record represents one single revoked block. During journal replay, this involves recording the transaction ID of the last transaction to revoke this block. */ struct jbd2_revoke_record_s { struct list_head hash; tid_t sequence; /* Used for recovery only */ unsigned long long blocknr; }; /* The revoke table is just a simple hash table of revoke records. */ struct jbd2_revoke_table_s { /* It is conceivable that we might want a larger hash table * for recovery. Must be a power of two. */ int hash_size; int hash_shift; struct list_head *hash_table; }; #ifdef __KERNEL__ static void write_one_revoke_record(transaction_t *, struct list_head *, struct buffer_head **, int *, struct jbd2_revoke_record_s *); static void flush_descriptor(journal_t *, struct buffer_head *, int); #endif /* Utility functions to maintain the revoke table */ static inline int hash(journal_t *journal, unsigned long long block) { return hash_64(block, journal->j_revoke->hash_shift); } static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr, tid_t seq) { struct list_head *hash_list; struct jbd2_revoke_record_s *record; gfp_t gfp_mask = GFP_NOFS; if (journal_oom_retry) gfp_mask |= __GFP_NOFAIL; record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask); if (!record) return -ENOMEM; record->sequence = seq; record->blocknr = blocknr; hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; spin_lock(&journal->j_revoke_lock); list_add(&record->hash, hash_list); spin_unlock(&journal->j_revoke_lock); return 0; } /* Find a revoke record in the journal's hash table. */ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal, unsigned long long blocknr) { struct list_head *hash_list; struct jbd2_revoke_record_s *record; hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; spin_lock(&journal->j_revoke_lock); record = (struct jbd2_revoke_record_s *) hash_list->next; while (&(record->hash) != hash_list) { if (record->blocknr == blocknr) { spin_unlock(&journal->j_revoke_lock); return record; } record = (struct jbd2_revoke_record_s *) record->hash.next; } spin_unlock(&journal->j_revoke_lock); return NULL; } void jbd2_journal_destroy_revoke_record_cache(void) { kmem_cache_destroy(jbd2_revoke_record_cache); jbd2_revoke_record_cache = NULL; } void jbd2_journal_destroy_revoke_table_cache(void) { kmem_cache_destroy(jbd2_revoke_table_cache); jbd2_revoke_table_cache = NULL; } int __init jbd2_journal_init_revoke_record_cache(void) { J_ASSERT(!jbd2_revoke_record_cache); jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s, SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY); if (!jbd2_revoke_record_cache) { pr_emerg("JBD2: failed to create revoke_record cache\n"); return -ENOMEM; } return 0; } int __init jbd2_journal_init_revoke_table_cache(void) { J_ASSERT(!jbd2_revoke_table_cache); jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s, SLAB_TEMPORARY); if (!jbd2_revoke_table_cache) { pr_emerg("JBD2: failed to create revoke_table cache\n"); return -ENOMEM; } return 0; } static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size) { int shift = 0; int tmp = hash_size; struct jbd2_revoke_table_s *table; table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); if (!table) goto out; while((tmp >>= 1UL) != 0UL) shift++; table->hash_size = hash_size; table->hash_shift = shift; table->hash_table = kmalloc_array(hash_size, sizeof(struct list_head), GFP_KERNEL); if (!table->hash_table) { kmem_cache_free(jbd2_revoke_table_cache, table); table = NULL; goto out; } for (tmp = 0; tmp < hash_size; tmp++) INIT_LIST_HEAD(&table->hash_table[tmp]); out: return table; } static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table) { int i; struct list_head *hash_list; for (i = 0; i < table->hash_size; i++) { hash_list = &table->hash_table[i]; J_ASSERT(list_empty(hash_list)); } kfree(table->hash_table); kmem_cache_free(jbd2_revoke_table_cache, table); } /* Initialise the revoke table for a given journal to a given size. */ int jbd2_journal_init_revoke(journal_t *journal, int hash_size) { J_ASSERT(journal->j_revoke_table[0] == NULL); J_ASSERT(is_power_of_2(hash_size)); journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size); if (!journal->j_revoke_table[0]) goto fail0; journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size); if (!journal->j_revoke_table[1]) goto fail1; journal->j_revoke = journal->j_revoke_table[1]; spin_lock_init(&journal->j_revoke_lock); return 0; fail1: jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); journal->j_revoke_table[0] = NULL; fail0: return -ENOMEM; } /* Destroy a journal's revoke table. The table must already be empty! */ void jbd2_journal_destroy_revoke(journal_t *journal) { journal->j_revoke = NULL; if (journal->j_revoke_table[0]) jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); if (journal->j_revoke_table[1]) jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]); } #ifdef __KERNEL__ /* * jbd2_journal_revoke: revoke a given buffer_head from the journal. This * prevents the block from being replayed during recovery if we take a * crash after this current transaction commits. Any subsequent * metadata writes of the buffer in this transaction cancel the * revoke. * * Note that this call may block --- it is up to the caller to make * sure that there are no further calls to journal_write_metadata * before the revoke is complete. In ext3, this implies calling the * revoke before clearing the block bitmap when we are deleting * metadata. * * Revoke performs a jbd2_journal_forget on any buffer_head passed in as a * parameter, but does _not_ forget the buffer_head if the bh was only * found implicitly. * * bh_in may not be a journalled buffer - it may have come off * the hash tables without an attached journal_head. * * If bh_in is non-zero, jbd2_journal_revoke() will decrement its b_count * by one. */ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, struct buffer_head *bh_in) { struct buffer_head *bh = NULL; journal_t *journal; struct block_device *bdev; int err; might_sleep(); if (bh_in) BUFFER_TRACE(bh_in, "enter"); journal = handle->h_transaction->t_journal; if (!jbd2_journal_set_features(journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)){ J_ASSERT (!"Cannot set revoke feature!"); return -EINVAL; } bdev = journal->j_fs_dev; bh = bh_in; if (!bh) { bh = __find_get_block(bdev, blocknr, journal->j_blocksize); if (bh) BUFFER_TRACE(bh, "found on hash"); } #ifdef JBD2_EXPENSIVE_CHECKING else { struct buffer_head *bh2; /* If there is a different buffer_head lying around in * memory anywhere... */ bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize); if (bh2) { /* ... and it has RevokeValid status... */ if (bh2 != bh && buffer_revokevalid(bh2)) /* ...then it better be revoked too, * since it's illegal to create a revoke * record against a buffer_head which is * not marked revoked --- that would * risk missing a subsequent revoke * cancel. */ J_ASSERT_BH(bh2, buffer_revoked(bh2)); put_bh(bh2); } } #endif if (WARN_ON_ONCE(handle->h_revoke_credits <= 0)) { if (!bh_in) brelse(bh); return -EIO; } /* We really ought not ever to revoke twice in a row without first having the revoke cancelled: it's illegal to free a block twice without allocating it in between! */ if (bh) { if (!J_EXPECT_BH(bh, !buffer_revoked(bh), "inconsistent data on disk")) { if (!bh_in) brelse(bh); return -EIO; } set_buffer_revoked(bh); set_buffer_revokevalid(bh); if (bh_in) { BUFFER_TRACE(bh_in, "call jbd2_journal_forget"); jbd2_journal_forget(handle, bh_in); } else { BUFFER_TRACE(bh, "call brelse"); __brelse(bh); } } handle->h_revoke_credits--; jbd2_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in); err = insert_revoke_hash(journal, blocknr, handle->h_transaction->t_tid); BUFFER_TRACE(bh_in, "exit"); return err; } /* * Cancel an outstanding revoke. For use only internally by the * journaling code (called from jbd2_journal_get_write_access). * * We trust buffer_revoked() on the buffer if the buffer is already * being journaled: if there is no revoke pending on the buffer, then we * don't do anything here. * * This would break if it were possible for a buffer to be revoked and * discarded, and then reallocated within the same transaction. In such * a case we would have lost the revoked bit, but when we arrived here * the second time we would still have a pending revoke to cancel. So, * do not trust the Revoked bit on buffers unless RevokeValid is also * set. */ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) { struct jbd2_revoke_record_s *record; journal_t *journal = handle->h_transaction->t_journal; int need_cancel; int did_revoke = 0; /* akpm: debug */ struct buffer_head *bh = jh2bh(jh); jbd2_debug(4, "journal_head %p, cancelling revoke\n", jh); /* Is the existing Revoke bit valid? If so, we trust it, and * only perform the full cancel if the revoke bit is set. If * not, we can't trust the revoke bit, and we need to do the * full search for a revoke record. */ if (test_set_buffer_revokevalid(bh)) { need_cancel = test_clear_buffer_revoked(bh); } else { need_cancel = 1; clear_buffer_revoked(bh); } if (need_cancel) { record = find_revoke_record(journal, bh->b_blocknr); if (record) { jbd2_debug(4, "cancelled existing revoke on " "blocknr %llu\n", (unsigned long long)bh->b_blocknr); spin_lock(&journal->j_revoke_lock); list_del(&record->hash); spin_unlock(&journal->j_revoke_lock); kmem_cache_free(jbd2_revoke_record_cache, record); did_revoke = 1; } } #ifdef JBD2_EXPENSIVE_CHECKING /* There better not be one left behind by now! */ record = find_revoke_record(journal, bh->b_blocknr); J_ASSERT_JH(jh, record == NULL); #endif /* Finally, have we just cleared revoke on an unhashed * buffer_head? If so, we'd better make sure we clear the * revoked status on any hashed alias too, otherwise the revoke * state machine will get very upset later on. */ if (need_cancel) { struct buffer_head *bh2; bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size); if (bh2) { if (bh2 != bh) clear_buffer_revoked(bh2); __brelse(bh2); } } return did_revoke; } /* * journal_clear_revoked_flag clears revoked flag of buffers in * revoke table to reflect there is no revoked buffers in the next * transaction which is going to be started. */ void jbd2_clear_buffer_revoked_flags(journal_t *journal) { struct jbd2_revoke_table_s *revoke = journal->j_revoke; int i = 0; for (i = 0; i < revoke->hash_size; i++) { struct list_head *hash_list; struct list_head *list_entry; hash_list = &revoke->hash_table[i]; list_for_each(list_entry, hash_list) { struct jbd2_revoke_record_s *record; struct buffer_head *bh; record = (struct jbd2_revoke_record_s *)list_entry; bh = __find_get_block(journal->j_fs_dev, record->blocknr, journal->j_blocksize); if (bh) { clear_buffer_revoked(bh); __brelse(bh); } } } } /* journal_switch_revoke table select j_revoke for next transaction * we do not want to suspend any processing until all revokes are * written -bzzz */ void jbd2_journal_switch_revoke_table(journal_t *journal) { int i; if (journal->j_revoke == journal->j_revoke_table[0]) journal->j_revoke = journal->j_revoke_table[1]; else journal->j_revoke = journal->j_revoke_table[0]; for (i = 0; i < journal->j_revoke->hash_size; i++) INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]); } /* * Write revoke records to the journal for all entries in the current * revoke hash, deleting the entries as we go. */ void jbd2_journal_write_revoke_records(transaction_t *transaction, struct list_head *log_bufs) { journal_t *journal = transaction->t_journal; struct buffer_head *descriptor; struct jbd2_revoke_record_s *record; struct jbd2_revoke_table_s *revoke; struct list_head *hash_list; int i, offset, count; descriptor = NULL; offset = 0; count = 0; /* select revoke table for committing transaction */ revoke = journal->j_revoke == journal->j_revoke_table[0] ? journal->j_revoke_table[1] : journal->j_revoke_table[0]; for (i = 0; i < revoke->hash_size; i++) { hash_list = &revoke->hash_table[i]; while (!list_empty(hash_list)) { record = (struct jbd2_revoke_record_s *) hash_list->next; write_one_revoke_record(transaction, log_bufs, &descriptor, &offset, record); count++; list_del(&record->hash); kmem_cache_free(jbd2_revoke_record_cache, record); } } if (descriptor) flush_descriptor(journal, descriptor, offset); jbd2_debug(1, "Wrote %d revoke records\n", count); } /* * Write out one revoke record. We need to create a new descriptor * block if the old one is full or if we have not already created one. */ static void write_one_revoke_record(transaction_t *transaction, struct list_head *log_bufs, struct buffer_head **descriptorp, int *offsetp, struct jbd2_revoke_record_s *record) { journal_t *journal = transaction->t_journal; int csum_size = 0; struct buffer_head *descriptor; int sz, offset; /* If we are already aborting, this all becomes a noop. We still need to go round the loop in jbd2_journal_write_revoke_records in order to free all of the revoke records: only the IO to the journal is omitted. */ if (is_journal_aborted(journal)) return; descriptor = *descriptorp; offset = *offsetp; /* Do we need to leave space at the end for a checksum? */ if (jbd2_journal_has_csum_v2or3(journal)) csum_size = sizeof(struct jbd2_journal_block_tail); if (jbd2_has_feature_64bit(journal)) sz = 8; else sz = 4; /* Make sure we have a descriptor with space left for the record */ if (descriptor) { if (offset + sz > journal->j_blocksize - csum_size) { flush_descriptor(journal, descriptor, offset); descriptor = NULL; } } if (!descriptor) { descriptor = jbd2_journal_get_descriptor_buffer(transaction, JBD2_REVOKE_BLOCK); if (!descriptor) return; /* Record it so that we can wait for IO completion later */ BUFFER_TRACE(descriptor, "file in log_bufs"); jbd2_file_log_bh(log_bufs, descriptor); offset = sizeof(jbd2_journal_revoke_header_t); *descriptorp = descriptor; } if (jbd2_has_feature_64bit(journal)) * ((__be64 *)(&descriptor->b_data[offset])) = cpu_to_be64(record->blocknr); else * ((__be32 *)(&descriptor->b_data[offset])) = cpu_to_be32(record->blocknr); offset += sz; *offsetp = offset; } /* * Flush a revoke descriptor out to the journal. If we are aborting, * this is a noop; otherwise we are generating a buffer which needs to * be waited for during commit, so it has to go onto the appropriate * journal buffer list. */ static void flush_descriptor(journal_t *journal, struct buffer_head *descriptor, int offset) { jbd2_journal_revoke_header_t *header; if (is_journal_aborted(journal)) return; header = (jbd2_journal_revoke_header_t *)descriptor->b_data; header->r_count = cpu_to_be32(offset); jbd2_descriptor_block_csum_set(journal, descriptor); set_buffer_jwrite(descriptor); BUFFER_TRACE(descriptor, "write"); set_buffer_dirty(descriptor); write_dirty_buffer(descriptor, JBD2_JOURNAL_REQ_FLAGS); } #endif /* * Revoke support for recovery. * * Recovery needs to be able to: * * record all revoke records, including the tid of the latest instance * of each revoke in the journal * * check whether a given block in a given transaction should be replayed * (ie. has not been revoked by a revoke record in that or a subsequent * transaction) * * empty the revoke table after recovery. */ /* * First, setting revoke records. We create a new revoke record for * every block ever revoked in the log as we scan it for recovery, and * we update the existing records if we find multiple revokes for a * single block. */ int jbd2_journal_set_revoke(journal_t *journal, unsigned long long blocknr, tid_t sequence) { struct jbd2_revoke_record_s *record; record = find_revoke_record(journal, blocknr); if (record) { /* If we have multiple occurrences, only record the * latest sequence number in the hashed record */ if (tid_gt(sequence, record->sequence)) record->sequence = sequence; return 0; } return insert_revoke_hash(journal, blocknr, sequence); } /* * Test revoke records. For a given block referenced in the log, has * that block been revoked? A revoke record with a given transaction * sequence number revokes all blocks in that transaction and earlier * ones, but later transactions still need replayed. */ int jbd2_journal_test_revoke(journal_t *journal, unsigned long long blocknr, tid_t sequence) { struct jbd2_revoke_record_s *record; record = find_revoke_record(journal, blocknr); if (!record) return 0; if (tid_gt(sequence, record->sequence)) return 0; return 1; } /* * Finally, once recovery is over, we need to clear the revoke table so * that it can be reused by the running filesystem. */ void jbd2_journal_clear_revoke(journal_t *journal) { int i; struct list_head *hash_list; struct jbd2_revoke_record_s *record; struct jbd2_revoke_table_s *revoke; revoke = journal->j_revoke; for (i = 0; i < revoke->hash_size; i++) { hash_list = &revoke->hash_table[i]; while (!list_empty(hash_list)) { record = (struct jbd2_revoke_record_s*) hash_list->next; list_del(&record->hash); kmem_cache_free(jbd2_revoke_record_cache, record); } } }
3 3 44 45 45 45 45 29 45 45 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_btree.h" #include "xfs_alloc_btree.h" #include "xfs_rmap_btree.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_rmap.h" #include "xfs_ag.h" #include "xfs_ag_resv.h" #include "xfs_health.h" #include "xfs_error.h" #include "xfs_bmap.h" #include "xfs_defer.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_trace.h" #include "xfs_inode.h" #include "xfs_icache.h" #include "xfs_buf_item.h" #include "xfs_rtgroup.h" #include "xfs_rtbitmap.h" #include "xfs_metafile.h" #include "xfs_metadir.h" #include "xfs_rtrmap_btree.h" #include "xfs_rtrefcount_btree.h" /* Find the first usable fsblock in this rtgroup. */ static inline uint32_t xfs_rtgroup_min_block( struct xfs_mount *mp, xfs_rgnumber_t rgno) { if (xfs_has_rtsb(mp) && rgno == 0) return mp->m_sb.sb_rextsize; return 0; } /* Precompute this group's geometry */ void xfs_rtgroup_calc_geometry( struct xfs_mount *mp, struct xfs_rtgroup *rtg, xfs_rgnumber_t rgno, xfs_rgnumber_t rgcount, xfs_rtbxlen_t rextents) { rtg->rtg_extents = __xfs_rtgroup_extents(mp, rgno, rgcount, rextents); rtg_group(rtg)->xg_block_count = rtg->rtg_extents * mp->m_sb.sb_rextsize; rtg_group(rtg)->xg_min_gbno = xfs_rtgroup_min_block(mp, rgno); } int xfs_rtgroup_alloc( struct xfs_mount *mp, xfs_rgnumber_t rgno, xfs_rgnumber_t rgcount, xfs_rtbxlen_t rextents) { struct xfs_rtgroup *rtg; int error; rtg = kzalloc(sizeof(struct xfs_rtgroup), GFP_KERNEL); if (!rtg) return -ENOMEM; xfs_rtgroup_calc_geometry(mp, rtg, rgno, rgcount, rextents); error = xfs_group_insert(mp, rtg_group(rtg), rgno, XG_TYPE_RTG); if (error) goto out_free_rtg; return 0; out_free_rtg: kfree(rtg); return error; } void xfs_rtgroup_free( struct xfs_mount *mp, xfs_rgnumber_t rgno) { xfs_group_free(mp, rgno, XG_TYPE_RTG, NULL); } /* Free a range of incore rtgroup objects. */ void xfs_free_rtgroups( struct xfs_mount *mp, xfs_rgnumber_t first_rgno, xfs_rgnumber_t end_rgno) { xfs_rgnumber_t rgno; for (rgno = first_rgno; rgno < end_rgno; rgno++) xfs_rtgroup_free(mp, rgno); } /* Initialize some range of incore rtgroup objects. */ int xfs_initialize_rtgroups( struct xfs_mount *mp, xfs_rgnumber_t first_rgno, xfs_rgnumber_t end_rgno, xfs_rtbxlen_t rextents) { xfs_rgnumber_t index; int error; if (first_rgno >= end_rgno) return 0; for (index = first_rgno; index < end_rgno; index++) { error = xfs_rtgroup_alloc(mp, index, end_rgno, rextents); if (error) goto out_unwind_new_rtgs; } return 0; out_unwind_new_rtgs: xfs_free_rtgroups(mp, first_rgno, index); return error; } /* Compute the number of rt extents in this realtime group. */ xfs_rtxnum_t __xfs_rtgroup_extents( struct xfs_mount *mp, xfs_rgnumber_t rgno, xfs_rgnumber_t rgcount, xfs_rtbxlen_t rextents) { ASSERT(rgno < rgcount); if (rgno == rgcount - 1) return rextents - ((xfs_rtxnum_t)rgno * mp->m_sb.sb_rgextents); ASSERT(xfs_has_rtgroups(mp)); return mp->m_sb.sb_rgextents; } xfs_rtxnum_t xfs_rtgroup_extents( struct xfs_mount *mp, xfs_rgnumber_t rgno) { return __xfs_rtgroup_extents(mp, rgno, mp->m_sb.sb_rgcount, mp->m_sb.sb_rextents); } /* * Update the rt extent count of the previous tail rtgroup if it changed during * recovery (i.e. recovery of a growfs). */ int xfs_update_last_rtgroup_size( struct xfs_mount *mp, xfs_rgnumber_t prev_rgcount) { struct xfs_rtgroup *rtg; ASSERT(prev_rgcount > 0); rtg = xfs_rtgroup_grab(mp, prev_rgcount - 1); if (!rtg) return -EFSCORRUPTED; rtg->rtg_extents = __xfs_rtgroup_extents(mp, prev_rgcount - 1, mp->m_sb.sb_rgcount, mp->m_sb.sb_rextents); rtg_group(rtg)->xg_block_count = rtg->rtg_extents * mp->m_sb.sb_rextsize; xfs_rtgroup_rele(rtg); return 0; } /* Lock metadata inodes associated with this rt group. */ void xfs_rtgroup_lock( struct xfs_rtgroup *rtg, unsigned int rtglock_flags) { ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS)); ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) || !(rtglock_flags & XFS_RTGLOCK_BITMAP)); if (rtglock_flags & XFS_RTGLOCK_BITMAP) { /* * Lock both realtime free space metadata inodes for a freespace * update. */ xfs_ilock(rtg_bitmap(rtg), XFS_ILOCK_EXCL); xfs_ilock(rtg_summary(rtg), XFS_ILOCK_EXCL); } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) { xfs_ilock(rtg_bitmap(rtg), XFS_ILOCK_SHARED); } if ((rtglock_flags & XFS_RTGLOCK_RMAP) && rtg_rmap(rtg)) xfs_ilock(rtg_rmap(rtg), XFS_ILOCK_EXCL); if ((rtglock_flags & XFS_RTGLOCK_REFCOUNT) && rtg_refcount(rtg)) xfs_ilock(rtg_refcount(rtg), XFS_ILOCK_EXCL); } /* Unlock metadata inodes associated with this rt group. */ void xfs_rtgroup_unlock( struct xfs_rtgroup *rtg, unsigned int rtglock_flags) { ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS)); ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) || !(rtglock_flags & XFS_RTGLOCK_BITMAP)); if ((rtglock_flags & XFS_RTGLOCK_REFCOUNT) && rtg_refcount(rtg)) xfs_iunlock(rtg_refcount(rtg), XFS_ILOCK_EXCL); if ((rtglock_flags & XFS_RTGLOCK_RMAP) && rtg_rmap(rtg)) xfs_iunlock(rtg_rmap(rtg), XFS_ILOCK_EXCL); if (rtglock_flags & XFS_RTGLOCK_BITMAP) { xfs_iunlock(rtg_summary(rtg), XFS_ILOCK_EXCL); xfs_iunlock(rtg_bitmap(rtg), XFS_ILOCK_EXCL); } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) { xfs_iunlock(rtg_bitmap(rtg), XFS_ILOCK_SHARED); } } /* * Join realtime group metadata inodes to the transaction. The ILOCKs will be * released on transaction commit. */ void xfs_rtgroup_trans_join( struct xfs_trans *tp, struct xfs_rtgroup *rtg, unsigned int rtglock_flags) { ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS)); ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED)); if (rtglock_flags & XFS_RTGLOCK_BITMAP) { xfs_trans_ijoin(tp, rtg_bitmap(rtg), XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, rtg_summary(rtg), XFS_ILOCK_EXCL); } if ((rtglock_flags & XFS_RTGLOCK_RMAP) && rtg_rmap(rtg)) xfs_trans_ijoin(tp, rtg_rmap(rtg), XFS_ILOCK_EXCL); if ((rtglock_flags & XFS_RTGLOCK_REFCOUNT) && rtg_refcount(rtg)) xfs_trans_ijoin(tp, rtg_refcount(rtg), XFS_ILOCK_EXCL); } /* Retrieve rt group geometry. */ int xfs_rtgroup_get_geometry( struct xfs_rtgroup *rtg, struct xfs_rtgroup_geometry *rgeo) { /* Fill out form. */ memset(rgeo, 0, sizeof(*rgeo)); rgeo->rg_number = rtg_rgno(rtg); rgeo->rg_length = rtg_group(rtg)->xg_block_count; xfs_rtgroup_geom_health(rtg, rgeo); return 0; } #ifdef CONFIG_PROVE_LOCKING static struct lock_class_key xfs_rtginode_lock_class; static int xfs_rtginode_ilock_cmp_fn( const struct lockdep_map *m1, const struct lockdep_map *m2) { const struct xfs_inode *ip1 = container_of(m1, struct xfs_inode, i_lock.dep_map); const struct xfs_inode *ip2 = container_of(m2, struct xfs_inode, i_lock.dep_map); if (ip1->i_projid < ip2->i_projid) return -1; if (ip1->i_projid > ip2->i_projid) return 1; return 0; } static inline void xfs_rtginode_ilock_print_fn( const struct lockdep_map *m) { const struct xfs_inode *ip = container_of(m, struct xfs_inode, i_lock.dep_map); printk(KERN_CONT " rgno=%u metatype=%s", ip->i_projid, xfs_metafile_type_str(ip->i_metatype)); } /* * Most of the time each of the RTG inode locks are only taken one at a time. * But when committing deferred ops, more than one of a kind can be taken. * However, deferred rt ops will be committed in rgno order so there is no * potential for deadlocks. The code here is needed to tell lockdep about this * order. */ static inline void xfs_rtginode_lockdep_setup( struct xfs_inode *ip, xfs_rgnumber_t rgno, enum xfs_rtg_inodes type) { lockdep_set_class_and_subclass(&ip->i_lock, &xfs_rtginode_lock_class, type); lock_set_cmp_fn(&ip->i_lock, xfs_rtginode_ilock_cmp_fn, xfs_rtginode_ilock_print_fn); } #else #define xfs_rtginode_lockdep_setup(ip, rgno, type) do { } while (0) #endif /* CONFIG_PROVE_LOCKING */ struct xfs_rtginode_ops { const char *name; /* short name */ enum xfs_metafile_type metafile_type; unsigned int sick; /* rtgroup sickness flag */ unsigned int fmt_mask; /* all valid data fork formats */ /* Does the fs have this feature? */ bool (*enabled)(const struct xfs_mount *mp); /* Create this rtgroup metadata inode and initialize it. */ int (*create)(struct xfs_rtgroup *rtg, struct xfs_inode *ip, struct xfs_trans *tp, bool init); }; static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = { [XFS_RTGI_BITMAP] = { .name = "bitmap", .metafile_type = XFS_METAFILE_RTBITMAP, .sick = XFS_SICK_RG_BITMAP, .fmt_mask = (1U << XFS_DINODE_FMT_EXTENTS) | (1U << XFS_DINODE_FMT_BTREE), .create = xfs_rtbitmap_create, }, [XFS_RTGI_SUMMARY] = { .name = "summary", .metafile_type = XFS_METAFILE_RTSUMMARY, .sick = XFS_SICK_RG_SUMMARY, .fmt_mask = (1U << XFS_DINODE_FMT_EXTENTS) | (1U << XFS_DINODE_FMT_BTREE), .create = xfs_rtsummary_create, }, [XFS_RTGI_RMAP] = { .name = "rmap", .metafile_type = XFS_METAFILE_RTRMAP, .sick = XFS_SICK_RG_RMAPBT, .fmt_mask = 1U << XFS_DINODE_FMT_META_BTREE, /* * growfs must create the rtrmap inodes before adding a * realtime volume to the filesystem, so we cannot use the * rtrmapbt predicate here. */ .enabled = xfs_has_rmapbt, .create = xfs_rtrmapbt_create, }, [XFS_RTGI_REFCOUNT] = { .name = "refcount", .metafile_type = XFS_METAFILE_RTREFCOUNT, .sick = XFS_SICK_RG_REFCNTBT, .fmt_mask = 1U << XFS_DINODE_FMT_META_BTREE, /* same comment about growfs and rmap inodes applies here */ .enabled = xfs_has_reflink, .create = xfs_rtrefcountbt_create, }, }; /* Return the shortname of this rtgroup inode. */ const char * xfs_rtginode_name( enum xfs_rtg_inodes type) { return xfs_rtginode_ops[type].name; } /* Return the metafile type of this rtgroup inode. */ enum xfs_metafile_type xfs_rtginode_metafile_type( enum xfs_rtg_inodes type) { return xfs_rtginode_ops[type].metafile_type; } /* Should this rtgroup inode be present? */ bool xfs_rtginode_enabled( struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type) { const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; if (!ops->enabled) return true; return ops->enabled(rtg_mount(rtg)); } /* Mark an rtgroup inode sick */ void xfs_rtginode_mark_sick( struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type) { const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; xfs_group_mark_sick(rtg_group(rtg), ops->sick); } /* Load and existing rtgroup inode into the rtgroup structure. */ int xfs_rtginode_load( struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type, struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; struct xfs_inode *ip; const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; int error; if (!xfs_rtginode_enabled(rtg, type)) return 0; if (!xfs_has_rtgroups(mp)) { xfs_ino_t ino; switch (type) { case XFS_RTGI_BITMAP: ino = mp->m_sb.sb_rbmino; break; case XFS_RTGI_SUMMARY: ino = mp->m_sb.sb_rsumino; break; default: /* None of the other types exist on !rtgroups */ return 0; } error = xfs_trans_metafile_iget(tp, ino, ops->metafile_type, &ip); } else { const char *path; if (!mp->m_rtdirip) { xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; } path = xfs_rtginode_path(rtg_rgno(rtg), type); if (!path) return -ENOMEM; error = xfs_metadir_load(tp, mp->m_rtdirip, path, ops->metafile_type, &ip); kfree(path); } if (error) { if (xfs_metadata_is_sick(error)) xfs_rtginode_mark_sick(rtg, type); return error; } if (XFS_IS_CORRUPT(mp, !((1U << ip->i_df.if_format) & ops->fmt_mask))) { xfs_irele(ip); xfs_rtginode_mark_sick(rtg, type); return -EFSCORRUPTED; } if (XFS_IS_CORRUPT(mp, ip->i_projid != rtg_rgno(rtg))) { xfs_irele(ip); xfs_rtginode_mark_sick(rtg, type); return -EFSCORRUPTED; } xfs_rtginode_lockdep_setup(ip, rtg_rgno(rtg), type); rtg->rtg_inodes[type] = ip; return 0; } /* Release an rtgroup metadata inode. */ void xfs_rtginode_irele( struct xfs_inode **ipp) { if (*ipp) xfs_irele(*ipp); *ipp = NULL; } /* Add a metadata inode for a realtime rmap btree. */ int xfs_rtginode_create( struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type, bool init) { const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; struct xfs_mount *mp = rtg_mount(rtg); struct xfs_metadir_update upd = { .dp = mp->m_rtdirip, .metafile_type = ops->metafile_type, }; int error; if (!xfs_rtginode_enabled(rtg, type)) return 0; if (!mp->m_rtdirip) { xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; } upd.path = xfs_rtginode_path(rtg_rgno(rtg), type); if (!upd.path) return -ENOMEM; error = xfs_metadir_start_create(&upd); if (error) goto out_path; error = xfs_metadir_create(&upd, S_IFREG); if (error) goto out_cancel; xfs_rtginode_lockdep_setup(upd.ip, rtg_rgno(rtg), type); upd.ip->i_projid = rtg_rgno(rtg); error = ops->create(rtg, upd.ip, upd.tp, init); if (error) goto out_cancel; error = xfs_metadir_commit(&upd); if (error) goto out_path; kfree(upd.path); xfs_finish_inode_setup(upd.ip); rtg->rtg_inodes[type] = upd.ip; return 0; out_cancel: xfs_metadir_cancel(&upd, error); /* Have to finish setting up the inode to ensure it's deleted. */ if (upd.ip) { xfs_finish_inode_setup(upd.ip); xfs_irele(upd.ip); } out_path: kfree(upd.path); return error; } /* Create the parent directory for all rtgroup inodes and load it. */ int xfs_rtginode_mkdir_parent( struct xfs_mount *mp) { if (!mp->m_metadirip) { xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; } return xfs_metadir_mkdir(mp->m_metadirip, "rtgroups", &mp->m_rtdirip); } /* Load the parent directory of all rtgroup inodes. */ int xfs_rtginode_load_parent( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; if (!mp->m_metadirip) { xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); return -EFSCORRUPTED; } return xfs_metadir_load(tp, mp->m_metadirip, "rtgroups", XFS_METAFILE_DIR, &mp->m_rtdirip); } /* Check superblock fields for a read or a write. */ static xfs_failaddr_t xfs_rtsb_verify_common( struct xfs_buf *bp) { struct xfs_rtsb *rsb = bp->b_addr; if (!xfs_verify_magic(bp, rsb->rsb_magicnum)) return __this_address; if (rsb->rsb_pad) return __this_address; /* Everything to the end of the fs block must be zero */ if (memchr_inv(rsb + 1, 0, BBTOB(bp->b_length) - sizeof(*rsb))) return __this_address; return NULL; } /* Check superblock fields for a read or revalidation. */ static inline xfs_failaddr_t xfs_rtsb_verify_all( struct xfs_buf *bp) { struct xfs_rtsb *rsb = bp->b_addr; struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; fa = xfs_rtsb_verify_common(bp); if (fa) return fa; if (memcmp(&rsb->rsb_fname, &mp->m_sb.sb_fname, XFSLABEL_MAX)) return __this_address; if (!uuid_equal(&rsb->rsb_uuid, &mp->m_sb.sb_uuid)) return __this_address; if (!uuid_equal(&rsb->rsb_meta_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; return NULL; } static void xfs_rtsb_read_verify( struct xfs_buf *bp) { xfs_failaddr_t fa; if (!xfs_buf_verify_cksum(bp, XFS_RTSB_CRC_OFF)) { xfs_verifier_error(bp, -EFSBADCRC, __this_address); return; } fa = xfs_rtsb_verify_all(bp); if (fa) xfs_verifier_error(bp, -EFSCORRUPTED, fa); } static void xfs_rtsb_write_verify( struct xfs_buf *bp) { xfs_failaddr_t fa; fa = xfs_rtsb_verify_common(bp); if (fa) { xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_buf_update_cksum(bp, XFS_RTSB_CRC_OFF); } const struct xfs_buf_ops xfs_rtsb_buf_ops = { .name = "xfs_rtsb", .magic = { 0, cpu_to_be32(XFS_RTSB_MAGIC) }, .verify_read = xfs_rtsb_read_verify, .verify_write = xfs_rtsb_write_verify, .verify_struct = xfs_rtsb_verify_all, }; /* Update a realtime superblock from the primary fs super */ void xfs_update_rtsb( struct xfs_buf *rtsb_bp, const struct xfs_buf *sb_bp) { const struct xfs_dsb *dsb = sb_bp->b_addr; struct xfs_rtsb *rsb = rtsb_bp->b_addr; const uuid_t *meta_uuid; rsb->rsb_magicnum = cpu_to_be32(XFS_RTSB_MAGIC); rsb->rsb_pad = 0; memcpy(&rsb->rsb_fname, &dsb->sb_fname, XFSLABEL_MAX); memcpy(&rsb->rsb_uuid, &dsb->sb_uuid, sizeof(rsb->rsb_uuid)); /* * The metadata uuid is the fs uuid if the metauuid feature is not * enabled. */ if (dsb->sb_features_incompat & cpu_to_be32(XFS_SB_FEAT_INCOMPAT_META_UUID)) meta_uuid = &dsb->sb_meta_uuid; else meta_uuid = &dsb->sb_uuid; memcpy(&rsb->rsb_meta_uuid, meta_uuid, sizeof(rsb->rsb_meta_uuid)); } /* * Update the realtime superblock from a filesystem superblock and log it to * the given transaction. */ struct xfs_buf * xfs_log_rtsb( struct xfs_trans *tp, const struct xfs_buf *sb_bp) { struct xfs_buf *rtsb_bp; if (!xfs_has_rtsb(tp->t_mountp)) return NULL; rtsb_bp = xfs_trans_getrtsb(tp); if (!rtsb_bp) { /* * It's possible for the rtgroups feature to be enabled but * there is no incore rt superblock buffer if the rt geometry * was specified at mkfs time but the rt section has not yet * been attached. In this case, rblocks must be zero. */ ASSERT(tp->t_mountp->m_sb.sb_rblocks == 0); return NULL; } xfs_update_rtsb(rtsb_bp, sb_bp); xfs_trans_ordered_buf(tp, rtsb_bp); return rtsb_bp; }
43 43 51 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright(c) 2004-2005 Intel Corporation. All rights reserved. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/device.h> #include <linux/sched/signal.h> #include <linux/fs.h> #include <linux/types.h> #include <linux/string.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/in.h> #include <linux/sysfs.h> #include <linux/ctype.h> #include <linux/inet.h> #include <linux/rtnetlink.h> #include <linux/etherdevice.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/nsproxy.h> #include <net/bonding.h> #define to_bond(cd) ((struct bonding *)(netdev_priv(to_net_dev(cd)))) /* "show" function for the bond_masters attribute. * The class parameter is ignored. */ static ssize_t bonding_show_bonds(const struct class *cls, const struct class_attribute *attr, char *buf) { const struct bond_net *bn = container_of_const(attr, struct bond_net, class_attr_bonding_masters); struct bonding *bond; int res = 0; rcu_read_lock(); list_for_each_entry_rcu(bond, &bn->dev_list, bond_list) { if (res > (PAGE_SIZE - IFNAMSIZ)) { /* not enough space for another interface name */ if ((PAGE_SIZE - res) > 10) res = PAGE_SIZE - 10; res += sysfs_emit_at(buf, res, "++more++ "); break; } res += sysfs_emit_at(buf, res, "%s ", bond->dev->name); } if (res) buf[res-1] = '\n'; /* eat the leftover space */ rcu_read_unlock(); return res; } static struct net_device *bond_get_by_name(const struct bond_net *bn, const char *ifname) { struct bonding *bond; list_for_each_entry(bond, &bn->dev_list, bond_list) { if (strncmp(bond->dev->name, ifname, IFNAMSIZ) == 0) return bond->dev; } return NULL; } /* "store" function for the bond_masters attribute. This is what * creates and deletes entire bonds. * * The class parameter is ignored. */ static ssize_t bonding_store_bonds(const struct class *cls, const struct class_attribute *attr, const char *buffer, size_t count) { const struct bond_net *bn = container_of_const(attr, struct bond_net, class_attr_bonding_masters); char command[IFNAMSIZ + 1] = {0, }; char *ifname; int rv, res = count; sscanf(buffer, "%16s", command); /* IFNAMSIZ*/ ifname = command + 1; if ((strlen(command) <= 1) || !dev_valid_name(ifname)) goto err_no_cmd; if (command[0] == '+') { pr_info("%s is being created...\n", ifname); rv = bond_create(bn->net, ifname); if (rv) { if (rv == -EEXIST) pr_info("%s already exists\n", ifname); else pr_info("%s creation failed\n", ifname); res = rv; } } else if (command[0] == '-') { struct net_device *bond_dev; rtnl_lock(); bond_dev = bond_get_by_name(bn, ifname); if (bond_dev) { pr_info("%s is being deleted...\n", ifname); unregister_netdevice(bond_dev); } else { pr_err("unable to delete non-existent %s\n", ifname); res = -ENODEV; } rtnl_unlock(); } else goto err_no_cmd; /* Always return either count or an error. If you return 0, you'll * get called forever, which is bad. */ return res; err_no_cmd: pr_err("no command found in bonding_masters - use +ifname or -ifname\n"); return -EPERM; } /* class attribute for bond_masters file. This ends up in /sys/class/net */ static const struct class_attribute class_attr_bonding_masters = { .attr = { .name = "bonding_masters", .mode = 0644, }, .show = bonding_show_bonds, .store = bonding_store_bonds, }; /* Generic "store" method for bonding sysfs option setting */ static ssize_t bonding_sysfs_store_option(struct device *d, struct device_attribute *attr, const char *buffer, size_t count) { struct bonding *bond = to_bond(d); const struct bond_option *opt; char *buffer_clone; int ret; opt = bond_opt_get_by_name(attr->attr.name); if (WARN_ON(!opt)) return -ENOENT; buffer_clone = kstrndup(buffer, count, GFP_KERNEL); if (!buffer_clone) return -ENOMEM; ret = bond_opt_tryset_rtnl(bond, opt->id, buffer_clone); if (!ret) ret = count; kfree(buffer_clone); return ret; } /* Show the slaves in the current bond. */ static ssize_t bonding_show_slaves(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); struct list_head *iter; struct slave *slave; int res = 0; rcu_read_lock(); bond_for_each_slave_rcu(bond, slave, iter) { if (res > (PAGE_SIZE - IFNAMSIZ)) { /* not enough space for another interface name */ if ((PAGE_SIZE - res) > 10) res = PAGE_SIZE - 10; res += sysfs_emit_at(buf, res, "++more++ "); break; } res += sysfs_emit_at(buf, res, "%s ", slave->dev->name); } rcu_read_unlock(); if (res) buf[res-1] = '\n'; /* eat the leftover space */ return res; } static DEVICE_ATTR(slaves, 0644, bonding_show_slaves, bonding_sysfs_store_option); /* Show the bonding mode. */ static ssize_t bonding_show_mode(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); const struct bond_opt_value *val; val = bond_opt_get_val(BOND_OPT_MODE, BOND_MODE(bond)); return sysfs_emit(buf, "%s %d\n", val->string, BOND_MODE(bond)); } static DEVICE_ATTR(mode, 0644, bonding_show_mode, bonding_sysfs_store_option); /* Show the bonding transmit hash method. */ static ssize_t bonding_show_xmit_hash(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); const struct bond_opt_value *val; val = bond_opt_get_val(BOND_OPT_XMIT_HASH, bond->params.xmit_policy); return sysfs_emit(buf, "%s %d\n", val->string, bond->params.xmit_policy); } static DEVICE_ATTR(xmit_hash_policy, 0644, bonding_show_xmit_hash, bonding_sysfs_store_option); /* Show arp_validate. */ static ssize_t bonding_show_arp_validate(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); const struct bond_opt_value *val; val = bond_opt_get_val(BOND_OPT_ARP_VALIDATE, bond->params.arp_validate); return sysfs_emit(buf, "%s %d\n", val->string, bond->params.arp_validate); } static DEVICE_ATTR(arp_validate, 0644, bonding_show_arp_validate, bonding_sysfs_store_option); /* Show arp_all_targets. */ static ssize_t bonding_show_arp_all_targets(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); const struct bond_opt_value *val; val = bond_opt_get_val(BOND_OPT_ARP_ALL_TARGETS, bond->params.arp_all_targets); return sysfs_emit(buf, "%s %d\n", val->string, bond->params.arp_all_targets); } static DEVICE_ATTR(arp_all_targets, 0644, bonding_show_arp_all_targets, bonding_sysfs_store_option); /* Show fail_over_mac. */ static ssize_t bonding_show_fail_over_mac(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); const struct bond_opt_value *val; val = bond_opt_get_val(BOND_OPT_FAIL_OVER_MAC, bond->params.fail_over_mac); return sysfs_emit(buf, "%s %d\n", val->string, bond->params.fail_over_mac); } static DEVICE_ATTR(fail_over_mac, 0644, bonding_show_fail_over_mac, bonding_sysfs_store_option); /* Show the arp timer interval. */ static ssize_t bonding_show_arp_interval(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); return sysfs_emit(buf, "%d\n", bond->params.arp_interval); } static DEVICE_ATTR(arp_interval, 0644, bonding_show_arp_interval, bonding_sysfs_store_option); /* Show the arp targets. */ static ssize_t bonding_show_arp_targets(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); int i, res = 0; for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) { if (bond->params.arp_targets[i]) res += sysfs_emit_at(buf, res, "%pI4 ", &bond->params.arp_targets[i]); } if (res) buf[res-1] = '\n'; /* eat the leftover space */ return res; } static DEVICE_ATTR(arp_ip_target, 0644, bonding_show_arp_targets, bonding_sysfs_store_option); /* Show the arp missed max. */ static ssize_t bonding_show_missed_max(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); return sysfs_emit(buf, "%u\n", bond->params.missed_max); } static DEVICE_ATTR(arp_missed_max, 0644, bonding_show_missed_max, bonding_sysfs_store_option); /* Show the up and down delays. */ static ssize_t bonding_show_downdelay(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); return sysfs_emit(buf, "%d\n", bond->params.downdelay * bond->params.miimon); } static DEVICE_ATTR(downdelay, 0644, bonding_show_downdelay, bonding_sysfs_store_option); static ssize_t bonding_show_updelay(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); return sysfs_emit(buf, "%d\n", bond->params.updelay * bond->params.miimon); } static DEVICE_ATTR(updelay, 0644, bonding_show_updelay, bonding_sysfs_store_option); static ssize_t bonding_show_peer_notif_delay(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); return sysfs_emit(buf, "%d\n", bond->params.peer_notif_delay * bond->params.miimon); } static DEVICE_ATTR(peer_notif_delay, 0644, bonding_show_peer_notif_delay, bonding_sysfs_store_option); /* Show the LACP activity and interval. */ static ssize_t bonding_show_lacp_active(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); const struct bond_opt_value *val; val = bond_opt_get_val(BOND_OPT_LACP_ACTIVE, bond->params.lacp_active); return sysfs_emit(buf, "%s %d\n", val->string, bond->params.lacp_active); } static DEVICE_ATTR(lacp_active, 0644, bonding_show_lacp_active, bonding_sysfs_store_option); static ssize_t bonding_show_lacp_rate(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); const struct bond_opt_value *val; val = bond_opt_get_val(BOND_OPT_LACP_RATE, bond->params.lacp_fast); return sysfs_emit(buf, "%s %d\n", val->string, bond->params.lacp_fast); } static DEVICE_ATTR(lacp_rate, 0644, bonding_show_lacp_rate, bonding_sysfs_store_option); static ssize_t bonding_show_min_links(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); return sysfs_emit(buf, "%u\n", bond->params.min_links); } static DEVICE_ATTR(min_links, 0644, bonding_show_min_links, bonding_sysfs_store_option); static ssize_t bonding_show_ad_select(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); const struct bond_opt_value *val; val = bond_opt_get_val(BOND_OPT_AD_SELECT, bond->params.ad_select); return sysfs_emit(buf, "%s %d\n", val->string, bond->params.ad_select); } static DEVICE_ATTR(ad_select, 0644, bonding_show_ad_select, bonding_sysfs_store_option); /* Show the number of peer notifications to send after a failover event. */ static ssize_t bonding_show_num_peer_notif(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); return sysfs_emit(buf, "%d\n", bond->params.num_peer_notif); } static DEVICE_ATTR(num_grat_arp, 0644, bonding_show_num_peer_notif, bonding_sysfs_store_option); static DEVICE_ATTR(num_unsol_na, 0644, bonding_show_num_peer_notif, bonding_sysfs_store_option); /* Show the MII monitor interval. */ static ssize_t bonding_show_miimon(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); return sysfs_emit(buf, "%d\n", bond->params.miimon); } static DEVICE_ATTR(miimon, 0644, bonding_show_miimon, bonding_sysfs_store_option); /* Show the primary slave. */ static ssize_t bonding_show_primary(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); struct slave *primary; int count = 0; rcu_read_lock(); primary = rcu_dereference(bond->primary_slave); if (primary) count = sysfs_emit(buf, "%s\n", primary->dev->name); rcu_read_unlock(); return count; } static DEVICE_ATTR(primary, 0644, bonding_show_primary, bonding_sysfs_store_option); /* Show the primary_reselect flag. */ static ssize_t bonding_show_primary_reselect(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); const struct bond_opt_value *val; val = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT, bond->params.primary_reselect); return sysfs_emit(buf, "%s %d\n", val->string, bond->params.primary_reselect); } static DEVICE_ATTR(primary_reselect, 0644, bonding_show_primary_reselect, bonding_sysfs_store_option); /* Show the use_carrier flag. */ static ssize_t bonding_show_carrier(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); return sysfs_emit(buf, "%d\n", bond->params.use_carrier); } static DEVICE_ATTR(use_carrier, 0644, bonding_show_carrier, bonding_sysfs_store_option); /* Show currently active_slave. */ static ssize_t bonding_show_active_slave(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); struct net_device *slave_dev; int count = 0; rcu_read_lock(); slave_dev = bond_option_active_slave_get_rcu(bond); if (slave_dev) count = sysfs_emit(buf, "%s\n", slave_dev->name); rcu_read_unlock(); return count; } static DEVICE_ATTR(active_slave, 0644, bonding_show_active_slave, bonding_sysfs_store_option); /* Show link status of the bond interface. */ static ssize_t bonding_show_mii_status(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); bool active = netif_carrier_ok(bond->dev); return sysfs_emit(buf, "%s\n", active ? "up" : "down"); } static DEVICE_ATTR(mii_status, 0444, bonding_show_mii_status, NULL); /* Show current 802.3ad aggregator ID. */ static ssize_t bonding_show_ad_aggregator(struct device *d, struct device_attribute *attr, char *buf) { int count = 0; struct bonding *bond = to_bond(d); if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info ad_info; count = sysfs_emit(buf, "%d\n", bond_3ad_get_active_agg_info(bond, &ad_info) ? 0 : ad_info.aggregator_id); } return count; } static DEVICE_ATTR(ad_aggregator, 0444, bonding_show_ad_aggregator, NULL); /* Show number of active 802.3ad ports. */ static ssize_t bonding_show_ad_num_ports(struct device *d, struct device_attribute *attr, char *buf) { int count = 0; struct bonding *bond = to_bond(d); if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info ad_info; count = sysfs_emit(buf, "%d\n", bond_3ad_get_active_agg_info(bond, &ad_info) ? 0 : ad_info.ports); } return count; } static DEVICE_ATTR(ad_num_ports, 0444, bonding_show_ad_num_ports, NULL); /* Show current 802.3ad actor key. */ static ssize_t bonding_show_ad_actor_key(struct device *d, struct device_attribute *attr, char *buf) { int count = 0; struct bonding *bond = to_bond(d); if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN)) { struct ad_info ad_info; count = sysfs_emit(buf, "%d\n", bond_3ad_get_active_agg_info(bond, &ad_info) ? 0 : ad_info.actor_key); } return count; } static DEVICE_ATTR(ad_actor_key, 0444, bonding_show_ad_actor_key, NULL); /* Show current 802.3ad partner key. */ static ssize_t bonding_show_ad_partner_key(struct device *d, struct device_attribute *attr, char *buf) { int count = 0; struct bonding *bond = to_bond(d); if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN)) { struct ad_info ad_info; count = sysfs_emit(buf, "%d\n", bond_3ad_get_active_agg_info(bond, &ad_info) ? 0 : ad_info.partner_key); } return count; } static DEVICE_ATTR(ad_partner_key, 0444, bonding_show_ad_partner_key, NULL); /* Show current 802.3ad partner mac. */ static ssize_t bonding_show_ad_partner_mac(struct device *d, struct device_attribute *attr, char *buf) { int count = 0; struct bonding *bond = to_bond(d); if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN)) { struct ad_info ad_info; if (!bond_3ad_get_active_agg_info(bond, &ad_info)) count = sysfs_emit(buf, "%pM\n", ad_info.partner_system); } return count; } static DEVICE_ATTR(ad_partner_mac, 0444, bonding_show_ad_partner_mac, NULL); /* Show the queue_ids of the slaves in the current bond. */ static ssize_t bonding_show_queue_id(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); struct list_head *iter; struct slave *slave; int res = 0; rcu_read_lock(); bond_for_each_slave_rcu(bond, slave, iter) { if (res > (PAGE_SIZE - IFNAMSIZ - 6)) { /* not enough space for another interface_name:queue_id pair */ if ((PAGE_SIZE - res) > 10) res = PAGE_SIZE - 10; res += sysfs_emit_at(buf, res, "++more++ "); break; } res += sysfs_emit_at(buf, res, "%s:%d ", slave->dev->name, READ_ONCE(slave->queue_id)); } if (res) buf[res-1] = '\n'; /* eat the leftover space */ rcu_read_unlock(); return res; } static DEVICE_ATTR(queue_id, 0644, bonding_show_queue_id, bonding_sysfs_store_option); /* Show the all_slaves_active flag. */ static ssize_t bonding_show_slaves_active(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); return sysfs_emit(buf, "%d\n", bond->params.all_slaves_active); } static DEVICE_ATTR(all_slaves_active, 0644, bonding_show_slaves_active, bonding_sysfs_store_option); /* Show the number of IGMP membership reports to send on link failure */ static ssize_t bonding_show_resend_igmp(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); return sysfs_emit(buf, "%d\n", bond->params.resend_igmp); } static DEVICE_ATTR(resend_igmp, 0644, bonding_show_resend_igmp, bonding_sysfs_store_option); static ssize_t bonding_show_lp_interval(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); return sysfs_emit(buf, "%d\n", bond->params.lp_interval); } static DEVICE_ATTR(lp_interval, 0644, bonding_show_lp_interval, bonding_sysfs_store_option); static ssize_t bonding_show_tlb_dynamic_lb(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); return sysfs_emit(buf, "%d\n", bond->params.tlb_dynamic_lb); } static DEVICE_ATTR(tlb_dynamic_lb, 0644, bonding_show_tlb_dynamic_lb, bonding_sysfs_store_option); static ssize_t bonding_show_packets_per_slave(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); unsigned int packets_per_slave = bond->params.packets_per_slave; return sysfs_emit(buf, "%u\n", packets_per_slave); } static DEVICE_ATTR(packets_per_slave, 0644, bonding_show_packets_per_slave, bonding_sysfs_store_option); static ssize_t bonding_show_ad_actor_sys_prio(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN)) return sysfs_emit(buf, "%hu\n", bond->params.ad_actor_sys_prio); return 0; } static DEVICE_ATTR(ad_actor_sys_prio, 0644, bonding_show_ad_actor_sys_prio, bonding_sysfs_store_option); static ssize_t bonding_show_ad_actor_system(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN)) return sysfs_emit(buf, "%pM\n", bond->params.ad_actor_system); return 0; } static DEVICE_ATTR(ad_actor_system, 0644, bonding_show_ad_actor_system, bonding_sysfs_store_option); static ssize_t bonding_show_ad_user_port_key(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN)) return sysfs_emit(buf, "%hu\n", bond->params.ad_user_port_key); return 0; } static DEVICE_ATTR(ad_user_port_key, 0644, bonding_show_ad_user_port_key, bonding_sysfs_store_option); static struct attribute *per_bond_attrs[] = { &dev_attr_slaves.attr, &dev_attr_mode.attr, &dev_attr_fail_over_mac.attr, &dev_attr_arp_validate.attr, &dev_attr_arp_all_targets.attr, &dev_attr_arp_interval.attr, &dev_attr_arp_ip_target.attr, &dev_attr_downdelay.attr, &dev_attr_updelay.attr, &dev_attr_peer_notif_delay.attr, &dev_attr_lacp_active.attr, &dev_attr_lacp_rate.attr, &dev_attr_ad_select.attr, &dev_attr_xmit_hash_policy.attr, &dev_attr_num_grat_arp.attr, &dev_attr_num_unsol_na.attr, &dev_attr_miimon.attr, &dev_attr_primary.attr, &dev_attr_primary_reselect.attr, &dev_attr_use_carrier.attr, &dev_attr_active_slave.attr, &dev_attr_mii_status.attr, &dev_attr_ad_aggregator.attr, &dev_attr_ad_num_ports.attr, &dev_attr_ad_actor_key.attr, &dev_attr_ad_partner_key.attr, &dev_attr_ad_partner_mac.attr, &dev_attr_queue_id.attr, &dev_attr_all_slaves_active.attr, &dev_attr_resend_igmp.attr, &dev_attr_min_links.attr, &dev_attr_lp_interval.attr, &dev_attr_packets_per_slave.attr, &dev_attr_tlb_dynamic_lb.attr, &dev_attr_ad_actor_sys_prio.attr, &dev_attr_ad_actor_system.attr, &dev_attr_ad_user_port_key.attr, &dev_attr_arp_missed_max.attr, NULL, }; static const struct attribute_group bonding_group = { .name = "bonding", .attrs = per_bond_attrs, }; /* Initialize sysfs. This sets up the bonding_masters file in * /sys/class/net. */ int __net_init bond_create_sysfs(struct bond_net *bn) { int ret; bn->class_attr_bonding_masters = class_attr_bonding_masters; sysfs_attr_init(&bn->class_attr_bonding_masters.attr); ret = netdev_class_create_file_ns(&bn->class_attr_bonding_masters, bn->net); /* Permit multiple loads of the module by ignoring failures to * create the bonding_masters sysfs file. Bonding devices * created by second or subsequent loads of the module will * not be listed in, or controllable by, bonding_masters, but * will have the usual "bonding" sysfs directory. * * This is done to preserve backwards compatibility for * initscripts/sysconfig, which load bonding multiple times to * configure multiple bonding devices. */ if (ret == -EEXIST) { /* Is someone being kinky and naming a device bonding_master? */ if (netdev_name_in_use(bn->net, class_attr_bonding_masters.attr.name)) pr_err("network device named %s already exists in sysfs\n", class_attr_bonding_masters.attr.name); ret = 0; } return ret; } /* Remove /sys/class/net/bonding_masters. */ void __net_exit bond_destroy_sysfs(struct bond_net *bn) { netdev_class_remove_file_ns(&bn->class_attr_bonding_masters, bn->net); } /* Initialize sysfs for each bond. This sets up and registers * the 'bondctl' directory for each individual bond under /sys/class/net. */ void bond_prepare_sysfs_group(struct bonding *bond) { bond->dev->sysfs_groups[0] = &bonding_group; }
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 // SPDX-License-Identifier: GPL-2.0-or-later /* * kinect sensor device camera, gspca driver * * Copyright (C) 2011 Antonio Ospite <ospite@studenti.unina.it> * * Based on the OpenKinect project and libfreenect * http://openkinect.org/wiki/Init_Analysis * * Special thanks to Steven Toth and kernellabs.com for sponsoring a Kinect * sensor device which I tested the driver on. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "kinect" #include "gspca.h" #define CTRL_TIMEOUT 500 MODULE_AUTHOR("Antonio Ospite <ospite@studenti.unina.it>"); MODULE_DESCRIPTION("GSPCA/Kinect Sensor Device USB Camera Driver"); MODULE_LICENSE("GPL"); static bool depth_mode; struct pkt_hdr { uint8_t magic[2]; uint8_t pad; uint8_t flag; uint8_t unk1; uint8_t seq; uint8_t unk2; uint8_t unk3; uint32_t timestamp; }; struct cam_hdr { uint8_t magic[2]; __le16 len; __le16 cmd; __le16 tag; }; /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ uint16_t cam_tag; /* a sequence number for packets */ uint8_t stream_flag; /* to identify different stream types */ uint8_t obuf[0x400]; /* output buffer for control commands */ uint8_t ibuf[0x200]; /* input buffer for control commands */ }; #define MODE_640x480 0x0001 #define MODE_640x488 0x0002 #define MODE_1280x1024 0x0004 #define FORMAT_BAYER 0x0010 #define FORMAT_UYVY 0x0020 #define FORMAT_Y10B 0x0040 #define FPS_HIGH 0x0100 static const struct v4l2_pix_format depth_camera_mode[] = { {640, 480, V4L2_PIX_FMT_Y10BPACK, V4L2_FIELD_NONE, .bytesperline = 640 * 10 / 8, .sizeimage = 640 * 480 * 10 / 8, .colorspace = V4L2_COLORSPACE_SRGB, .priv = MODE_640x488 | FORMAT_Y10B}, }; static const struct v4l2_pix_format video_camera_mode[] = { {640, 480, V4L2_PIX_FMT_SGRBG8, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = MODE_640x480 | FORMAT_BAYER | FPS_HIGH}, {640, 480, V4L2_PIX_FMT_UYVY, V4L2_FIELD_NONE, .bytesperline = 640 * 2, .sizeimage = 640 * 480 * 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = MODE_640x480 | FORMAT_UYVY}, {1280, 1024, V4L2_PIX_FMT_SGRBG8, V4L2_FIELD_NONE, .bytesperline = 1280, .sizeimage = 1280 * 1024, .colorspace = V4L2_COLORSPACE_SRGB, .priv = MODE_1280x1024 | FORMAT_BAYER}, {640, 488, V4L2_PIX_FMT_Y10BPACK, V4L2_FIELD_NONE, .bytesperline = 640 * 10 / 8, .sizeimage = 640 * 488 * 10 / 8, .colorspace = V4L2_COLORSPACE_SRGB, .priv = MODE_640x488 | FORMAT_Y10B | FPS_HIGH}, {1280, 1024, V4L2_PIX_FMT_Y10BPACK, V4L2_FIELD_NONE, .bytesperline = 1280 * 10 / 8, .sizeimage = 1280 * 1024 * 10 / 8, .colorspace = V4L2_COLORSPACE_SRGB, .priv = MODE_1280x1024 | FORMAT_Y10B}, }; static int kinect_write(struct usb_device *udev, uint8_t *data, uint16_t wLength) { return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x00, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, data, wLength, CTRL_TIMEOUT); } static int kinect_read(struct usb_device *udev, uint8_t *data, uint16_t wLength) { return usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x00, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, data, wLength, CTRL_TIMEOUT); } static int send_cmd(struct gspca_dev *gspca_dev, uint16_t cmd, void *cmdbuf, unsigned int cmd_len, void *replybuf, unsigned int reply_len) { struct sd *sd = (struct sd *) gspca_dev; struct usb_device *udev = gspca_dev->dev; int res, actual_len; uint8_t *obuf = sd->obuf; uint8_t *ibuf = sd->ibuf; struct cam_hdr *chdr = (void *)obuf; struct cam_hdr *rhdr = (void *)ibuf; if (cmd_len & 1 || cmd_len > (0x400 - sizeof(*chdr))) { pr_err("send_cmd: Invalid command length (0x%x)\n", cmd_len); return -1; } chdr->magic[0] = 0x47; chdr->magic[1] = 0x4d; chdr->cmd = cpu_to_le16(cmd); chdr->tag = cpu_to_le16(sd->cam_tag); chdr->len = cpu_to_le16(cmd_len / 2); memcpy(obuf+sizeof(*chdr), cmdbuf, cmd_len); res = kinect_write(udev, obuf, cmd_len + sizeof(*chdr)); gspca_dbg(gspca_dev, D_USBO, "Control cmd=%04x tag=%04x len=%04x: %d\n", cmd, sd->cam_tag, cmd_len, res); if (res < 0) { pr_err("send_cmd: Output control transfer failed (%d)\n", res); return res; } do { actual_len = kinect_read(udev, ibuf, 0x200); } while (actual_len == 0); gspca_dbg(gspca_dev, D_USBO, "Control reply: %d\n", actual_len); if (actual_len < (int)sizeof(*rhdr)) { pr_err("send_cmd: Input control transfer failed (%d)\n", actual_len); return actual_len < 0 ? actual_len : -EREMOTEIO; } actual_len -= sizeof(*rhdr); if (rhdr->magic[0] != 0x52 || rhdr->magic[1] != 0x42) { pr_err("send_cmd: Bad magic %02x %02x\n", rhdr->magic[0], rhdr->magic[1]); return -1; } if (rhdr->cmd != chdr->cmd) { pr_err("send_cmd: Bad cmd %02x != %02x\n", rhdr->cmd, chdr->cmd); return -1; } if (rhdr->tag != chdr->tag) { pr_err("send_cmd: Bad tag %04x != %04x\n", rhdr->tag, chdr->tag); return -1; } if (le16_to_cpu(rhdr->len) != (actual_len/2)) { pr_err("send_cmd: Bad len %04x != %04x\n", le16_to_cpu(rhdr->len), (int)(actual_len/2)); return -1; } if (actual_len > reply_len) { pr_warn("send_cmd: Data buffer is %d bytes long, but got %d bytes\n", reply_len, actual_len); memcpy(replybuf, ibuf+sizeof(*rhdr), reply_len); } else { memcpy(replybuf, ibuf+sizeof(*rhdr), actual_len); } sd->cam_tag++; return actual_len; } static int write_register(struct gspca_dev *gspca_dev, uint16_t reg, uint16_t data) { uint16_t reply[2]; __le16 cmd[2]; int res; cmd[0] = cpu_to_le16(reg); cmd[1] = cpu_to_le16(data); gspca_dbg(gspca_dev, D_USBO, "Write Reg 0x%04x <= 0x%02x\n", reg, data); res = send_cmd(gspca_dev, 0x03, cmd, 4, reply, 4); if (res < 0) return res; if (res != 2) { pr_warn("send_cmd returned %d [%04x %04x], 0000 expected\n", res, reply[0], reply[1]); } return 0; } /* this function is called at probe time */ static int sd_config_video(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; sd->cam_tag = 0; sd->stream_flag = 0x80; cam = &gspca_dev->cam; cam->cam_mode = video_camera_mode; cam->nmodes = ARRAY_SIZE(video_camera_mode); gspca_dev->xfer_ep = 0x81; #if 0 /* Setting those values is not needed for video stream */ cam->npkt = 15; gspca_dev->pkt_size = 960 * 2; #endif return 0; } static int sd_config_depth(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; sd->cam_tag = 0; sd->stream_flag = 0x70; cam = &gspca_dev->cam; cam->cam_mode = depth_camera_mode; cam->nmodes = ARRAY_SIZE(depth_camera_mode); gspca_dev->xfer_ep = 0x82; return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { gspca_dbg(gspca_dev, D_PROBE, "Kinect Camera device.\n"); return 0; } static int sd_start_video(struct gspca_dev *gspca_dev) { int mode; uint8_t fmt_reg, fmt_val; uint8_t res_reg, res_val; uint8_t fps_reg, fps_val; uint8_t mode_val; mode = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; if (mode & FORMAT_Y10B) { fmt_reg = 0x19; res_reg = 0x1a; fps_reg = 0x1b; mode_val = 0x03; } else { fmt_reg = 0x0c; res_reg = 0x0d; fps_reg = 0x0e; mode_val = 0x01; } /* format */ if (mode & FORMAT_UYVY) fmt_val = 0x05; else fmt_val = 0x00; if (mode & MODE_1280x1024) res_val = 0x02; else res_val = 0x01; if (mode & FPS_HIGH) fps_val = 0x1e; else fps_val = 0x0f; /* turn off IR-reset function */ write_register(gspca_dev, 0x105, 0x00); /* Reset video stream */ write_register(gspca_dev, 0x05, 0x00); /* Due to some ridiculous condition in the firmware, we have to start * and stop the depth stream before the camera will hand us 1280x1024 * IR. This is a stupid workaround, but we've yet to find a better * solution. * * Thanks to Drew Fisher for figuring this out. */ if (mode & (FORMAT_Y10B | MODE_1280x1024)) { write_register(gspca_dev, 0x13, 0x01); write_register(gspca_dev, 0x14, 0x1e); write_register(gspca_dev, 0x06, 0x02); write_register(gspca_dev, 0x06, 0x00); } write_register(gspca_dev, fmt_reg, fmt_val); write_register(gspca_dev, res_reg, res_val); write_register(gspca_dev, fps_reg, fps_val); /* Start video stream */ write_register(gspca_dev, 0x05, mode_val); /* disable Hflip */ write_register(gspca_dev, 0x47, 0x00); return 0; } static int sd_start_depth(struct gspca_dev *gspca_dev) { /* turn off IR-reset function */ write_register(gspca_dev, 0x105, 0x00); /* reset depth stream */ write_register(gspca_dev, 0x06, 0x00); /* Depth Stream Format 0x03: 11 bit stream | 0x02: 10 bit */ write_register(gspca_dev, 0x12, 0x02); /* Depth Stream Resolution 1: standard (640x480) */ write_register(gspca_dev, 0x13, 0x01); /* Depth Framerate / 0x1e (30): 30 fps */ write_register(gspca_dev, 0x14, 0x1e); /* Depth Stream Control / 2: Open Depth Stream */ write_register(gspca_dev, 0x06, 0x02); /* disable depth hflip / LSB = 0: Smoothing Disabled */ write_register(gspca_dev, 0x17, 0x00); return 0; } static void sd_stopN_video(struct gspca_dev *gspca_dev) { /* reset video stream */ write_register(gspca_dev, 0x05, 0x00); } static void sd_stopN_depth(struct gspca_dev *gspca_dev) { /* reset depth stream */ write_register(gspca_dev, 0x06, 0x00); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *__data, int len) { struct sd *sd = (struct sd *) gspca_dev; struct pkt_hdr *hdr = (void *)__data; uint8_t *data = __data + sizeof(*hdr); int datalen = len - sizeof(*hdr); uint8_t sof = sd->stream_flag | 1; uint8_t mof = sd->stream_flag | 2; uint8_t eof = sd->stream_flag | 5; if (len < 12) return; if (hdr->magic[0] != 'R' || hdr->magic[1] != 'B') { pr_warn("[Stream %02x] Invalid magic %02x%02x\n", sd->stream_flag, hdr->magic[0], hdr->magic[1]); return; } if (hdr->flag == sof) gspca_frame_add(gspca_dev, FIRST_PACKET, data, datalen); else if (hdr->flag == mof) gspca_frame_add(gspca_dev, INTER_PACKET, data, datalen); else if (hdr->flag == eof) gspca_frame_add(gspca_dev, LAST_PACKET, data, datalen); else pr_warn("Packet type not recognized...\n"); } /* sub-driver description */ static const struct sd_desc sd_desc_video = { .name = MODULE_NAME, .config = sd_config_video, .init = sd_init, .start = sd_start_video, .stopN = sd_stopN_video, .pkt_scan = sd_pkt_scan, /* .get_streamparm = sd_get_streamparm, .set_streamparm = sd_set_streamparm, */ }; static const struct sd_desc sd_desc_depth = { .name = MODULE_NAME, .config = sd_config_depth, .init = sd_init, .start = sd_start_depth, .stopN = sd_stopN_depth, .pkt_scan = sd_pkt_scan, /* .get_streamparm = sd_get_streamparm, .set_streamparm = sd_set_streamparm, */ }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x045e, 0x02ae)}, {USB_DEVICE(0x045e, 0x02bf)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { if (depth_mode) return gspca_dev_probe(intf, id, &sd_desc_depth, sizeof(struct sd), THIS_MODULE); else return gspca_dev_probe(intf, id, &sd_desc_video, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver); module_param(depth_mode, bool, 0644); MODULE_PARM_DESC(depth_mode, "0=video 1=depth");
31 31 31 30 30 30 31 30 30 31 31 30 31 31 30 31 31 30 31 30 30 31 30 31 30 30 30 31 30 31 31 31 31 30 31 31 31 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHEFS_FORMAT_H #define _BCACHEFS_FORMAT_H /* * bcachefs on disk data structures * * OVERVIEW: * * There are three main types of on disk data structures in bcachefs (this is * reduced from 5 in bcache) * * - superblock * - journal * - btree * * The btree is the primary structure; most metadata exists as keys in the * various btrees. There are only a small number of btrees, they're not * sharded - we have one btree for extents, another for inodes, et cetera. * * SUPERBLOCK: * * The superblock contains the location of the journal, the list of devices in * the filesystem, and in general any metadata we need in order to decide * whether we can start a filesystem or prior to reading the journal/btree * roots. * * The superblock is extensible, and most of the contents of the superblock are * in variable length, type tagged fields; see struct bch_sb_field. * * Backup superblocks do not reside in a fixed location; also, superblocks do * not have a fixed size. To locate backup superblocks we have struct * bch_sb_layout; we store a copy of this inside every superblock, and also * before the first superblock. * * JOURNAL: * * The journal primarily records btree updates in the order they occurred; * journal replay consists of just iterating over all the keys in the open * journal entries and re-inserting them into the btrees. * * The journal also contains entry types for the btree roots, and blacklisted * journal sequence numbers (see journal_seq_blacklist.c). * * BTREE: * * bcachefs btrees are copy on write b+ trees, where nodes are big (typically * 128k-256k) and log structured. We use struct btree_node for writing the first * entry in a given node (offset 0), and struct btree_node_entry for all * subsequent writes. * * After the header, btree node entries contain a list of keys in sorted order. * Values are stored inline with the keys; since values are variable length (and * keys effectively are variable length too, due to packing) we can't do random * access without building up additional in memory tables in the btree node read * path. * * BTREE KEYS (struct bkey): * * The various btrees share a common format for the key - so as to avoid * switching in fastpath lookup/comparison code - but define their own * structures for the key values. * * The size of a key/value pair is stored as a u8 in units of u64s, so the max * size is just under 2k. The common part also contains a type tag for the * value, and a format field indicating whether the key is packed or not (and * also meant to allow adding new key fields in the future, if desired). * * bkeys, when stored within a btree node, may also be packed. In that case, the * bkey_format in that node is used to unpack it. Packed bkeys mean that we can * be generous with field sizes in the common part of the key format (64 bit * inode number, 64 bit offset, 96 bit version field, etc.) for negligible cost. */ #include <asm/types.h> #include <asm/byteorder.h> #include <linux/kernel.h> #include <linux/uuid.h> #include <uapi/linux/magic.h> #include "vstructs.h" #ifdef __KERNEL__ typedef uuid_t __uuid_t; #endif #define BITMASK(name, type, field, offset, end) \ static const __maybe_unused unsigned name##_OFFSET = offset; \ static const __maybe_unused unsigned name##_BITS = (end - offset); \ \ static inline __u64 name(const type *k) \ { \ return (k->field >> offset) & ~(~0ULL << (end - offset)); \ } \ \ static inline void SET_##name(type *k, __u64 v) \ { \ k->field &= ~(~(~0ULL << (end - offset)) << offset); \ k->field |= (v & ~(~0ULL << (end - offset))) << offset; \ } #define LE_BITMASK(_bits, name, type, field, offset, end) \ static const __maybe_unused unsigned name##_OFFSET = offset; \ static const __maybe_unused unsigned name##_BITS = (end - offset); \ static const __maybe_unused __u##_bits name##_MAX = (1ULL << (end - offset)) - 1;\ \ static inline __u64 name(const type *k) \ { \ return (__le##_bits##_to_cpu(k->field) >> offset) & \ ~(~0ULL << (end - offset)); \ } \ \ static inline void SET_##name(type *k, __u64 v) \ { \ __u##_bits new = __le##_bits##_to_cpu(k->field); \ \ new &= ~(~(~0ULL << (end - offset)) << offset); \ new |= (v & ~(~0ULL << (end - offset))) << offset; \ k->field = __cpu_to_le##_bits(new); \ } #define LE16_BITMASK(n, t, f, o, e) LE_BITMASK(16, n, t, f, o, e) #define LE32_BITMASK(n, t, f, o, e) LE_BITMASK(32, n, t, f, o, e) #define LE64_BITMASK(n, t, f, o, e) LE_BITMASK(64, n, t, f, o, e) struct bkey_format { __u8 key_u64s; __u8 nr_fields; /* One unused slot for now: */ __u8 bits_per_field[6]; __le64 field_offset[6]; }; /* Btree keys - all units are in sectors */ struct bpos { /* * Word order matches machine byte order - btree code treats a bpos as a * single large integer, for search/comparison purposes * * Note that wherever a bpos is embedded in another on disk data * structure, it has to be byte swabbed when reading in metadata that * wasn't written in native endian order: */ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ __u32 snapshot; __u64 offset; __u64 inode; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ __u64 inode; __u64 offset; /* Points to end of extent - sectors */ __u32 snapshot; #else #error edit for your odd byteorder. #endif } __packed #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ __aligned(4) #endif ; #define KEY_INODE_MAX ((__u64)~0ULL) #define KEY_OFFSET_MAX ((__u64)~0ULL) #define KEY_SNAPSHOT_MAX ((__u32)~0U) #define KEY_SIZE_MAX ((__u32)~0U) static inline struct bpos SPOS(__u64 inode, __u64 offset, __u32 snapshot) { return (struct bpos) { .inode = inode, .offset = offset, .snapshot = snapshot, }; } #define POS_MIN SPOS(0, 0, 0) #define POS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, 0) #define SPOS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, KEY_SNAPSHOT_MAX) #define POS(_inode, _offset) SPOS(_inode, _offset, 0) /* Empty placeholder struct, for container_of() */ struct bch_val { __u64 __nothing[0]; }; struct bversion { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ __u64 lo; __u32 hi; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ __u32 hi; __u64 lo; #endif } __packed #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ __aligned(4) #endif ; struct bkey { /* Size of combined key and value, in u64s */ __u8 u64s; /* Format of key (0 for format local to btree node) */ #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 format:7, needs_whiteout:1; #elif defined (__BIG_ENDIAN_BITFIELD) __u8 needs_whiteout:1, format:7; #else #error edit for your odd byteorder. #endif /* Type of the value */ __u8 type; #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ __u8 pad[1]; struct bversion bversion; __u32 size; /* extent size, in sectors */ struct bpos p; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ struct bpos p; __u32 size; /* extent size, in sectors */ struct bversion bversion; __u8 pad[1]; #endif } __packed #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ /* * The big-endian version of bkey can't be compiled by rustc with the "aligned" * attr since it doesn't allow types to have both "packed" and "aligned" attrs. * So for Rust compatibility, don't include this. It can be included in the LE * version because the "packed" attr is redundant in that case. * * History: (quoting Kent) * * Specifically, when i was designing bkey, I wanted the header to be no * bigger than necessary so that bkey_packed could use the rest. That means that * decently offten extent keys will fit into only 8 bytes, instead of spilling over * to 16. * * But packed_bkey treats the part after the header - the packed section - * as a single multi word, variable length integer. And bkey, the unpacked * version, is just a special case version of a bkey_packed; all the packed * bkey code will work on keys in any packed format, the in-memory * representation of an unpacked key also is just one type of packed key... * * So that constrains the key part of a bkig endian bkey to start right * after the header. * * If we ever do a bkey_v2 and need to expand the hedaer by another byte for * some reason - that will clean up this wart. */ __aligned(8) #endif ; struct bkey_packed { __u64 _data[0]; /* Size of combined key and value, in u64s */ __u8 u64s; /* Format of key (0 for format local to btree node) */ /* * XXX: next incompat on disk format change, switch format and * needs_whiteout - bkey_packed() will be cheaper if format is the high * bits of the bitfield */ #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 format:7, needs_whiteout:1; #elif defined (__BIG_ENDIAN_BITFIELD) __u8 needs_whiteout:1, format:7; #endif /* Type of the value */ __u8 type; __u8 key_start[0]; /* * We copy bkeys with struct assignment in various places, and while * that shouldn't be done with packed bkeys we can't disallow it in C, * and it's legal to cast a bkey to a bkey_packed - so padding it out * to the same size as struct bkey should hopefully be safest. */ __u8 pad[sizeof(struct bkey) - 3]; } __packed __aligned(8); typedef struct { __le64 lo; __le64 hi; } bch_le128; #define BKEY_U64s (sizeof(struct bkey) / sizeof(__u64)) #define BKEY_U64s_MAX U8_MAX #define BKEY_VAL_U64s_MAX (BKEY_U64s_MAX - BKEY_U64s) #define KEY_PACKED_BITS_START 24 #define KEY_FORMAT_LOCAL_BTREE 0 #define KEY_FORMAT_CURRENT 1 enum bch_bkey_fields { BKEY_FIELD_INODE, BKEY_FIELD_OFFSET, BKEY_FIELD_SNAPSHOT, BKEY_FIELD_SIZE, BKEY_FIELD_VERSION_HI, BKEY_FIELD_VERSION_LO, BKEY_NR_FIELDS, }; #define bkey_format_field(name, field) \ [BKEY_FIELD_##name] = (sizeof(((struct bkey *) NULL)->field) * 8) #define BKEY_FORMAT_CURRENT \ ((struct bkey_format) { \ .key_u64s = BKEY_U64s, \ .nr_fields = BKEY_NR_FIELDS, \ .bits_per_field = { \ bkey_format_field(INODE, p.inode), \ bkey_format_field(OFFSET, p.offset), \ bkey_format_field(SNAPSHOT, p.snapshot), \ bkey_format_field(SIZE, size), \ bkey_format_field(VERSION_HI, bversion.hi), \ bkey_format_field(VERSION_LO, bversion.lo), \ }, \ }) /* bkey with inline value */ struct bkey_i { __u64 _data[0]; struct bkey k; struct bch_val v; }; #define POS_KEY(_pos) \ ((struct bkey) { \ .u64s = BKEY_U64s, \ .format = KEY_FORMAT_CURRENT, \ .p = _pos, \ }) #define KEY(_inode, _offset, _size) \ ((struct bkey) { \ .u64s = BKEY_U64s, \ .format = KEY_FORMAT_CURRENT, \ .p = POS(_inode, _offset), \ .size = _size, \ }) static inline void bkey_init(struct bkey *k) { *k = KEY(0, 0, 0); } #define bkey_bytes(_k) ((_k)->u64s * sizeof(__u64)) #define __BKEY_PADDED(key, pad) \ struct bkey_i key; __u64 key ## _pad[pad] /* * - DELETED keys are used internally to mark keys that should be ignored but * override keys in composition order. Their version number is ignored. * * - DISCARDED keys indicate that the data is all 0s because it has been * discarded. DISCARDs may have a version; if the version is nonzero the key * will be persistent, otherwise the key will be dropped whenever the btree * node is rewritten (like DELETED keys). * * - ERROR: any read of the data returns a read error, as the data was lost due * to a failing device. Like DISCARDED keys, they can be removed (overridden) * by new writes or cluster-wide GC. Node repair can also overwrite them with * the same or a more recent version number, but not with an older version * number. * * - WHITEOUT: for hash table btrees */ #define BCH_BKEY_TYPES() \ x(deleted, 0) \ x(whiteout, 1) \ x(error, 2) \ x(cookie, 3) \ x(hash_whiteout, 4) \ x(btree_ptr, 5) \ x(extent, 6) \ x(reservation, 7) \ x(inode, 8) \ x(inode_generation, 9) \ x(dirent, 10) \ x(xattr, 11) \ x(alloc, 12) \ x(quota, 13) \ x(stripe, 14) \ x(reflink_p, 15) \ x(reflink_v, 16) \ x(inline_data, 17) \ x(btree_ptr_v2, 18) \ x(indirect_inline_data, 19) \ x(alloc_v2, 20) \ x(subvolume, 21) \ x(snapshot, 22) \ x(inode_v2, 23) \ x(alloc_v3, 24) \ x(set, 25) \ x(lru, 26) \ x(alloc_v4, 27) \ x(backpointer, 28) \ x(inode_v3, 29) \ x(bucket_gens, 30) \ x(snapshot_tree, 31) \ x(logged_op_truncate, 32) \ x(logged_op_finsert, 33) \ x(accounting, 34) \ x(inode_alloc_cursor, 35) enum bch_bkey_type { #define x(name, nr) KEY_TYPE_##name = nr, BCH_BKEY_TYPES() #undef x KEY_TYPE_MAX, }; struct bch_deleted { struct bch_val v; }; struct bch_whiteout { struct bch_val v; }; struct bch_error { struct bch_val v; }; struct bch_cookie { struct bch_val v; __le64 cookie; }; struct bch_hash_whiteout { struct bch_val v; }; struct bch_set { struct bch_val v; }; /* 128 bits, sufficient for cryptographic MACs: */ struct bch_csum { __le64 lo; __le64 hi; } __packed __aligned(8); struct bch_backpointer { struct bch_val v; __u8 btree_id; __u8 level; __u8 data_type; __u8 bucket_gen; __u32 pad; __u32 bucket_len; struct bpos pos; } __packed __aligned(8); /* Optional/variable size superblock sections: */ struct bch_sb_field { __u64 _data[0]; __le32 u64s; __le32 type; }; #define BCH_SB_FIELDS() \ x(journal, 0) \ x(members_v1, 1) \ x(crypt, 2) \ x(replicas_v0, 3) \ x(quota, 4) \ x(disk_groups, 5) \ x(clean, 6) \ x(replicas, 7) \ x(journal_seq_blacklist, 8) \ x(journal_v2, 9) \ x(counters, 10) \ x(members_v2, 11) \ x(errors, 12) \ x(ext, 13) \ x(downgrade, 14) #include "alloc_background_format.h" #include "dirent_format.h" #include "disk_accounting_format.h" #include "disk_groups_format.h" #include "extents_format.h" #include "ec_format.h" #include "inode_format.h" #include "journal_seq_blacklist_format.h" #include "logged_ops_format.h" #include "lru_format.h" #include "quota_format.h" #include "reflink_format.h" #include "replicas_format.h" #include "snapshot_format.h" #include "subvolume_format.h" #include "sb-counters_format.h" #include "sb-downgrade_format.h" #include "sb-errors_format.h" #include "sb-members_format.h" #include "xattr_format.h" enum bch_sb_field_type { #define x(f, nr) BCH_SB_FIELD_##f = nr, BCH_SB_FIELDS() #undef x BCH_SB_FIELD_NR }; /* * Most superblock fields are replicated in all device's superblocks - a few are * not: */ #define BCH_SINGLE_DEVICE_SB_FIELDS \ ((1U << BCH_SB_FIELD_journal)| \ (1U << BCH_SB_FIELD_journal_v2)) /* BCH_SB_FIELD_journal: */ struct bch_sb_field_journal { struct bch_sb_field field; __le64 buckets[]; }; struct bch_sb_field_journal_v2 { struct bch_sb_field field; struct bch_sb_field_journal_v2_entry { __le64 start; __le64 nr; } d[]; }; /* BCH_SB_FIELD_crypt: */ struct nonce { __le32 d[4]; }; struct bch_key { __le64 key[4]; }; #define BCH_KEY_MAGIC \ (((__u64) 'b' << 0)|((__u64) 'c' << 8)| \ ((__u64) 'h' << 16)|((__u64) '*' << 24)| \ ((__u64) '*' << 32)|((__u64) 'k' << 40)| \ ((__u64) 'e' << 48)|((__u64) 'y' << 56)) struct bch_encrypted_key { __le64 magic; struct bch_key key; }; /* * If this field is present in the superblock, it stores an encryption key which * is used encrypt all other data/metadata. The key will normally be encrypted * with the key userspace provides, but if encryption has been turned off we'll * just store the master key unencrypted in the superblock so we can access the * previously encrypted data. */ struct bch_sb_field_crypt { struct bch_sb_field field; __le64 flags; __le64 kdf_flags; struct bch_encrypted_key key; }; LE64_BITMASK(BCH_CRYPT_KDF_TYPE, struct bch_sb_field_crypt, flags, 0, 4); enum bch_kdf_types { BCH_KDF_SCRYPT = 0, BCH_KDF_NR = 1, }; /* stored as base 2 log of scrypt params: */ LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags, 0, 16); LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32); LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48); /* * On clean shutdown, store btree roots and current journal sequence number in * the superblock: */ struct jset_entry { __le16 u64s; __u8 btree_id; __u8 level; __u8 type; /* designates what this jset holds */ __u8 pad[3]; struct bkey_i start[0]; __u64 _data[]; }; struct bch_sb_field_clean { struct bch_sb_field field; __le32 flags; __le16 _read_clock; /* no longer used */ __le16 _write_clock; __le64 journal_seq; struct jset_entry start[0]; __u64 _data[]; }; struct bch_sb_field_ext { struct bch_sb_field field; __le64 recovery_passes_required[2]; __le64 errors_silent[8]; __le64 btrees_lost_data; }; /* Superblock: */ /* * New versioning scheme: * One common version number for all on disk data structures - superblock, btree * nodes, journal entries */ #define BCH_VERSION_MAJOR(_v) ((__u16) ((_v) >> 10)) #define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10))) #define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0) /* * field 1: version name * field 2: BCH_VERSION(major, minor) * field 3: recovery passess required on upgrade */ #define BCH_METADATA_VERSIONS() \ x(bkey_renumber, BCH_VERSION(0, 10)) \ x(inode_btree_change, BCH_VERSION(0, 11)) \ x(snapshot, BCH_VERSION(0, 12)) \ x(inode_backpointers, BCH_VERSION(0, 13)) \ x(btree_ptr_sectors_written, BCH_VERSION(0, 14)) \ x(snapshot_2, BCH_VERSION(0, 15)) \ x(reflink_p_fix, BCH_VERSION(0, 16)) \ x(subvol_dirent, BCH_VERSION(0, 17)) \ x(inode_v2, BCH_VERSION(0, 18)) \ x(freespace, BCH_VERSION(0, 19)) \ x(alloc_v4, BCH_VERSION(0, 20)) \ x(new_data_types, BCH_VERSION(0, 21)) \ x(backpointers, BCH_VERSION(0, 22)) \ x(inode_v3, BCH_VERSION(0, 23)) \ x(unwritten_extents, BCH_VERSION(0, 24)) \ x(bucket_gens, BCH_VERSION(0, 25)) \ x(lru_v2, BCH_VERSION(0, 26)) \ x(fragmentation_lru, BCH_VERSION(0, 27)) \ x(no_bps_in_alloc_keys, BCH_VERSION(0, 28)) \ x(snapshot_trees, BCH_VERSION(0, 29)) \ x(major_minor, BCH_VERSION(1, 0)) \ x(snapshot_skiplists, BCH_VERSION(1, 1)) \ x(deleted_inodes, BCH_VERSION(1, 2)) \ x(rebalance_work, BCH_VERSION(1, 3)) \ x(member_seq, BCH_VERSION(1, 4)) \ x(subvolume_fs_parent, BCH_VERSION(1, 5)) \ x(btree_subvolume_children, BCH_VERSION(1, 6)) \ x(mi_btree_bitmap, BCH_VERSION(1, 7)) \ x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ x(disk_accounting_v2, BCH_VERSION(1, 9)) \ x(disk_accounting_v3, BCH_VERSION(1, 10)) \ x(disk_accounting_inum, BCH_VERSION(1, 11)) \ x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) \ x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \ x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \ x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \ x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \ x(inode_depth, BCH_VERSION(1, 17)) \ x(persistent_inode_cursors, BCH_VERSION(1, 18)) \ x(autofix_errors, BCH_VERSION(1, 19)) \ x(directory_size, BCH_VERSION(1, 20)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, #define x(t, n) bcachefs_metadata_version_##t = n, BCH_METADATA_VERSIONS() #undef x bcachefs_metadata_version_max }; static const __maybe_unused unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work; #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) #define BCH_SB_SECTOR 8 #define BCH_SB_LAYOUT_SIZE_BITS_MAX 16 /* 32 MB */ struct bch_sb_layout { __uuid_t magic; /* bcachefs superblock UUID */ __u8 layout_type; __u8 sb_max_size_bits; /* base 2 of 512 byte sectors */ __u8 nr_superblocks; __u8 pad[5]; __le64 sb_offset[61]; } __packed __aligned(8); #define BCH_SB_LAYOUT_SECTOR 7 /* * @offset - sector where this sb was written * @version - on disk format version * @version_min - Oldest metadata version this filesystem contains; so we can * safely drop compatibility code and refuse to mount filesystems * we'd need it for * @magic - identifies as a bcachefs superblock (BCHFS_MAGIC) * @seq - incremented each time superblock is written * @uuid - used for generating various magic numbers and identifying * member devices, never changes * @user_uuid - user visible UUID, may be changed * @label - filesystem label * @seq - identifies most recent superblock, incremented each time * superblock is written * @features - enabled incompatible features */ struct bch_sb { struct bch_csum csum; __le16 version; __le16 version_min; __le16 pad[2]; __uuid_t magic; __uuid_t uuid; __uuid_t user_uuid; __u8 label[BCH_SB_LABEL_SIZE]; __le64 offset; __le64 seq; __le16 block_size; __u8 dev_idx; __u8 nr_devices; __le32 u64s; __le64 time_base_lo; __le32 time_base_hi; __le32 time_precision; __le64 flags[7]; __le64 write_time; __le64 features[2]; __le64 compat[2]; struct bch_sb_layout layout; struct bch_sb_field start[0]; __le64 _data[]; } __packed __aligned(8); /* * Flags: * BCH_SB_INITALIZED - set on first mount * BCH_SB_CLEAN - did we shut down cleanly? Just a hint, doesn't affect * behaviour of mount/recovery path: * BCH_SB_INODE_32BIT - limit inode numbers to 32 bits * BCH_SB_128_BIT_MACS - 128 bit macs instead of 80 * BCH_SB_ENCRYPTION_TYPE - if nonzero encryption is enabled; overrides * DATA/META_CSUM_TYPE. Also indicates encryption * algorithm in use, if/when we get more than one */ LE16_BITMASK(BCH_SB_BLOCK_SIZE, struct bch_sb, block_size, 0, 16); LE64_BITMASK(BCH_SB_INITIALIZED, struct bch_sb, flags[0], 0, 1); LE64_BITMASK(BCH_SB_CLEAN, struct bch_sb, flags[0], 1, 2); LE64_BITMASK(BCH_SB_CSUM_TYPE, struct bch_sb, flags[0], 2, 8); LE64_BITMASK(BCH_SB_ERROR_ACTION, struct bch_sb, flags[0], 8, 12); LE64_BITMASK(BCH_SB_BTREE_NODE_SIZE, struct bch_sb, flags[0], 12, 28); LE64_BITMASK(BCH_SB_GC_RESERVE, struct bch_sb, flags[0], 28, 33); LE64_BITMASK(BCH_SB_ROOT_RESERVE, struct bch_sb, flags[0], 33, 40); LE64_BITMASK(BCH_SB_META_CSUM_TYPE, struct bch_sb, flags[0], 40, 44); LE64_BITMASK(BCH_SB_DATA_CSUM_TYPE, struct bch_sb, flags[0], 44, 48); LE64_BITMASK(BCH_SB_META_REPLICAS_WANT, struct bch_sb, flags[0], 48, 52); LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT, struct bch_sb, flags[0], 52, 56); LE64_BITMASK(BCH_SB_POSIX_ACL, struct bch_sb, flags[0], 56, 57); LE64_BITMASK(BCH_SB_USRQUOTA, struct bch_sb, flags[0], 57, 58); LE64_BITMASK(BCH_SB_GRPQUOTA, struct bch_sb, flags[0], 58, 59); LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60); LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61); LE64_BITMASK(BCH_SB_HAS_TOPOLOGY_ERRORS,struct bch_sb, flags[0], 61, 62); LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63); LE64_BITMASK(BCH_SB_PROMOTE_WHOLE_EXTENTS, struct bch_sb, flags[0], 63, 64); LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4); LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_LO,struct bch_sb, flags[1], 4, 8); LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9); LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10); LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE, struct bch_sb, flags[1], 10, 14); /* * Max size of an extent that may require bouncing to read or write * (checksummed, compressed): 64k */ LE64_BITMASK(BCH_SB_ENCODED_EXTENT_MAX_BITS, struct bch_sb, flags[1], 14, 20); LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24); LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28); LE64_BITMASK(BCH_SB_PROMOTE_TARGET, struct bch_sb, flags[1], 28, 40); LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52); LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64); LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO, struct bch_sb, flags[2], 0, 4); LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64); LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16); LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28); LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29); LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30); LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62); LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63); LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32); LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33); LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34); LE64_BITMASK(BCH_SB_WRITE_BUFFER_SIZE, struct bch_sb, flags[4], 34, 54); LE64_BITMASK(BCH_SB_VERSION_UPGRADE, struct bch_sb, flags[4], 54, 56); LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_HI,struct bch_sb, flags[4], 56, 60); LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI, struct bch_sb, flags[4], 60, 64); LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, struct bch_sb, flags[5], 0, 16); LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT, struct bch_sb, flags[5], 16, 32); LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48); LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED, struct bch_sb, flags[5], 48, 64); LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { return BCH_SB_COMPRESSION_TYPE_LO(sb) | (BCH_SB_COMPRESSION_TYPE_HI(sb) << 4); } static inline void SET_BCH_SB_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v) { SET_BCH_SB_COMPRESSION_TYPE_LO(sb, v); SET_BCH_SB_COMPRESSION_TYPE_HI(sb, v >> 4); } static inline __u64 BCH_SB_BACKGROUND_COMPRESSION_TYPE(const struct bch_sb *sb) { return BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb) | (BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb) << 4); } static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v) { SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb, v); SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb, v >> 4); } /* * Features: * * journal_seq_blacklist_v3: gates BCH_SB_FIELD_journal_seq_blacklist * reflink: gates KEY_TYPE_reflink * inline_data: gates KEY_TYPE_inline_data * new_siphash: gates BCH_STR_HASH_siphash * new_extent_overwrite: gates BTREE_NODE_NEW_EXTENT_OVERWRITE */ #define BCH_SB_FEATURES() \ x(lz4, 0) \ x(gzip, 1) \ x(zstd, 2) \ x(atomic_nlink, 3) \ x(ec, 4) \ x(journal_seq_blacklist_v3, 5) \ x(reflink, 6) \ x(new_siphash, 7) \ x(inline_data, 8) \ x(new_extent_overwrite, 9) \ x(incompressible, 10) \ x(btree_ptr_v2, 11) \ x(extents_above_btree_updates, 12) \ x(btree_updates_journalled, 13) \ x(reflink_inline_data, 14) \ x(new_varint, 15) \ x(journal_no_flush, 16) \ x(alloc_v2, 17) \ x(extents_across_btree_nodes, 18) \ x(incompat_version_field, 19) #define BCH_SB_FEATURES_ALWAYS \ (BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \ BIT_ULL(BCH_FEATURE_extents_above_btree_updates)|\ BIT_ULL(BCH_FEATURE_btree_updates_journalled)|\ BIT_ULL(BCH_FEATURE_alloc_v2)|\ BIT_ULL(BCH_FEATURE_extents_across_btree_nodes)) #define BCH_SB_FEATURES_ALL \ (BCH_SB_FEATURES_ALWAYS| \ BIT_ULL(BCH_FEATURE_new_siphash)| \ BIT_ULL(BCH_FEATURE_btree_ptr_v2)| \ BIT_ULL(BCH_FEATURE_new_varint)| \ BIT_ULL(BCH_FEATURE_journal_no_flush)) enum bch_sb_feature { #define x(f, n) BCH_FEATURE_##f, BCH_SB_FEATURES() #undef x BCH_FEATURE_NR, }; #define BCH_SB_COMPAT() \ x(alloc_info, 0) \ x(alloc_metadata, 1) \ x(extents_above_btree_updates_done, 2) \ x(bformat_overflow_done, 3) enum bch_sb_compat { #define x(f, n) BCH_COMPAT_##f, BCH_SB_COMPAT() #undef x BCH_COMPAT_NR, }; /* options: */ #define BCH_VERSION_UPGRADE_OPTS() \ x(compatible, 0) \ x(incompatible, 1) \ x(none, 2) enum bch_version_upgrade_opts { #define x(t, n) BCH_VERSION_UPGRADE_##t = n, BCH_VERSION_UPGRADE_OPTS() #undef x }; #define BCH_REPLICAS_MAX 4U #define BCH_BKEY_PTRS_MAX 16U #define BCH_ERROR_ACTIONS() \ x(continue, 0) \ x(fix_safe, 1) \ x(panic, 2) \ x(ro, 3) enum bch_error_actions { #define x(t, n) BCH_ON_ERROR_##t = n, BCH_ERROR_ACTIONS() #undef x BCH_ON_ERROR_NR }; #define BCH_STR_HASH_TYPES() \ x(crc32c, 0) \ x(crc64, 1) \ x(siphash_old, 2) \ x(siphash, 3) enum bch_str_hash_type { #define x(t, n) BCH_STR_HASH_##t = n, BCH_STR_HASH_TYPES() #undef x BCH_STR_HASH_NR }; #define BCH_STR_HASH_OPTS() \ x(crc32c, 0) \ x(crc64, 1) \ x(siphash, 2) enum bch_str_hash_opts { #define x(t, n) BCH_STR_HASH_OPT_##t = n, BCH_STR_HASH_OPTS() #undef x BCH_STR_HASH_OPT_NR }; #define BCH_CSUM_TYPES() \ x(none, 0) \ x(crc32c_nonzero, 1) \ x(crc64_nonzero, 2) \ x(chacha20_poly1305_80, 3) \ x(chacha20_poly1305_128, 4) \ x(crc32c, 5) \ x(crc64, 6) \ x(xxhash, 7) enum bch_csum_type { #define x(t, n) BCH_CSUM_##t = n, BCH_CSUM_TYPES() #undef x BCH_CSUM_NR }; static const __maybe_unused unsigned bch_crc_bytes[] = { [BCH_CSUM_none] = 0, [BCH_CSUM_crc32c_nonzero] = 4, [BCH_CSUM_crc32c] = 4, [BCH_CSUM_crc64_nonzero] = 8, [BCH_CSUM_crc64] = 8, [BCH_CSUM_xxhash] = 8, [BCH_CSUM_chacha20_poly1305_80] = 10, [BCH_CSUM_chacha20_poly1305_128] = 16, }; static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type) { switch (type) { case BCH_CSUM_chacha20_poly1305_80: case BCH_CSUM_chacha20_poly1305_128: return true; default: return false; } } #define BCH_CSUM_OPTS() \ x(none, 0) \ x(crc32c, 1) \ x(crc64, 2) \ x(xxhash, 3) enum bch_csum_opt { #define x(t, n) BCH_CSUM_OPT_##t = n, BCH_CSUM_OPTS() #undef x BCH_CSUM_OPT_NR }; #define BCH_COMPRESSION_TYPES() \ x(none, 0) \ x(lz4_old, 1) \ x(gzip, 2) \ x(lz4, 3) \ x(zstd, 4) \ x(incompressible, 5) enum bch_compression_type { #define x(t, n) BCH_COMPRESSION_TYPE_##t = n, BCH_COMPRESSION_TYPES() #undef x BCH_COMPRESSION_TYPE_NR }; #define BCH_COMPRESSION_OPTS() \ x(none, 0) \ x(lz4, 1) \ x(gzip, 2) \ x(zstd, 3) enum bch_compression_opts { #define x(t, n) BCH_COMPRESSION_OPT_##t = n, BCH_COMPRESSION_OPTS() #undef x BCH_COMPRESSION_OPT_NR }; /* * Magic numbers * * The various other data structures have their own magic numbers, which are * xored with the first part of the cache set's UUID */ #define BCACHE_MAGIC \ UUID_INIT(0xc68573f6, 0x4e1a, 0x45ca, \ 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81) #define BCHFS_MAGIC \ UUID_INIT(0xc68573f6, 0x66ce, 0x90a9, \ 0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef) #define BCACHEFS_STATFS_MAGIC BCACHEFS_SUPER_MAGIC #define JSET_MAGIC __cpu_to_le64(0x245235c1a3625032ULL) #define BSET_MAGIC __cpu_to_le64(0x90135c78b99e07f5ULL) static inline __le64 __bch2_sb_magic(struct bch_sb *sb) { __le64 ret; memcpy(&ret, &sb->uuid, sizeof(ret)); return ret; } static inline __u64 __jset_magic(struct bch_sb *sb) { return __le64_to_cpu(__bch2_sb_magic(sb) ^ JSET_MAGIC); } static inline __u64 __bset_magic(struct bch_sb *sb) { return __le64_to_cpu(__bch2_sb_magic(sb) ^ BSET_MAGIC); } /* Journal */ #define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64)) #define BCH_JSET_ENTRY_TYPES() \ x(btree_keys, 0) \ x(btree_root, 1) \ x(prio_ptrs, 2) \ x(blacklist, 3) \ x(blacklist_v2, 4) \ x(usage, 5) \ x(data_usage, 6) \ x(clock, 7) \ x(dev_usage, 8) \ x(log, 9) \ x(overwrite, 10) \ x(write_buffer_keys, 11) \ x(datetime, 12) enum bch_jset_entry_type { #define x(f, nr) BCH_JSET_ENTRY_##f = nr, BCH_JSET_ENTRY_TYPES() #undef x BCH_JSET_ENTRY_NR }; static inline bool jset_entry_is_key(struct jset_entry *e) { switch (e->type) { case BCH_JSET_ENTRY_btree_keys: case BCH_JSET_ENTRY_btree_root: case BCH_JSET_ENTRY_write_buffer_keys: return true; } return false; } /* * Journal sequence numbers can be blacklisted: bsets record the max sequence * number of all the journal entries they contain updates for, so that on * recovery we can ignore those bsets that contain index updates newer that what * made it into the journal. * * This means that we can't reuse that journal_seq - we have to skip it, and * then record that we skipped it so that the next time we crash and recover we * don't think there was a missing journal entry. */ struct jset_entry_blacklist { struct jset_entry entry; __le64 seq; }; struct jset_entry_blacklist_v2 { struct jset_entry entry; __le64 start; __le64 end; }; #define BCH_FS_USAGE_TYPES() \ x(reserved, 0) \ x(inodes, 1) \ x(key_version, 2) enum bch_fs_usage_type { #define x(f, nr) BCH_FS_USAGE_##f = nr, BCH_FS_USAGE_TYPES() #undef x BCH_FS_USAGE_NR }; struct jset_entry_usage { struct jset_entry entry; __le64 v; } __packed; struct jset_entry_data_usage { struct jset_entry entry; __le64 v; struct bch_replicas_entry_v1 r; } __packed; struct jset_entry_clock { struct jset_entry entry; __u8 rw; __u8 pad[7]; __le64 time; } __packed; struct jset_entry_dev_usage_type { __le64 buckets; __le64 sectors; __le64 fragmented; } __packed; struct jset_entry_dev_usage { struct jset_entry entry; __le32 dev; __u32 pad; __le64 _buckets_ec; /* No longer used */ __le64 _buckets_unavailable; /* No longer used */ struct jset_entry_dev_usage_type d[]; }; static inline unsigned jset_entry_dev_usage_nr_types(struct jset_entry_dev_usage *u) { return (vstruct_bytes(&u->entry) - sizeof(struct jset_entry_dev_usage)) / sizeof(struct jset_entry_dev_usage_type); } struct jset_entry_log { struct jset_entry entry; u8 d[]; } __packed __aligned(8); static inline unsigned jset_entry_log_msg_bytes(struct jset_entry_log *l) { unsigned b = vstruct_bytes(&l->entry) - offsetof(struct jset_entry_log, d); while (b && !l->d[b - 1]) --b; return b; } struct jset_entry_datetime { struct jset_entry entry; __le64 seconds; } __packed __aligned(8); /* * On disk format for a journal entry: * seq is monotonically increasing; every journal entry has its own unique * sequence number. * * last_seq is the oldest journal entry that still has keys the btree hasn't * flushed to disk yet. * * version is for on disk format changes. */ struct jset { struct bch_csum csum; __le64 magic; __le64 seq; __le32 version; __le32 flags; __le32 u64s; /* size of d[] in u64s */ __u8 encrypted_start[0]; __le16 _read_clock; /* no longer used */ __le16 _write_clock; /* Sequence number of oldest dirty journal entry */ __le64 last_seq; struct jset_entry start[0]; __u64 _data[]; } __packed __aligned(8); LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4); LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5); LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6); #define BCH_JOURNAL_BUCKETS_MIN 8 /* Btree: */ enum btree_id_flags { BTREE_IS_extents = BIT(0), BTREE_IS_snapshots = BIT(1), BTREE_IS_snapshot_field = BIT(2), BTREE_IS_data = BIT(3), BTREE_IS_write_buffer = BIT(4), }; #define BCH_BTREE_IDS() \ x(extents, 0, \ BTREE_IS_extents| \ BTREE_IS_snapshots| \ BTREE_IS_data, \ BIT_ULL(KEY_TYPE_whiteout)| \ BIT_ULL(KEY_TYPE_error)| \ BIT_ULL(KEY_TYPE_cookie)| \ BIT_ULL(KEY_TYPE_extent)| \ BIT_ULL(KEY_TYPE_reservation)| \ BIT_ULL(KEY_TYPE_reflink_p)| \ BIT_ULL(KEY_TYPE_inline_data)) \ x(inodes, 1, \ BTREE_IS_snapshots, \ BIT_ULL(KEY_TYPE_whiteout)| \ BIT_ULL(KEY_TYPE_inode)| \ BIT_ULL(KEY_TYPE_inode_v2)| \ BIT_ULL(KEY_TYPE_inode_v3)| \ BIT_ULL(KEY_TYPE_inode_generation)) \ x(dirents, 2, \ BTREE_IS_snapshots, \ BIT_ULL(KEY_TYPE_whiteout)| \ BIT_ULL(KEY_TYPE_hash_whiteout)| \ BIT_ULL(KEY_TYPE_dirent)) \ x(xattrs, 3, \ BTREE_IS_snapshots, \ BIT_ULL(KEY_TYPE_whiteout)| \ BIT_ULL(KEY_TYPE_cookie)| \ BIT_ULL(KEY_TYPE_hash_whiteout)| \ BIT_ULL(KEY_TYPE_xattr)) \ x(alloc, 4, 0, \ BIT_ULL(KEY_TYPE_alloc)| \ BIT_ULL(KEY_TYPE_alloc_v2)| \ BIT_ULL(KEY_TYPE_alloc_v3)| \ BIT_ULL(KEY_TYPE_alloc_v4)) \ x(quotas, 5, 0, \ BIT_ULL(KEY_TYPE_quota)) \ x(stripes, 6, 0, \ BIT_ULL(KEY_TYPE_stripe)) \ x(reflink, 7, \ BTREE_IS_extents| \ BTREE_IS_data, \ BIT_ULL(KEY_TYPE_reflink_v)| \ BIT_ULL(KEY_TYPE_indirect_inline_data)| \ BIT_ULL(KEY_TYPE_error)) \ x(subvolumes, 8, 0, \ BIT_ULL(KEY_TYPE_subvolume)) \ x(snapshots, 9, 0, \ BIT_ULL(KEY_TYPE_snapshot)) \ x(lru, 10, \ BTREE_IS_write_buffer, \ BIT_ULL(KEY_TYPE_set)) \ x(freespace, 11, \ BTREE_IS_extents, \ BIT_ULL(KEY_TYPE_set)) \ x(need_discard, 12, 0, \ BIT_ULL(KEY_TYPE_set)) \ x(backpointers, 13, \ BTREE_IS_write_buffer, \ BIT_ULL(KEY_TYPE_backpointer)) \ x(bucket_gens, 14, 0, \ BIT_ULL(KEY_TYPE_bucket_gens)) \ x(snapshot_trees, 15, 0, \ BIT_ULL(KEY_TYPE_snapshot_tree)) \ x(deleted_inodes, 16, \ BTREE_IS_snapshot_field| \ BTREE_IS_write_buffer, \ BIT_ULL(KEY_TYPE_set)) \ x(logged_ops, 17, 0, \ BIT_ULL(KEY_TYPE_logged_op_truncate)| \ BIT_ULL(KEY_TYPE_logged_op_finsert)| \ BIT_ULL(KEY_TYPE_inode_alloc_cursor)) \ x(rebalance_work, 18, \ BTREE_IS_snapshot_field| \ BTREE_IS_write_buffer, \ BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \ x(subvolume_children, 19, 0, \ BIT_ULL(KEY_TYPE_set)) \ x(accounting, 20, \ BTREE_IS_snapshot_field| \ BTREE_IS_write_buffer, \ BIT_ULL(KEY_TYPE_accounting)) \ enum btree_id { #define x(name, nr, ...) BTREE_ID_##name = nr, BCH_BTREE_IDS() #undef x BTREE_ID_NR }; /* * Maximum number of btrees that we will _ever_ have under the current scheme, * where we refer to them with 64 bit bitfields - and we also need a bit for * the interior btree node type: */ #define BTREE_ID_NR_MAX 63 static inline bool btree_id_is_alloc(enum btree_id id) { switch (id) { case BTREE_ID_alloc: case BTREE_ID_backpointers: case BTREE_ID_need_discard: case BTREE_ID_freespace: case BTREE_ID_bucket_gens: case BTREE_ID_lru: case BTREE_ID_accounting: return true; default: return false; } } #define BTREE_MAX_DEPTH 4U /* Btree nodes */ /* * Btree nodes * * On disk a btree node is a list/log of these; within each set the keys are * sorted */ struct bset { __le64 seq; /* * Highest journal entry this bset contains keys for. * If on recovery we don't see that journal entry, this bset is ignored: * this allows us to preserve the order of all index updates after a * crash, since the journal records a total order of all index updates * and anything that didn't make it to the journal doesn't get used. */ __le64 journal_seq; __le32 flags; __le16 version; __le16 u64s; /* count of d[] in u64s */ struct bkey_packed start[0]; __u64 _data[]; } __packed __aligned(8); LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4); LE32_BITMASK(BSET_BIG_ENDIAN, struct bset, flags, 4, 5); LE32_BITMASK(BSET_SEPARATE_WHITEOUTS, struct bset, flags, 5, 6); /* Sector offset within the btree node: */ LE32_BITMASK(BSET_OFFSET, struct bset, flags, 16, 32); struct btree_node { struct bch_csum csum; __le64 magic; /* this flags field is encrypted, unlike bset->flags: */ __le64 flags; /* Closed interval: */ struct bpos min_key; struct bpos max_key; struct bch_extent_ptr _ptr; /* not used anymore */ struct bkey_format format; union { struct bset keys; struct { __u8 pad[22]; __le16 u64s; __u64 _data[0]; }; }; } __packed __aligned(8); LE64_BITMASK(BTREE_NODE_ID_LO, struct btree_node, flags, 0, 4); LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8); LE64_BITMASK(BTREE_NODE_NEW_EXTENT_OVERWRITE, struct btree_node, flags, 8, 9); LE64_BITMASK(BTREE_NODE_ID_HI, struct btree_node, flags, 9, 25); /* 25-32 unused */ LE64_BITMASK(BTREE_NODE_SEQ, struct btree_node, flags, 32, 64); static inline __u64 BTREE_NODE_ID(struct btree_node *n) { return BTREE_NODE_ID_LO(n) | (BTREE_NODE_ID_HI(n) << 4); } static inline void SET_BTREE_NODE_ID(struct btree_node *n, __u64 v) { SET_BTREE_NODE_ID_LO(n, v); SET_BTREE_NODE_ID_HI(n, v >> 4); } struct btree_node_entry { struct bch_csum csum; union { struct bset keys; struct { __u8 pad[22]; __le16 u64s; __u64 _data[0]; }; }; } __packed __aligned(8); #endif /* _BCACHEFS_FORMAT_H */
5 48 48 4 48 48 47 47 47 4 9 48 4 48 48 48 48 48 59 60 60 47 48 48 4 7 4 7 7 4 2 217 3 2 3 2 7 7 7 7 7 2 2 2 2 7 7 2 7 48 48 1 48 48 41 7 48 9 47 47 48 4 4 4 48 48 48 48 48 48 48 5 20 35 10 14 11 25 61 8 61 61 60 7 2 61 23 1 59 60 60 3 60 60 11 62 2 10 48 61 14 1 60 62 48 20 2 52 53 53 63 63 63 61 1 53 53 2 11 5 53 23 23 1 22 12 68 68 67 10 61 2 61 51 13 18 9 10 1 5 16 13 3 13 13 12 3 1 1 5 2 1 2 2 539 508 10 46 5 37 23 23 13 13 13 9 418 410 4 12 9 217 217 217 237 238 234 235 235 235 1 434 431 945 945 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 // SPDX-License-Identifier: GPL-2.0+ /* * User-space Probes (UProbes) * * Copyright (C) IBM Corporation, 2008-2012 * Authors: * Srikar Dronamraju * Jim Keniston * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra */ #include <linux/kernel.h> #include <linux/highmem.h> #include <linux/pagemap.h> /* read_mapping_page */ #include <linux/slab.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/export.h> #include <linux/rmap.h> /* anon_vma_prepare */ #include <linux/mmu_notifier.h> #include <linux/swap.h> /* folio_free_swap */ #include <linux/ptrace.h> /* user_enable_single_step */ #include <linux/kdebug.h> /* notifier mechanism */ #include <linux/percpu-rwsem.h> #include <linux/task_work.h> #include <linux/shmem_fs.h> #include <linux/khugepaged.h> #include <linux/rcupdate_trace.h> #include <linux/workqueue.h> #include <linux/srcu.h> #include <linux/uprobes.h> #define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE static struct rb_root uprobes_tree = RB_ROOT; /* * allows us to skip the uprobe_mmap if there are no uprobe events active * at this time. Probably a fine grained per inode count is better? */ #define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree) static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */ static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock); #define UPROBES_HASH_SZ 13 /* serialize uprobe->pending_list */ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem); /* Covers return_instance's uprobe lifetime. */ DEFINE_STATIC_SRCU(uretprobes_srcu); /* Have a copy of original instruction */ #define UPROBE_COPY_INSN 0 struct uprobe { struct rb_node rb_node; /* node in the rb tree */ refcount_t ref; struct rw_semaphore register_rwsem; struct rw_semaphore consumer_rwsem; struct list_head pending_list; struct list_head consumers; struct inode *inode; /* Also hold a ref to inode */ union { struct rcu_head rcu; struct work_struct work; }; loff_t offset; loff_t ref_ctr_offset; unsigned long flags; /* "unsigned long" so bitops work */ /* * The generic code assumes that it has two members of unknown type * owned by the arch-specific code: * * insn - copy_insn() saves the original instruction here for * arch_uprobe_analyze_insn(). * * ixol - potentially modified instruction to execute out of * line, copied to xol_area by xol_get_insn_slot(). */ struct arch_uprobe arch; }; struct delayed_uprobe { struct list_head list; struct uprobe *uprobe; struct mm_struct *mm; }; static DEFINE_MUTEX(delayed_uprobe_lock); static LIST_HEAD(delayed_uprobe_list); /* * Execute out of line area: anonymous executable mapping installed * by the probed task to execute the copy of the original instruction * mangled by set_swbp(). * * On a breakpoint hit, thread contests for a slot. It frees the * slot after singlestep. Currently a fixed number of slots are * allocated. */ struct xol_area { wait_queue_head_t wq; /* if all slots are busy */ unsigned long *bitmap; /* 0 = free slot */ struct page *page; /* * We keep the vma's vm_start rather than a pointer to the vma * itself. The probed process or a naughty kernel module could make * the vma go away, and we must handle that reasonably gracefully. */ unsigned long vaddr; /* Page(s) of instruction slots */ }; static void uprobe_warn(struct task_struct *t, const char *msg) { pr_warn("uprobe: %s:%d failed to %s\n", current->comm, current->pid, msg); } /* * valid_vma: Verify if the specified vma is an executable vma * Relax restrictions while unregistering: vm_flags might have * changed after breakpoint was inserted. * - is_register: indicates if we are in register context. * - Return 1 if the specified virtual address is in an * executable vma. */ static bool valid_vma(struct vm_area_struct *vma, bool is_register) { vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE; if (is_register) flags |= VM_WRITE; return vma->vm_file && (vma->vm_flags & flags) == VM_MAYEXEC; } static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset) { return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT); } static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr) { return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start); } /** * __replace_page - replace page in vma by new page. * based on replace_page in mm/ksm.c * * @vma: vma that holds the pte pointing to page * @addr: address the old @page is mapped at * @old_page: the page we are replacing by new_page * @new_page: the modified page we replace page by * * If @new_page is NULL, only unmap @old_page. * * Returns 0 on success, negative error code otherwise. */ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, struct page *old_page, struct page *new_page) { struct folio *old_folio = page_folio(old_page); struct folio *new_folio; struct mm_struct *mm = vma->vm_mm; DEFINE_FOLIO_VMA_WALK(pvmw, old_folio, vma, addr, 0); int err; struct mmu_notifier_range range; mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, addr, addr + PAGE_SIZE); if (new_page) { new_folio = page_folio(new_page); err = mem_cgroup_charge(new_folio, vma->vm_mm, GFP_KERNEL); if (err) return err; } /* For folio_free_swap() below */ folio_lock(old_folio); mmu_notifier_invalidate_range_start(&range); err = -EAGAIN; if (!page_vma_mapped_walk(&pvmw)) goto unlock; VM_BUG_ON_PAGE(addr != pvmw.address, old_page); if (new_page) { folio_get(new_folio); folio_add_new_anon_rmap(new_folio, vma, addr, RMAP_EXCLUSIVE); folio_add_lru_vma(new_folio, vma); } else /* no new page, just dec_mm_counter for old_page */ dec_mm_counter(mm, MM_ANONPAGES); if (!folio_test_anon(old_folio)) { dec_mm_counter(mm, mm_counter_file(old_folio)); inc_mm_counter(mm, MM_ANONPAGES); } flush_cache_page(vma, addr, pte_pfn(ptep_get(pvmw.pte))); ptep_clear_flush(vma, addr, pvmw.pte); if (new_page) set_pte_at(mm, addr, pvmw.pte, mk_pte(new_page, vma->vm_page_prot)); folio_remove_rmap_pte(old_folio, old_page, vma); if (!folio_mapped(old_folio)) folio_free_swap(old_folio); page_vma_mapped_walk_done(&pvmw); folio_put(old_folio); err = 0; unlock: mmu_notifier_invalidate_range_end(&range); folio_unlock(old_folio); return err; } /** * is_swbp_insn - check if instruction is breakpoint instruction. * @insn: instruction to be checked. * Default implementation of is_swbp_insn * Returns true if @insn is a breakpoint instruction. */ bool __weak is_swbp_insn(uprobe_opcode_t *insn) { return *insn == UPROBE_SWBP_INSN; } /** * is_trap_insn - check if instruction is breakpoint instruction. * @insn: instruction to be checked. * Default implementation of is_trap_insn * Returns true if @insn is a breakpoint instruction. * * This function is needed for the case where an architecture has multiple * trap instructions (like powerpc). */ bool __weak is_trap_insn(uprobe_opcode_t *insn) { return is_swbp_insn(insn); } static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len) { void *kaddr = kmap_atomic(page); memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len); kunmap_atomic(kaddr); } static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len) { void *kaddr = kmap_atomic(page); memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len); kunmap_atomic(kaddr); } static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode) { uprobe_opcode_t old_opcode; bool is_swbp; /* * Note: We only check if the old_opcode is UPROBE_SWBP_INSN here. * We do not check if it is any other 'trap variant' which could * be conditional trap instruction such as the one powerpc supports. * * The logic is that we do not care if the underlying instruction * is a trap variant; uprobes always wins over any other (gdb) * breakpoint. */ copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE); is_swbp = is_swbp_insn(&old_opcode); if (is_swbp_insn(new_opcode)) { if (is_swbp) /* register: already installed? */ return 0; } else { if (!is_swbp) /* unregister: was it changed by us? */ return 0; } return 1; } static struct delayed_uprobe * delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm) { struct delayed_uprobe *du; list_for_each_entry(du, &delayed_uprobe_list, list) if (du->uprobe == uprobe && du->mm == mm) return du; return NULL; } static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm) { struct delayed_uprobe *du; if (delayed_uprobe_check(uprobe, mm)) return 0; du = kzalloc(sizeof(*du), GFP_KERNEL); if (!du) return -ENOMEM; du->uprobe = uprobe; du->mm = mm; list_add(&du->list, &delayed_uprobe_list); return 0; } static void delayed_uprobe_delete(struct delayed_uprobe *du) { if (WARN_ON(!du)) return; list_del(&du->list); kfree(du); } static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm) { struct list_head *pos, *q; struct delayed_uprobe *du; if (!uprobe && !mm) return; list_for_each_safe(pos, q, &delayed_uprobe_list) { du = list_entry(pos, struct delayed_uprobe, list); if (uprobe && du->uprobe != uprobe) continue; if (mm && du->mm != mm) continue; delayed_uprobe_delete(du); } } static bool valid_ref_ctr_vma(struct uprobe *uprobe, struct vm_area_struct *vma) { unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset); return uprobe->ref_ctr_offset && vma->vm_file && file_inode(vma->vm_file) == uprobe->inode && (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE && vma->vm_start <= vaddr && vma->vm_end > vaddr; } static struct vm_area_struct * find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm) { VMA_ITERATOR(vmi, mm, 0); struct vm_area_struct *tmp; for_each_vma(vmi, tmp) if (valid_ref_ctr_vma(uprobe, tmp)) return tmp; return NULL; } static int __update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d) { void *kaddr; struct page *page; int ret; short *ptr; if (!vaddr || !d) return -EINVAL; ret = get_user_pages_remote(mm, vaddr, 1, FOLL_WRITE, &page, NULL); if (unlikely(ret <= 0)) { /* * We are asking for 1 page. If get_user_pages_remote() fails, * it may return 0, in that case we have to return error. */ return ret == 0 ? -EBUSY : ret; } kaddr = kmap_atomic(page); ptr = kaddr + (vaddr & ~PAGE_MASK); if (unlikely(*ptr + d < 0)) { pr_warn("ref_ctr going negative. vaddr: 0x%lx, " "curr val: %d, delta: %d\n", vaddr, *ptr, d); ret = -EINVAL; goto out; } *ptr += d; ret = 0; out: kunmap_atomic(kaddr); put_page(page); return ret; } static void update_ref_ctr_warn(struct uprobe *uprobe, struct mm_struct *mm, short d) { pr_warn("ref_ctr %s failed for inode: 0x%lx offset: " "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n", d > 0 ? "increment" : "decrement", uprobe->inode->i_ino, (unsigned long long) uprobe->offset, (unsigned long long) uprobe->ref_ctr_offset, mm); } static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm, short d) { struct vm_area_struct *rc_vma; unsigned long rc_vaddr; int ret = 0; rc_vma = find_ref_ctr_vma(uprobe, mm); if (rc_vma) { rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset); ret = __update_ref_ctr(mm, rc_vaddr, d); if (ret) update_ref_ctr_warn(uprobe, mm, d); if (d > 0) return ret; } mutex_lock(&delayed_uprobe_lock); if (d > 0) ret = delayed_uprobe_add(uprobe, mm); else delayed_uprobe_remove(uprobe, mm); mutex_unlock(&delayed_uprobe_lock); return ret; } /* * NOTE: * Expect the breakpoint instruction to be the smallest size instruction for * the architecture. If an arch has variable length instruction and the * breakpoint instruction is not of the smallest length instruction * supported by that architecture then we need to modify is_trap_at_addr and * uprobe_write_opcode accordingly. This would never be a problem for archs * that have fixed length instructions. * * uprobe_write_opcode - write the opcode at a given virtual address. * @auprobe: arch specific probepoint information. * @mm: the probed process address space. * @vaddr: the virtual address to store the opcode. * @opcode: opcode to be written at @vaddr. * * Called with mm->mmap_lock held for read or write. * Return 0 (success) or a negative errno. */ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) { struct uprobe *uprobe; struct page *old_page, *new_page; struct vm_area_struct *vma; int ret, is_register, ref_ctr_updated = 0; bool orig_page_huge = false; unsigned int gup_flags = FOLL_FORCE; is_register = is_swbp_insn(&opcode); uprobe = container_of(auprobe, struct uprobe, arch); retry: if (is_register) gup_flags |= FOLL_SPLIT_PMD; /* Read the page with vaddr into memory */ old_page = get_user_page_vma_remote(mm, vaddr, gup_flags, &vma); if (IS_ERR(old_page)) return PTR_ERR(old_page); ret = verify_opcode(old_page, vaddr, &opcode); if (ret <= 0) goto put_old; if (WARN(!is_register && PageCompound(old_page), "uprobe unregister should never work on compound page\n")) { ret = -EINVAL; goto put_old; } /* We are going to replace instruction, update ref_ctr. */ if (!ref_ctr_updated && uprobe->ref_ctr_offset) { ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1); if (ret) goto put_old; ref_ctr_updated = 1; } ret = 0; if (!is_register && !PageAnon(old_page)) goto put_old; ret = anon_vma_prepare(vma); if (ret) goto put_old; ret = -ENOMEM; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); if (!new_page) goto put_old; __SetPageUptodate(new_page); copy_highpage(new_page, old_page); copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); if (!is_register) { struct page *orig_page; pgoff_t index; VM_BUG_ON_PAGE(!PageAnon(old_page), old_page); index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT; orig_page = find_get_page(vma->vm_file->f_inode->i_mapping, index); if (orig_page) { if (PageUptodate(orig_page) && pages_identical(new_page, orig_page)) { /* let go new_page */ put_page(new_page); new_page = NULL; if (PageCompound(orig_page)) orig_page_huge = true; } put_page(orig_page); } } ret = __replace_page(vma, vaddr & PAGE_MASK, old_page, new_page); if (new_page) put_page(new_page); put_old: put_page(old_page); if (unlikely(ret == -EAGAIN)) goto retry; /* Revert back reference counter if instruction update failed. */ if (ret && is_register && ref_ctr_updated) update_ref_ctr(uprobe, mm, -1); /* try collapse pmd for compound page */ if (!ret && orig_page_huge) collapse_pte_mapped_thp(mm, vaddr, false); return ret; } /** * set_swbp - store breakpoint at a given address. * @auprobe: arch specific probepoint information. * @mm: the probed process address space. * @vaddr: the virtual address to insert the opcode. * * For mm @mm, store the breakpoint instruction at @vaddr. * Return 0 (success) or a negative errno. */ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { return uprobe_write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN); } /** * set_orig_insn - Restore the original instruction. * @mm: the probed process address space. * @auprobe: arch specific probepoint information. * @vaddr: the virtual address to insert the opcode. * * For mm @mm, restore the original opcode (opcode) at @vaddr. * Return 0 (success) or a negative errno. */ int __weak set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { return uprobe_write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)&auprobe->insn); } /* uprobe should have guaranteed positive refcount */ static struct uprobe *get_uprobe(struct uprobe *uprobe) { refcount_inc(&uprobe->ref); return uprobe; } /* * uprobe should have guaranteed lifetime, which can be either of: * - caller already has refcount taken (and wants an extra one); * - uprobe is RCU protected and won't be freed until after grace period; * - we are holding uprobes_treelock (for read or write, doesn't matter). */ static struct uprobe *try_get_uprobe(struct uprobe *uprobe) { if (refcount_inc_not_zero(&uprobe->ref)) return uprobe; return NULL; } static inline bool uprobe_is_active(struct uprobe *uprobe) { return !RB_EMPTY_NODE(&uprobe->rb_node); } static void uprobe_free_rcu_tasks_trace(struct rcu_head *rcu) { struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); kfree(uprobe); } static void uprobe_free_srcu(struct rcu_head *rcu) { struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); call_rcu_tasks_trace(&uprobe->rcu, uprobe_free_rcu_tasks_trace); } static void uprobe_free_deferred(struct work_struct *work) { struct uprobe *uprobe = container_of(work, struct uprobe, work); write_lock(&uprobes_treelock); if (uprobe_is_active(uprobe)) { write_seqcount_begin(&uprobes_seqcount); rb_erase(&uprobe->rb_node, &uprobes_tree); write_seqcount_end(&uprobes_seqcount); } write_unlock(&uprobes_treelock); /* * If application munmap(exec_vma) before uprobe_unregister() * gets called, we don't get a chance to remove uprobe from * delayed_uprobe_list from remove_breakpoint(). Do it here. */ mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(uprobe, NULL); mutex_unlock(&delayed_uprobe_lock); /* start srcu -> rcu_tasks_trace -> kfree chain */ call_srcu(&uretprobes_srcu, &uprobe->rcu, uprobe_free_srcu); } static void put_uprobe(struct uprobe *uprobe) { if (!refcount_dec_and_test(&uprobe->ref)) return; INIT_WORK(&uprobe->work, uprobe_free_deferred); schedule_work(&uprobe->work); } /* Initialize hprobe as SRCU-protected "leased" uprobe */ static void hprobe_init_leased(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx) { WARN_ON(!uprobe); hprobe->state = HPROBE_LEASED; hprobe->uprobe = uprobe; hprobe->srcu_idx = srcu_idx; } /* Initialize hprobe as refcounted ("stable") uprobe (uprobe can be NULL). */ static void hprobe_init_stable(struct hprobe *hprobe, struct uprobe *uprobe) { hprobe->state = uprobe ? HPROBE_STABLE : HPROBE_GONE; hprobe->uprobe = uprobe; hprobe->srcu_idx = -1; } /* * hprobe_consume() fetches hprobe's underlying uprobe and detects whether * uprobe is SRCU protected or is refcounted. hprobe_consume() can be * used only once for a given hprobe. * * Caller has to call hprobe_finalize() and pass previous hprobe_state, so * that hprobe_finalize() can perform SRCU unlock or put uprobe, whichever * is appropriate. */ static inline struct uprobe *hprobe_consume(struct hprobe *hprobe, enum hprobe_state *hstate) { *hstate = xchg(&hprobe->state, HPROBE_CONSUMED); switch (*hstate) { case HPROBE_LEASED: case HPROBE_STABLE: return hprobe->uprobe; case HPROBE_GONE: /* uprobe is NULL, no SRCU */ case HPROBE_CONSUMED: /* uprobe was finalized already, do nothing */ return NULL; default: WARN(1, "hprobe invalid state %d", *hstate); return NULL; } } /* * Reset hprobe state and, if hprobe was LEASED, release SRCU lock. * hprobe_finalize() can only be used from current context after * hprobe_consume() call (which determines uprobe and hstate value). */ static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate) { switch (hstate) { case HPROBE_LEASED: __srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx); break; case HPROBE_STABLE: put_uprobe(hprobe->uprobe); break; case HPROBE_GONE: case HPROBE_CONSUMED: break; default: WARN(1, "hprobe invalid state %d", hstate); break; } } /* * Attempt to switch (atomically) uprobe from being SRCU protected (LEASED) * to refcounted (STABLE) state. Competes with hprobe_consume(); only one of * them can win the race to perform SRCU unlocking. Whoever wins must perform * SRCU unlock. * * Returns underlying valid uprobe or NULL, if there was no underlying uprobe * to begin with or we failed to bump its refcount and it's going away. * * Returned non-NULL uprobe can be still safely used within an ongoing SRCU * locked region. If `get` is true, it's guaranteed that non-NULL uprobe has * an extra refcount for caller to assume and use. Otherwise, it's not * guaranteed that returned uprobe has a positive refcount, so caller has to * attempt try_get_uprobe(), if it needs to preserve uprobe beyond current * SRCU lock region. See dup_utask(). */ static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get) { enum hprobe_state hstate; /* * return_instance's hprobe is protected by RCU. * Underlying uprobe is itself protected from reuse by SRCU. */ lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu)); hstate = READ_ONCE(hprobe->state); switch (hstate) { case HPROBE_STABLE: /* uprobe has positive refcount, bump refcount, if necessary */ return get ? get_uprobe(hprobe->uprobe) : hprobe->uprobe; case HPROBE_GONE: /* * SRCU was unlocked earlier and we didn't manage to take * uprobe refcnt, so it's effectively NULL */ return NULL; case HPROBE_CONSUMED: /* * uprobe was consumed, so it's effectively NULL as far as * uretprobe processing logic is concerned */ return NULL; case HPROBE_LEASED: { struct uprobe *uprobe = try_get_uprobe(hprobe->uprobe); /* * Try to switch hprobe state, guarding against * hprobe_consume() or another hprobe_expire() racing with us. * Note, if we failed to get uprobe refcount, we use special * HPROBE_GONE state to signal that hprobe->uprobe shouldn't * be used as it will be freed after SRCU is unlocked. */ if (try_cmpxchg(&hprobe->state, &hstate, uprobe ? HPROBE_STABLE : HPROBE_GONE)) { /* We won the race, we are the ones to unlock SRCU */ __srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx); return get ? get_uprobe(uprobe) : uprobe; } /* * We lost the race, undo refcount bump (if it ever happened), * unless caller would like an extra refcount anyways. */ if (uprobe && !get) put_uprobe(uprobe); /* * Even if hprobe_consume() or another hprobe_expire() wins * the state update race and unlocks SRCU from under us, we * still have a guarantee that underyling uprobe won't be * freed due to ongoing caller's SRCU lock region, so we can * return it regardless. Also, if `get` was true, we also have * an extra ref for the caller to own. This is used in dup_utask(). */ return uprobe; } default: WARN(1, "unknown hprobe state %d", hstate); return NULL; } } static __always_inline int uprobe_cmp(const struct inode *l_inode, const loff_t l_offset, const struct uprobe *r) { if (l_inode < r->inode) return -1; if (l_inode > r->inode) return 1; if (l_offset < r->offset) return -1; if (l_offset > r->offset) return 1; return 0; } #define __node_2_uprobe(node) \ rb_entry((node), struct uprobe, rb_node) struct __uprobe_key { struct inode *inode; loff_t offset; }; static inline int __uprobe_cmp_key(const void *key, const struct rb_node *b) { const struct __uprobe_key *a = key; return uprobe_cmp(a->inode, a->offset, __node_2_uprobe(b)); } static inline int __uprobe_cmp(struct rb_node *a, const struct rb_node *b) { struct uprobe *u = __node_2_uprobe(a); return uprobe_cmp(u->inode, u->offset, __node_2_uprobe(b)); } /* * Assumes being inside RCU protected region. * No refcount is taken on returned uprobe. */ static struct uprobe *find_uprobe_rcu(struct inode *inode, loff_t offset) { struct __uprobe_key key = { .inode = inode, .offset = offset, }; struct rb_node *node; unsigned int seq; lockdep_assert(rcu_read_lock_trace_held()); do { seq = read_seqcount_begin(&uprobes_seqcount); node = rb_find_rcu(&key, &uprobes_tree, __uprobe_cmp_key); /* * Lockless RB-tree lookups can result only in false negatives. * If the element is found, it is correct and can be returned * under RCU protection. If we find nothing, we need to * validate that seqcount didn't change. If it did, we have to * try again as we might have missed the element (false * negative). If seqcount is unchanged, search truly failed. */ if (node) return __node_2_uprobe(node); } while (read_seqcount_retry(&uprobes_seqcount, seq)); return NULL; } /* * Attempt to insert a new uprobe into uprobes_tree. * * If uprobe already exists (for given inode+offset), we just increment * refcount of previously existing uprobe. * * If not, a provided new instance of uprobe is inserted into the tree (with * assumed initial refcount == 1). * * In any case, we return a uprobe instance that ends up being in uprobes_tree. * Caller has to clean up new uprobe instance, if it ended up not being * inserted into the tree. * * We assume that uprobes_treelock is held for writing. */ static struct uprobe *__insert_uprobe(struct uprobe *uprobe) { struct rb_node *node; again: node = rb_find_add_rcu(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); if (node) { struct uprobe *u = __node_2_uprobe(node); if (!try_get_uprobe(u)) { rb_erase(node, &uprobes_tree); RB_CLEAR_NODE(&u->rb_node); goto again; } return u; } return uprobe; } /* * Acquire uprobes_treelock and insert uprobe into uprobes_tree * (or reuse existing one, see __insert_uprobe() comments above). */ static struct uprobe *insert_uprobe(struct uprobe *uprobe) { struct uprobe *u; write_lock(&uprobes_treelock); write_seqcount_begin(&uprobes_seqcount); u = __insert_uprobe(uprobe); write_seqcount_end(&uprobes_seqcount); write_unlock(&uprobes_treelock); return u; } static void ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe) { pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx " "ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n", uprobe->inode->i_ino, (unsigned long long) uprobe->offset, (unsigned long long) cur_uprobe->ref_ctr_offset, (unsigned long long) uprobe->ref_ctr_offset); } static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset, loff_t ref_ctr_offset) { struct uprobe *uprobe, *cur_uprobe; uprobe = kzalloc(sizeof(struct uprobe), GFP_KERNEL); if (!uprobe) return ERR_PTR(-ENOMEM); uprobe->inode = inode; uprobe->offset = offset; uprobe->ref_ctr_offset = ref_ctr_offset; INIT_LIST_HEAD(&uprobe->consumers); init_rwsem(&uprobe->register_rwsem); init_rwsem(&uprobe->consumer_rwsem); RB_CLEAR_NODE(&uprobe->rb_node); refcount_set(&uprobe->ref, 1); /* add to uprobes_tree, sorted on inode:offset */ cur_uprobe = insert_uprobe(uprobe); /* a uprobe exists for this inode:offset combination */ if (cur_uprobe != uprobe) { if (cur_uprobe->ref_ctr_offset != uprobe->ref_ctr_offset) { ref_ctr_mismatch_warn(cur_uprobe, uprobe); put_uprobe(cur_uprobe); kfree(uprobe); return ERR_PTR(-EINVAL); } kfree(uprobe); uprobe = cur_uprobe; } return uprobe; } static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) { static atomic64_t id; down_write(&uprobe->consumer_rwsem); list_add_rcu(&uc->cons_node, &uprobe->consumers); uc->id = (__u64) atomic64_inc_return(&id); up_write(&uprobe->consumer_rwsem); } /* * For uprobe @uprobe, delete the consumer @uc. * Should never be called with consumer that's not part of @uprobe->consumers. */ static void consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) { down_write(&uprobe->consumer_rwsem); list_del_rcu(&uc->cons_node); up_write(&uprobe->consumer_rwsem); } static int __copy_insn(struct address_space *mapping, struct file *filp, void *insn, int nbytes, loff_t offset) { struct page *page; /* * Ensure that the page that has the original instruction is populated * and in page-cache. If ->read_folio == NULL it must be shmem_mapping(), * see uprobe_register(). */ if (mapping->a_ops->read_folio) page = read_mapping_page(mapping, offset >> PAGE_SHIFT, filp); else page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); if (IS_ERR(page)) return PTR_ERR(page); copy_from_page(page, offset, insn, nbytes); put_page(page); return 0; } static int copy_insn(struct uprobe *uprobe, struct file *filp) { struct address_space *mapping = uprobe->inode->i_mapping; loff_t offs = uprobe->offset; void *insn = &uprobe->arch.insn; int size = sizeof(uprobe->arch.insn); int len, err = -EIO; /* Copy only available bytes, -EIO if nothing was read */ do { if (offs >= i_size_read(uprobe->inode)) break; len = min_t(int, size, PAGE_SIZE - (offs & ~PAGE_MASK)); err = __copy_insn(mapping, filp, insn, len, offs); if (err) break; insn += len; offs += len; size -= len; } while (size); return err; } static int prepare_uprobe(struct uprobe *uprobe, struct file *file, struct mm_struct *mm, unsigned long vaddr) { int ret = 0; if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) return ret; /* TODO: move this into _register, until then we abuse this sem. */ down_write(&uprobe->consumer_rwsem); if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) goto out; ret = copy_insn(uprobe, file); if (ret) goto out; ret = -ENOTSUPP; if (is_trap_insn((uprobe_opcode_t *)&uprobe->arch.insn)) goto out; ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); if (ret) goto out; smp_wmb(); /* pairs with the smp_rmb() in handle_swbp() */ set_bit(UPROBE_COPY_INSN, &uprobe->flags); out: up_write(&uprobe->consumer_rwsem); return ret; } static inline bool consumer_filter(struct uprobe_consumer *uc, struct mm_struct *mm) { return !uc->filter || uc->filter(uc, mm); } static bool filter_chain(struct uprobe *uprobe, struct mm_struct *mm) { struct uprobe_consumer *uc; bool ret = false; down_read(&uprobe->consumer_rwsem); list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) { ret = consumer_filter(uc, mm); if (ret) break; } up_read(&uprobe->consumer_rwsem); return ret; } static int install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long vaddr) { bool first_uprobe; int ret; ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); if (ret) return ret; /* * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(), * the task can hit this breakpoint right after __replace_page(). */ first_uprobe = !test_bit(MMF_HAS_UPROBES, &mm->flags); if (first_uprobe) set_bit(MMF_HAS_UPROBES, &mm->flags); ret = set_swbp(&uprobe->arch, mm, vaddr); if (!ret) clear_bit(MMF_RECALC_UPROBES, &mm->flags); else if (first_uprobe) clear_bit(MMF_HAS_UPROBES, &mm->flags); return ret; } static int remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) { set_bit(MMF_RECALC_UPROBES, &mm->flags); return set_orig_insn(&uprobe->arch, mm, vaddr); } struct map_info { struct map_info *next; struct mm_struct *mm; unsigned long vaddr; }; static inline struct map_info *free_map_info(struct map_info *info) { struct map_info *next = info->next; kfree(info); return next; } static struct map_info * build_map_info(struct address_space *mapping, loff_t offset, bool is_register) { unsigned long pgoff = offset >> PAGE_SHIFT; struct vm_area_struct *vma; struct map_info *curr = NULL; struct map_info *prev = NULL; struct map_info *info; int more = 0; again: i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { if (!valid_vma(vma, is_register)) continue; if (!prev && !more) { /* * Needs GFP_NOWAIT to avoid i_mmap_rwsem recursion through * reclaim. This is optimistic, no harm done if it fails. */ prev = kmalloc(sizeof(struct map_info), GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN); if (prev) prev->next = NULL; } if (!prev) { more++; continue; } if (!mmget_not_zero(vma->vm_mm)) continue; info = prev; prev = prev->next; info->next = curr; curr = info; info->mm = vma->vm_mm; info->vaddr = offset_to_vaddr(vma, offset); } i_mmap_unlock_read(mapping); if (!more) goto out; prev = curr; while (curr) { mmput(curr->mm); curr = curr->next; } do { info = kmalloc(sizeof(struct map_info), GFP_KERNEL); if (!info) { curr = ERR_PTR(-ENOMEM); goto out; } info->next = prev; prev = info; } while (--more); goto again; out: while (prev) prev = free_map_info(prev); return curr; } static int register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) { bool is_register = !!new; struct map_info *info; int err = 0; percpu_down_write(&dup_mmap_sem); info = build_map_info(uprobe->inode->i_mapping, uprobe->offset, is_register); if (IS_ERR(info)) { err = PTR_ERR(info); goto out; } while (info) { struct mm_struct *mm = info->mm; struct vm_area_struct *vma; if (err && is_register) goto free; /* * We take mmap_lock for writing to avoid the race with * find_active_uprobe_rcu() which takes mmap_lock for reading. * Thus this install_breakpoint() can not make * is_trap_at_addr() true right after find_uprobe_rcu() * returns NULL in find_active_uprobe_rcu(). */ mmap_write_lock(mm); vma = find_vma(mm, info->vaddr); if (!vma || !valid_vma(vma, is_register) || file_inode(vma->vm_file) != uprobe->inode) goto unlock; if (vma->vm_start > info->vaddr || vaddr_to_offset(vma, info->vaddr) != uprobe->offset) goto unlock; if (is_register) { /* consult only the "caller", new consumer. */ if (consumer_filter(new, mm)) err = install_breakpoint(uprobe, mm, vma, info->vaddr); } else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) { if (!filter_chain(uprobe, mm)) err |= remove_breakpoint(uprobe, mm, info->vaddr); } unlock: mmap_write_unlock(mm); free: mmput(mm); info = free_map_info(info); } out: percpu_up_write(&dup_mmap_sem); return err; } /** * uprobe_unregister_nosync - unregister an already registered probe. * @uprobe: uprobe to remove * @uc: identify which probe if multiple probes are colocated. */ void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc) { int err; down_write(&uprobe->register_rwsem); consumer_del(uprobe, uc); err = register_for_each_vma(uprobe, NULL); up_write(&uprobe->register_rwsem); /* TODO : cant unregister? schedule a worker thread */ if (unlikely(err)) { uprobe_warn(current, "unregister, leaking uprobe"); return; } put_uprobe(uprobe); } EXPORT_SYMBOL_GPL(uprobe_unregister_nosync); void uprobe_unregister_sync(void) { /* * Now that handler_chain() and handle_uretprobe_chain() iterate over * uprobe->consumers list under RCU protection without holding * uprobe->register_rwsem, we need to wait for RCU grace period to * make sure that we can't call into just unregistered * uprobe_consumer's callbacks anymore. If we don't do that, fast and * unlucky enough caller can free consumer's memory and cause * handler_chain() or handle_uretprobe_chain() to do an use-after-free. */ synchronize_rcu_tasks_trace(); synchronize_srcu(&uretprobes_srcu); } EXPORT_SYMBOL_GPL(uprobe_unregister_sync); /** * uprobe_register - register a probe * @inode: the file in which the probe has to be placed. * @offset: offset from the start of the file. * @ref_ctr_offset: offset of SDT marker / reference counter * @uc: information on howto handle the probe.. * * Apart from the access refcount, uprobe_register() takes a creation * refcount (thro alloc_uprobe) if and only if this @uprobe is getting * inserted into the rbtree (i.e first consumer for a @inode:@offset * tuple). Creation refcount stops uprobe_unregister from freeing the * @uprobe even before the register operation is complete. Creation * refcount is released when the last @uc for the @uprobe * unregisters. Caller of uprobe_register() is required to keep @inode * (and the containing mount) referenced. * * Return: pointer to the new uprobe on success or an ERR_PTR on failure. */ struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc) { struct uprobe *uprobe; int ret; /* Uprobe must have at least one set consumer */ if (!uc->handler && !uc->ret_handler) return ERR_PTR(-EINVAL); /* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */ if (!inode->i_mapping->a_ops->read_folio && !shmem_mapping(inode->i_mapping)) return ERR_PTR(-EIO); /* Racy, just to catch the obvious mistakes */ if (offset > i_size_read(inode)) return ERR_PTR(-EINVAL); /* * This ensures that copy_from_page(), copy_to_page() and * __update_ref_ctr() can't cross page boundary. */ if (!IS_ALIGNED(offset, UPROBE_SWBP_INSN_SIZE)) return ERR_PTR(-EINVAL); if (!IS_ALIGNED(ref_ctr_offset, sizeof(short))) return ERR_PTR(-EINVAL); uprobe = alloc_uprobe(inode, offset, ref_ctr_offset); if (IS_ERR(uprobe)) return uprobe; down_write(&uprobe->register_rwsem); consumer_add(uprobe, uc); ret = register_for_each_vma(uprobe, uc); up_write(&uprobe->register_rwsem); if (ret) { uprobe_unregister_nosync(uprobe, uc); /* * Registration might have partially succeeded, so we can have * this consumer being called right at this time. We need to * sync here. It's ok, it's unlikely slow path. */ uprobe_unregister_sync(); return ERR_PTR(ret); } return uprobe; } EXPORT_SYMBOL_GPL(uprobe_register); /** * uprobe_apply - add or remove the breakpoints according to @uc->filter * @uprobe: uprobe which "owns" the breakpoint * @uc: consumer which wants to add more or remove some breakpoints * @add: add or remove the breakpoints * Return: 0 on success or negative error code. */ int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add) { struct uprobe_consumer *con; int ret = -ENOENT; down_write(&uprobe->register_rwsem); rcu_read_lock_trace(); list_for_each_entry_rcu(con, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) { if (con == uc) { ret = register_for_each_vma(uprobe, add ? uc : NULL); break; } } rcu_read_unlock_trace(); up_write(&uprobe->register_rwsem); return ret; } static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm) { VMA_ITERATOR(vmi, mm, 0); struct vm_area_struct *vma; int err = 0; mmap_read_lock(mm); for_each_vma(vmi, vma) { unsigned long vaddr; loff_t offset; if (!valid_vma(vma, false) || file_inode(vma->vm_file) != uprobe->inode) continue; offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; if (uprobe->offset < offset || uprobe->offset >= offset + vma->vm_end - vma->vm_start) continue; vaddr = offset_to_vaddr(vma, uprobe->offset); err |= remove_breakpoint(uprobe, mm, vaddr); } mmap_read_unlock(mm); return err; } static struct rb_node * find_node_in_range(struct inode *inode, loff_t min, loff_t max) { struct rb_node *n = uprobes_tree.rb_node; while (n) { struct uprobe *u = rb_entry(n, struct uprobe, rb_node); if (inode < u->inode) { n = n->rb_left; } else if (inode > u->inode) { n = n->rb_right; } else { if (max < u->offset) n = n->rb_left; else if (min > u->offset) n = n->rb_right; else break; } } return n; } /* * For a given range in vma, build a list of probes that need to be inserted. */ static void build_probe_list(struct inode *inode, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct list_head *head) { loff_t min, max; struct rb_node *n, *t; struct uprobe *u; INIT_LIST_HEAD(head); min = vaddr_to_offset(vma, start); max = min + (end - start) - 1; read_lock(&uprobes_treelock); n = find_node_in_range(inode, min, max); if (n) { for (t = n; t; t = rb_prev(t)) { u = rb_entry(t, struct uprobe, rb_node); if (u->inode != inode || u->offset < min) break; /* if uprobe went away, it's safe to ignore it */ if (try_get_uprobe(u)) list_add(&u->pending_list, head); } for (t = n; (t = rb_next(t)); ) { u = rb_entry(t, struct uprobe, rb_node); if (u->inode != inode || u->offset > max) break; /* if uprobe went away, it's safe to ignore it */ if (try_get_uprobe(u)) list_add(&u->pending_list, head); } } read_unlock(&uprobes_treelock); } /* @vma contains reference counter, not the probed instruction. */ static int delayed_ref_ctr_inc(struct vm_area_struct *vma) { struct list_head *pos, *q; struct delayed_uprobe *du; unsigned long vaddr; int ret = 0, err = 0; mutex_lock(&delayed_uprobe_lock); list_for_each_safe(pos, q, &delayed_uprobe_list) { du = list_entry(pos, struct delayed_uprobe, list); if (du->mm != vma->vm_mm || !valid_ref_ctr_vma(du->uprobe, vma)) continue; vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset); ret = __update_ref_ctr(vma->vm_mm, vaddr, 1); if (ret) { update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1); if (!err) err = ret; } delayed_uprobe_delete(du); } mutex_unlock(&delayed_uprobe_lock); return err; } /* * Called from mmap_region/vma_merge with mm->mmap_lock acquired. * * Currently we ignore all errors and always return 0, the callers * can't handle the failure anyway. */ int uprobe_mmap(struct vm_area_struct *vma) { struct list_head tmp_list; struct uprobe *uprobe, *u; struct inode *inode; if (no_uprobe_events()) return 0; if (vma->vm_file && (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE && test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags)) delayed_ref_ctr_inc(vma); if (!valid_vma(vma, true)) return 0; inode = file_inode(vma->vm_file); if (!inode) return 0; mutex_lock(uprobes_mmap_hash(inode)); build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); /* * We can race with uprobe_unregister(), this uprobe can be already * removed. But in this case filter_chain() must return false, all * consumers have gone away. */ list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { if (!fatal_signal_pending(current) && filter_chain(uprobe, vma->vm_mm)) { unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); } put_uprobe(uprobe); } mutex_unlock(uprobes_mmap_hash(inode)); return 0; } static bool vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end) { loff_t min, max; struct inode *inode; struct rb_node *n; inode = file_inode(vma->vm_file); min = vaddr_to_offset(vma, start); max = min + (end - start) - 1; read_lock(&uprobes_treelock); n = find_node_in_range(inode, min, max); read_unlock(&uprobes_treelock); return !!n; } /* * Called in context of a munmap of a vma. */ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (no_uprobe_events() || !valid_vma(vma, false)) return; if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ return; if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags) || test_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags)) return; if (vma_has_uprobes(vma, start, end)) set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags); } static vm_fault_t xol_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { struct xol_area *area = vma->vm_mm->uprobes_state.xol_area; vmf->page = area->page; get_page(vmf->page); return 0; } static int xol_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { return -EPERM; } static const struct vm_special_mapping xol_mapping = { .name = "[uprobes]", .fault = xol_fault, .mremap = xol_mremap, }; /* Slot allocation for XOL */ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) { struct vm_area_struct *vma; int ret; if (mmap_write_lock_killable(mm)) return -EINTR; if (mm->uprobes_state.xol_area) { ret = -EALREADY; goto fail; } if (!area->vaddr) { /* Try to map as high as possible, this is only a hint. */ area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); if (IS_ERR_VALUE(area->vaddr)) { ret = area->vaddr; goto fail; } } vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE, VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO, &xol_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto fail; } ret = 0; /* pairs with get_xol_area() */ smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */ fail: mmap_write_unlock(mm); return ret; } void * __weak arch_uprobe_trampoline(unsigned long *psize) { static uprobe_opcode_t insn = UPROBE_SWBP_INSN; *psize = UPROBE_SWBP_INSN_SIZE; return &insn; } static struct xol_area *__create_xol_area(unsigned long vaddr) { struct mm_struct *mm = current->mm; unsigned long insns_size; struct xol_area *area; void *insns; area = kzalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) goto out; area->bitmap = kcalloc(BITS_TO_LONGS(UINSNS_PER_PAGE), sizeof(long), GFP_KERNEL); if (!area->bitmap) goto free_area; area->page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); if (!area->page) goto free_bitmap; area->vaddr = vaddr; init_waitqueue_head(&area->wq); /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); insns = arch_uprobe_trampoline(&insns_size); arch_uprobe_copy_ixol(area->page, 0, insns, insns_size); if (!xol_add_vma(mm, area)) return area; __free_page(area->page); free_bitmap: kfree(area->bitmap); free_area: kfree(area); out: return NULL; } /* * get_xol_area - Allocate process's xol_area if necessary. * This area will be used for storing instructions for execution out of line. * * Returns the allocated area or NULL. */ static struct xol_area *get_xol_area(void) { struct mm_struct *mm = current->mm; struct xol_area *area; if (!mm->uprobes_state.xol_area) __create_xol_area(0); /* Pairs with xol_add_vma() smp_store_release() */ area = READ_ONCE(mm->uprobes_state.xol_area); /* ^^^ */ return area; } /* * uprobe_clear_state - Free the area allocated for slots. */ void uprobe_clear_state(struct mm_struct *mm) { struct xol_area *area = mm->uprobes_state.xol_area; mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(NULL, mm); mutex_unlock(&delayed_uprobe_lock); if (!area) return; put_page(area->page); kfree(area->bitmap); kfree(area); } void uprobe_start_dup_mmap(void) { percpu_down_read(&dup_mmap_sem); } void uprobe_end_dup_mmap(void) { percpu_up_read(&dup_mmap_sem); } void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) { if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) { set_bit(MMF_HAS_UPROBES, &newmm->flags); /* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */ set_bit(MMF_RECALC_UPROBES, &newmm->flags); } } static unsigned long xol_get_slot_nr(struct xol_area *area) { unsigned long slot_nr; slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE); if (slot_nr < UINSNS_PER_PAGE) { if (!test_and_set_bit(slot_nr, area->bitmap)) return slot_nr; } return UINSNS_PER_PAGE; } /* * xol_get_insn_slot - allocate a slot for xol. */ static bool xol_get_insn_slot(struct uprobe *uprobe, struct uprobe_task *utask) { struct xol_area *area = get_xol_area(); unsigned long slot_nr; if (!area) return false; wait_event(area->wq, (slot_nr = xol_get_slot_nr(area)) < UINSNS_PER_PAGE); utask->xol_vaddr = area->vaddr + slot_nr * UPROBE_XOL_SLOT_BYTES; arch_uprobe_copy_ixol(area->page, utask->xol_vaddr, &uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); return true; } /* * xol_free_insn_slot - free the slot allocated by xol_get_insn_slot() */ static void xol_free_insn_slot(struct uprobe_task *utask) { struct xol_area *area = current->mm->uprobes_state.xol_area; unsigned long offset = utask->xol_vaddr - area->vaddr; unsigned int slot_nr; utask->xol_vaddr = 0; /* xol_vaddr must fit into [area->vaddr, area->vaddr + PAGE_SIZE) */ if (WARN_ON_ONCE(offset >= PAGE_SIZE)) return; slot_nr = offset / UPROBE_XOL_SLOT_BYTES; clear_bit(slot_nr, area->bitmap); smp_mb__after_atomic(); /* pairs with prepare_to_wait() */ if (waitqueue_active(&area->wq)) wake_up(&area->wq); } void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len) { /* Initialize the slot */ copy_to_page(page, vaddr, src, len); /* * We probably need flush_icache_user_page() but it needs vma. * This should work on most of architectures by default. If * architecture needs to do something different it can define * its own version of the function. */ flush_dcache_page(page); } /** * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs * @regs: Reflects the saved state of the task after it has hit a breakpoint * instruction. * Return the address of the breakpoint instruction. */ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) { return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; } unsigned long uprobe_get_trap_addr(struct pt_regs *regs) { struct uprobe_task *utask = current->utask; if (unlikely(utask && utask->active_uprobe)) return utask->vaddr; return instruction_pointer(regs); } static void ri_pool_push(struct uprobe_task *utask, struct return_instance *ri) { ri->cons_cnt = 0; ri->next = utask->ri_pool; utask->ri_pool = ri; } static struct return_instance *ri_pool_pop(struct uprobe_task *utask) { struct return_instance *ri = utask->ri_pool; if (likely(ri)) utask->ri_pool = ri->next; return ri; } static void ri_free(struct return_instance *ri) { kfree(ri->extra_consumers); kfree_rcu(ri, rcu); } static void free_ret_instance(struct uprobe_task *utask, struct return_instance *ri, bool cleanup_hprobe) { unsigned seq; if (cleanup_hprobe) { enum hprobe_state hstate; (void)hprobe_consume(&ri->hprobe, &hstate); hprobe_finalize(&ri->hprobe, hstate); } /* * At this point return_instance is unlinked from utask's * return_instances list and this has become visible to ri_timer(). * If seqcount now indicates that ri_timer's return instance * processing loop isn't active, we can return ri into the pool of * to-be-reused return instances for future uretprobes. If ri_timer() * happens to be running right now, though, we fallback to safety and * just perform RCU-delated freeing of ri. */ if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) { /* immediate reuse of ri without RCU GP is OK */ ri_pool_push(utask, ri); } else { /* we might be racing with ri_timer(), so play it safe */ ri_free(ri); } } /* * Called with no locks held. * Called in context of an exiting or an exec-ing thread. */ void uprobe_free_utask(struct task_struct *t) { struct uprobe_task *utask = t->utask; struct return_instance *ri, *ri_next; if (!utask) return; t->utask = NULL; WARN_ON_ONCE(utask->active_uprobe || utask->xol_vaddr); timer_delete_sync(&utask->ri_timer); ri = utask->return_instances; while (ri) { ri_next = ri->next; free_ret_instance(utask, ri, true /* cleanup_hprobe */); ri = ri_next; } /* free_ret_instance() above might add to ri_pool, so this loop should come last */ ri = utask->ri_pool; while (ri) { ri_next = ri->next; ri_free(ri); ri = ri_next; } kfree(utask); } #define RI_TIMER_PERIOD (HZ / 10) /* 100 ms */ #define for_each_ret_instance_rcu(pos, head) \ for (pos = rcu_dereference_raw(head); pos; pos = rcu_dereference_raw(pos->next)) static void ri_timer(struct timer_list *timer) { struct uprobe_task *utask = container_of(timer, struct uprobe_task, ri_timer); struct return_instance *ri; /* SRCU protects uprobe from reuse for the cmpxchg() inside hprobe_expire(). */ guard(srcu)(&uretprobes_srcu); /* RCU protects return_instance from freeing. */ guard(rcu)(); write_seqcount_begin(&utask->ri_seqcount); for_each_ret_instance_rcu(ri, utask->return_instances) hprobe_expire(&ri->hprobe, false); write_seqcount_end(&utask->ri_seqcount); } static struct uprobe_task *alloc_utask(void) { struct uprobe_task *utask; utask = kzalloc(sizeof(*utask), GFP_KERNEL); if (!utask) return NULL; timer_setup(&utask->ri_timer, ri_timer, 0); seqcount_init(&utask->ri_seqcount); return utask; } /* * Allocate a uprobe_task object for the task if necessary. * Called when the thread hits a breakpoint. * * Returns: * - pointer to new uprobe_task on success * - NULL otherwise */ static struct uprobe_task *get_utask(void) { if (!current->utask) current->utask = alloc_utask(); return current->utask; } static struct return_instance *alloc_return_instance(struct uprobe_task *utask) { struct return_instance *ri; ri = ri_pool_pop(utask); if (ri) return ri; ri = kzalloc(sizeof(*ri), GFP_KERNEL); if (!ri) return ZERO_SIZE_PTR; return ri; } static struct return_instance *dup_return_instance(struct return_instance *old) { struct return_instance *ri; ri = kmemdup(old, sizeof(*ri), GFP_KERNEL); if (!ri) return NULL; if (unlikely(old->cons_cnt > 1)) { ri->extra_consumers = kmemdup(old->extra_consumers, sizeof(ri->extra_consumers[0]) * (old->cons_cnt - 1), GFP_KERNEL); if (!ri->extra_consumers) { kfree(ri); return NULL; } } return ri; } static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) { struct uprobe_task *n_utask; struct return_instance **p, *o, *n; struct uprobe *uprobe; n_utask = alloc_utask(); if (!n_utask) return -ENOMEM; t->utask = n_utask; /* protect uprobes from freeing, we'll need try_get_uprobe() them */ guard(srcu)(&uretprobes_srcu); p = &n_utask->return_instances; for (o = o_utask->return_instances; o; o = o->next) { n = dup_return_instance(o); if (!n) return -ENOMEM; /* if uprobe is non-NULL, we'll have an extra refcount for uprobe */ uprobe = hprobe_expire(&o->hprobe, true); /* * New utask will have stable properly refcounted uprobe or * NULL. Even if we failed to get refcounted uprobe, we still * need to preserve full set of return_instances for proper * uretprobe handling and nesting in forked task. */ hprobe_init_stable(&n->hprobe, uprobe); n->next = NULL; rcu_assign_pointer(*p, n); p = &n->next; n_utask->depth++; } return 0; } static void dup_xol_work(struct callback_head *work) { if (current->flags & PF_EXITING) return; if (!__create_xol_area(current->utask->dup_xol_addr) && !fatal_signal_pending(current)) uprobe_warn(current, "dup xol area"); } /* * Called in context of a new clone/fork from copy_process. */ void uprobe_copy_process(struct task_struct *t, unsigned long flags) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; struct xol_area *area; t->utask = NULL; if (!utask || !utask->return_instances) return; if (mm == t->mm && !(flags & CLONE_VFORK)) return; if (dup_utask(t, utask)) return uprobe_warn(t, "dup ret instances"); /* The task can fork() after dup_xol_work() fails */ area = mm->uprobes_state.xol_area; if (!area) return uprobe_warn(t, "dup xol area"); if (mm == t->mm) return; t->utask->dup_xol_addr = area->vaddr; init_task_work(&t->utask->dup_xol_work, dup_xol_work); task_work_add(t, &t->utask->dup_xol_work, TWA_RESUME); } /* * Current area->vaddr notion assume the trampoline address is always * equal area->vaddr. * * Returns -1 in case the xol_area is not allocated. */ unsigned long uprobe_get_trampoline_vaddr(void) { struct xol_area *area; unsigned long trampoline_vaddr = -1; /* Pairs with xol_add_vma() smp_store_release() */ area = READ_ONCE(current->mm->uprobes_state.xol_area); /* ^^^ */ if (area) trampoline_vaddr = area->vaddr; return trampoline_vaddr; } static void cleanup_return_instances(struct uprobe_task *utask, bool chained, struct pt_regs *regs) { struct return_instance *ri = utask->return_instances, *ri_next; enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL; while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) { ri_next = ri->next; rcu_assign_pointer(utask->return_instances, ri_next); utask->depth--; free_ret_instance(utask, ri, true /* cleanup_hprobe */); ri = ri_next; } } static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs, struct return_instance *ri) { struct uprobe_task *utask = current->utask; unsigned long orig_ret_vaddr, trampoline_vaddr; bool chained; int srcu_idx; if (!get_xol_area()) goto free; if (utask->depth >= MAX_URETPROBE_DEPTH) { printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to" " nestedness limit pid/tgid=%d/%d\n", current->pid, current->tgid); goto free; } trampoline_vaddr = uprobe_get_trampoline_vaddr(); orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); if (orig_ret_vaddr == -1) goto free; /* drop the entries invalidated by longjmp() */ chained = (orig_ret_vaddr == trampoline_vaddr); cleanup_return_instances(utask, chained, regs); /* * We don't want to keep trampoline address in stack, rather keep the * original return address of first caller thru all the consequent * instances. This also makes breakpoint unwrapping easier. */ if (chained) { if (!utask->return_instances) { /* * This situation is not possible. Likely we have an * attack from user-space. */ uprobe_warn(current, "handle tail call"); goto free; } orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; } /* __srcu_read_lock() because SRCU lock survives switch to user space */ srcu_idx = __srcu_read_lock(&uretprobes_srcu); ri->func = instruction_pointer(regs); ri->stack = user_stack_pointer(regs); ri->orig_ret_vaddr = orig_ret_vaddr; ri->chained = chained; utask->depth++; hprobe_init_leased(&ri->hprobe, uprobe, srcu_idx); ri->next = utask->return_instances; rcu_assign_pointer(utask->return_instances, ri); mod_timer(&utask->ri_timer, jiffies + RI_TIMER_PERIOD); return; free: ri_free(ri); } /* Prepare to single-step probed instruction out of line. */ static int pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) { struct uprobe_task *utask = current->utask; int err; if (!try_get_uprobe(uprobe)) return -EINVAL; if (!xol_get_insn_slot(uprobe, utask)) { err = -ENOMEM; goto err_out; } utask->vaddr = bp_vaddr; err = arch_uprobe_pre_xol(&uprobe->arch, regs); if (unlikely(err)) { xol_free_insn_slot(utask); goto err_out; } utask->active_uprobe = uprobe; utask->state = UTASK_SSTEP; return 0; err_out: put_uprobe(uprobe); return err; } /* * If we are singlestepping, then ensure this thread is not connected to * non-fatal signals until completion of singlestep. When xol insn itself * triggers the signal, restart the original insn even if the task is * already SIGKILL'ed (since coredump should report the correct ip). This * is even more important if the task has a handler for SIGSEGV/etc, The * _same_ instruction should be repeated again after return from the signal * handler, and SSTEP can never finish in this case. */ bool uprobe_deny_signal(void) { struct task_struct *t = current; struct uprobe_task *utask = t->utask; if (likely(!utask || !utask->active_uprobe)) return false; WARN_ON_ONCE(utask->state != UTASK_SSTEP); if (task_sigpending(t)) { spin_lock_irq(&t->sighand->siglock); clear_tsk_thread_flag(t, TIF_SIGPENDING); spin_unlock_irq(&t->sighand->siglock); if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) { utask->state = UTASK_SSTEP_TRAPPED; set_tsk_thread_flag(t, TIF_UPROBE); } } return true; } static void mmf_recalc_uprobes(struct mm_struct *mm) { VMA_ITERATOR(vmi, mm, 0); struct vm_area_struct *vma; for_each_vma(vmi, vma) { if (!valid_vma(vma, false)) continue; /* * This is not strictly accurate, we can race with * uprobe_unregister() and see the already removed * uprobe if delete_uprobe() was not yet called. * Or this uprobe can be filtered out. */ if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) return; } clear_bit(MMF_HAS_UPROBES, &mm->flags); } static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) { struct page *page; uprobe_opcode_t opcode; int result; if (WARN_ON_ONCE(!IS_ALIGNED(vaddr, UPROBE_SWBP_INSN_SIZE))) return -EINVAL; pagefault_disable(); result = __get_user(opcode, (uprobe_opcode_t __user *)vaddr); pagefault_enable(); if (likely(result == 0)) goto out; result = get_user_pages(vaddr, 1, FOLL_FORCE, &page); if (result < 0) return result; copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); put_page(page); out: /* This needs to return true for any variant of the trap insn */ return is_trap_insn(&opcode); } static struct uprobe *find_active_uprobe_speculative(unsigned long bp_vaddr) { struct mm_struct *mm = current->mm; struct uprobe *uprobe = NULL; struct vm_area_struct *vma; struct file *vm_file; loff_t offset; unsigned int seq; guard(rcu)(); if (!mmap_lock_speculate_try_begin(mm, &seq)) return NULL; vma = vma_lookup(mm, bp_vaddr); if (!vma) return NULL; /* * vm_file memory can be reused for another instance of struct file, * but can't be freed from under us, so it's safe to read fields from * it, even if the values are some garbage values; ultimately * find_uprobe_rcu() + mmap_lock_speculation_end() check will ensure * that whatever we speculatively found is correct */ vm_file = READ_ONCE(vma->vm_file); if (!vm_file) return NULL; offset = (loff_t)(vma->vm_pgoff << PAGE_SHIFT) + (bp_vaddr - vma->vm_start); uprobe = find_uprobe_rcu(vm_file->f_inode, offset); if (!uprobe) return NULL; /* now double check that nothing about MM changed */ if (mmap_lock_speculate_retry(mm, seq)) return NULL; return uprobe; } /* assumes being inside RCU protected region */ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swbp) { struct mm_struct *mm = current->mm; struct uprobe *uprobe = NULL; struct vm_area_struct *vma; uprobe = find_active_uprobe_speculative(bp_vaddr); if (uprobe) return uprobe; mmap_read_lock(mm); vma = vma_lookup(mm, bp_vaddr); if (vma) { if (vma->vm_file) { struct inode *inode = file_inode(vma->vm_file); loff_t offset = vaddr_to_offset(vma, bp_vaddr); uprobe = find_uprobe_rcu(inode, offset); } if (!uprobe) *is_swbp = is_trap_at_addr(mm, bp_vaddr); } else { *is_swbp = -EFAULT; } if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags)) mmf_recalc_uprobes(mm); mmap_read_unlock(mm); return uprobe; } static struct return_instance *push_consumer(struct return_instance *ri, __u64 id, __u64 cookie) { struct return_consumer *ric; if (unlikely(ri == ZERO_SIZE_PTR)) return ri; if (unlikely(ri->cons_cnt > 0)) { ric = krealloc(ri->extra_consumers, sizeof(*ric) * ri->cons_cnt, GFP_KERNEL); if (!ric) { ri_free(ri); return ZERO_SIZE_PTR; } ri->extra_consumers = ric; } ric = likely(ri->cons_cnt == 0) ? &ri->consumer : &ri->extra_consumers[ri->cons_cnt - 1]; ric->id = id; ric->cookie = cookie; ri->cons_cnt++; return ri; } static struct return_consumer * return_consumer_find(struct return_instance *ri, int *iter, int id) { struct return_consumer *ric; int idx; for (idx = *iter; idx < ri->cons_cnt; idx++) { ric = likely(idx == 0) ? &ri->consumer : &ri->extra_consumers[idx - 1]; if (ric->id == id) { *iter = idx + 1; return ric; } } return NULL; } static bool ignore_ret_handler(int rc) { return rc == UPROBE_HANDLER_REMOVE || rc == UPROBE_HANDLER_IGNORE; } static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) { struct uprobe_consumer *uc; bool has_consumers = false, remove = true; struct return_instance *ri = NULL; struct uprobe_task *utask = current->utask; utask->auprobe = &uprobe->arch; list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) { bool session = uc->handler && uc->ret_handler; __u64 cookie = 0; int rc = 0; if (uc->handler) { rc = uc->handler(uc, regs, &cookie); WARN(rc < 0 || rc > 2, "bad rc=0x%x from %ps()\n", rc, uc->handler); } remove &= rc == UPROBE_HANDLER_REMOVE; has_consumers = true; if (!uc->ret_handler || ignore_ret_handler(rc)) continue; if (!ri) ri = alloc_return_instance(utask); if (session) ri = push_consumer(ri, uc->id, cookie); } utask->auprobe = NULL; if (!ZERO_OR_NULL_PTR(ri)) prepare_uretprobe(uprobe, regs, ri); if (remove && has_consumers) { down_read(&uprobe->register_rwsem); /* re-check that removal is still required, this time under lock */ if (!filter_chain(uprobe, current->mm)) { WARN_ON(!uprobe_is_active(uprobe)); unapply_uprobe(uprobe, current->mm); } up_read(&uprobe->register_rwsem); } } static void handle_uretprobe_chain(struct return_instance *ri, struct uprobe *uprobe, struct pt_regs *regs) { struct return_consumer *ric; struct uprobe_consumer *uc; int ric_idx = 0; /* all consumers unsubscribed meanwhile */ if (unlikely(!uprobe)) return; rcu_read_lock_trace(); list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) { bool session = uc->handler && uc->ret_handler; if (uc->ret_handler) { ric = return_consumer_find(ri, &ric_idx, uc->id); if (!session || ric) uc->ret_handler(uc, ri->func, regs, ric ? &ric->cookie : NULL); } } rcu_read_unlock_trace(); } static struct return_instance *find_next_ret_chain(struct return_instance *ri) { bool chained; do { chained = ri->chained; ri = ri->next; /* can't be NULL if chained */ } while (chained); return ri; } void uprobe_handle_trampoline(struct pt_regs *regs) { struct uprobe_task *utask; struct return_instance *ri, *ri_next, *next_chain; struct uprobe *uprobe; enum hprobe_state hstate; bool valid; utask = current->utask; if (!utask) goto sigill; ri = utask->return_instances; if (!ri) goto sigill; do { /* * We should throw out the frames invalidated by longjmp(). * If this chain is valid, then the next one should be alive * or NULL; the latter case means that nobody but ri->func * could hit this trampoline on return. TODO: sigaltstack(). */ next_chain = find_next_ret_chain(ri); valid = !next_chain || arch_uretprobe_is_alive(next_chain, RP_CHECK_RET, regs); instruction_pointer_set(regs, ri->orig_ret_vaddr); do { /* pop current instance from the stack of pending return instances, * as it's not pending anymore: we just fixed up original * instruction pointer in regs and are about to call handlers; * this allows fixup_uretprobe_trampoline_entries() to properly fix up * captured stack traces from uretprobe handlers, in which pending * trampoline addresses on the stack are replaced with correct * original return addresses */ ri_next = ri->next; rcu_assign_pointer(utask->return_instances, ri_next); utask->depth--; uprobe = hprobe_consume(&ri->hprobe, &hstate); if (valid) handle_uretprobe_chain(ri, uprobe, regs); hprobe_finalize(&ri->hprobe, hstate); /* We already took care of hprobe, no need to waste more time on that. */ free_ret_instance(utask, ri, false /* !cleanup_hprobe */); ri = ri_next; } while (ri != next_chain); } while (!valid); return; sigill: uprobe_warn(current, "handle uretprobe, sending SIGILL."); force_sig(SIGILL); } bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs) { return false; } bool __weak arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, struct pt_regs *regs) { return true; } /* * Run handler and ask thread to singlestep. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. */ static void handle_swbp(struct pt_regs *regs) { struct uprobe *uprobe; unsigned long bp_vaddr; int is_swbp; bp_vaddr = uprobe_get_swbp_addr(regs); if (bp_vaddr == uprobe_get_trampoline_vaddr()) return uprobe_handle_trampoline(regs); rcu_read_lock_trace(); uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp); if (!uprobe) { if (is_swbp > 0) { /* No matching uprobe; signal SIGTRAP. */ force_sig(SIGTRAP); } else { /* * Either we raced with uprobe_unregister() or we can't * access this memory. The latter is only possible if * another thread plays with our ->mm. In both cases * we can simply restart. If this vma was unmapped we * can pretend this insn was not executed yet and get * the (correct) SIGSEGV after restart. */ instruction_pointer_set(regs, bp_vaddr); } goto out; } /* change it in advance for ->handler() and restart */ instruction_pointer_set(regs, bp_vaddr); /* * TODO: move copy_insn/etc into _register and remove this hack. * After we hit the bp, _unregister + _register can install the * new and not-yet-analyzed uprobe at the same address, restart. */ if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) goto out; /* * Pairs with the smp_wmb() in prepare_uprobe(). * * Guarantees that if we see the UPROBE_COPY_INSN bit set, then * we must also see the stores to &uprobe->arch performed by the * prepare_uprobe() call. */ smp_rmb(); /* Tracing handlers use ->utask to communicate with fetch methods */ if (!get_utask()) goto out; if (arch_uprobe_ignore(&uprobe->arch, regs)) goto out; handler_chain(uprobe, regs); if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) goto out; if (pre_ssout(uprobe, regs, bp_vaddr)) goto out; out: /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */ rcu_read_unlock_trace(); } /* * Perform required fix-ups and disable singlestep. * Allow pending signals to take effect. */ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) { struct uprobe *uprobe; int err = 0; uprobe = utask->active_uprobe; if (utask->state == UTASK_SSTEP_ACK) err = arch_uprobe_post_xol(&uprobe->arch, regs); else if (utask->state == UTASK_SSTEP_TRAPPED) arch_uprobe_abort_xol(&uprobe->arch, regs); else WARN_ON_ONCE(1); put_uprobe(uprobe); utask->active_uprobe = NULL; utask->state = UTASK_RUNNING; xol_free_insn_slot(utask); spin_lock_irq(&current->sighand->siglock); recalc_sigpending(); /* see uprobe_deny_signal() */ spin_unlock_irq(&current->sighand->siglock); if (unlikely(err)) { uprobe_warn(current, "execute the probed insn, sending SIGILL."); force_sig(SIGILL); } } /* * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag and * allows the thread to return from interrupt. After that handle_swbp() * sets utask->active_uprobe. * * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag * and allows the thread to return from interrupt. * * While returning to userspace, thread notices the TIF_UPROBE flag and calls * uprobe_notify_resume(). */ void uprobe_notify_resume(struct pt_regs *regs) { struct uprobe_task *utask; clear_thread_flag(TIF_UPROBE); utask = current->utask; if (utask && utask->active_uprobe) handle_singlestep(utask, regs); else handle_swbp(regs); } /* * uprobe_pre_sstep_notifier gets called from interrupt context as part of * notifier mechanism. Set TIF_UPROBE flag and indicate breakpoint hit. */ int uprobe_pre_sstep_notifier(struct pt_regs *regs) { if (!current->mm) return 0; if (!test_bit(MMF_HAS_UPROBES, &current->mm->flags) && (!current->utask || !current->utask->return_instances)) return 0; set_thread_flag(TIF_UPROBE); return 1; } /* * uprobe_post_sstep_notifier gets called in interrupt context as part of notifier * mechanism. Set TIF_UPROBE flag and indicate completion of singlestep. */ int uprobe_post_sstep_notifier(struct pt_regs *regs) { struct uprobe_task *utask = current->utask; if (!current->mm || !utask || !utask->active_uprobe) /* task is currently not uprobed */ return 0; utask->state = UTASK_SSTEP_ACK; set_thread_flag(TIF_UPROBE); return 1; } static struct notifier_block uprobe_exception_nb = { .notifier_call = arch_uprobe_exception_notify, .priority = INT_MAX-1, /* notified after kprobes, kgdb */ }; void __init uprobes_init(void) { int i; for (i = 0; i < UPROBES_HASH_SZ; i++) mutex_init(&uprobes_mmap_mutex[i]); BUG_ON(register_die_notifier(&uprobe_exception_nb)); }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 // SPDX-License-Identifier: GPL-2.0-or-later /* * Mirics MSi001 silicon tuner driver * * Copyright (C) 2013 Antti Palosaari <crope@iki.fi> * Copyright (C) 2014 Antti Palosaari <crope@iki.fi> */ #include <linux/module.h> #include <linux/gcd.h> #include <media/v4l2-device.h> #include <media/v4l2-ctrls.h> static const struct v4l2_frequency_band bands[] = { { .type = V4L2_TUNER_RF, .index = 0, .capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = 49000000, .rangehigh = 263000000, }, { .type = V4L2_TUNER_RF, .index = 1, .capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = 390000000, .rangehigh = 960000000, }, }; struct msi001_dev { struct spi_device *spi; struct v4l2_subdev sd; /* Controls */ struct v4l2_ctrl_handler hdl; struct v4l2_ctrl *bandwidth_auto; struct v4l2_ctrl *bandwidth; struct v4l2_ctrl *lna_gain; struct v4l2_ctrl *mixer_gain; struct v4l2_ctrl *if_gain; unsigned int f_tuner; }; static inline struct msi001_dev *sd_to_msi001_dev(struct v4l2_subdev *sd) { return container_of(sd, struct msi001_dev, sd); } static int msi001_wreg(struct msi001_dev *dev, u32 data) { /* Register format: 4 bits addr + 20 bits value */ return spi_write(dev->spi, &data, 3); }; static int msi001_set_gain(struct msi001_dev *dev, int lna_gain, int mixer_gain, int if_gain) { struct spi_device *spi = dev->spi; int ret; u32 reg; dev_dbg(&spi->dev, "lna=%d mixer=%d if=%d\n", lna_gain, mixer_gain, if_gain); reg = 1 << 0; reg |= (59 - if_gain) << 4; reg |= 0 << 10; reg |= (1 - mixer_gain) << 12; reg |= (1 - lna_gain) << 13; reg |= 4 << 14; reg |= 0 << 17; ret = msi001_wreg(dev, reg); if (ret) goto err; return 0; err: dev_dbg(&spi->dev, "failed %d\n", ret); return ret; }; static int msi001_set_tuner(struct msi001_dev *dev) { struct spi_device *spi = dev->spi; int ret, i; unsigned int uitmp, div_n, k, k_thresh, k_frac, div_lo, f_if1; u32 reg; u64 f_vco; u8 mode, filter_mode; static const struct { u32 rf; u8 mode; u8 div_lo; } band_lut[] = { { 50000000, 0xe1, 16}, /* AM_MODE2, antenna 2 */ {108000000, 0x42, 32}, /* VHF_MODE */ {330000000, 0x44, 16}, /* B3_MODE */ {960000000, 0x48, 4}, /* B45_MODE */ { ~0U, 0x50, 2}, /* BL_MODE */ }; static const struct { u32 freq; u8 filter_mode; } if_freq_lut[] = { { 0, 0x03}, /* Zero IF */ { 450000, 0x02}, /* 450 kHz IF */ {1620000, 0x01}, /* 1.62 MHz IF */ {2048000, 0x00}, /* 2.048 MHz IF */ }; static const struct { u32 freq; u8 val; } bandwidth_lut[] = { { 200000, 0x00}, /* 200 kHz */ { 300000, 0x01}, /* 300 kHz */ { 600000, 0x02}, /* 600 kHz */ {1536000, 0x03}, /* 1.536 MHz */ {5000000, 0x04}, /* 5 MHz */ {6000000, 0x05}, /* 6 MHz */ {7000000, 0x06}, /* 7 MHz */ {8000000, 0x07}, /* 8 MHz */ }; unsigned int f_rf = dev->f_tuner; /* * bandwidth (Hz) * 200000, 300000, 600000, 1536000, 5000000, 6000000, 7000000, 8000000 */ unsigned int bandwidth; /* * intermediate frequency (Hz) * 0, 450000, 1620000, 2048000 */ unsigned int f_if = 0; #define F_REF 24000000 #define DIV_PRE_N 4 #define F_VCO_STEP div_lo dev_dbg(&spi->dev, "f_rf=%d f_if=%d\n", f_rf, f_if); for (i = 0; i < ARRAY_SIZE(band_lut); i++) { if (f_rf <= band_lut[i].rf) { mode = band_lut[i].mode; div_lo = band_lut[i].div_lo; break; } } if (i == ARRAY_SIZE(band_lut)) { ret = -EINVAL; goto err; } /* AM_MODE is upconverted */ if ((mode >> 0) & 0x1) f_if1 = 5 * F_REF; else f_if1 = 0; for (i = 0; i < ARRAY_SIZE(if_freq_lut); i++) { if (f_if == if_freq_lut[i].freq) { filter_mode = if_freq_lut[i].filter_mode; break; } } if (i == ARRAY_SIZE(if_freq_lut)) { ret = -EINVAL; goto err; } /* filters */ bandwidth = dev->bandwidth->val; bandwidth = clamp(bandwidth, 200000U, 8000000U); for (i = 0; i < ARRAY_SIZE(bandwidth_lut); i++) { if (bandwidth <= bandwidth_lut[i].freq) { bandwidth = bandwidth_lut[i].val; break; } } if (i == ARRAY_SIZE(bandwidth_lut)) { ret = -EINVAL; goto err; } dev->bandwidth->val = bandwidth_lut[i].freq; dev_dbg(&spi->dev, "bandwidth selected=%d\n", bandwidth_lut[i].freq); /* * Fractional-N synthesizer * * +---------------------------------------+ * v | * Fref +----+ +-------+ +----+ +------+ +---+ * ------> | PD | --> | VCO | ------> | /4 | --> | /N.F | <-- | K | * +----+ +-------+ +----+ +------+ +---+ * | * | * v * +-------+ Fout * | /Rout | ------> * +-------+ */ /* Calculate PLL integer and fractional control word. */ f_vco = (u64) (f_rf + f_if + f_if1) * div_lo; div_n = div_u64_rem(f_vco, DIV_PRE_N * F_REF, &k); k_thresh = (DIV_PRE_N * F_REF) / F_VCO_STEP; k_frac = div_u64((u64) k * k_thresh, (DIV_PRE_N * F_REF)); /* Find out greatest common divisor and divide to smaller. */ uitmp = gcd(k_thresh, k_frac); k_thresh /= uitmp; k_frac /= uitmp; /* Force divide to reg max. Resolution will be reduced. */ uitmp = DIV_ROUND_UP(k_thresh, 4095); k_thresh = DIV_ROUND_CLOSEST(k_thresh, uitmp); k_frac = DIV_ROUND_CLOSEST(k_frac, uitmp); /* Calculate real RF set. */ uitmp = (unsigned int) F_REF * DIV_PRE_N * div_n; uitmp += (unsigned int) F_REF * DIV_PRE_N * k_frac / k_thresh; uitmp /= div_lo; dev_dbg(&spi->dev, "f_rf=%u:%u f_vco=%llu div_n=%u k_thresh=%u k_frac=%u div_lo=%u\n", f_rf, uitmp, f_vco, div_n, k_thresh, k_frac, div_lo); ret = msi001_wreg(dev, 0x00000e); if (ret) goto err; ret = msi001_wreg(dev, 0x000003); if (ret) goto err; reg = 0 << 0; reg |= mode << 4; reg |= filter_mode << 12; reg |= bandwidth << 14; reg |= 0x02 << 17; reg |= 0x00 << 20; ret = msi001_wreg(dev, reg); if (ret) goto err; reg = 5 << 0; reg |= k_thresh << 4; reg |= 1 << 19; reg |= 1 << 21; ret = msi001_wreg(dev, reg); if (ret) goto err; reg = 2 << 0; reg |= k_frac << 4; reg |= div_n << 16; ret = msi001_wreg(dev, reg); if (ret) goto err; ret = msi001_set_gain(dev, dev->lna_gain->cur.val, dev->mixer_gain->cur.val, dev->if_gain->cur.val); if (ret) goto err; reg = 6 << 0; reg |= 63 << 4; reg |= 4095 << 10; ret = msi001_wreg(dev, reg); if (ret) goto err; return 0; err: dev_dbg(&spi->dev, "failed %d\n", ret); return ret; } static int msi001_standby(struct v4l2_subdev *sd) { struct msi001_dev *dev = sd_to_msi001_dev(sd); return msi001_wreg(dev, 0x000000); } static int msi001_g_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *v) { struct msi001_dev *dev = sd_to_msi001_dev(sd); struct spi_device *spi = dev->spi; dev_dbg(&spi->dev, "index=%d\n", v->index); strscpy(v->name, "Mirics MSi001", sizeof(v->name)); v->type = V4L2_TUNER_RF; v->capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS; v->rangelow = 49000000; v->rangehigh = 960000000; return 0; } static int msi001_s_tuner(struct v4l2_subdev *sd, const struct v4l2_tuner *v) { struct msi001_dev *dev = sd_to_msi001_dev(sd); struct spi_device *spi = dev->spi; dev_dbg(&spi->dev, "index=%d\n", v->index); return 0; } static int msi001_g_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f) { struct msi001_dev *dev = sd_to_msi001_dev(sd); struct spi_device *spi = dev->spi; dev_dbg(&spi->dev, "tuner=%d\n", f->tuner); f->frequency = dev->f_tuner; return 0; } static int msi001_s_frequency(struct v4l2_subdev *sd, const struct v4l2_frequency *f) { struct msi001_dev *dev = sd_to_msi001_dev(sd); struct spi_device *spi = dev->spi; unsigned int band; dev_dbg(&spi->dev, "tuner=%d type=%d frequency=%u\n", f->tuner, f->type, f->frequency); if (f->frequency < ((bands[0].rangehigh + bands[1].rangelow) / 2)) band = 0; else band = 1; dev->f_tuner = clamp_t(unsigned int, f->frequency, bands[band].rangelow, bands[band].rangehigh); return msi001_set_tuner(dev); } static int msi001_enum_freq_bands(struct v4l2_subdev *sd, struct v4l2_frequency_band *band) { struct msi001_dev *dev = sd_to_msi001_dev(sd); struct spi_device *spi = dev->spi; dev_dbg(&spi->dev, "tuner=%d type=%d index=%d\n", band->tuner, band->type, band->index); if (band->index >= ARRAY_SIZE(bands)) return -EINVAL; band->capability = bands[band->index].capability; band->rangelow = bands[band->index].rangelow; band->rangehigh = bands[band->index].rangehigh; return 0; } static const struct v4l2_subdev_tuner_ops msi001_tuner_ops = { .standby = msi001_standby, .g_tuner = msi001_g_tuner, .s_tuner = msi001_s_tuner, .g_frequency = msi001_g_frequency, .s_frequency = msi001_s_frequency, .enum_freq_bands = msi001_enum_freq_bands, }; static const struct v4l2_subdev_ops msi001_ops = { .tuner = &msi001_tuner_ops, }; static int msi001_s_ctrl(struct v4l2_ctrl *ctrl) { struct msi001_dev *dev = container_of(ctrl->handler, struct msi001_dev, hdl); struct spi_device *spi = dev->spi; int ret; dev_dbg(&spi->dev, "id=%d name=%s val=%d min=%lld max=%lld step=%lld\n", ctrl->id, ctrl->name, ctrl->val, ctrl->minimum, ctrl->maximum, ctrl->step); switch (ctrl->id) { case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO: case V4L2_CID_RF_TUNER_BANDWIDTH: ret = msi001_set_tuner(dev); break; case V4L2_CID_RF_TUNER_LNA_GAIN: ret = msi001_set_gain(dev, dev->lna_gain->val, dev->mixer_gain->cur.val, dev->if_gain->cur.val); break; case V4L2_CID_RF_TUNER_MIXER_GAIN: ret = msi001_set_gain(dev, dev->lna_gain->cur.val, dev->mixer_gain->val, dev->if_gain->cur.val); break; case V4L2_CID_RF_TUNER_IF_GAIN: ret = msi001_set_gain(dev, dev->lna_gain->cur.val, dev->mixer_gain->cur.val, dev->if_gain->val); break; default: dev_dbg(&spi->dev, "unknown control %d\n", ctrl->id); ret = -EINVAL; } return ret; } static const struct v4l2_ctrl_ops msi001_ctrl_ops = { .s_ctrl = msi001_s_ctrl, }; static int msi001_probe(struct spi_device *spi) { struct msi001_dev *dev; int ret; dev_dbg(&spi->dev, "\n"); dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { ret = -ENOMEM; goto err; } dev->spi = spi; dev->f_tuner = bands[0].rangelow; v4l2_spi_subdev_init(&dev->sd, spi, &msi001_ops); /* Register controls */ v4l2_ctrl_handler_init(&dev->hdl, 5); dev->bandwidth_auto = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_BANDWIDTH_AUTO, 0, 1, 1, 1); dev->bandwidth = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_BANDWIDTH, 200000, 8000000, 1, 200000); if (dev->hdl.error) { ret = dev->hdl.error; dev_err(&spi->dev, "Could not initialize controls\n"); /* control init failed, free handler */ goto err_ctrl_handler_free; } v4l2_ctrl_auto_cluster(2, &dev->bandwidth_auto, 0, false); dev->lna_gain = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_LNA_GAIN, 0, 1, 1, 1); dev->mixer_gain = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_MIXER_GAIN, 0, 1, 1, 1); dev->if_gain = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_IF_GAIN, 0, 59, 1, 0); if (dev->hdl.error) { ret = dev->hdl.error; dev_err(&spi->dev, "Could not initialize controls\n"); /* control init failed, free handler */ goto err_ctrl_handler_free; } dev->sd.ctrl_handler = &dev->hdl; return 0; err_ctrl_handler_free: v4l2_ctrl_handler_free(&dev->hdl); kfree(dev); err: return ret; } static void msi001_remove(struct spi_device *spi) { struct v4l2_subdev *sd = spi_get_drvdata(spi); struct msi001_dev *dev = sd_to_msi001_dev(sd); dev_dbg(&spi->dev, "\n"); /* * Registered by v4l2_spi_new_subdev() from master driver, but we must * unregister it from here. Weird. */ v4l2_device_unregister_subdev(&dev->sd); v4l2_ctrl_handler_free(&dev->hdl); kfree(dev); } static const struct spi_device_id msi001_id_table[] = { {"msi001", 0}, {} }; MODULE_DEVICE_TABLE(spi, msi001_id_table); static struct spi_driver msi001_driver = { .driver = { .name = "msi001", .suppress_bind_attrs = true, }, .probe = msi001_probe, .remove = msi001_remove, .id_table = msi001_id_table, }; module_spi_driver(msi001_driver); MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>"); MODULE_DESCRIPTION("Mirics MSi001"); MODULE_LICENSE("GPL");
3 3 3 3 3 3 3 1 1 1 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2007 The University of Aberdeen, Scotland, UK * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see https://www.wand.net.nz/ * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon * * Changes to meet Linux coding standards, to make it meet latest ccid3 draft * and to make it work as a loadable module in the DCCP stack written by * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. * * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #include <linux/string.h> #include <linux/slab.h> #include "packet_history.h" #include "../../dccp.h" /* * Transmitter History Routines */ static struct kmem_cache *tfrc_tx_hist_slab; int __init tfrc_tx_packet_history_init(void) { tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist", sizeof(struct tfrc_tx_hist_entry), 0, SLAB_HWCACHE_ALIGN, NULL); return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0; } void tfrc_tx_packet_history_exit(void) { if (tfrc_tx_hist_slab != NULL) { kmem_cache_destroy(tfrc_tx_hist_slab); tfrc_tx_hist_slab = NULL; } } int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) { struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); if (entry == NULL) return -ENOBUFS; entry->seqno = seqno; entry->stamp = ktime_get_real(); entry->next = *headp; *headp = entry; return 0; } void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) { struct tfrc_tx_hist_entry *head = *headp; while (head != NULL) { struct tfrc_tx_hist_entry *next = head->next; kmem_cache_free(tfrc_tx_hist_slab, head); head = next; } *headp = NULL; } /* * Receiver History Routines */ static struct kmem_cache *tfrc_rx_hist_slab; int __init tfrc_rx_packet_history_init(void) { tfrc_rx_hist_slab = kmem_cache_create("tfrc_rxh_cache", sizeof(struct tfrc_rx_hist_entry), 0, SLAB_HWCACHE_ALIGN, NULL); return tfrc_rx_hist_slab == NULL ? -ENOBUFS : 0; } void tfrc_rx_packet_history_exit(void) { if (tfrc_rx_hist_slab != NULL) { kmem_cache_destroy(tfrc_rx_hist_slab); tfrc_rx_hist_slab = NULL; } } static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry, const struct sk_buff *skb, const u64 ndp) { const struct dccp_hdr *dh = dccp_hdr(skb); entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; entry->tfrchrx_ccval = dh->dccph_ccval; entry->tfrchrx_type = dh->dccph_type; entry->tfrchrx_ndp = ndp; entry->tfrchrx_tstamp = ktime_get_real(); } void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, const u64 ndp) { struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h); tfrc_rx_hist_entry_from_skb(entry, skb, ndp); } /* has the packet contained in skb been seen before? */ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) { const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq; int i; if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0) return 1; for (i = 1; i <= h->loss_count; i++) if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq) return 1; return 0; } static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) { const u8 idx_a = tfrc_rx_hist_index(h, a), idx_b = tfrc_rx_hist_index(h, b); swap(h->ring[idx_a], h->ring[idx_b]); } /* * Private helper functions for loss detection. * * In the descriptions, `Si' refers to the sequence number of entry number i, * whose NDP count is `Ni' (lower case is used for variables). * Note: All __xxx_loss functions expect that a test against duplicates has been * performed already: the seqno of the skb must not be less than the seqno * of loss_prev; and it must not equal that of any valid history entry. */ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1) { u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, s1 = DCCP_SKB_CB(skb)->dccpd_seq; if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */ h->loss_count = 1; tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1); } } static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2) { u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, s2 = DCCP_SKB_CB(skb)->dccpd_seq; if (likely(dccp_delta_seqno(s1, s2) > 0)) { /* S1 < S2 */ h->loss_count = 2; tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2); return; } /* S0 < S2 < S1 */ if (dccp_loss_free(s0, s2, n2)) { u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp; if (dccp_loss_free(s2, s1, n1)) { /* hole is filled: S0, S2, and S1 are consecutive */ h->loss_count = 0; h->loss_start = tfrc_rx_hist_index(h, 1); } else /* gap between S2 and S1: just update loss_prev */ tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2); } else { /* gap between S0 and S2 */ /* * Reorder history to insert S2 between S0 and S1 */ tfrc_rx_hist_swap(h, 0, 3); h->loss_start = tfrc_rx_hist_index(h, 3); tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n2); h->loss_count = 2; } } /* return 1 if a new loss event has been identified */ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) { u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno, s3 = DCCP_SKB_CB(skb)->dccpd_seq; if (likely(dccp_delta_seqno(s2, s3) > 0)) { /* S2 < S3 */ h->loss_count = 3; tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3); return 1; } /* S3 < S2 */ if (dccp_delta_seqno(s1, s3) > 0) { /* S1 < S3 < S2 */ /* * Reorder history to insert S3 between S1 and S2 */ tfrc_rx_hist_swap(h, 2, 3); tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3); h->loss_count = 3; return 1; } /* S0 < S3 < S1 */ if (dccp_loss_free(s0, s3, n3)) { u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp; if (dccp_loss_free(s3, s1, n1)) { /* hole between S0 and S1 filled by S3 */ u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp; if (dccp_loss_free(s1, s2, n2)) { /* entire hole filled by S0, S3, S1, S2 */ h->loss_start = tfrc_rx_hist_index(h, 2); h->loss_count = 0; } else { /* gap remains between S1 and S2 */ h->loss_start = tfrc_rx_hist_index(h, 1); h->loss_count = 1; } } else /* gap exists between S3 and S1, loss_count stays at 2 */ tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n3); return 0; } /* * The remaining case: S0 < S3 < S1 < S2; gap between S0 and S3 * Reorder history to insert S3 between S0 and S1. */ tfrc_rx_hist_swap(h, 0, 3); h->loss_start = tfrc_rx_hist_index(h, 3); tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n3); h->loss_count = 3; return 1; } /* recycle RX history records to continue loss detection if necessary */ static void __three_after_loss(struct tfrc_rx_hist *h) { /* * At this stage we know already that there is a gap between S0 and S1 * (since S0 was the highest sequence number received before detecting * the loss). To recycle the loss record, it is thus only necessary to * check for other possible gaps between S1/S2 and between S2/S3. */ u64 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno, s3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_seqno; u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp, n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp; if (dccp_loss_free(s1, s2, n2)) { if (dccp_loss_free(s2, s3, n3)) { /* no gap between S2 and S3: entire hole is filled */ h->loss_start = tfrc_rx_hist_index(h, 3); h->loss_count = 0; } else { /* gap between S2 and S3 */ h->loss_start = tfrc_rx_hist_index(h, 2); h->loss_count = 1; } } else { /* gap between S1 and S2 */ h->loss_start = tfrc_rx_hist_index(h, 1); h->loss_count = 2; } } /** * tfrc_rx_handle_loss - Loss detection and further processing * @h: The non-empty RX history object * @lh: Loss Intervals database to update * @skb: Currently received packet * @ndp: The NDP count belonging to @skb * @calc_first_li: Caller-dependent computation of first loss interval in @lh * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) * * Chooses action according to pending loss, updates LI database when a new * loss was detected, and does required post-processing. Returns 1 when caller * should send feedback, 0 otherwise. * Since it also takes care of reordering during loss detection and updates the * records accordingly, the caller should not perform any more RX history * operations when loss_count is greater than 0 after calling this function. */ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh, struct sk_buff *skb, const u64 ndp, u32 (*calc_first_li)(struct sock *), struct sock *sk) { int is_new_loss = 0; if (h->loss_count == 0) { __do_track_loss(h, skb, ndp); } else if (h->loss_count == 1) { __one_after_loss(h, skb, ndp); } else if (h->loss_count != 2) { DCCP_BUG("invalid loss_count %d", h->loss_count); } else if (__two_after_loss(h, skb, ndp)) { /* * Update Loss Interval database and recycle RX records */ is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk); __three_after_loss(h); } return is_new_loss; } int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) { int i; for (i = 0; i <= TFRC_NDUPACK; i++) { h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC); if (h->ring[i] == NULL) goto out_free; } h->loss_count = h->loss_start = 0; return 0; out_free: while (i-- != 0) { kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); h->ring[i] = NULL; } return -ENOBUFS; } void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) { int i; for (i = 0; i <= TFRC_NDUPACK; ++i) if (h->ring[i] != NULL) { kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); h->ring[i] = NULL; } } /** * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against * @h: The non-empty RX history object */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h) { return h->ring[0]; } /** * tfrc_rx_hist_rtt_prev_s - previously suitable (wrt rtt_last_s) RTT-sampling entry * @h: The non-empty RX history object */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h) { return h->ring[h->rtt_sample_prev]; } /** * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal * @h: receive histogram * @skb: packet containing timestamp. * * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able * to compute a sample with given data - calling function should check this. */ u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) { u32 sample = 0, delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */ if (h->rtt_sample_prev == 2) { /* previous candidate stored */ sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); if (sample) sample = 4 / sample * ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp, tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp); else /* * FIXME: This condition is in principle not * possible but occurs when CCID is used for * two-way data traffic. I have tried to trace * it, but the cause does not seem to be here. */ DCCP_BUG("please report to dccp@vger.kernel.org" " => prev = %u, last = %u", tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); } else if (delta_v < 1) { h->rtt_sample_prev = 1; goto keep_ref_for_next_time; } } else if (delta_v == 4) /* optimal match */ sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp)); else { /* suboptimal match */ h->rtt_sample_prev = 2; goto keep_ref_for_next_time; } if (unlikely(sample > DCCP_SANE_RTT_MAX)) { DCCP_WARN("RTT sample %u too large, using max\n", sample); sample = DCCP_SANE_RTT_MAX; } h->rtt_sample_prev = 0; /* use current entry as next reference */ keep_ref_for_next_time: return sample; }
10 10 3 11 11 1 1 4 3 1 3 1 4 4 4 3 1 3 3 7 2 4 4 1 17 5 10 1 1 14 6 3 3 3 3 6 6 3 3 15 4 11 1 14 14 3 3 3 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved * Copyright 2005-2006 Ian Kent <raven@themaw.net> */ #include <linux/seq_file.h> #include <linux/pagemap.h> #include "autofs_i.h" struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi) { struct autofs_info *ino; ino = kzalloc(sizeof(*ino), GFP_KERNEL); if (ino) { INIT_LIST_HEAD(&ino->active); INIT_LIST_HEAD(&ino->expiring); ino->last_used = jiffies; ino->sbi = sbi; ino->exp_timeout = -1; ino->count = 1; } return ino; } void autofs_clean_ino(struct autofs_info *ino) { ino->uid = GLOBAL_ROOT_UID; ino->gid = GLOBAL_ROOT_GID; ino->exp_timeout = -1; ino->last_used = jiffies; } void autofs_free_ino(struct autofs_info *ino) { kfree_rcu(ino, rcu); } void autofs_kill_sb(struct super_block *sb) { struct autofs_sb_info *sbi = autofs_sbi(sb); /* * In the event of a failure in get_sb_nodev the superblock * info is not present so nothing else has been setup, so * just call kill_anon_super when we are called from * deactivate_super. */ if (sbi) { /* Free wait queues, close pipe */ autofs_catatonic_mode(sbi); put_pid(sbi->oz_pgrp); } pr_debug("shutting down\n"); kill_litter_super(sb); if (sbi) kfree_rcu(sbi, rcu); } static int autofs_show_options(struct seq_file *m, struct dentry *root) { struct autofs_sb_info *sbi = autofs_sbi(root->d_sb); struct inode *root_inode = d_inode(root->d_sb->s_root); if (!sbi) return 0; seq_printf(m, ",fd=%d", sbi->pipefd); if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID)) seq_printf(m, ",uid=%u", from_kuid_munged(&init_user_ns, root_inode->i_uid)); if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID)) seq_printf(m, ",gid=%u", from_kgid_munged(&init_user_ns, root_inode->i_gid)); seq_printf(m, ",pgrp=%d", pid_vnr(sbi->oz_pgrp)); seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ); seq_printf(m, ",minproto=%d", sbi->min_proto); seq_printf(m, ",maxproto=%d", sbi->max_proto); if (autofs_type_offset(sbi->type)) seq_puts(m, ",offset"); else if (autofs_type_direct(sbi->type)) seq_puts(m, ",direct"); else seq_puts(m, ",indirect"); if (sbi->flags & AUTOFS_SBI_STRICTEXPIRE) seq_puts(m, ",strictexpire"); if (sbi->flags & AUTOFS_SBI_IGNORE) seq_puts(m, ",ignore"); #ifdef CONFIG_CHECKPOINT_RESTORE if (sbi->pipe) seq_printf(m, ",pipe_ino=%ld", file_inode(sbi->pipe)->i_ino); else seq_puts(m, ",pipe_ino=-1"); #endif return 0; } static void autofs_evict_inode(struct inode *inode) { clear_inode(inode); kfree(inode->i_private); } static const struct super_operations autofs_sops = { .statfs = simple_statfs, .show_options = autofs_show_options, .evict_inode = autofs_evict_inode, }; enum { Opt_direct, Opt_fd, Opt_gid, Opt_ignore, Opt_indirect, Opt_maxproto, Opt_minproto, Opt_offset, Opt_pgrp, Opt_strictexpire, Opt_uid, }; const struct fs_parameter_spec autofs_param_specs[] = { fsparam_flag ("direct", Opt_direct), fsparam_fd ("fd", Opt_fd), fsparam_gid ("gid", Opt_gid), fsparam_flag ("ignore", Opt_ignore), fsparam_flag ("indirect", Opt_indirect), fsparam_u32 ("maxproto", Opt_maxproto), fsparam_u32 ("minproto", Opt_minproto), fsparam_flag ("offset", Opt_offset), fsparam_u32 ("pgrp", Opt_pgrp), fsparam_flag ("strictexpire", Opt_strictexpire), fsparam_uid ("uid", Opt_uid), {} }; struct autofs_fs_context { kuid_t uid; kgid_t gid; int pgrp; bool pgrp_set; }; /* * Open the fd. We do it here rather than in get_tree so that it's done in the * context of the system call that passed the data and not the one that * triggered the superblock creation, lest the fd gets reassigned. */ static int autofs_parse_fd(struct fs_context *fc, struct autofs_sb_info *sbi, struct fs_parameter *param, struct fs_parse_result *result) { struct file *pipe; int ret; if (param->type == fs_value_is_file) { /* came through the new api */ pipe = param->file; param->file = NULL; } else { pipe = fget(result->uint_32); } if (!pipe) { errorf(fc, "could not open pipe file descriptor"); return -EBADF; } ret = autofs_check_pipe(pipe); if (ret < 0) { errorf(fc, "Invalid/unusable pipe"); fput(pipe); return -EBADF; } autofs_set_packet_pipe_flags(pipe); if (sbi->pipe) fput(sbi->pipe); sbi->pipefd = result->uint_32; sbi->pipe = pipe; return 0; } static int autofs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct autofs_fs_context *ctx = fc->fs_private; struct autofs_sb_info *sbi = fc->s_fs_info; struct fs_parse_result result; int opt; opt = fs_parse(fc, autofs_param_specs, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_fd: return autofs_parse_fd(fc, sbi, param, &result); case Opt_uid: ctx->uid = result.uid; break; case Opt_gid: ctx->gid = result.gid; break; case Opt_pgrp: ctx->pgrp = result.uint_32; ctx->pgrp_set = true; break; case Opt_minproto: sbi->min_proto = result.uint_32; break; case Opt_maxproto: sbi->max_proto = result.uint_32; break; case Opt_indirect: set_autofs_type_indirect(&sbi->type); break; case Opt_direct: set_autofs_type_direct(&sbi->type); break; case Opt_offset: set_autofs_type_offset(&sbi->type); break; case Opt_strictexpire: sbi->flags |= AUTOFS_SBI_STRICTEXPIRE; break; case Opt_ignore: sbi->flags |= AUTOFS_SBI_IGNORE; } return 0; } static struct autofs_sb_info *autofs_alloc_sbi(void) { struct autofs_sb_info *sbi; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) return NULL; sbi->magic = AUTOFS_SBI_MAGIC; sbi->flags = AUTOFS_SBI_CATATONIC; sbi->min_proto = AUTOFS_MIN_PROTO_VERSION; sbi->max_proto = AUTOFS_MAX_PROTO_VERSION; sbi->pipefd = -1; set_autofs_type_indirect(&sbi->type); mutex_init(&sbi->wq_mutex); mutex_init(&sbi->pipe_mutex); spin_lock_init(&sbi->fs_lock); spin_lock_init(&sbi->lookup_lock); INIT_LIST_HEAD(&sbi->active_list); INIT_LIST_HEAD(&sbi->expiring_list); return sbi; } static int autofs_validate_protocol(struct fs_context *fc) { struct autofs_sb_info *sbi = fc->s_fs_info; /* Test versions first */ if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION || sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) { errorf(fc, "kernel does not match daemon version " "daemon (%d, %d) kernel (%d, %d)\n", sbi->min_proto, sbi->max_proto, AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION); return -EINVAL; } /* Establish highest kernel protocol version */ if (sbi->max_proto > AUTOFS_MAX_PROTO_VERSION) sbi->version = AUTOFS_MAX_PROTO_VERSION; else sbi->version = sbi->max_proto; switch (sbi->version) { case 4: sbi->sub_version = 7; break; case 5: sbi->sub_version = AUTOFS_PROTO_SUBVERSION; break; default: sbi->sub_version = 0; } return 0; } static int autofs_fill_super(struct super_block *s, struct fs_context *fc) { struct autofs_fs_context *ctx = fc->fs_private; struct autofs_sb_info *sbi = s->s_fs_info; struct inode *root_inode; struct autofs_info *ino; pr_debug("starting up, sbi = %p\n", sbi); sbi->sb = s; s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = AUTOFS_SUPER_MAGIC; s->s_op = &autofs_sops; s->s_d_op = &autofs_dentry_operations; s->s_time_gran = 1; /* * Get the root inode and dentry, but defer checking for errors. */ ino = autofs_new_ino(sbi); if (!ino) return -ENOMEM; root_inode = autofs_get_inode(s, S_IFDIR | 0755); if (!root_inode) return -ENOMEM; root_inode->i_uid = ctx->uid; root_inode->i_gid = ctx->gid; root_inode->i_fop = &autofs_root_operations; root_inode->i_op = &autofs_dir_inode_operations; s->s_root = d_make_root(root_inode); if (unlikely(!s->s_root)) { autofs_free_ino(ino); return -ENOMEM; } s->s_root->d_fsdata = ino; if (ctx->pgrp_set) { sbi->oz_pgrp = find_get_pid(ctx->pgrp); if (!sbi->oz_pgrp) return invalf(fc, "Could not find process group %d", ctx->pgrp); } else sbi->oz_pgrp = get_task_pid(current, PIDTYPE_PGID); if (autofs_type_trigger(sbi->type)) /* s->s_root won't be contended so there's little to * be gained by not taking the d_lock when setting * d_flags, even when a lot mounts are being done. */ managed_dentry_set_managed(s->s_root); pr_debug("pipe fd = %d, pgrp = %u\n", sbi->pipefd, pid_nr(sbi->oz_pgrp)); sbi->flags &= ~AUTOFS_SBI_CATATONIC; return 0; } /* * Validate the parameters and then request a superblock. */ static int autofs_get_tree(struct fs_context *fc) { struct autofs_sb_info *sbi = fc->s_fs_info; int ret; ret = autofs_validate_protocol(fc); if (ret) return ret; if (sbi->pipefd < 0) return invalf(fc, "No control pipe specified"); return get_tree_nodev(fc, autofs_fill_super); } static void autofs_free_fc(struct fs_context *fc) { struct autofs_fs_context *ctx = fc->fs_private; struct autofs_sb_info *sbi = fc->s_fs_info; if (sbi) { if (sbi->pipe) fput(sbi->pipe); kfree(sbi); } kfree(ctx); } static const struct fs_context_operations autofs_context_ops = { .free = autofs_free_fc, .parse_param = autofs_parse_param, .get_tree = autofs_get_tree, }; /* * Set up the filesystem mount context. */ int autofs_init_fs_context(struct fs_context *fc) { struct autofs_fs_context *ctx; struct autofs_sb_info *sbi; ctx = kzalloc(sizeof(struct autofs_fs_context), GFP_KERNEL); if (!ctx) goto nomem; ctx->uid = current_uid(); ctx->gid = current_gid(); sbi = autofs_alloc_sbi(); if (!sbi) goto nomem_ctx; fc->fs_private = ctx; fc->s_fs_info = sbi; fc->ops = &autofs_context_ops; return 0; nomem_ctx: kfree(ctx); nomem: return -ENOMEM; } struct inode *autofs_get_inode(struct super_block *sb, umode_t mode) { struct inode *inode = new_inode(sb); if (inode == NULL) return NULL; inode->i_mode = mode; if (sb->s_root) { inode->i_uid = d_inode(sb->s_root)->i_uid; inode->i_gid = d_inode(sb->s_root)->i_gid; } simple_inode_init_ts(inode); inode->i_ino = get_next_ino(); if (S_ISDIR(mode)) { set_nlink(inode, 2); inode->i_op = &autofs_dir_inode_operations; inode->i_fop = &autofs_dir_operations; } else if (S_ISLNK(mode)) { inode->i_op = &autofs_symlink_inode_operations; } else WARN_ON(1); return inode; }
16 16 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. */ #ifndef __GLOCK_DOT_H__ #define __GLOCK_DOT_H__ #include <linux/sched.h> #include <linux/parser.h> #include "incore.h" #include "util.h" /* Options for hostdata parser */ enum { Opt_jid, Opt_id, Opt_first, Opt_nodir, Opt_err, }; /* * lm_lockname types */ #define LM_TYPE_RESERVED 0x00 #define LM_TYPE_NONDISK 0x01 #define LM_TYPE_INODE 0x02 #define LM_TYPE_RGRP 0x03 #define LM_TYPE_META 0x04 #define LM_TYPE_IOPEN 0x05 #define LM_TYPE_FLOCK 0x06 #define LM_TYPE_PLOCK 0x07 #define LM_TYPE_QUOTA 0x08 #define LM_TYPE_JOURNAL 0x09 /* * lm_lock() states * * SHARED is compatible with SHARED, not with DEFERRED or EX. * DEFERRED is compatible with DEFERRED, not with SHARED or EX. */ #define LM_ST_UNLOCKED 0 #define LM_ST_EXCLUSIVE 1 #define LM_ST_DEFERRED 2 #define LM_ST_SHARED 3 /* * lm_lock() flags * * LM_FLAG_TRY * Don't wait to acquire the lock if it can't be granted immediately. * * LM_FLAG_TRY_1CB * Send one blocking callback if TRY is set and the lock is not granted. * * LM_FLAG_NOEXP * GFS sets this flag on lock requests it makes while doing journal recovery. * These special requests should not be blocked due to the recovery like * ordinary locks would be. * * LM_FLAG_ANY * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may * also be granted in SHARED. The preferred state is whichever is compatible * with other granted locks, or the specified state if no other locks exist. * * LM_FLAG_NODE_SCOPE * This holder agrees to share the lock within this node. In other words, * the glock is held in EX mode according to DLM, but local holders on the * same node can share it. */ #define LM_FLAG_TRY 0x0001 #define LM_FLAG_TRY_1CB 0x0002 #define LM_FLAG_NOEXP 0x0004 #define LM_FLAG_ANY 0x0008 #define LM_FLAG_NODE_SCOPE 0x0020 #define GL_ASYNC 0x0040 #define GL_EXACT 0x0080 #define GL_SKIP 0x0100 #define GL_NOPID 0x0200 #define GL_NOCACHE 0x0400 #define GL_NOBLOCK 0x0800 /* * lm_async_cb return flags * * LM_OUT_ST_MASK * Masks the lower two bits of lock state in the returned value. * * LM_OUT_CANCELED * The lock request was canceled. * */ #define LM_OUT_ST_MASK 0x00000003 #define LM_OUT_CANCELED 0x00000008 #define LM_OUT_ERROR 0x00000004 /* * lm_recovery_done() messages */ #define LM_RD_GAVEUP 308 #define LM_RD_SUCCESS 309 #define GLR_TRYFAILED 13 #define GL_GLOCK_MAX_HOLD (long)(HZ / 5) #define GL_GLOCK_DFT_HOLD (long)(HZ / 5) #define GL_GLOCK_MIN_HOLD (long)(10) #define GL_GLOCK_HOLD_INCR (long)(HZ / 20) #define GL_GLOCK_HOLD_DECR (long)(HZ / 40) struct lm_lockops { const char *lm_proto_name; int (*lm_mount) (struct gfs2_sbd *sdp, const char *table); void (*lm_first_done) (struct gfs2_sbd *sdp); void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid, unsigned int result); void (*lm_unmount) (struct gfs2_sbd *sdp); void (*lm_withdraw) (struct gfs2_sbd *sdp); void (*lm_put_lock) (struct gfs2_glock *gl); int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, unsigned int flags); void (*lm_cancel) (struct gfs2_glock *gl); const match_table_t *lm_tokens; }; struct gfs2_glock_aspace { struct gfs2_glock glock; struct address_space mapping; }; static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) { struct gfs2_holder *gh; struct pid *pid; /* Look in glock's list of holders for one with current task as owner */ spin_lock(&gl->gl_lockref.lock); pid = task_pid(current); list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) break; if (gh->gh_owner_pid == pid) goto out; } gh = NULL; out: spin_unlock(&gl->gl_lockref.lock); return gh; } static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) { if (gl->gl_ops->go_flags & GLOF_ASPACE) { struct gfs2_glock_aspace *gla = container_of(gl, struct gfs2_glock_aspace, glock); return &gla->mapping; } return NULL; } int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl); void gfs2_glock_put(struct gfs2_glock *gl); void gfs2_glock_put_async(struct gfs2_glock *gl); void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, struct gfs2_holder *gh, unsigned long ip); static inline void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, struct gfs2_holder *gh) { __gfs2_holder_init(gl, state, flags, gh, _RET_IP_); } void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh); void gfs2_holder_uninit(struct gfs2_holder *gh); int gfs2_glock_nq(struct gfs2_holder *gh); int gfs2_glock_poll(struct gfs2_holder *gh); int gfs2_instantiate(struct gfs2_holder *gh); int gfs2_glock_holder_ready(struct gfs2_holder *gh); int gfs2_glock_wait(struct gfs2_holder *gh); int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq(struct gfs2_holder *gh); void gfs2_glock_dq_wait(struct gfs2_holder *gh); void gfs2_glock_dq_uninit(struct gfs2_holder *gh); int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, unsigned int state, u16 flags, struct gfs2_holder *gh); int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid); #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { \ gfs2_dump_glock(NULL, gl, true); \ BUG(); } } while(0) #define gfs2_glock_assert_warn(gl, x) do { if (unlikely(!(x))) { \ gfs2_dump_glock(NULL, gl, true); \ gfs2_assert_warn((gl)->gl_name.ln_sbd, (x)); } } \ while (0) #define gfs2_glock_assert_withdraw(gl, x) do { if (unlikely(!(x))) { \ gfs2_dump_glock(NULL, gl, true); \ gfs2_assert_withdraw((gl)->gl_name.ln_sbd, (x)); } } \ while (0) __printf(2, 3) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); /** * gfs2_glock_nq_init - initialize a holder and enqueue it on a glock * @gl: the glock * @state: the state we're requesting * @flags: the modifier flags * @gh: the holder structure * * Returns: 0, GLR_*, or errno */ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl, unsigned int state, u16 flags, struct gfs2_holder *gh) { int error; __gfs2_holder_init(gl, state, flags, gh, _RET_IP_); error = gfs2_glock_nq(gh); if (error) gfs2_holder_uninit(gh); return error; } void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); void gfs2_glock_complete(struct gfs2_glock *gl, int ret); bool gfs2_queue_try_to_evict(struct gfs2_glock *gl); bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later); void gfs2_cancel_delete_work(struct gfs2_glock *gl); void gfs2_flush_delete_work(struct gfs2_sbd *sdp); void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); void gfs2_gl_dq_holders(struct gfs2_sbd *sdp); void gfs2_glock_thaw(struct gfs2_sbd *sdp); void gfs2_glock_free(struct gfs2_glock *gl); void gfs2_glock_free_later(struct gfs2_glock *gl); int __init gfs2_glock_init(void); void gfs2_glock_exit(void); void gfs2_create_debugfs_file(struct gfs2_sbd *sdp); void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); void gfs2_register_debugfs(void); void gfs2_unregister_debugfs(void); void glock_set_object(struct gfs2_glock *gl, void *object); void glock_clear_object(struct gfs2_glock *gl, void *object); extern const struct lm_lockops gfs2_dlm_ops; static inline void gfs2_holder_mark_uninitialized(struct gfs2_holder *gh) { gh->gh_gl = NULL; } static inline bool gfs2_holder_initialized(struct gfs2_holder *gh) { return gh->gh_gl; } static inline bool gfs2_holder_queued(struct gfs2_holder *gh) { return !list_empty(&gh->gh_list); } void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation); bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation); static inline bool glock_needs_demote(struct gfs2_glock *gl) { return (test_bit(GLF_DEMOTE, &gl->gl_flags) || test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)); } #endif /* __GLOCK_DOT_H__ */
56 37 191 184 4 4 8 2 1 5 11 2 2 1 2 45 5 13 5 18 8 1 3 1 3 6 4 10 12 16 4 8 30 9 6 15 40 41 14 13 27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _INET_ECN_H_ #define _INET_ECN_H_ #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <net/inet_sock.h> #include <net/dsfield.h> #include <net/checksum.h> enum { INET_ECN_NOT_ECT = 0, INET_ECN_ECT_1 = 1, INET_ECN_ECT_0 = 2, INET_ECN_CE = 3, INET_ECN_MASK = 3, }; extern int sysctl_tunnel_ecn_log; static inline int INET_ECN_is_ce(__u8 dsfield) { return (dsfield & INET_ECN_MASK) == INET_ECN_CE; } static inline int INET_ECN_is_not_ect(__u8 dsfield) { return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT; } static inline int INET_ECN_is_capable(__u8 dsfield) { return dsfield & INET_ECN_ECT_0; } /* * RFC 3168 9.1.1 * The full-functionality option for ECN encapsulation is to copy the * ECN codepoint of the inside header to the outside header on * encapsulation if the inside header is not-ECT or ECT, and to set the * ECN codepoint of the outside header to ECT(0) if the ECN codepoint of * the inside header is CE. */ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) { outer &= ~INET_ECN_MASK; outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) : INET_ECN_ECT_0; return outer; } static inline void INET_ECN_xmit(struct sock *sk) { inet_sk(sk)->tos |= INET_ECN_ECT_0; if (inet6_sk(sk) != NULL) inet6_sk(sk)->tclass |= INET_ECN_ECT_0; } static inline void INET_ECN_dontxmit(struct sock *sk) { inet_sk(sk)->tos &= ~INET_ECN_MASK; if (inet6_sk(sk) != NULL) inet6_sk(sk)->tclass &= ~INET_ECN_MASK; } #define IP6_ECN_flow_init(label) do { \ (label) &= ~htonl(INET_ECN_MASK << 20); \ } while (0) #define IP6_ECN_flow_xmit(sk, label) do { \ if (INET_ECN_is_capable(inet6_sk(sk)->tclass)) \ (label) |= htonl(INET_ECN_ECT_0 << 20); \ } while (0) static inline int IP_ECN_set_ce(struct iphdr *iph) { u32 ecn = (iph->tos + 1) & INET_ECN_MASK; __be16 check_add; /* * After the last operation we have (in binary): * INET_ECN_NOT_ECT => 01 * INET_ECN_ECT_1 => 10 * INET_ECN_ECT_0 => 11 * INET_ECN_CE => 00 */ if (!(ecn & 2)) return !ecn; /* * The following gives us: * INET_ECN_ECT_1 => check += htons(0xFFFD) * INET_ECN_ECT_0 => check += htons(0xFFFE) */ check_add = (__force __be16)((__force u16)htons(0xFFFB) + (__force u16)htons(ecn)); iph->check = csum16_add(iph->check, check_add); iph->tos |= INET_ECN_CE; return 1; } static inline int IP_ECN_set_ect1(struct iphdr *iph) { if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; iph->check = csum16_add(iph->check, htons(0x1)); iph->tos ^= INET_ECN_MASK; return 1; } static inline void IP_ECN_clear(struct iphdr *iph) { iph->tos &= ~INET_ECN_MASK; } static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner) { dscp &= ~INET_ECN_MASK; ipv4_change_dsfield(inner, INET_ECN_MASK, dscp); } struct ipv6hdr; /* Note: * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE, * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE * In IPv6 case, no checksum compensates the change in IPv6 header, * so we have to update skb->csum. */ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) { __be32 from, to; if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) return 0; from = *(__be32 *)iph; to = from | htonl(INET_ECN_CE << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), (__force __wsum)to); return 1; } static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph) { __be32 from, to; if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0) return 0; from = *(__be32 *)iph; to = from ^ htonl(INET_ECN_MASK << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), (__force __wsum)to); return 1; } static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) { dscp &= ~INET_ECN_MASK; ipv6_change_dsfield(inner, INET_ECN_MASK, dscp); } static inline int INET_ECN_set_ce(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) return IP_ECN_set_ce(ip_hdr(skb)); break; case cpu_to_be16(ETH_P_IPV6): if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= skb_tail_pointer(skb)) return IP6_ECN_set_ce(skb, ipv6_hdr(skb)); break; } return 0; } static inline int skb_get_dsfield(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) break; return ipv4_get_dsfield(ip_hdr(skb)); case cpu_to_be16(ETH_P_IPV6): if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) break; return ipv6_get_dsfield(ipv6_hdr(skb)); } return -1; } static inline int INET_ECN_set_ect1(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) return IP_ECN_set_ect1(ip_hdr(skb)); break; case cpu_to_be16(ETH_P_IPV6): if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= skb_tail_pointer(skb)) return IP6_ECN_set_ect1(skb, ipv6_hdr(skb)); break; } return 0; } /* * RFC 6040 4.2 * To decapsulate the inner header at the tunnel egress, a compliant * tunnel egress MUST set the outgoing ECN field to the codepoint at the * intersection of the appropriate arriving inner header (row) and outer * header (column) in Figure 4 * * +---------+------------------------------------------------+ * |Arriving | Arriving Outer Header | * | Inner +---------+------------+------------+------------+ * | Header | Not-ECT | ECT(0) | ECT(1) | CE | * +---------+---------+------------+------------+------------+ * | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)| * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE | * | ECT(1) | ECT(1) | ECT(1) (!) | ECT(1) | CE | * | CE | CE | CE | CE(!!!)| CE | * +---------+---------+------------+------------+------------+ * * Figure 4: New IP in IP Decapsulation Behaviour * * returns 0 on success * 1 if something is broken and should be logged (!!! above) * 2 if packet should be dropped */ static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce) { if (INET_ECN_is_not_ect(inner)) { switch (outer & INET_ECN_MASK) { case INET_ECN_NOT_ECT: return 0; case INET_ECN_ECT_0: case INET_ECN_ECT_1: return 1; case INET_ECN_CE: return 2; } } *set_ce = INET_ECN_is_ce(outer); return 0; } static inline int INET_ECN_decapsulate(struct sk_buff *skb, __u8 outer, __u8 inner) { bool set_ce = false; int rc; rc = __INET_ECN_decapsulate(outer, inner, &set_ce); if (!rc) { if (set_ce) INET_ECN_set_ce(skb); else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1) INET_ECN_set_ect1(skb); } return rc; } static inline int IP_ECN_decapsulate(const struct iphdr *oiph, struct sk_buff *skb) { __u8 inner; switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; break; case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); break; default: return 0; } return INET_ECN_decapsulate(skb, oiph->tos, inner); } static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h, struct sk_buff *skb) { __u8 inner; switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; break; case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); break; default: return 0; } return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner); } #endif
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 // SPDX-License-Identifier: GPL-2.0 /* * Symbol USB barcode to serial driver * * Copyright (C) 2013 Johan Hovold <jhovold@gmail.com> * Copyright (C) 2009 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (C) 2009 Novell Inc. */ #include <linux/kernel.h> #include <linux/tty.h> #include <linux/slab.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/uaccess.h> static const struct usb_device_id id_table[] = { { USB_DEVICE(0x05e0, 0x0600) }, { }, }; MODULE_DEVICE_TABLE(usb, id_table); struct symbol_private { spinlock_t lock; /* protects the following flags */ bool throttled; bool actually_throttled; }; static void symbol_int_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; struct symbol_private *priv = usb_get_serial_port_data(port); unsigned char *data = urb->transfer_buffer; int status = urb->status; unsigned long flags; int result; int data_length; switch (status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(&port->dev, "%s - urb shutting down with status: %d\n", __func__, status); return; default: dev_dbg(&port->dev, "%s - nonzero urb status received: %d\n", __func__, status); goto exit; } usb_serial_debug_data(&port->dev, __func__, urb->actual_length, data); /* * Data from the device comes with a 1 byte header: * * <size of data> <data>... */ if (urb->actual_length > 1) { data_length = data[0]; if (data_length > (urb->actual_length - 1)) data_length = urb->actual_length - 1; tty_insert_flip_string(&port->port, &data[1], data_length); tty_flip_buffer_push(&port->port); } else { dev_dbg(&port->dev, "%s - short packet\n", __func__); } exit: spin_lock_irqsave(&priv->lock, flags); /* Continue trying to always read if we should */ if (!priv->throttled) { result = usb_submit_urb(port->interrupt_in_urb, GFP_ATOMIC); if (result) dev_err(&port->dev, "%s - failed resubmitting read urb, error %d\n", __func__, result); } else priv->actually_throttled = true; spin_unlock_irqrestore(&priv->lock, flags); } static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port) { struct symbol_private *priv = usb_get_serial_port_data(port); unsigned long flags; int result = 0; spin_lock_irqsave(&priv->lock, flags); priv->throttled = false; priv->actually_throttled = false; spin_unlock_irqrestore(&priv->lock, flags); /* Start reading from the device */ result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); if (result) dev_err(&port->dev, "%s - failed resubmitting read urb, error %d\n", __func__, result); return result; } static void symbol_close(struct usb_serial_port *port) { usb_kill_urb(port->interrupt_in_urb); } static void symbol_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct symbol_private *priv = usb_get_serial_port_data(port); spin_lock_irq(&priv->lock); priv->throttled = true; spin_unlock_irq(&priv->lock); } static void symbol_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct symbol_private *priv = usb_get_serial_port_data(port); int result; bool was_throttled; spin_lock_irq(&priv->lock); priv->throttled = false; was_throttled = priv->actually_throttled; priv->actually_throttled = false; spin_unlock_irq(&priv->lock); if (was_throttled) { result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); if (result) dev_err(&port->dev, "%s - failed submitting read urb, error %d\n", __func__, result); } } static int symbol_port_probe(struct usb_serial_port *port) { struct symbol_private *priv; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; spin_lock_init(&priv->lock); usb_set_serial_port_data(port, priv); return 0; } static void symbol_port_remove(struct usb_serial_port *port) { struct symbol_private *priv = usb_get_serial_port_data(port); kfree(priv); } static struct usb_serial_driver symbol_device = { .driver = { .name = "symbol", }, .id_table = id_table, .num_ports = 1, .num_interrupt_in = 1, .port_probe = symbol_port_probe, .port_remove = symbol_port_remove, .open = symbol_open, .close = symbol_close, .throttle = symbol_throttle, .unthrottle = symbol_unthrottle, .read_int_callback = symbol_int_callback, }; static struct usb_serial_driver * const serial_drivers[] = { &symbol_device, NULL }; module_usb_serial_driver(serial_drivers, id_table); MODULE_DESCRIPTION("Symbol USB barcode to serial driver"); MODULE_LICENSE("GPL v2");
5 2 1 2 4 3 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 // SPDX-License-Identifier: GPL-2.0-only /* Masquerade. Simple mapping which alters range to a local IP address (depending on route). */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/netfilter/x_tables.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_masquerade.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); /* FIXME: Multiple targets. --RR */ static int masquerade_tg_check(const struct xt_tgchk_param *par) { const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { pr_debug("bad MAP_IPS.\n"); return -EINVAL; } if (mr->rangesize != 1) { pr_debug("bad rangesize %u\n", mr->rangesize); return -EINVAL; } return nf_ct_netns_get(par->net, par->family); } static unsigned int masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) { struct nf_nat_range2 range; const struct nf_nat_ipv4_multi_range_compat *mr; mr = par->targinfo; range.flags = mr->range[0].flags; range.min_proto = mr->range[0].min; range.max_proto = mr->range[0].max; return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), &range, xt_out(par)); } static void masquerade_tg_destroy(const struct xt_tgdtor_param *par) { nf_ct_netns_put(par->net, par->family); } #if IS_ENABLED(CONFIG_IPV6) static unsigned int masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) { return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par)); } static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) { const struct nf_nat_range2 *range = par->targinfo; if (range->flags & NF_NAT_RANGE_MAP_IPS) return -EINVAL; return nf_ct_netns_get(par->net, par->family); } #endif static struct xt_target masquerade_tg_reg[] __read_mostly = { { #if IS_ENABLED(CONFIG_IPV6) .name = "MASQUERADE", .family = NFPROTO_IPV6, .target = masquerade_tg6, .targetsize = sizeof(struct nf_nat_range), .table = "nat", .hooks = 1 << NF_INET_POST_ROUTING, .checkentry = masquerade_tg6_checkentry, .destroy = masquerade_tg_destroy, .me = THIS_MODULE, }, { #endif .name = "MASQUERADE", .family = NFPROTO_IPV4, .target = masquerade_tg, .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .table = "nat", .hooks = 1 << NF_INET_POST_ROUTING, .checkentry = masquerade_tg_check, .destroy = masquerade_tg_destroy, .me = THIS_MODULE, } }; static int __init masquerade_tg_init(void) { int ret; ret = xt_register_targets(masquerade_tg_reg, ARRAY_SIZE(masquerade_tg_reg)); if (ret) return ret; ret = nf_nat_masquerade_inet_register_notifiers(); if (ret) { xt_unregister_targets(masquerade_tg_reg, ARRAY_SIZE(masquerade_tg_reg)); return ret; } return ret; } static void __exit masquerade_tg_exit(void) { xt_unregister_targets(masquerade_tg_reg, ARRAY_SIZE(masquerade_tg_reg)); nf_nat_masquerade_inet_unregister_notifiers(); } module_init(masquerade_tg_init); module_exit(masquerade_tg_exit); #if IS_ENABLED(CONFIG_IPV6) MODULE_ALIAS("ip6t_MASQUERADE"); #endif MODULE_ALIAS("ipt_MASQUERADE");
9 20 9 20 19 19 12 4 6 1 3 4 3 9 8 3 3 9 2 7 5 2 7 7 7 12 15 16 17 10 10 9 9 9 1 16 11 2 3 15 17 13 6 1 24 25 3 2 1 20 4 15 12 2 1 16 13 4 16 15 13 2 17 16 8 9 4 9 4 4 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 /* * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/slab.h> #include <net/sock.h> #include <linux/in.h> #include <linux/export.h> #include <linux/sched/clock.h> #include <linux/time.h> #include <linux/rds.h> #include "rds.h" void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, struct in6_addr *saddr) { refcount_set(&inc->i_refcount, 1); INIT_LIST_HEAD(&inc->i_item); inc->i_conn = conn; inc->i_saddr = *saddr; inc->i_usercopy.rdma_cookie = 0; inc->i_usercopy.rx_tstamp = ktime_set(0, 0); memset(inc->i_rx_lat_trace, 0, sizeof(inc->i_rx_lat_trace)); } EXPORT_SYMBOL_GPL(rds_inc_init); void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp, struct in6_addr *saddr) { refcount_set(&inc->i_refcount, 1); INIT_LIST_HEAD(&inc->i_item); inc->i_conn = cp->cp_conn; inc->i_conn_path = cp; inc->i_saddr = *saddr; inc->i_usercopy.rdma_cookie = 0; inc->i_usercopy.rx_tstamp = ktime_set(0, 0); } EXPORT_SYMBOL_GPL(rds_inc_path_init); static void rds_inc_addref(struct rds_incoming *inc) { rdsdebug("addref inc %p ref %d\n", inc, refcount_read(&inc->i_refcount)); refcount_inc(&inc->i_refcount); } void rds_inc_put(struct rds_incoming *inc) { rdsdebug("put inc %p ref %d\n", inc, refcount_read(&inc->i_refcount)); if (refcount_dec_and_test(&inc->i_refcount)) { BUG_ON(!list_empty(&inc->i_item)); inc->i_conn->c_trans->inc_free(inc); } } EXPORT_SYMBOL_GPL(rds_inc_put); static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk, struct rds_cong_map *map, int delta, __be16 port) { int now_congested; if (delta == 0) return; rs->rs_rcv_bytes += delta; if (delta > 0) rds_stats_add(s_recv_bytes_added_to_socket, delta); else rds_stats_add(s_recv_bytes_removed_from_socket, -delta); /* loop transport doesn't send/recv congestion updates */ if (rs->rs_transport->t_type == RDS_TRANS_LOOP) return; now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs); rdsdebug("rs %p (%pI6c:%u) recv bytes %d buf %d " "now_cong %d delta %d\n", rs, &rs->rs_bound_addr, ntohs(rs->rs_bound_port), rs->rs_rcv_bytes, rds_sk_rcvbuf(rs), now_congested, delta); /* wasn't -> am congested */ if (!rs->rs_congested && now_congested) { rs->rs_congested = 1; rds_cong_set_bit(map, port); rds_cong_queue_updates(map); } /* was -> aren't congested */ /* Require more free space before reporting uncongested to prevent bouncing cong/uncong state too often */ else if (rs->rs_congested && (rs->rs_rcv_bytes < (rds_sk_rcvbuf(rs)/2))) { rs->rs_congested = 0; rds_cong_clear_bit(map, port); rds_cong_queue_updates(map); } /* do nothing if no change in cong state */ } static void rds_conn_peer_gen_update(struct rds_connection *conn, u32 peer_gen_num) { int i; struct rds_message *rm, *tmp; unsigned long flags; WARN_ON(conn->c_trans->t_type != RDS_TRANS_TCP); if (peer_gen_num != 0) { if (conn->c_peer_gen_num != 0 && peer_gen_num != conn->c_peer_gen_num) { for (i = 0; i < RDS_MPATH_WORKERS; i++) { struct rds_conn_path *cp; cp = &conn->c_path[i]; spin_lock_irqsave(&cp->cp_lock, flags); cp->cp_next_tx_seq = 1; cp->cp_next_rx_seq = 0; list_for_each_entry_safe(rm, tmp, &cp->cp_retrans, m_conn_item) { set_bit(RDS_MSG_FLUSH, &rm->m_flags); } spin_unlock_irqrestore(&cp->cp_lock, flags); } } conn->c_peer_gen_num = peer_gen_num; } } /* * Process all extension headers that come with this message. */ static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock *rs) { struct rds_header *hdr = &inc->i_hdr; unsigned int pos = 0, type, len; union { struct rds_ext_header_version version; struct rds_ext_header_rdma rdma; struct rds_ext_header_rdma_dest rdma_dest; } buffer; while (1) { len = sizeof(buffer); type = rds_message_next_extension(hdr, &pos, &buffer, &len); if (type == RDS_EXTHDR_NONE) break; /* Process extension header here */ switch (type) { case RDS_EXTHDR_RDMA: rds_rdma_unuse(rs, be32_to_cpu(buffer.rdma.h_rdma_rkey), 0); break; case RDS_EXTHDR_RDMA_DEST: /* We ignore the size for now. We could stash it * somewhere and use it for error checking. */ inc->i_usercopy.rdma_cookie = rds_rdma_make_cookie( be32_to_cpu(buffer.rdma_dest.h_rdma_rkey), be32_to_cpu(buffer.rdma_dest.h_rdma_offset)); break; } } } static void rds_recv_hs_exthdrs(struct rds_header *hdr, struct rds_connection *conn) { unsigned int pos = 0, type, len; union { struct rds_ext_header_version version; u16 rds_npaths; u32 rds_gen_num; } buffer; u32 new_peer_gen_num = 0; while (1) { len = sizeof(buffer); type = rds_message_next_extension(hdr, &pos, &buffer, &len); if (type == RDS_EXTHDR_NONE) break; /* Process extension header here */ switch (type) { case RDS_EXTHDR_NPATHS: conn->c_npaths = min_t(int, RDS_MPATH_WORKERS, be16_to_cpu(buffer.rds_npaths)); break; case RDS_EXTHDR_GEN_NUM: new_peer_gen_num = be32_to_cpu(buffer.rds_gen_num); break; default: pr_warn_ratelimited("ignoring unknown exthdr type " "0x%x\n", type); } } /* if RDS_EXTHDR_NPATHS was not found, default to a single-path */ conn->c_npaths = max_t(int, conn->c_npaths, 1); conn->c_ping_triggered = 0; rds_conn_peer_gen_update(conn, new_peer_gen_num); } /* rds_start_mprds() will synchronously start multiple paths when appropriate. * The scheme is based on the following rules: * * 1. rds_sendmsg on first connect attempt sends the probe ping, with the * sender's npaths (s_npaths) * 2. rcvr of probe-ping knows the mprds_paths = min(s_npaths, r_npaths). It * sends back a probe-pong with r_npaths. After that, if rcvr is the * smaller ip addr, it starts rds_conn_path_connect_if_down on all * mprds_paths. * 3. sender gets woken up, and can move to rds_conn_path_connect_if_down. * If it is the smaller ipaddr, rds_conn_path_connect_if_down can be * called after reception of the probe-pong on all mprds_paths. * Otherwise (sender of probe-ping is not the smaller ip addr): just call * rds_conn_path_connect_if_down on the hashed path. (see rule 4) * 4. rds_connect_worker must only trigger a connection if laddr < faddr. * 5. sender may end up queuing the packet on the cp. will get sent out later. * when connection is completed. */ static void rds_start_mprds(struct rds_connection *conn) { int i; struct rds_conn_path *cp; if (conn->c_npaths > 1 && rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) < 0) { for (i = 0; i < conn->c_npaths; i++) { cp = &conn->c_path[i]; rds_conn_path_connect_if_down(cp); } } } /* * The transport must make sure that this is serialized against other * rx and conn reset on this specific conn. * * We currently assert that only one fragmented message will be sent * down a connection at a time. This lets us reassemble in the conn * instead of per-flow which means that we don't have to go digging through * flows to tear down partial reassembly progress on conn failure and * we save flow lookup and locking for each frag arrival. It does mean * that small messages will wait behind large ones. Fragmenting at all * is only to reduce the memory consumption of pre-posted buffers. * * The caller passes in saddr and daddr instead of us getting it from the * conn. This lets loopback, who only has one conn for both directions, * tell us which roles the addrs in the conn are playing for this message. */ void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr, struct in6_addr *daddr, struct rds_incoming *inc, gfp_t gfp) { struct rds_sock *rs = NULL; struct sock *sk; unsigned long flags; struct rds_conn_path *cp; inc->i_conn = conn; inc->i_rx_jiffies = jiffies; if (conn->c_trans->t_mp_capable) cp = inc->i_conn_path; else cp = &conn->c_path[0]; rdsdebug("conn %p next %llu inc %p seq %llu len %u sport %u dport %u " "flags 0x%x rx_jiffies %lu\n", conn, (unsigned long long)cp->cp_next_rx_seq, inc, (unsigned long long)be64_to_cpu(inc->i_hdr.h_sequence), be32_to_cpu(inc->i_hdr.h_len), be16_to_cpu(inc->i_hdr.h_sport), be16_to_cpu(inc->i_hdr.h_dport), inc->i_hdr.h_flags, inc->i_rx_jiffies); /* * Sequence numbers should only increase. Messages get their * sequence number as they're queued in a sending conn. They * can be dropped, though, if the sending socket is closed before * they hit the wire. So sequence numbers can skip forward * under normal operation. They can also drop back in the conn * failover case as previously sent messages are resent down the * new instance of a conn. We drop those, otherwise we have * to assume that the next valid seq does not come after a * hole in the fragment stream. * * The headers don't give us a way to realize if fragments of * a message have been dropped. We assume that frags that arrive * to a flow are part of the current message on the flow that is * being reassembled. This means that senders can't drop messages * from the sending conn until all their frags are sent. * * XXX we could spend more on the wire to get more robust failure * detection, arguably worth it to avoid data corruption. */ if (be64_to_cpu(inc->i_hdr.h_sequence) < cp->cp_next_rx_seq && (inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) { rds_stats_inc(s_recv_drop_old_seq); goto out; } cp->cp_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1; if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) { if (inc->i_hdr.h_sport == 0) { rdsdebug("ignore ping with 0 sport from %pI6c\n", saddr); goto out; } rds_stats_inc(s_recv_ping); rds_send_pong(cp, inc->i_hdr.h_sport); /* if this is a handshake ping, start multipath if necessary */ if (RDS_HS_PROBE(be16_to_cpu(inc->i_hdr.h_sport), be16_to_cpu(inc->i_hdr.h_dport))) { rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn); rds_start_mprds(cp->cp_conn); } goto out; } if (be16_to_cpu(inc->i_hdr.h_dport) == RDS_FLAG_PROBE_PORT && inc->i_hdr.h_sport == 0) { rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn); /* if this is a handshake pong, start multipath if necessary */ rds_start_mprds(cp->cp_conn); wake_up(&cp->cp_conn->c_hs_waitq); goto out; } rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_bound_if); if (!rs) { rds_stats_inc(s_recv_drop_no_sock); goto out; } /* Process extension headers */ rds_recv_incoming_exthdrs(inc, rs); /* We can be racing with rds_release() which marks the socket dead. */ sk = rds_rs_to_sk(rs); /* serialize with rds_release -> sock_orphan */ write_lock_irqsave(&rs->rs_recv_lock, flags); if (!sock_flag(sk, SOCK_DEAD)) { rdsdebug("adding inc %p to rs %p's recv queue\n", inc, rs); rds_stats_inc(s_recv_queued); rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, be32_to_cpu(inc->i_hdr.h_len), inc->i_hdr.h_dport); if (sock_flag(sk, SOCK_RCVTSTAMP)) inc->i_usercopy.rx_tstamp = ktime_get_real(); rds_inc_addref(inc); inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock(); list_add_tail(&inc->i_item, &rs->rs_recv_queue); __rds_wake_sk_sleep(sk); } else { rds_stats_inc(s_recv_drop_dead_sock); } write_unlock_irqrestore(&rs->rs_recv_lock, flags); out: if (rs) rds_sock_put(rs); } EXPORT_SYMBOL_GPL(rds_recv_incoming); /* * be very careful here. This is being called as the condition in * wait_event_*() needs to cope with being called many times. */ static int rds_next_incoming(struct rds_sock *rs, struct rds_incoming **inc) { unsigned long flags; if (!*inc) { read_lock_irqsave(&rs->rs_recv_lock, flags); if (!list_empty(&rs->rs_recv_queue)) { *inc = list_entry(rs->rs_recv_queue.next, struct rds_incoming, i_item); rds_inc_addref(*inc); } read_unlock_irqrestore(&rs->rs_recv_lock, flags); } return *inc != NULL; } static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, int drop) { struct sock *sk = rds_rs_to_sk(rs); int ret = 0; unsigned long flags; struct rds_incoming *to_drop = NULL; write_lock_irqsave(&rs->rs_recv_lock, flags); if (!list_empty(&inc->i_item)) { ret = 1; if (drop) { /* XXX make sure this i_conn is reliable */ rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, -be32_to_cpu(inc->i_hdr.h_len), inc->i_hdr.h_dport); list_del_init(&inc->i_item); to_drop = inc; } } write_unlock_irqrestore(&rs->rs_recv_lock, flags); if (to_drop) rds_inc_put(to_drop); rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop); return ret; } /* * Pull errors off the error queue. * If msghdr is NULL, we will just purge the error queue. */ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr) { struct rds_notifier *notifier; struct rds_rdma_notify cmsg; unsigned int count = 0, max_messages = ~0U; unsigned long flags; LIST_HEAD(copy); int err = 0; memset(&cmsg, 0, sizeof(cmsg)); /* fill holes with zero */ /* put_cmsg copies to user space and thus may sleep. We can't do this * with rs_lock held, so first grab as many notifications as we can stuff * in the user provided cmsg buffer. We don't try to copy more, to avoid * losing notifications - except when the buffer is so small that it wouldn't * even hold a single notification. Then we give him as much of this single * msg as we can squeeze in, and set MSG_CTRUNC. */ if (msghdr) { max_messages = msghdr->msg_controllen / CMSG_SPACE(sizeof(cmsg)); if (!max_messages) max_messages = 1; } spin_lock_irqsave(&rs->rs_lock, flags); while (!list_empty(&rs->rs_notify_queue) && count < max_messages) { notifier = list_entry(rs->rs_notify_queue.next, struct rds_notifier, n_list); list_move(&notifier->n_list, &copy); count++; } spin_unlock_irqrestore(&rs->rs_lock, flags); if (!count) return 0; while (!list_empty(&copy)) { notifier = list_entry(copy.next, struct rds_notifier, n_list); if (msghdr) { cmsg.user_token = notifier->n_user_token; cmsg.status = notifier->n_status; err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS, sizeof(cmsg), &cmsg); if (err) break; } list_del_init(&notifier->n_list); kfree(notifier); } /* If we bailed out because of an error in put_cmsg, * we may be left with one or more notifications that we * didn't process. Return them to the head of the list. */ if (!list_empty(&copy)) { spin_lock_irqsave(&rs->rs_lock, flags); list_splice(&copy, &rs->rs_notify_queue); spin_unlock_irqrestore(&rs->rs_lock, flags); } return err; } /* * Queue a congestion notification */ static int rds_notify_cong(struct rds_sock *rs, struct msghdr *msghdr) { uint64_t notify = rs->rs_cong_notify; unsigned long flags; int err; err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_CONG_UPDATE, sizeof(notify), &notify); if (err) return err; spin_lock_irqsave(&rs->rs_lock, flags); rs->rs_cong_notify &= ~notify; spin_unlock_irqrestore(&rs->rs_lock, flags); return 0; } /* * Receive any control messages. */ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, struct rds_sock *rs) { int ret = 0; if (inc->i_usercopy.rdma_cookie) { ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST, sizeof(inc->i_usercopy.rdma_cookie), &inc->i_usercopy.rdma_cookie); if (ret) goto out; } if ((inc->i_usercopy.rx_tstamp != 0) && sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) { struct __kernel_old_timeval tv = ns_to_kernel_old_timeval(inc->i_usercopy.rx_tstamp); if (!sock_flag(rds_rs_to_sk(rs), SOCK_TSTAMP_NEW)) { ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, sizeof(tv), &tv); } else { struct __kernel_sock_timeval sk_tv; sk_tv.tv_sec = tv.tv_sec; sk_tv.tv_usec = tv.tv_usec; ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, sizeof(sk_tv), &sk_tv); } if (ret) goto out; } if (rs->rs_rx_traces) { struct rds_cmsg_rx_trace t; int i, j; memset(&t, 0, sizeof(t)); inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock(); t.rx_traces = rs->rs_rx_traces; for (i = 0; i < rs->rs_rx_traces; i++) { j = rs->rs_rx_trace[i]; t.rx_trace_pos[i] = j; t.rx_trace[i] = inc->i_rx_lat_trace[j + 1] - inc->i_rx_lat_trace[j]; } ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RXPATH_LATENCY, sizeof(t), &t); if (ret) goto out; } out: return ret; } static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg) { struct rds_msg_zcopy_queue *q = &rs->rs_zcookie_queue; struct rds_msg_zcopy_info *info = NULL; struct rds_zcopy_cookies *done; unsigned long flags; if (!msg->msg_control) return false; if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) || msg->msg_controllen < CMSG_SPACE(sizeof(*done))) return false; spin_lock_irqsave(&q->lock, flags); if (!list_empty(&q->zcookie_head)) { info = list_entry(q->zcookie_head.next, struct rds_msg_zcopy_info, rs_zcookie_next); list_del(&info->rs_zcookie_next); } spin_unlock_irqrestore(&q->lock, flags); if (!info) return false; done = &info->zcookies; if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done), done)) { spin_lock_irqsave(&q->lock, flags); list_add(&info->rs_zcookie_next, &q->zcookie_head); spin_unlock_irqrestore(&q->lock, flags); return false; } kfree(info); return true; } int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int msg_flags) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); long timeo; int ret = 0, nonblock = msg_flags & MSG_DONTWAIT; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct rds_incoming *inc = NULL; /* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */ timeo = sock_rcvtimeo(sk, nonblock); rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); if (msg_flags & MSG_OOB) goto out; if (msg_flags & MSG_ERRQUEUE) return sock_recv_errqueue(sk, msg, size, SOL_IP, IP_RECVERR); while (1) { /* If there are pending notifications, do those - and nothing else */ if (!list_empty(&rs->rs_notify_queue)) { ret = rds_notify_queue_get(rs, msg); break; } if (rs->rs_cong_notify) { ret = rds_notify_cong(rs, msg); break; } if (!rds_next_incoming(rs, &inc)) { if (nonblock) { bool reaped = rds_recvmsg_zcookie(rs, msg); ret = reaped ? 0 : -EAGAIN; break; } timeo = wait_event_interruptible_timeout(*sk_sleep(sk), (!list_empty(&rs->rs_notify_queue) || rs->rs_cong_notify || rds_next_incoming(rs, &inc)), timeo); rdsdebug("recvmsg woke inc %p timeo %ld\n", inc, timeo); if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT) continue; ret = timeo; if (ret == 0) ret = -ETIMEDOUT; break; } rdsdebug("copying inc %p from %pI6c:%u to user\n", inc, &inc->i_conn->c_faddr, ntohs(inc->i_hdr.h_sport)); ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter); if (ret < 0) break; /* * if the message we just copied isn't at the head of the * recv queue then someone else raced us to return it, try * to get the next message. */ if (!rds_still_queued(rs, inc, !(msg_flags & MSG_PEEK))) { rds_inc_put(inc); inc = NULL; rds_stats_inc(s_recv_deliver_raced); iov_iter_revert(&msg->msg_iter, ret); continue; } if (ret < be32_to_cpu(inc->i_hdr.h_len)) { if (msg_flags & MSG_TRUNC) ret = be32_to_cpu(inc->i_hdr.h_len); msg->msg_flags |= MSG_TRUNC; } if (rds_cmsg_recv(inc, msg, rs)) { ret = -EFAULT; break; } rds_recvmsg_zcookie(rs, msg); rds_stats_inc(s_recv_delivered); if (msg->msg_name) { if (ipv6_addr_v4mapped(&inc->i_saddr)) { sin->sin_family = AF_INET; sin->sin_port = inc->i_hdr.h_sport; sin->sin_addr.s_addr = inc->i_saddr.s6_addr32[3]; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); msg->msg_namelen = sizeof(*sin); } else { sin6->sin6_family = AF_INET6; sin6->sin6_port = inc->i_hdr.h_sport; sin6->sin6_addr = inc->i_saddr; sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = rs->rs_bound_scope_id; msg->msg_namelen = sizeof(*sin6); } } break; } if (inc) rds_inc_put(inc); out: return ret; } /* * The socket is being shut down and we're asked to drop messages that were * queued for recvmsg. The caller has unbound the socket so the receive path * won't queue any more incoming fragments or messages on the socket. */ void rds_clear_recv_queue(struct rds_sock *rs) { struct sock *sk = rds_rs_to_sk(rs); struct rds_incoming *inc, *tmp; unsigned long flags; LIST_HEAD(to_drop); write_lock_irqsave(&rs->rs_recv_lock, flags); list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) { rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, -be32_to_cpu(inc->i_hdr.h_len), inc->i_hdr.h_dport); list_move(&inc->i_item, &to_drop); } write_unlock_irqrestore(&rs->rs_recv_lock, flags); list_for_each_entry_safe(inc, tmp, &to_drop, i_item) { list_del_init(&inc->i_item); rds_inc_put(inc); } } /* * inc->i_saddr isn't used here because it is only set in the receive * path. */ void rds_inc_info_copy(struct rds_incoming *inc, struct rds_info_iterator *iter, __be32 saddr, __be32 daddr, int flip) { struct rds_info_message minfo; minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence); minfo.len = be32_to_cpu(inc->i_hdr.h_len); minfo.tos = inc->i_conn->c_tos; if (flip) { minfo.laddr = daddr; minfo.faddr = saddr; minfo.lport = inc->i_hdr.h_dport; minfo.fport = inc->i_hdr.h_sport; } else { minfo.laddr = saddr; minfo.faddr = daddr; minfo.lport = inc->i_hdr.h_sport; minfo.fport = inc->i_hdr.h_dport; } minfo.flags = 0; rds_info_copy(iter, &minfo, sizeof(minfo)); } #if IS_ENABLED(CONFIG_IPV6) void rds6_inc_info_copy(struct rds_incoming *inc, struct rds_info_iterator *iter, struct in6_addr *saddr, struct in6_addr *daddr, int flip) { struct rds6_info_message minfo6; minfo6.seq = be64_to_cpu(inc->i_hdr.h_sequence); minfo6.len = be32_to_cpu(inc->i_hdr.h_len); minfo6.tos = inc->i_conn->c_tos; if (flip) { minfo6.laddr = *daddr; minfo6.faddr = *saddr; minfo6.lport = inc->i_hdr.h_dport; minfo6.fport = inc->i_hdr.h_sport; } else { minfo6.laddr = *saddr; minfo6.faddr = *daddr; minfo6.lport = inc->i_hdr.h_sport; minfo6.fport = inc->i_hdr.h_dport; } minfo6.flags = 0; rds_info_copy(iter, &minfo6, sizeof(minfo6)); } #endif
90 91 7 11 11 91 89 11 90 90 18 90 90 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_log_format.h" #include "xfs_bit.h" /* * XFS bit manipulation routines, used in non-realtime code. */ /* * Return whether bitmap is empty. * Size is number of words in the bitmap, which is padded to word boundary * Returns 1 for empty, 0 for non-empty. */ int xfs_bitmap_empty(uint *map, uint size) { uint i; for (i = 0; i < size; i++) { if (map[i] != 0) return 0; } return 1; } /* * Count the number of contiguous bits set in the bitmap starting with bit * start_bit. Size is the size of the bitmap in words. */ int xfs_contig_bits(uint *map, uint size, uint start_bit) { uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT); uint result = 0; uint tmp; size <<= BIT_TO_WORD_SHIFT; ASSERT(start_bit < size); size -= start_bit & ~(NBWORD - 1); start_bit &= (NBWORD - 1); if (start_bit) { tmp = *p++; /* set to one first offset bits prior to start */ tmp |= (~0U >> (NBWORD-start_bit)); if (tmp != ~0U) goto found; result += NBWORD; size -= NBWORD; } while (size) { if ((tmp = *p++) != ~0U) goto found; result += NBWORD; size -= NBWORD; } return result - start_bit; found: return result + ffz(tmp) - start_bit; } /* * This takes the bit number to start looking from and * returns the next set bit from there. It returns -1 * if there are no more bits set or the start bit is * beyond the end of the bitmap. * * Size is the number of words, not bytes, in the bitmap. */ int xfs_next_bit(uint *map, uint size, uint start_bit) { uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT); uint result = start_bit & ~(NBWORD - 1); uint tmp; size <<= BIT_TO_WORD_SHIFT; if (start_bit >= size) return -1; size -= result; start_bit &= (NBWORD - 1); if (start_bit) { tmp = *p++; /* set to zero first offset bits prior to start */ tmp &= (~0U << start_bit); if (tmp != 0U) goto found; result += NBWORD; size -= NBWORD; } while (size) { if ((tmp = *p++) != 0U) goto found; result += NBWORD; size -= NBWORD; } return -1; found: return result + ffs(tmp) - 1; }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 // SPDX-License-Identifier: GPL-2.0-only /* * HID driver for THQ PS3 uDraw tablet * * Copyright (C) 2016 Red Hat Inc. All Rights Reserved */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" MODULE_AUTHOR("Bastien Nocera <hadess@hadess.net>"); MODULE_DESCRIPTION("PS3 uDraw tablet driver"); MODULE_LICENSE("GPL"); /* * Protocol information from: * https://brandonw.net/udraw/ * and the source code of: * https://vvvv.org/contribution/udraw-hid */ /* * The device is setup with multiple input devices: * - the touch area which works as a touchpad * - the tablet area which works as a touchpad/drawing tablet * - a joypad with a d-pad, and 7 buttons * - an accelerometer device */ enum { TOUCH_NONE, TOUCH_PEN, TOUCH_FINGER, TOUCH_TWOFINGER }; enum { AXIS_X, AXIS_Y, AXIS_Z }; /* * Accelerometer min/max values * in order, X, Y and Z */ static struct { int min; int max; } accel_limits[] = { [AXIS_X] = { 490, 534 }, [AXIS_Y] = { 490, 534 }, [AXIS_Z] = { 492, 536 } }; #define DEVICE_NAME "THQ uDraw Game Tablet for PS3" /* resolution in pixels */ #define RES_X 1920 #define RES_Y 1080 /* size in mm */ #define WIDTH 160 #define HEIGHT 90 #define PRESSURE_OFFSET 113 #define MAX_PRESSURE (255 - PRESSURE_OFFSET) struct udraw { struct input_dev *joy_input_dev; struct input_dev *touch_input_dev; struct input_dev *pen_input_dev; struct input_dev *accel_input_dev; struct hid_device *hdev; /* * The device's two-finger support is pretty unreliable, as * the device could report a single touch when the two fingers * are too close together, and the distance between fingers, even * though reported is not in the same unit as the touches. * * We'll make do without it, and try to report the first touch * as reliably as possible. */ int last_one_finger_x; int last_one_finger_y; int last_two_finger_x; int last_two_finger_y; }; static int clamp_accel(int axis, int offset) { axis = clamp(axis, accel_limits[offset].min, accel_limits[offset].max); axis = (axis - accel_limits[offset].min) / ((accel_limits[offset].max - accel_limits[offset].min) * 0xFF); return axis; } static int udraw_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int len) { struct udraw *udraw = hid_get_drvdata(hdev); int touch; int x, y, z; if (len != 27) return 0; if (data[11] == 0x00) touch = TOUCH_NONE; else if (data[11] == 0x40) touch = TOUCH_PEN; else if (data[11] == 0x80) touch = TOUCH_FINGER; else touch = TOUCH_TWOFINGER; /* joypad */ input_report_key(udraw->joy_input_dev, BTN_WEST, data[0] & 1); input_report_key(udraw->joy_input_dev, BTN_SOUTH, !!(data[0] & 2)); input_report_key(udraw->joy_input_dev, BTN_EAST, !!(data[0] & 4)); input_report_key(udraw->joy_input_dev, BTN_NORTH, !!(data[0] & 8)); input_report_key(udraw->joy_input_dev, BTN_SELECT, !!(data[1] & 1)); input_report_key(udraw->joy_input_dev, BTN_START, !!(data[1] & 2)); input_report_key(udraw->joy_input_dev, BTN_MODE, !!(data[1] & 16)); x = y = 0; switch (data[2]) { case 0x0: y = -127; break; case 0x1: y = -127; x = 127; break; case 0x2: x = 127; break; case 0x3: y = 127; x = 127; break; case 0x4: y = 127; break; case 0x5: y = 127; x = -127; break; case 0x6: x = -127; break; case 0x7: y = -127; x = -127; break; default: break; } input_report_abs(udraw->joy_input_dev, ABS_X, x); input_report_abs(udraw->joy_input_dev, ABS_Y, y); input_sync(udraw->joy_input_dev); /* For pen and touchpad */ x = y = 0; if (touch != TOUCH_NONE) { if (data[15] != 0x0F) x = data[15] * 256 + data[17]; if (data[16] != 0x0F) y = data[16] * 256 + data[18]; } if (touch == TOUCH_FINGER) { /* Save the last one-finger touch */ udraw->last_one_finger_x = x; udraw->last_one_finger_y = y; udraw->last_two_finger_x = -1; udraw->last_two_finger_y = -1; } else if (touch == TOUCH_TWOFINGER) { /* * We have a problem because x/y is the one for the * second finger but we want the first finger given * to user-space otherwise it'll look as if it jumped. * * See the udraw struct definition for why this was * implemented this way. */ if (udraw->last_two_finger_x == -1) { /* Save the position of the 2nd finger */ udraw->last_two_finger_x = x; udraw->last_two_finger_y = y; x = udraw->last_one_finger_x; y = udraw->last_one_finger_y; } else { /* * Offset the 2-finger coords using the * saved data from the first finger */ x = x - (udraw->last_two_finger_x - udraw->last_one_finger_x); y = y - (udraw->last_two_finger_y - udraw->last_one_finger_y); } } /* touchpad */ if (touch == TOUCH_FINGER || touch == TOUCH_TWOFINGER) { input_report_key(udraw->touch_input_dev, BTN_TOUCH, 1); input_report_key(udraw->touch_input_dev, BTN_TOOL_FINGER, touch == TOUCH_FINGER); input_report_key(udraw->touch_input_dev, BTN_TOOL_DOUBLETAP, touch == TOUCH_TWOFINGER); input_report_abs(udraw->touch_input_dev, ABS_X, x); input_report_abs(udraw->touch_input_dev, ABS_Y, y); } else { input_report_key(udraw->touch_input_dev, BTN_TOUCH, 0); input_report_key(udraw->touch_input_dev, BTN_TOOL_FINGER, 0); input_report_key(udraw->touch_input_dev, BTN_TOOL_DOUBLETAP, 0); } input_sync(udraw->touch_input_dev); /* pen */ if (touch == TOUCH_PEN) { int level; level = clamp(data[13] - PRESSURE_OFFSET, 0, MAX_PRESSURE); input_report_key(udraw->pen_input_dev, BTN_TOUCH, (level != 0)); input_report_key(udraw->pen_input_dev, BTN_TOOL_PEN, 1); input_report_abs(udraw->pen_input_dev, ABS_PRESSURE, level); input_report_abs(udraw->pen_input_dev, ABS_X, x); input_report_abs(udraw->pen_input_dev, ABS_Y, y); } else { input_report_key(udraw->pen_input_dev, BTN_TOUCH, 0); input_report_key(udraw->pen_input_dev, BTN_TOOL_PEN, 0); input_report_abs(udraw->pen_input_dev, ABS_PRESSURE, 0); } input_sync(udraw->pen_input_dev); /* accel */ x = (data[19] + (data[20] << 8)); x = clamp_accel(x, AXIS_X); y = (data[21] + (data[22] << 8)); y = clamp_accel(y, AXIS_Y); z = (data[23] + (data[24] << 8)); z = clamp_accel(z, AXIS_Z); input_report_abs(udraw->accel_input_dev, ABS_X, x); input_report_abs(udraw->accel_input_dev, ABS_Y, y); input_report_abs(udraw->accel_input_dev, ABS_Z, z); input_sync(udraw->accel_input_dev); /* let hidraw and hiddev handle the report */ return 0; } static int udraw_open(struct input_dev *dev) { struct udraw *udraw = input_get_drvdata(dev); return hid_hw_open(udraw->hdev); } static void udraw_close(struct input_dev *dev) { struct udraw *udraw = input_get_drvdata(dev); hid_hw_close(udraw->hdev); } static struct input_dev *allocate_and_setup(struct hid_device *hdev, const char *name) { struct input_dev *input_dev; input_dev = devm_input_allocate_device(&hdev->dev); if (!input_dev) return NULL; input_dev->name = name; input_dev->phys = hdev->phys; input_dev->dev.parent = &hdev->dev; input_dev->open = udraw_open; input_dev->close = udraw_close; input_dev->uniq = hdev->uniq; input_dev->id.bustype = hdev->bus; input_dev->id.vendor = hdev->vendor; input_dev->id.product = hdev->product; input_dev->id.version = hdev->version; input_set_drvdata(input_dev, hid_get_drvdata(hdev)); return input_dev; } static bool udraw_setup_touch(struct udraw *udraw, struct hid_device *hdev) { struct input_dev *input_dev; input_dev = allocate_and_setup(hdev, DEVICE_NAME " Touchpad"); if (!input_dev) return false; input_dev->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY); input_set_abs_params(input_dev, ABS_X, 0, RES_X, 1, 0); input_abs_set_res(input_dev, ABS_X, RES_X / WIDTH); input_set_abs_params(input_dev, ABS_Y, 0, RES_Y, 1, 0); input_abs_set_res(input_dev, ABS_Y, RES_Y / HEIGHT); set_bit(BTN_TOUCH, input_dev->keybit); set_bit(BTN_TOOL_FINGER, input_dev->keybit); set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit); set_bit(INPUT_PROP_POINTER, input_dev->propbit); udraw->touch_input_dev = input_dev; return true; } static bool udraw_setup_pen(struct udraw *udraw, struct hid_device *hdev) { struct input_dev *input_dev; input_dev = allocate_and_setup(hdev, DEVICE_NAME " Pen"); if (!input_dev) return false; input_dev->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY); input_set_abs_params(input_dev, ABS_X, 0, RES_X, 1, 0); input_abs_set_res(input_dev, ABS_X, RES_X / WIDTH); input_set_abs_params(input_dev, ABS_Y, 0, RES_Y, 1, 0); input_abs_set_res(input_dev, ABS_Y, RES_Y / HEIGHT); input_set_abs_params(input_dev, ABS_PRESSURE, 0, MAX_PRESSURE, 0, 0); set_bit(BTN_TOUCH, input_dev->keybit); set_bit(BTN_TOOL_PEN, input_dev->keybit); set_bit(INPUT_PROP_POINTER, input_dev->propbit); udraw->pen_input_dev = input_dev; return true; } static bool udraw_setup_accel(struct udraw *udraw, struct hid_device *hdev) { struct input_dev *input_dev; input_dev = allocate_and_setup(hdev, DEVICE_NAME " Accelerometer"); if (!input_dev) return false; input_dev->evbit[0] = BIT(EV_ABS); /* 1G accel is reported as ~256, so clamp to 2G */ input_set_abs_params(input_dev, ABS_X, -512, 512, 0, 0); input_set_abs_params(input_dev, ABS_Y, -512, 512, 0, 0); input_set_abs_params(input_dev, ABS_Z, -512, 512, 0, 0); set_bit(INPUT_PROP_ACCELEROMETER, input_dev->propbit); udraw->accel_input_dev = input_dev; return true; } static bool udraw_setup_joypad(struct udraw *udraw, struct hid_device *hdev) { struct input_dev *input_dev; input_dev = allocate_and_setup(hdev, DEVICE_NAME " Joypad"); if (!input_dev) return false; input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); set_bit(BTN_SOUTH, input_dev->keybit); set_bit(BTN_NORTH, input_dev->keybit); set_bit(BTN_EAST, input_dev->keybit); set_bit(BTN_WEST, input_dev->keybit); set_bit(BTN_SELECT, input_dev->keybit); set_bit(BTN_START, input_dev->keybit); set_bit(BTN_MODE, input_dev->keybit); input_set_abs_params(input_dev, ABS_X, -127, 127, 0, 0); input_set_abs_params(input_dev, ABS_Y, -127, 127, 0, 0); udraw->joy_input_dev = input_dev; return true; } static int udraw_probe(struct hid_device *hdev, const struct hid_device_id *id) { struct udraw *udraw; int ret; udraw = devm_kzalloc(&hdev->dev, sizeof(struct udraw), GFP_KERNEL); if (!udraw) return -ENOMEM; udraw->hdev = hdev; udraw->last_two_finger_x = -1; udraw->last_two_finger_y = -1; hid_set_drvdata(hdev, udraw); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); return ret; } if (!udraw_setup_joypad(udraw, hdev) || !udraw_setup_touch(udraw, hdev) || !udraw_setup_pen(udraw, hdev) || !udraw_setup_accel(udraw, hdev)) { hid_err(hdev, "could not allocate interfaces\n"); return -ENOMEM; } ret = input_register_device(udraw->joy_input_dev) || input_register_device(udraw->touch_input_dev) || input_register_device(udraw->pen_input_dev) || input_register_device(udraw->accel_input_dev); if (ret) { hid_err(hdev, "failed to register interfaces\n"); return ret; } ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW | HID_CONNECT_DRIVER); if (ret) { hid_err(hdev, "hw start failed\n"); return ret; } return 0; } static const struct hid_device_id udraw_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_THQ, USB_DEVICE_ID_THQ_PS3_UDRAW) }, { } }; MODULE_DEVICE_TABLE(hid, udraw_devices); static struct hid_driver udraw_driver = { .name = "hid-udraw", .id_table = udraw_devices, .raw_event = udraw_raw_event, .probe = udraw_probe, }; module_hid_driver(udraw_driver);
9 9 9 15 15 29 1 1 2 24 1 1 2 2 2 1 2 2 11 1 5 15 2 15 2 7 5 2 7 7 7 1659 1660 1659 379 88 11 11 15 5 11 14 15 15 3 8 2 2 2 8 2 2 43 43 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_mirred.c packet mirroring and redirect actions * * Authors: Jamal Hadi Salim (2002-4) * * TODO: Add ingress support (and socket redirect support) */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> #include <linux/gfp.h> #include <linux/if_arp.h> #include <net/net_namespace.h> #include <net/netlink.h> #include <net/dst.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <linux/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_wrapper.h> static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); #define MIRRED_NEST_LIMIT 4 static DEFINE_PER_CPU(unsigned int, mirred_nest_level); static bool tcf_mirred_is_act_redirect(int action) { return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR; } static bool tcf_mirred_act_wants_ingress(int action) { switch (action) { case TCA_EGRESS_REDIR: case TCA_EGRESS_MIRROR: return false; case TCA_INGRESS_REDIR: case TCA_INGRESS_MIRROR: return true; default: BUG(); } } static bool tcf_mirred_can_reinsert(int action) { switch (action) { case TC_ACT_SHOT: case TC_ACT_STOLEN: case TC_ACT_QUEUED: case TC_ACT_TRAP: return true; } return false; } static struct net_device *tcf_mirred_dev_dereference(struct tcf_mirred *m) { return rcu_dereference_protected(m->tcfm_dev, lockdep_is_held(&m->tcf_lock)); } static void tcf_mirred_release(struct tc_action *a) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev; spin_lock(&mirred_list_lock); list_del(&m->tcfm_list); spin_unlock(&mirred_list_lock); /* last reference to action, no need to lock */ dev = rcu_dereference_protected(m->tcfm_dev, 1); netdev_put(dev, &m->tcfm_dev_tracker); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, [TCA_MIRRED_BLOCKID] = NLA_POLICY_MIN(NLA_U32, 1), }; static struct tc_action_ops act_mirred_ops; static void tcf_mirred_replace_dev(struct tcf_mirred *m, struct net_device *ndev) { struct net_device *odev; odev = rcu_replace_pointer(m->tcfm_dev, ndev, lockdep_is_held(&m->tcf_lock)); netdev_put(odev, &m->tcfm_dev_tracker); } static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MIRRED_MAX + 1]; struct tcf_chain *goto_ch = NULL; bool mac_header_xmit = false; struct tc_mirred *parm; struct tcf_mirred *m; bool exists = false; int ret, err; u32 index; if (!nla) { NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed"); return -EINVAL; } ret = nla_parse_nested_deprecated(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack); if (ret < 0) return ret; if (!tb[TCA_MIRRED_PARMS]) { NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters"); return -EINVAL; } parm = nla_data(tb[TCA_MIRRED_PARMS]); index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; if (exists && bind) return ACT_P_BOUND; if (tb[TCA_MIRRED_BLOCKID] && parm->ifindex) { NL_SET_ERR_MSG_MOD(extack, "Cannot specify Block ID and dev simultaneously"); if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); return -EINVAL; } switch (parm->eaction) { case TCA_EGRESS_MIRROR: case TCA_EGRESS_REDIR: case TCA_INGRESS_REDIR: case TCA_INGRESS_MIRROR: break; default: if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option"); return -EINVAL; } if (!exists) { if (!parm->ifindex && !tb[TCA_MIRRED_BLOCKID]) { tcf_idr_cleanup(tn, index); NL_SET_ERR_MSG_MOD(extack, "Must specify device or block"); return -EINVAL; } ret = tcf_idr_create_from_flags(tn, index, est, a, &act_mirred_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } m = to_mirred(*a); if (ret == ACT_P_CREATED) INIT_LIST_HEAD(&m->tcfm_list); err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; spin_lock_bh(&m->tcf_lock); if (parm->ifindex) { struct net_device *ndev; ndev = dev_get_by_index(net, parm->ifindex); if (!ndev) { spin_unlock_bh(&m->tcf_lock); err = -ENODEV; goto put_chain; } mac_header_xmit = dev_is_mac_header_xmit(ndev); tcf_mirred_replace_dev(m, ndev); netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC); m->tcfm_mac_header_xmit = mac_header_xmit; m->tcfm_blockid = 0; } else if (tb[TCA_MIRRED_BLOCKID]) { tcf_mirred_replace_dev(m, NULL); m->tcfm_mac_header_xmit = false; m->tcfm_blockid = nla_get_u32(tb[TCA_MIRRED_BLOCKID]); } goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); m->tcfm_eaction = parm->eaction; spin_unlock_bh(&m->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) { spin_lock(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); spin_unlock(&mirred_list_lock); } return ret; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*a, bind); return err; } static int tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb) { int err; if (!want_ingress) err = tcf_dev_queue_xmit(skb, dev_queue_xmit); else if (!at_ingress) err = netif_rx(skb); else err = netif_receive_skb(skb); return err; } static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, struct net_device *dev, const bool m_mac_header_xmit, int m_eaction, int retval) { struct sk_buff *skb_to_send = skb; bool want_ingress; bool is_redirect; bool expects_nh; bool at_ingress; bool dont_clone; int mac_len; bool at_nh; int err; is_redirect = tcf_mirred_is_act_redirect(m_eaction); if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); goto err_cant_do; } /* we could easily avoid the clone only if called by ingress and clsact; * since we can't easily detect the clsact caller, skip clone only for * ingress - that covers the TC S/W datapath. */ at_ingress = skb_at_tc_ingress(skb); dont_clone = skb_at_tc_ingress(skb) && is_redirect && tcf_mirred_can_reinsert(retval); if (!dont_clone) { skb_to_send = skb_clone(skb, GFP_ATOMIC); if (!skb_to_send) goto err_cant_do; } want_ingress = tcf_mirred_act_wants_ingress(m_eaction); /* All mirred/redirected skbs should clear previous ct info */ nf_reset_ct(skb_to_send); if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ skb_dst_drop(skb_to_send); expects_nh = want_ingress || !m_mac_header_xmit; at_nh = skb->data == skb_network_header(skb); if (at_nh != expects_nh) { mac_len = at_ingress ? skb->mac_len : skb_network_offset(skb); if (expects_nh) { /* target device/action expect data at nh */ skb_pull_rcsum(skb_to_send, mac_len); } else { /* target device/action expect data at mac */ skb_push_rcsum(skb_to_send, mac_len); } } skb_to_send->skb_iif = skb->dev->ifindex; skb_to_send->dev = dev; if (is_redirect) { if (skb == skb_to_send) retval = TC_ACT_CONSUMED; skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } else { err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } if (err) tcf_action_inc_overlimit_qstats(&m->common); return retval; err_cant_do: if (is_redirect) retval = TC_ACT_SHOT; tcf_action_inc_overlimit_qstats(&m->common); return retval; } static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m, struct tcf_block *block, int m_eaction, const u32 exception_ifindex, int retval) { struct net_device *dev_prev = NULL; struct net_device *dev = NULL; unsigned long index; int mirred_eaction; mirred_eaction = tcf_mirred_act_wants_ingress(m_eaction) ? TCA_INGRESS_MIRROR : TCA_EGRESS_MIRROR; xa_for_each(&block->ports, index, dev) { if (index == exception_ifindex) continue; if (!dev_prev) goto assign_prev; tcf_mirred_to_dev(skb, m, dev_prev, dev_is_mac_header_xmit(dev), mirred_eaction, retval); assign_prev: dev_prev = dev; } if (dev_prev) return tcf_mirred_to_dev(skb, m, dev_prev, dev_is_mac_header_xmit(dev_prev), m_eaction, retval); return retval; } static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m, struct tcf_block *block, int m_eaction, const u32 exception_ifindex, int retval) { struct net_device *dev = NULL; unsigned long index; xa_for_each(&block->ports, index, dev) { if (index == exception_ifindex) continue; tcf_mirred_to_dev(skb, m, dev, dev_is_mac_header_xmit(dev), m_eaction, retval); } return retval; } static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m, const u32 blockid, struct tcf_result *res, int retval) { const u32 exception_ifindex = skb->dev->ifindex; struct tcf_block *block; bool is_redirect; int m_eaction; m_eaction = READ_ONCE(m->tcfm_eaction); is_redirect = tcf_mirred_is_act_redirect(m_eaction); /* we are already under rcu protection, so can call block lookup * directly. */ block = tcf_block_lookup(dev_net(skb->dev), blockid); if (!block || xa_empty(&block->ports)) { tcf_action_inc_overlimit_qstats(&m->common); return retval; } if (is_redirect) return tcf_blockcast_redir(skb, m, block, m_eaction, exception_ifindex, retval); /* If it's not redirect, it is mirror */ return tcf_blockcast_mirror(skb, m, block, m_eaction, exception_ifindex, retval); } TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_mirred *m = to_mirred(a); int retval = READ_ONCE(m->tcf_action); unsigned int nest_level; bool m_mac_header_xmit; struct net_device *dev; int m_eaction; u32 blockid; nest_level = __this_cpu_inc_return(mirred_nest_level); if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", netdev_name(skb->dev)); retval = TC_ACT_SHOT; goto dec_nest_level; } tcf_lastuse_update(&m->tcf_tm); tcf_action_update_bstats(&m->common, skb); blockid = READ_ONCE(m->tcfm_blockid); if (blockid) { retval = tcf_blockcast(skb, m, blockid, res, retval); goto dec_nest_level; } dev = rcu_dereference_bh(m->tcfm_dev); if (unlikely(!dev)) { pr_notice_once("tc mirred: target device is gone\n"); tcf_action_inc_overlimit_qstats(&m->common); goto dec_nest_level; } m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); m_eaction = READ_ONCE(m->tcfm_eaction); retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction, retval); dec_nest_level: __this_cpu_dec(mirred_nest_level); return retval; } static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { struct tcf_mirred *m = to_mirred(a); struct tcf_t *tm = &m->tcf_tm; tcf_action_update_stats(a, bytes, packets, drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = to_mirred(a); struct tc_mirred opt = { .index = m->tcf_index, .refcnt = refcount_read(&m->tcf_refcnt) - ref, .bindcnt = atomic_read(&m->tcf_bindcnt) - bind, }; struct net_device *dev; struct tcf_t t; u32 blockid; spin_lock_bh(&m->tcf_lock); opt.action = m->tcf_action; opt.eaction = m->tcfm_eaction; dev = tcf_mirred_dev_dereference(m); if (dev) opt.ifindex = dev->ifindex; if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt)) goto nla_put_failure; blockid = m->tcfm_blockid; if (blockid && nla_put_u32(skb, TCA_MIRRED_BLOCKID, blockid)) goto nla_put_failure; tcf_tm_dump(&t, &m->tcf_tm); if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD)) goto nla_put_failure; spin_unlock_bh(&m->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&m->tcf_lock); nlmsg_trim(skb, b); return -1; } static int mirred_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct tcf_mirred *m; ASSERT_RTNL(); if (event == NETDEV_UNREGISTER) { spin_lock(&mirred_list_lock); list_for_each_entry(m, &mirred_list, tcfm_list) { spin_lock_bh(&m->tcf_lock); if (tcf_mirred_dev_dereference(m) == dev) { netdev_put(dev, &m->tcfm_dev_tracker); /* Note : no rcu grace period necessary, as * net_device are already rcu protected. */ RCU_INIT_POINTER(m->tcfm_dev, NULL); } spin_unlock_bh(&m->tcf_lock); } spin_unlock(&mirred_list_lock); } return NOTIFY_DONE; } static struct notifier_block mirred_device_notifier = { .notifier_call = mirred_device_event, }; static void tcf_mirred_dev_put(void *priv) { struct net_device *dev = priv; dev_put(dev); } static struct net_device * tcf_mirred_get_dev(const struct tc_action *a, tc_action_priv_destructor *destructor) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev; rcu_read_lock(); dev = rcu_dereference(m->tcfm_dev); if (dev) { dev_hold(dev); *destructor = tcf_mirred_dev_put; } rcu_read_unlock(); return dev; } static size_t tcf_mirred_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_mirred)); } static void tcf_offload_mirred_get_dev(struct flow_action_entry *entry, const struct tc_action *act) { entry->dev = act->ops->get_dev(act, &entry->destructor); if (!entry->dev) return; entry->destructor_priv = entry->dev; } static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, bool bind, struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; if (is_tcf_mirred_egress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT; tcf_offload_mirred_get_dev(entry, act); } else if (is_tcf_mirred_egress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED; tcf_offload_mirred_get_dev(entry, act); } else if (is_tcf_mirred_ingress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT_INGRESS; tcf_offload_mirred_get_dev(entry, act); } else if (is_tcf_mirred_ingress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED_INGRESS; tcf_offload_mirred_get_dev(entry, act); } else { NL_SET_ERR_MSG_MOD(extack, "Unsupported mirred offload"); return -EOPNOTSUPP; } *index_inc = 1; } else { struct flow_offload_action *fl_action = entry_data; if (is_tcf_mirred_egress_redirect(act)) fl_action->id = FLOW_ACTION_REDIRECT; else if (is_tcf_mirred_egress_mirror(act)) fl_action->id = FLOW_ACTION_MIRRED; else if (is_tcf_mirred_ingress_redirect(act)) fl_action->id = FLOW_ACTION_REDIRECT_INGRESS; else if (is_tcf_mirred_ingress_mirror(act)) fl_action->id = FLOW_ACTION_MIRRED_INGRESS; else return -EOPNOTSUPP; } return 0; } static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .id = TCA_ID_MIRRED, .owner = THIS_MODULE, .act = tcf_mirred_act, .stats_update = tcf_stats_update, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_release, .init = tcf_mirred_init, .get_fill_size = tcf_mirred_get_fill_size, .offload_act_setup = tcf_mirred_offload_act_setup, .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, }; MODULE_ALIAS_NET_ACT("mirred"); static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); return tc_action_net_init(net, tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_mirred_ops.net_id); } static struct pernet_operations mirred_net_ops = { .init = mirred_init_net, .exit_batch = mirred_exit_net, .id = &act_mirred_ops.net_id, .size = sizeof(struct tc_action_net), }; MODULE_AUTHOR("Jamal Hadi Salim(2002)"); MODULE_DESCRIPTION("Device Mirror/redirect actions"); MODULE_LICENSE("GPL"); static int __init mirred_init_module(void) { int err = register_netdevice_notifier(&mirred_device_notifier); if (err) return err; pr_info("Mirror/redirect action on\n"); err = tcf_register_action(&act_mirred_ops, &mirred_net_ops); if (err) unregister_netdevice_notifier(&mirred_device_notifier); return err; } static void __exit mirred_cleanup_module(void) { tcf_unregister_action(&act_mirred_ops, &mirred_net_ops); unregister_netdevice_notifier(&mirred_device_notifier); } module_init(mirred_init_module); module_exit(mirred_cleanup_module);
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 // SPDX-License-Identifier: GPL-2.0 /* * drivers/power/process.c - Functions for starting/stopping processes on * suspend transitions. * * Originally from swsusp. */ #include <linux/interrupt.h> #include <linux/oom.h> #include <linux/suspend.h> #include <linux/module.h> #include <linux/sched/debug.h> #include <linux/sched/task.h> #include <linux/syscalls.h> #include <linux/freezer.h> #include <linux/delay.h> #include <linux/workqueue.h> #include <linux/kmod.h> #include <trace/events/power.h> #include <linux/cpuset.h> /* * Timeout for stopping processes */ unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC; static int try_to_freeze_tasks(bool user_only) { const char *what = user_only ? "user space processes" : "remaining freezable tasks"; struct task_struct *g, *p; unsigned long end_time; unsigned int todo; bool wq_busy = false; ktime_t start, end, elapsed; unsigned int elapsed_msecs; bool wakeup = false; int sleep_usecs = USEC_PER_MSEC; pr_info("Freezing %s\n", what); start = ktime_get_boottime(); end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs); if (!user_only) freeze_workqueues_begin(); while (true) { todo = 0; read_lock(&tasklist_lock); for_each_process_thread(g, p) { if (p == current || !freeze_task(p)) continue; todo++; } read_unlock(&tasklist_lock); if (!user_only) { wq_busy = freeze_workqueues_busy(); todo += wq_busy; } if (!todo || time_after(jiffies, end_time)) break; if (pm_wakeup_pending()) { wakeup = true; break; } /* * We need to retry, but first give the freezing tasks some * time to enter the refrigerator. Start with an initial * 1 ms sleep followed by exponential backoff until 8 ms. */ usleep_range(sleep_usecs / 2, sleep_usecs); if (sleep_usecs < 8 * USEC_PER_MSEC) sleep_usecs *= 2; } end = ktime_get_boottime(); elapsed = ktime_sub(end, start); elapsed_msecs = ktime_to_ms(elapsed); if (todo) { pr_err("Freezing %s %s after %d.%03d seconds " "(%d tasks refusing to freeze, wq_busy=%d):\n", what, wakeup ? "aborted" : "failed", elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); if (wq_busy) show_freezable_workqueues(); if (!wakeup || pm_debug_messages_on) { read_lock(&tasklist_lock); for_each_process_thread(g, p) { if (p != current && freezing(p) && !frozen(p)) sched_show_task(p); } read_unlock(&tasklist_lock); } } else { pr_info("Freezing %s completed (elapsed %d.%03d seconds)\n", what, elapsed_msecs / 1000, elapsed_msecs % 1000); } return todo ? -EBUSY : 0; } /** * freeze_processes - Signal user space processes to enter the refrigerator. * The current thread will not be frozen. The same process that calls * freeze_processes must later call thaw_processes. * * On success, returns 0. On failure, -errno and system is fully thawed. */ int freeze_processes(void) { int error; error = __usermodehelper_disable(UMH_FREEZING); if (error) return error; /* Make sure this task doesn't get frozen */ current->flags |= PF_SUSPEND_TASK; if (!pm_freezing) static_branch_inc(&freezer_active); pm_wakeup_clear(0); pm_freezing = true; error = try_to_freeze_tasks(true); if (!error) __usermodehelper_set_disable_depth(UMH_DISABLED); BUG_ON(in_atomic()); /* * Now that the whole userspace is frozen we need to disable * the OOM killer to disallow any further interference with * killable tasks. There is no guarantee oom victims will * ever reach a point they go away we have to wait with a timeout. */ if (!error && !oom_killer_disable(msecs_to_jiffies(freeze_timeout_msecs))) error = -EBUSY; if (error) thaw_processes(); return error; } /** * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. * * On success, returns 0. On failure, -errno and only the kernel threads are * thawed, so as to give a chance to the caller to do additional cleanups * (if any) before thawing the userspace tasks. So, it is the responsibility * of the caller to thaw the userspace tasks, when the time is right. */ int freeze_kernel_threads(void) { int error; pm_nosig_freezing = true; error = try_to_freeze_tasks(false); BUG_ON(in_atomic()); if (error) thaw_kernel_threads(); return error; } void thaw_processes(void) { struct task_struct *g, *p; struct task_struct *curr = current; trace_suspend_resume(TPS("thaw_processes"), 0, true); if (pm_freezing) static_branch_dec(&freezer_active); pm_freezing = false; pm_nosig_freezing = false; oom_killer_enable(); pr_info("Restarting tasks ... "); __usermodehelper_set_disable_depth(UMH_FREEZING); thaw_workqueues(); read_lock(&tasklist_lock); for_each_process_thread(g, p) { /* No other threads should have PF_SUSPEND_TASK set */ WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK)); __thaw_task(p); } read_unlock(&tasklist_lock); WARN_ON(!(curr->flags & PF_SUSPEND_TASK)); curr->flags &= ~PF_SUSPEND_TASK; usermodehelper_enable(); schedule(); pr_cont("done.\n"); trace_suspend_resume(TPS("thaw_processes"), 0, false); } void thaw_kernel_threads(void) { struct task_struct *g, *p; pm_nosig_freezing = false; pr_info("Restarting kernel threads ... "); thaw_workqueues(); read_lock(&tasklist_lock); for_each_process_thread(g, p) { if (p->flags & PF_KTHREAD) __thaw_task(p); } read_unlock(&tasklist_lock); schedule(); pr_cont("done.\n"); }
10 2 1 5 10 7 3 10 10 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 // SPDX-License-Identifier: GPL-2.0-only /* * symlink.c * * PURPOSE * Symlink handling routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT * (C) 1998-2001 Ben Fennema * (C) 1999 Stelias Computing Inc * * HISTORY * * 04/16/99 blf Created. * */ #include "udfdecl.h" #include <linux/uaccess.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/stat.h> #include <linux/pagemap.h> #include "udf_i.h" static int udf_pc_to_char(struct super_block *sb, unsigned char *from, int fromlen, unsigned char *to, int tolen) { struct pathComponent *pc; int elen = 0; int comp_len; unsigned char *p = to; /* Reserve one byte for terminating \0 */ tolen--; while (elen < fromlen) { pc = (struct pathComponent *)(from + elen); elen += sizeof(struct pathComponent); switch (pc->componentType) { case 1: /* * Symlink points to some place which should be agreed * upon between originator and receiver of the media. Ignore. */ if (pc->lengthComponentIdent > 0) { elen += pc->lengthComponentIdent; break; } fallthrough; case 2: if (tolen == 0) return -ENAMETOOLONG; p = to; *p++ = '/'; tolen--; break; case 3: if (tolen < 3) return -ENAMETOOLONG; memcpy(p, "../", 3); p += 3; tolen -= 3; break; case 4: if (tolen < 2) return -ENAMETOOLONG; memcpy(p, "./", 2); p += 2; tolen -= 2; /* that would be . - just ignore */ break; case 5: elen += pc->lengthComponentIdent; if (elen > fromlen) return -EIO; comp_len = udf_get_filename(sb, pc->componentIdent, pc->lengthComponentIdent, p, tolen); if (comp_len < 0) return comp_len; p += comp_len; tolen -= comp_len; if (tolen == 0) return -ENAMETOOLONG; *p++ = '/'; tolen--; break; } } if (p > to + 1) p[-1] = '\0'; else p[0] = '\0'; return 0; } static int udf_symlink_filler(struct file *file, struct folio *folio) { struct inode *inode = folio->mapping->host; struct buffer_head *bh = NULL; unsigned char *symlink; int err = 0; unsigned char *p = folio_address(folio); struct udf_inode_info *iinfo = UDF_I(inode); /* We don't support symlinks longer than one block */ if (inode->i_size > inode->i_sb->s_blocksize) { err = -ENAMETOOLONG; goto out; } if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { symlink = iinfo->i_data + iinfo->i_lenEAttr; } else { bh = udf_bread(inode, 0, 0, &err); if (!bh) { if (!err) err = -EFSCORRUPTED; goto out; } symlink = bh->b_data; } err = udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p, PAGE_SIZE); brelse(bh); out: folio_end_read(folio, err == 0); return err; } static int udf_symlink_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; struct inode *inode = d_backing_inode(dentry); struct folio *folio; generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); folio = read_mapping_folio(inode->i_mapping, 0, NULL); if (IS_ERR(folio)) return PTR_ERR(folio); /* * UDF uses non-trivial encoding of symlinks so i_size does not match * number of characters reported by readlink(2) which apparently some * applications expect. Also POSIX says that "The value returned in the * st_size field shall be the length of the contents of the symbolic * link, and shall not count a trailing null if one is present." So * let's report the length of string returned by readlink(2) for * st_size. */ stat->size = strlen(folio_address(folio)); folio_put(folio); return 0; } /* * symlinks can't do much... */ const struct address_space_operations udf_symlink_aops = { .read_folio = udf_symlink_filler, }; const struct inode_operations udf_symlink_inode_operations = { .get_link = page_get_link, .getattr = udf_symlink_getattr, };
73 1 9 1 1 29 29 29 30 1 29 29 25 4 29 29 29 31 9 32 18 10 3 5 18 2 18 31 39 33 2 7 1 42 39 10 31 31 79 44 28 5 73 1 6 3 3 9 7 18 43 56 18 431 429 2 2 427 26 19 8 24 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 // SPDX-License-Identifier: GPL-2.0-or-later /* Request a key from userspace * * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * See Documentation/security/keys/request-key.rst */ #include <linux/export.h> #include <linux/sched.h> #include <linux/kmod.h> #include <linux/err.h> #include <linux/keyctl.h> #include <linux/slab.h> #include <net/net_namespace.h> #include "internal.h" #include <keys/request_key_auth-type.h> #define key_negative_timeout 60 /* default timeout on a negative key's existence */ static struct key *check_cached_key(struct keyring_search_context *ctx) { #ifdef CONFIG_KEYS_REQUEST_CACHE struct key *key = current->cached_requested_key; if (key && ctx->match_data.cmp(key, &ctx->match_data) && !(key->flags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED)))) return key_get(key); #endif return NULL; } static void cache_requested_key(struct key *key) { #ifdef CONFIG_KEYS_REQUEST_CACHE struct task_struct *t = current; /* Do not cache key if it is a kernel thread */ if (!(t->flags & PF_KTHREAD)) { key_put(t->cached_requested_key); t->cached_requested_key = key_get(key); set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); } #endif } /** * complete_request_key - Complete the construction of a key. * @authkey: The authorisation key. * @error: The success or failute of the construction. * * Complete the attempt to construct a key. The key will be negated * if an error is indicated. The authorisation key will be revoked * unconditionally. */ void complete_request_key(struct key *authkey, int error) { struct request_key_auth *rka = get_request_key_auth(authkey); struct key *key = rka->target_key; kenter("%d{%d},%d", authkey->serial, key->serial, error); if (error < 0) key_negate_and_link(key, key_negative_timeout, NULL, authkey); else key_revoke(authkey); } EXPORT_SYMBOL(complete_request_key); /* * Initialise a usermode helper that is going to have a specific session * keyring. * * This is called in context of freshly forked kthread before kernel_execve(), * so we can simply install the desired session_keyring at this point. */ static int umh_keys_init(struct subprocess_info *info, struct cred *cred) { struct key *keyring = info->data; return install_session_keyring_to_cred(cred, keyring); } /* * Clean up a usermode helper with session keyring. */ static void umh_keys_cleanup(struct subprocess_info *info) { struct key *keyring = info->data; key_put(keyring); } /* * Call a usermode helper with a specific session keyring. */ static int call_usermodehelper_keys(const char *path, char **argv, char **envp, struct key *session_keyring, int wait) { struct subprocess_info *info; info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL, umh_keys_init, umh_keys_cleanup, session_keyring); if (!info) return -ENOMEM; key_get(session_keyring); return call_usermodehelper_exec(info, wait); } /* * Request userspace finish the construction of a key * - execute "/sbin/request-key <op> <key> <uid> <gid> <keyring> <keyring> <keyring>" */ static int call_sbin_request_key(struct key *authkey, void *aux) { static char const request_key[] = "/sbin/request-key"; struct request_key_auth *rka = get_request_key_auth(authkey); const struct cred *cred = current_cred(); key_serial_t prkey, sskey; struct key *key = rka->target_key, *keyring, *session, *user_session; char *argv[9], *envp[3], uid_str[12], gid_str[12]; char key_str[12], keyring_str[3][12]; char desc[20]; int ret, i; kenter("{%d},{%d},%s", key->serial, authkey->serial, rka->op); ret = look_up_user_keyrings(NULL, &user_session); if (ret < 0) goto error_us; /* allocate a new session keyring */ sprintf(desc, "_req.%u", key->serial); cred = get_current_cred(); keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred, KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ, KEY_ALLOC_QUOTA_OVERRUN, NULL, NULL); put_cred(cred); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error_alloc; } /* attach the auth key to the session keyring */ ret = key_link(keyring, authkey); if (ret < 0) goto error_link; /* record the UID and GID */ sprintf(uid_str, "%d", from_kuid(&init_user_ns, cred->fsuid)); sprintf(gid_str, "%d", from_kgid(&init_user_ns, cred->fsgid)); /* we say which key is under construction */ sprintf(key_str, "%d", key->serial); /* we specify the process's default keyrings */ sprintf(keyring_str[0], "%d", cred->thread_keyring ? cred->thread_keyring->serial : 0); prkey = 0; if (cred->process_keyring) prkey = cred->process_keyring->serial; sprintf(keyring_str[1], "%d", prkey); session = cred->session_keyring; if (!session) session = user_session; sskey = session->serial; sprintf(keyring_str[2], "%d", sskey); /* set up a minimal environment */ i = 0; envp[i++] = "HOME=/"; envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; envp[i] = NULL; /* set up the argument list */ i = 0; argv[i++] = (char *)request_key; argv[i++] = (char *)rka->op; argv[i++] = key_str; argv[i++] = uid_str; argv[i++] = gid_str; argv[i++] = keyring_str[0]; argv[i++] = keyring_str[1]; argv[i++] = keyring_str[2]; argv[i] = NULL; /* do it */ ret = call_usermodehelper_keys(request_key, argv, envp, keyring, UMH_WAIT_PROC); kdebug("usermode -> 0x%x", ret); if (ret >= 0) { /* ret is the exit/wait code */ if (test_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags) || key_validate(key) < 0) ret = -ENOKEY; else /* ignore any errors from userspace if the key was * instantiated */ ret = 0; } error_link: key_put(keyring); error_alloc: key_put(user_session); error_us: complete_request_key(authkey, ret); kleave(" = %d", ret); return ret; } /* * Call out to userspace for key construction. * * Program failure is ignored in favour of key status. */ static int construct_key(struct key *key, const void *callout_info, size_t callout_len, void *aux, struct key *dest_keyring) { request_key_actor_t actor; struct key *authkey; int ret; kenter("%d,%p,%zu,%p", key->serial, callout_info, callout_len, aux); /* allocate an authorisation key */ authkey = request_key_auth_new(key, "create", callout_info, callout_len, dest_keyring); if (IS_ERR(authkey)) return PTR_ERR(authkey); /* Make the call */ actor = call_sbin_request_key; if (key->type->request_key) actor = key->type->request_key; ret = actor(authkey, aux); /* check that the actor called complete_request_key() prior to * returning an error */ WARN_ON(ret < 0 && !test_bit(KEY_FLAG_INVALIDATED, &authkey->flags)); key_put(authkey); kleave(" = %d", ret); return ret; } /* * Get the appropriate destination keyring for the request. * * The keyring selected is returned with an extra reference upon it which the * caller must release. */ static int construct_get_dest_keyring(struct key **_dest_keyring) { struct request_key_auth *rka; const struct cred *cred = current_cred(); struct key *dest_keyring = *_dest_keyring, *authkey; int ret; kenter("%p", dest_keyring); /* find the appropriate keyring */ if (dest_keyring) { /* the caller supplied one */ key_get(dest_keyring); } else { bool do_perm_check = true; /* use a default keyring; falling through the cases until we * find one that we actually have */ switch (cred->jit_keyring) { case KEY_REQKEY_DEFL_DEFAULT: case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: if (cred->request_key_auth) { authkey = cred->request_key_auth; down_read(&authkey->sem); rka = get_request_key_auth(authkey); if (!test_bit(KEY_FLAG_REVOKED, &authkey->flags)) dest_keyring = key_get(rka->dest_keyring); up_read(&authkey->sem); if (dest_keyring) { do_perm_check = false; break; } } fallthrough; case KEY_REQKEY_DEFL_THREAD_KEYRING: dest_keyring = key_get(cred->thread_keyring); if (dest_keyring) break; fallthrough; case KEY_REQKEY_DEFL_PROCESS_KEYRING: dest_keyring = key_get(cred->process_keyring); if (dest_keyring) break; fallthrough; case KEY_REQKEY_DEFL_SESSION_KEYRING: dest_keyring = key_get(cred->session_keyring); if (dest_keyring) break; fallthrough; case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: ret = look_up_user_keyrings(NULL, &dest_keyring); if (ret < 0) return ret; break; case KEY_REQKEY_DEFL_USER_KEYRING: ret = look_up_user_keyrings(&dest_keyring, NULL); if (ret < 0) return ret; break; case KEY_REQKEY_DEFL_GROUP_KEYRING: default: BUG(); } /* * Require Write permission on the keyring. This is essential * because the default keyring may be the session keyring, and * joining a keyring only requires Search permission. * * However, this check is skipped for the "requestor keyring" so * that /sbin/request-key can itself use request_key() to add * keys to the original requestor's destination keyring. */ if (dest_keyring && do_perm_check) { ret = key_permission(make_key_ref(dest_keyring, 1), KEY_NEED_WRITE); if (ret) { key_put(dest_keyring); return ret; } } } *_dest_keyring = dest_keyring; kleave(" [dk %d]", key_serial(dest_keyring)); return 0; } /* * Allocate a new key in under-construction state and attempt to link it in to * the requested keyring. * * May return a key that's already under construction instead if there was a * race between two thread calling request_key(). */ static int construct_alloc_key(struct keyring_search_context *ctx, struct key *dest_keyring, unsigned long flags, struct key_user *user, struct key **_key) { struct assoc_array_edit *edit = NULL; struct key *key; key_perm_t perm; key_ref_t key_ref; int ret; kenter("%s,%s,,,", ctx->index_key.type->name, ctx->index_key.description); *_key = NULL; mutex_lock(&user->cons_lock); perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; perm |= KEY_USR_VIEW; if (ctx->index_key.type->read) perm |= KEY_POS_READ; if (ctx->index_key.type == &key_type_keyring || ctx->index_key.type->update) perm |= KEY_POS_WRITE; key = key_alloc(ctx->index_key.type, ctx->index_key.description, ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred, perm, flags, NULL); if (IS_ERR(key)) goto alloc_failed; set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); if (dest_keyring) { ret = __key_link_lock(dest_keyring, &key->index_key); if (ret < 0) goto link_lock_failed; } /* * Attach the key to the destination keyring under lock, but we do need * to do another check just in case someone beat us to it whilst we * waited for locks. * * The caller might specify a comparison function which looks for keys * that do not exactly match but are still equivalent from the caller's * perspective. The __key_link_begin() operation must be done only after * an actual key is determined. */ mutex_lock(&key_construction_mutex); rcu_read_lock(); key_ref = search_process_keyrings_rcu(ctx); rcu_read_unlock(); if (!IS_ERR(key_ref)) goto key_already_present; if (dest_keyring) { ret = __key_link_begin(dest_keyring, &key->index_key, &edit); if (ret < 0) goto link_alloc_failed; __key_link(dest_keyring, key, &edit); } mutex_unlock(&key_construction_mutex); if (dest_keyring) __key_link_end(dest_keyring, &key->index_key, edit); mutex_unlock(&user->cons_lock); *_key = key; kleave(" = 0 [%d]", key_serial(key)); return 0; /* the key is now present - we tell the caller that we found it by * returning -EINPROGRESS */ key_already_present: key_put(key); mutex_unlock(&key_construction_mutex); key = key_ref_to_ptr(key_ref); if (dest_keyring) { ret = __key_link_begin(dest_keyring, &key->index_key, &edit); if (ret < 0) goto link_alloc_failed_unlocked; ret = __key_link_check_live_key(dest_keyring, key); if (ret == 0) __key_link(dest_keyring, key, &edit); __key_link_end(dest_keyring, &key->index_key, edit); if (ret < 0) goto link_check_failed; } mutex_unlock(&user->cons_lock); *_key = key; kleave(" = -EINPROGRESS [%d]", key_serial(key)); return -EINPROGRESS; link_check_failed: mutex_unlock(&user->cons_lock); key_put(key); kleave(" = %d [linkcheck]", ret); return ret; link_alloc_failed: mutex_unlock(&key_construction_mutex); link_alloc_failed_unlocked: __key_link_end(dest_keyring, &key->index_key, edit); link_lock_failed: mutex_unlock(&user->cons_lock); key_put(key); kleave(" = %d [prelink]", ret); return ret; alloc_failed: mutex_unlock(&user->cons_lock); kleave(" = %ld", PTR_ERR(key)); return PTR_ERR(key); } /* * Commence key construction. */ static struct key *construct_key_and_link(struct keyring_search_context *ctx, const char *callout_info, size_t callout_len, void *aux, struct key *dest_keyring, unsigned long flags) { struct key_user *user; struct key *key; int ret; kenter(""); if (ctx->index_key.type == &key_type_keyring) return ERR_PTR(-EPERM); ret = construct_get_dest_keyring(&dest_keyring); if (ret) goto error; user = key_user_lookup(current_fsuid()); if (!user) { ret = -ENOMEM; goto error_put_dest_keyring; } ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key); key_user_put(user); if (ret == 0) { ret = construct_key(key, callout_info, callout_len, aux, dest_keyring); if (ret < 0) { kdebug("cons failed"); goto construction_failed; } } else if (ret == -EINPROGRESS) { ret = 0; } else { goto error_put_dest_keyring; } key_put(dest_keyring); kleave(" = key %d", key_serial(key)); return key; construction_failed: key_negate_and_link(key, key_negative_timeout, NULL, NULL); key_put(key); error_put_dest_keyring: key_put(dest_keyring); error: kleave(" = %d", ret); return ERR_PTR(ret); } /** * request_key_and_link - Request a key and cache it in a keyring. * @type: The type of key we want. * @description: The searchable description of the key. * @domain_tag: The domain in which the key operates. * @callout_info: The data to pass to the instantiation upcall (or NULL). * @callout_len: The length of callout_info. * @aux: Auxiliary data for the upcall. * @dest_keyring: Where to cache the key. * @flags: Flags to key_alloc(). * * A key matching the specified criteria (type, description, domain_tag) is * searched for in the process's keyrings and returned with its usage count * incremented if found. Otherwise, if callout_info is not NULL, a key will be * allocated and some service (probably in userspace) will be asked to * instantiate it. * * If successfully found or created, the key will be linked to the destination * keyring if one is provided. * * Returns a pointer to the key if successful; -EACCES, -ENOKEY, -EKEYREVOKED * or -EKEYEXPIRED if an inaccessible, negative, revoked or expired key was * found; -ENOKEY if no key was found and no @callout_info was given; -EDQUOT * if insufficient key quota was available to create a new key; or -ENOMEM if * insufficient memory was available. * * If the returned key was created, then it may still be under construction, * and wait_for_key_construction() should be used to wait for that to complete. */ struct key *request_key_and_link(struct key_type *type, const char *description, struct key_tag *domain_tag, const void *callout_info, size_t callout_len, void *aux, struct key *dest_keyring, unsigned long flags) { struct keyring_search_context ctx = { .index_key.type = type, .index_key.domain_tag = domain_tag, .index_key.description = description, .index_key.desc_len = strlen(description), .cred = current_cred(), .match_data.cmp = key_default_cmp, .match_data.raw_data = description, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .flags = (KEYRING_SEARCH_DO_STATE_CHECK | KEYRING_SEARCH_SKIP_EXPIRED | KEYRING_SEARCH_RECURSE), }; struct key *key; key_ref_t key_ref; int ret; kenter("%s,%s,%p,%zu,%p,%p,%lx", ctx.index_key.type->name, ctx.index_key.description, callout_info, callout_len, aux, dest_keyring, flags); if (type->match_preparse) { ret = type->match_preparse(&ctx.match_data); if (ret < 0) { key = ERR_PTR(ret); goto error; } } key = check_cached_key(&ctx); if (key) goto error_free; /* search all the process keyrings for a key */ rcu_read_lock(); key_ref = search_process_keyrings_rcu(&ctx); rcu_read_unlock(); if (!IS_ERR(key_ref)) { if (dest_keyring) { ret = key_task_permission(key_ref, current_cred(), KEY_NEED_LINK); if (ret < 0) { key_ref_put(key_ref); key = ERR_PTR(ret); goto error_free; } } key = key_ref_to_ptr(key_ref); if (dest_keyring) { ret = key_link(dest_keyring, key); if (ret < 0) { key_put(key); key = ERR_PTR(ret); goto error_free; } } /* Only cache the key on immediate success */ cache_requested_key(key); } else if (PTR_ERR(key_ref) != -EAGAIN) { key = ERR_CAST(key_ref); } else { /* the search failed, but the keyrings were searchable, so we * should consult userspace if we can */ key = ERR_PTR(-ENOKEY); if (!callout_info) goto error_free; key = construct_key_and_link(&ctx, callout_info, callout_len, aux, dest_keyring, flags); } error_free: if (type->match_free) type->match_free(&ctx.match_data); error: kleave(" = %p", key); return key; } /** * wait_for_key_construction - Wait for construction of a key to complete * @key: The key being waited for. * @intr: Whether to wait interruptibly. * * Wait for a key to finish being constructed. * * Returns 0 if successful; -ERESTARTSYS if the wait was interrupted; -ENOKEY * if the key was negated; or -EKEYREVOKED or -EKEYEXPIRED if the key was * revoked or expired. */ int wait_for_key_construction(struct key *key, bool intr) { int ret; ret = wait_on_bit(&key->flags, KEY_FLAG_USER_CONSTRUCT, intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); if (ret) return -ERESTARTSYS; ret = key_read_state(key); if (ret < 0) return ret; return key_validate(key); } EXPORT_SYMBOL(wait_for_key_construction); /** * request_key_tag - Request a key and wait for construction * @type: Type of key. * @description: The searchable description of the key. * @domain_tag: The domain in which the key operates. * @callout_info: The data to pass to the instantiation upcall (or NULL). * * As for request_key_and_link() except that it does not add the returned key * to a keyring if found, new keys are always allocated in the user's quota, * the callout_info must be a NUL-terminated string and no auxiliary data can * be passed. * * Furthermore, it then works as wait_for_key_construction() to wait for the * completion of keys undergoing construction with a non-interruptible wait. */ struct key *request_key_tag(struct key_type *type, const char *description, struct key_tag *domain_tag, const char *callout_info) { struct key *key; size_t callout_len = 0; int ret; if (callout_info) callout_len = strlen(callout_info); key = request_key_and_link(type, description, domain_tag, callout_info, callout_len, NULL, NULL, KEY_ALLOC_IN_QUOTA); if (!IS_ERR(key)) { ret = wait_for_key_construction(key, false); if (ret < 0) { key_put(key); return ERR_PTR(ret); } } return key; } EXPORT_SYMBOL(request_key_tag); /** * request_key_with_auxdata - Request a key with auxiliary data for the upcaller * @type: The type of key we want. * @description: The searchable description of the key. * @domain_tag: The domain in which the key operates. * @callout_info: The data to pass to the instantiation upcall (or NULL). * @callout_len: The length of callout_info. * @aux: Auxiliary data for the upcall. * * As for request_key_and_link() except that it does not add the returned key * to a keyring if found and new keys are always allocated in the user's quota. * * Furthermore, it then works as wait_for_key_construction() to wait for the * completion of keys undergoing construction with a non-interruptible wait. */ struct key *request_key_with_auxdata(struct key_type *type, const char *description, struct key_tag *domain_tag, const void *callout_info, size_t callout_len, void *aux) { struct key *key; int ret; key = request_key_and_link(type, description, domain_tag, callout_info, callout_len, aux, NULL, KEY_ALLOC_IN_QUOTA); if (!IS_ERR(key)) { ret = wait_for_key_construction(key, false); if (ret < 0) { key_put(key); return ERR_PTR(ret); } } return key; } EXPORT_SYMBOL(request_key_with_auxdata); /** * request_key_rcu - Request key from RCU-read-locked context * @type: The type of key we want. * @description: The name of the key we want. * @domain_tag: The domain in which the key operates. * * Request a key from a context that we may not sleep in (such as RCU-mode * pathwalk). Keys under construction are ignored. * * Return a pointer to the found key if successful, -ENOKEY if we couldn't find * a key or some other error if the key found was unsuitable or inaccessible. */ struct key *request_key_rcu(struct key_type *type, const char *description, struct key_tag *domain_tag) { struct keyring_search_context ctx = { .index_key.type = type, .index_key.domain_tag = domain_tag, .index_key.description = description, .index_key.desc_len = strlen(description), .cred = current_cred(), .match_data.cmp = key_default_cmp, .match_data.raw_data = description, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .flags = (KEYRING_SEARCH_DO_STATE_CHECK | KEYRING_SEARCH_SKIP_EXPIRED), }; struct key *key; key_ref_t key_ref; kenter("%s,%s", type->name, description); key = check_cached_key(&ctx); if (key) return key; /* search all the process keyrings for a key */ key_ref = search_process_keyrings_rcu(&ctx); if (IS_ERR(key_ref)) { key = ERR_CAST(key_ref); if (PTR_ERR(key_ref) == -EAGAIN) key = ERR_PTR(-ENOKEY); } else { key = key_ref_to_ptr(key_ref); cache_requested_key(key); } kleave(" = %p", key); return key; } EXPORT_SYMBOL(request_key_rcu);
34 31 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 /* SPDX-License-Identifier: GPL-2.0 */ /* * INETPEER - A storage for permanent information about peers * * Authors: Andrey V. Savochkin <saw@msu.ru> */ #ifndef _NET_INETPEER_H #define _NET_INETPEER_H #include <linux/types.h> #include <linux/init.h> #include <linux/jiffies.h> #include <linux/spinlock.h> #include <linux/rtnetlink.h> #include <net/ipv6.h> #include <linux/atomic.h> /* IPv4 address key for cache lookups */ struct ipv4_addr_key { __be32 addr; int vif; }; #define INETPEER_MAXKEYSZ (sizeof(struct in6_addr) / sizeof(u32)) struct inetpeer_addr { union { struct ipv4_addr_key a4; struct in6_addr a6; u32 key[INETPEER_MAXKEYSZ]; }; __u16 family; }; struct inet_peer { struct rb_node rb_node; struct inetpeer_addr daddr; u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ u32 n_redirects; unsigned long rate_last; /* * Once inet_peer is queued for deletion (refcnt == 0), following field * is not available: rid * We can share memory with rcu_head to help keep inet_peer small. */ union { struct { atomic_t rid; /* Frag reception counter */ }; struct rcu_head rcu; }; /* following fields might be frequently dirtied */ __u32 dtime; /* the time of last use of not referenced entries */ refcount_t refcnt; }; struct inet_peer_base { struct rb_root rb_root; seqlock_t lock; int total; }; void inet_peer_base_init(struct inet_peer_base *); void inet_initpeers(void) __init; #define INETPEER_METRICS_NEW (~(u32) 0) static inline void inetpeer_set_addr_v4(struct inetpeer_addr *iaddr, __be32 ip) { iaddr->a4.addr = ip; iaddr->a4.vif = 0; iaddr->family = AF_INET; } static inline __be32 inetpeer_get_addr_v4(struct inetpeer_addr *iaddr) { return iaddr->a4.addr; } static inline void inetpeer_set_addr_v6(struct inetpeer_addr *iaddr, struct in6_addr *in6) { iaddr->a6 = *in6; iaddr->family = AF_INET6; } static inline struct in6_addr *inetpeer_get_addr_v6(struct inetpeer_addr *iaddr) { return &iaddr->a6; } /* can be called with or without local BH being disabled */ struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr); static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base, __be32 v4daddr, int vif) { struct inetpeer_addr daddr; daddr.a4.addr = v4daddr; daddr.a4.vif = vif; daddr.family = AF_INET; return inet_getpeer(base, &daddr); } static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base, const struct in6_addr *v6daddr) { struct inetpeer_addr daddr; daddr.a6 = *v6daddr; daddr.family = AF_INET6; return inet_getpeer(base, &daddr); } static inline int inetpeer_addr_cmp(const struct inetpeer_addr *a, const struct inetpeer_addr *b) { int i, n; if (a->family == AF_INET) n = sizeof(a->a4) / sizeof(u32); else n = sizeof(a->a6) / sizeof(u32); for (i = 0; i < n; i++) { if (a->key[i] == b->key[i]) continue; if (a->key[i] < b->key[i]) return -1; return 1; } return 0; } /* can be called from BH context or outside */ void inet_putpeer(struct inet_peer *p); bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); void inetpeer_invalidate_tree(struct inet_peer_base *); #endif /* _NET_INETPEER_H */
292 292 1 288 42 43 46 39 39 241 273 272 272 2 3 3 1 2 3 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 // SPDX-License-Identifier: GPL-2.0-or-later /* V4L2 device support. Copyright (C) 2008 Hans Verkuil <hverkuil@xs4all.nl> */ #include <linux/types.h> #include <linux/ioctl.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/videodev2.h> #include <media/v4l2-device.h> #include <media/v4l2-ctrls.h> int v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev) { if (v4l2_dev == NULL) return -EINVAL; INIT_LIST_HEAD(&v4l2_dev->subdevs); spin_lock_init(&v4l2_dev->lock); v4l2_prio_init(&v4l2_dev->prio); kref_init(&v4l2_dev->ref); get_device(dev); v4l2_dev->dev = dev; if (dev == NULL) { /* If dev == NULL, then name must be filled in by the caller */ if (WARN_ON(!v4l2_dev->name[0])) return -EINVAL; return 0; } /* Set name to driver name + device name if it is empty. */ if (!v4l2_dev->name[0]) snprintf(v4l2_dev->name, sizeof(v4l2_dev->name), "%s %s", dev->driver->name, dev_name(dev)); if (!dev_get_drvdata(dev)) dev_set_drvdata(dev, v4l2_dev); return 0; } EXPORT_SYMBOL_GPL(v4l2_device_register); static void v4l2_device_release(struct kref *ref) { struct v4l2_device *v4l2_dev = container_of(ref, struct v4l2_device, ref); if (v4l2_dev->release) v4l2_dev->release(v4l2_dev); } int v4l2_device_put(struct v4l2_device *v4l2_dev) { return kref_put(&v4l2_dev->ref, v4l2_device_release); } EXPORT_SYMBOL_GPL(v4l2_device_put); int v4l2_device_set_name(struct v4l2_device *v4l2_dev, const char *basename, atomic_t *instance) { int num = atomic_inc_return(instance) - 1; int len = strlen(basename); if (basename[len - 1] >= '0' && basename[len - 1] <= '9') snprintf(v4l2_dev->name, sizeof(v4l2_dev->name), "%s-%d", basename, num); else snprintf(v4l2_dev->name, sizeof(v4l2_dev->name), "%s%d", basename, num); return num; } EXPORT_SYMBOL_GPL(v4l2_device_set_name); void v4l2_device_disconnect(struct v4l2_device *v4l2_dev) { if (v4l2_dev->dev == NULL) return; if (dev_get_drvdata(v4l2_dev->dev) == v4l2_dev) dev_set_drvdata(v4l2_dev->dev, NULL); put_device(v4l2_dev->dev); v4l2_dev->dev = NULL; } EXPORT_SYMBOL_GPL(v4l2_device_disconnect); void v4l2_device_unregister(struct v4l2_device *v4l2_dev) { struct v4l2_subdev *sd, *next; /* Just return if v4l2_dev is NULL or if it was already * unregistered before. */ if (v4l2_dev == NULL || !v4l2_dev->name[0]) return; v4l2_device_disconnect(v4l2_dev); /* Unregister subdevs */ list_for_each_entry_safe(sd, next, &v4l2_dev->subdevs, list) { v4l2_device_unregister_subdev(sd); if (sd->flags & V4L2_SUBDEV_FL_IS_I2C) v4l2_i2c_subdev_unregister(sd); else if (sd->flags & V4L2_SUBDEV_FL_IS_SPI) v4l2_spi_subdev_unregister(sd); } /* Mark as unregistered, thus preventing duplicate unregistrations */ v4l2_dev->name[0] = '\0'; } EXPORT_SYMBOL_GPL(v4l2_device_unregister); int __v4l2_device_register_subdev(struct v4l2_device *v4l2_dev, struct v4l2_subdev *sd, struct module *module) { int err; /* Check for valid input */ if (!v4l2_dev || !sd || sd->v4l2_dev || !sd->name[0]) return -EINVAL; /* * The reason to acquire the module here is to avoid unloading * a module of sub-device which is registered to a media * device. To make it possible to unload modules for media * devices that also register sub-devices, do not * try_module_get() such sub-device owners. */ sd->owner_v4l2_dev = v4l2_dev->dev && v4l2_dev->dev->driver && module == v4l2_dev->dev->driver->owner; if (!sd->owner_v4l2_dev && !try_module_get(module)) return -ENODEV; sd->v4l2_dev = v4l2_dev; /* This just returns 0 if either of the two args is NULL */ err = v4l2_ctrl_add_handler(v4l2_dev->ctrl_handler, sd->ctrl_handler, NULL, true); if (err) goto error_module; #if defined(CONFIG_MEDIA_CONTROLLER) /* Register the entity. */ if (v4l2_dev->mdev) { err = media_device_register_entity(v4l2_dev->mdev, &sd->entity); if (err < 0) goto error_module; } #endif if (sd->internal_ops && sd->internal_ops->registered) { err = sd->internal_ops->registered(sd); if (err) goto error_unregister; } sd->owner = module; spin_lock(&v4l2_dev->lock); list_add_tail(&sd->list, &v4l2_dev->subdevs); spin_unlock(&v4l2_dev->lock); return 0; error_unregister: #if defined(CONFIG_MEDIA_CONTROLLER) media_device_unregister_entity(&sd->entity); #endif error_module: if (!sd->owner_v4l2_dev) module_put(sd->owner); sd->v4l2_dev = NULL; return err; } EXPORT_SYMBOL_GPL(__v4l2_device_register_subdev); static void v4l2_subdev_release(struct v4l2_subdev *sd) { struct module *owner = !sd->owner_v4l2_dev ? sd->owner : NULL; if (sd->internal_ops && sd->internal_ops->release) sd->internal_ops->release(sd); sd->devnode = NULL; module_put(owner); } static void v4l2_device_release_subdev_node(struct video_device *vdev) { v4l2_subdev_release(video_get_drvdata(vdev)); kfree(vdev); } int __v4l2_device_register_subdev_nodes(struct v4l2_device *v4l2_dev, bool read_only) { struct video_device *vdev; struct v4l2_subdev *sd; int err; /* Register a device node for every subdev marked with the * V4L2_SUBDEV_FL_HAS_DEVNODE flag. */ list_for_each_entry(sd, &v4l2_dev->subdevs, list) { if (!(sd->flags & V4L2_SUBDEV_FL_HAS_DEVNODE)) continue; if (sd->devnode) continue; vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); if (!vdev) { err = -ENOMEM; goto clean_up; } video_set_drvdata(vdev, sd); strscpy(vdev->name, sd->name, sizeof(vdev->name)); vdev->dev_parent = sd->dev; vdev->v4l2_dev = v4l2_dev; vdev->fops = &v4l2_subdev_fops; vdev->release = v4l2_device_release_subdev_node; vdev->ctrl_handler = sd->ctrl_handler; if (read_only) set_bit(V4L2_FL_SUBDEV_RO_DEVNODE, &vdev->flags); sd->devnode = vdev; err = __video_register_device(vdev, VFL_TYPE_SUBDEV, -1, 1, sd->owner); if (err < 0) { sd->devnode = NULL; kfree(vdev); goto clean_up; } #if defined(CONFIG_MEDIA_CONTROLLER) sd->entity.info.dev.major = VIDEO_MAJOR; sd->entity.info.dev.minor = vdev->minor; /* Interface is created by __video_register_device() */ if (vdev->v4l2_dev->mdev) { struct media_link *link; link = media_create_intf_link(&sd->entity, &vdev->intf_devnode->intf, MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE); if (!link) { err = -ENOMEM; goto clean_up; } } #endif } return 0; clean_up: list_for_each_entry(sd, &v4l2_dev->subdevs, list) { if (!sd->devnode) break; video_unregister_device(sd->devnode); } return err; } EXPORT_SYMBOL_GPL(__v4l2_device_register_subdev_nodes); void v4l2_device_unregister_subdev(struct v4l2_subdev *sd) { struct v4l2_device *v4l2_dev; /* return if it isn't registered */ if (sd == NULL || sd->v4l2_dev == NULL) return; v4l2_dev = sd->v4l2_dev; spin_lock(&v4l2_dev->lock); list_del(&sd->list); spin_unlock(&v4l2_dev->lock); if (sd->internal_ops && sd->internal_ops->unregistered) sd->internal_ops->unregistered(sd); sd->v4l2_dev = NULL; #if defined(CONFIG_MEDIA_CONTROLLER) if (v4l2_dev->mdev) { /* * No need to explicitly remove links, as both pads and * links are removed by the function below, in the right order */ media_device_unregister_entity(&sd->entity); } #endif if (sd->devnode) video_unregister_device(sd->devnode); else v4l2_subdev_release(sd); } EXPORT_SYMBOL_GPL(v4l2_device_unregister_subdev);
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C)2006 USAGI/WIDE Project * * Author: * Masahide NAKAMURA @USAGI <masahide.nakamura.cz@hitachi.com> * * Based on net/netfilter/xt_tcpudp.c */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/module.h> #include <net/ip.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <net/mip6.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6t_mh.h> MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match"); MODULE_LICENSE("GPL"); /* Returns 1 if the type is matched by the range, 0 otherwise */ static inline bool type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert) { return (type >= min && type <= max) ^ invert; } static bool mh_mt6(const struct sk_buff *skb, struct xt_action_param *par) { struct ip6_mh _mh; const struct ip6_mh *mh; const struct ip6t_mh *mhinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; mh = skb_header_pointer(skb, par->thoff, sizeof(_mh), &_mh); if (mh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ pr_debug("Dropping evil MH tinygram.\n"); par->hotdrop = true; return false; } if (mh->ip6mh_proto != IPPROTO_NONE) { pr_debug("Dropping invalid MH Payload Proto: %u\n", mh->ip6mh_proto); par->hotdrop = true; return false; } return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type, !!(mhinfo->invflags & IP6T_MH_INV_TYPE)); } static int mh_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_mh *mhinfo = par->matchinfo; /* Must specify no unknown invflags */ return (mhinfo->invflags & ~IP6T_MH_INV_MASK) ? -EINVAL : 0; } static struct xt_match mh_mt6_reg __read_mostly = { .name = "mh", .family = NFPROTO_IPV6, .checkentry = mh_mt6_check, .match = mh_mt6, .matchsize = sizeof(struct ip6t_mh), .proto = IPPROTO_MH, .me = THIS_MODULE, }; static int __init mh_mt6_init(void) { return xt_register_match(&mh_mt6_reg); } static void __exit mh_mt6_exit(void) { xt_unregister_match(&mh_mt6_reg); } module_init(mh_mt6_init); module_exit(mh_mt6_exit);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ #ifndef __XFS_BMAP_H__ #define __XFS_BMAP_H__ struct getbmap; struct xfs_bmbt_irec; struct xfs_ifork; struct xfs_inode; struct xfs_mount; struct xfs_trans; struct xfs_alloc_arg; /* * Argument structure for xfs_bmap_alloc. */ struct xfs_bmalloca { struct xfs_trans *tp; /* transaction pointer */ struct xfs_inode *ip; /* incore inode pointer */ struct xfs_bmbt_irec prev; /* extent before the new one */ struct xfs_bmbt_irec got; /* extent after, or delayed */ xfs_fileoff_t offset; /* offset in file filling in */ xfs_extlen_t length; /* i/o length asked/allocated */ xfs_fsblock_t blkno; /* starting block of new extent */ struct xfs_btree_cur *cur; /* btree cursor */ struct xfs_iext_cursor icur; /* incore extent cursor */ int nallocs;/* number of extents alloc'd */ int logflags;/* flags for transaction logging */ xfs_extlen_t total; /* total blocks needed for xaction */ xfs_extlen_t minlen; /* minimum allocation size (blocks) */ xfs_extlen_t minleft; /* amount must be left after alloc */ bool eof; /* set if allocating past last extent */ bool wasdel; /* replacing a delayed allocation */ bool aeof; /* allocated space at eof */ bool conv; /* overwriting unwritten extents */ int datatype;/* data type being allocated */ uint32_t flags; }; #define XFS_BMAP_MAX_NMAP 4 /* * Flags for xfs_bmapi_* */ #define XFS_BMAPI_ENTIRE (1u << 0) /* return entire extent untrimmed */ #define XFS_BMAPI_METADATA (1u << 1) /* mapping metadata not user data */ #define XFS_BMAPI_ATTRFORK (1u << 2) /* use attribute fork not data */ #define XFS_BMAPI_PREALLOC (1u << 3) /* preallocating unwritten space */ #define XFS_BMAPI_CONTIG (1u << 4) /* must allocate only one extent */ /* * unwritten extent conversion - this needs write cache flushing and no additional * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts * from written to unwritten, otherwise convert from unwritten to written. */ #define XFS_BMAPI_CONVERT (1u << 5) /* * allocate zeroed extents - this requires all newly allocated user data extents * to be initialised to zero. It will be ignored if XFS_BMAPI_METADATA is set. * Use in conjunction with XFS_BMAPI_CONVERT to convert unwritten extents found * during the allocation range to zeroed written extents. */ #define XFS_BMAPI_ZERO (1u << 6) /* * Map the inode offset to the block given in ap->firstblock. Primarily * used for reflink. The range must be in a hole, and this flag cannot be * turned on with PREALLOC or CONVERT, and cannot be used on the attr fork. * * For bunmapi, this flag unmaps the range without adjusting quota, reducing * refcount, or freeing the blocks. */ #define XFS_BMAPI_REMAP (1u << 7) /* Map something in the CoW fork. */ #define XFS_BMAPI_COWFORK (1u << 8) /* Skip online discard of freed extents */ #define XFS_BMAPI_NODISCARD (1u << 9) /* Do not update the rmap btree. Used for reconstructing bmbt from rmapbt. */ #define XFS_BMAPI_NORMAP (1u << 10) #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \ { XFS_BMAPI_CONVERT, "CONVERT" }, \ { XFS_BMAPI_ZERO, "ZERO" }, \ { XFS_BMAPI_REMAP, "REMAP" }, \ { XFS_BMAPI_COWFORK, "COWFORK" }, \ { XFS_BMAPI_NODISCARD, "NODISCARD" }, \ { XFS_BMAPI_NORMAP, "NORMAP" } static inline int xfs_bmapi_aflag(int w) { return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : (w == XFS_COW_FORK ? XFS_BMAPI_COWFORK : 0)); } static inline int xfs_bmapi_whichfork(uint32_t bmapi_flags) { if (bmapi_flags & XFS_BMAPI_COWFORK) return XFS_COW_FORK; else if (bmapi_flags & XFS_BMAPI_ATTRFORK) return XFS_ATTR_FORK; return XFS_DATA_FORK; } void xfs_bmap_alloc_account(struct xfs_bmalloca *ap); /* * Special values for xfs_bmbt_irec_t br_startblock field. */ #define DELAYSTARTBLOCK ((xfs_fsblock_t)-1LL) #define HOLESTARTBLOCK ((xfs_fsblock_t)-2LL) /* * Flags for xfs_bmap_add_extent*. */ #define BMAP_LEFT_CONTIG (1u << 0) #define BMAP_RIGHT_CONTIG (1u << 1) #define BMAP_LEFT_FILLING (1u << 2) #define BMAP_RIGHT_FILLING (1u << 3) #define BMAP_LEFT_DELAY (1u << 4) #define BMAP_RIGHT_DELAY (1u << 5) #define BMAP_LEFT_VALID (1u << 6) #define BMAP_RIGHT_VALID (1u << 7) #define BMAP_ATTRFORK (1u << 8) #define BMAP_COWFORK (1u << 9) #define XFS_BMAP_EXT_FLAGS \ { BMAP_LEFT_CONTIG, "LC" }, \ { BMAP_RIGHT_CONTIG, "RC" }, \ { BMAP_LEFT_FILLING, "LF" }, \ { BMAP_RIGHT_FILLING, "RF" }, \ { BMAP_ATTRFORK, "ATTR" }, \ { BMAP_COWFORK, "COW" } /* Return true if the extent is an allocated extent, written or not. */ static inline bool xfs_bmap_is_real_extent(const struct xfs_bmbt_irec *irec) { return irec->br_startblock != HOLESTARTBLOCK && irec->br_startblock != DELAYSTARTBLOCK && !isnullstartblock(irec->br_startblock); } /* * Return true if the extent is a real, allocated extent, or false if it is a * delayed allocation, and unwritten extent or a hole. */ static inline bool xfs_bmap_is_written_extent(const struct xfs_bmbt_irec *irec) { return xfs_bmap_is_real_extent(irec) && irec->br_state != XFS_EXT_UNWRITTEN; } /* * Check the mapping for obviously garbage allocations that could trash the * filesystem immediately. */ #define xfs_valid_startblock(ip, startblock) \ ((startblock) != 0 || XFS_IS_REALTIME_INODE(ip)) int xfs_bmap_longest_free_extent(struct xfs_perag *pag, struct xfs_trans *tp, xfs_extlen_t *blen); void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); unsigned int xfs_bmap_compute_attr_offset(struct xfs_mount *mp); int xfs_bmap_add_attrfork(struct xfs_trans *tp, struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork); int xfs_bmap_local_to_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extlen_t total, int *logflagsp, int whichfork, void (*init_fn)(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_inode *ip, struct xfs_ifork *ifp, void *priv), void *priv); void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *last_block, int whichfork); int xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused, int whichfork); int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, struct xfs_bmbt_irec *mval, int *nmap, uint32_t flags); int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags, xfs_extlen_t total, struct xfs_bmbt_irec *mval, int *nmap); int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags, xfs_extnum_t nexts, int *done); void xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); void xfs_bmap_del_extent_cow(struct xfs_inode *ip, struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); uint xfs_default_attroffset(struct xfs_inode *ip); int xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, bool *done); int xfs_bmap_can_insert_extents(struct xfs_inode *ip, xfs_fileoff_t off, xfs_fileoff_t shift); int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, bool *done, xfs_fileoff_t stop_fsb); int xfs_bmap_split_extent(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t split_offset); int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur, int eof); int xfs_bmapi_convert_delalloc(struct xfs_inode *ip, int whichfork, xfs_off_t offset, struct iomap *iomap, unsigned int *seq); int xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp, struct xfs_bmbt_irec *new, int *logflagsp); xfs_extlen_t xfs_bmapi_minleft(struct xfs_trans *tp, struct xfs_inode *ip, int fork); int xfs_bmap_btalloc_low_space(struct xfs_bmalloca *ap, struct xfs_alloc_arg *args); enum xfs_bmap_intent_type { XFS_BMAP_MAP = 1, XFS_BMAP_UNMAP, }; #define XFS_BMAP_INTENT_STRINGS \ { XFS_BMAP_MAP, "map" }, \ { XFS_BMAP_UNMAP, "unmap" } struct xfs_bmap_intent { struct list_head bi_list; enum xfs_bmap_intent_type bi_type; int bi_whichfork; struct xfs_inode *bi_owner; struct xfs_group *bi_group; struct xfs_bmbt_irec bi_bmap; }; int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_bmap_intent *bi); void xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *imap); void xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *imap); static inline uint32_t xfs_bmap_fork_to_state(int whichfork) { switch (whichfork) { case XFS_ATTR_FORK: return BMAP_ATTRFORK; case XFS_COW_FORK: return BMAP_COWFORK; default: return 0; } } xfs_failaddr_t xfs_bmap_validate_extent_raw(struct xfs_mount *mp, bool rtfile, int whichfork, struct xfs_bmbt_irec *irec); xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *irec); int xfs_bmap_complain_bad_rec(struct xfs_inode *ip, int whichfork, xfs_failaddr_t fa, const struct xfs_bmbt_irec *irec); int xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock, uint32_t flags); int xfs_bunmapi_range(struct xfs_trans **tpp, struct xfs_inode *ip, uint32_t flags, xfs_fileoff_t startoff, xfs_fileoff_t endoff); extern struct kmem_cache *xfs_bmap_intent_cache; int __init xfs_bmap_intent_init_cache(void); void xfs_bmap_intent_destroy_cache(void); typedef int (*xfs_bmap_query_range_fn)( struct xfs_btree_cur *cur, struct xfs_bmbt_irec *rec, void *priv); int xfs_bmap_query_all(struct xfs_btree_cur *cur, xfs_bmap_query_range_fn fn, void *priv); xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); #endif /* __XFS_BMAP_H__ */
6 1 1 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 // SPDX-License-Identifier: GPL-2.0-only /* * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/jhash.h> #include <linux/ip.h> #include <net/ipv6.h> #include <linux/netfilter/x_tables.h> #include <net/netfilter/nf_conntrack.h> #include <linux/netfilter/xt_cluster.h> static inline u32 nf_ct_orig_ipv4_src(const struct nf_conn *ct) { return (__force u32)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; } static inline const u32 *nf_ct_orig_ipv6_src(const struct nf_conn *ct) { return (__force u32 *)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6; } static inline u_int32_t xt_cluster_hash_ipv4(u_int32_t ip, const struct xt_cluster_match_info *info) { return jhash_1word(ip, info->hash_seed); } static inline u_int32_t xt_cluster_hash_ipv6(const void *ip, const struct xt_cluster_match_info *info) { return jhash2(ip, NF_CT_TUPLE_L3SIZE / sizeof(__u32), info->hash_seed); } static inline u_int32_t xt_cluster_hash(const struct nf_conn *ct, const struct xt_cluster_match_info *info) { u_int32_t hash = 0; switch(nf_ct_l3num(ct)) { case AF_INET: hash = xt_cluster_hash_ipv4(nf_ct_orig_ipv4_src(ct), info); break; case AF_INET6: hash = xt_cluster_hash_ipv6(nf_ct_orig_ipv6_src(ct), info); break; default: WARN_ON(1); break; } return reciprocal_scale(hash, info->total_nodes); } static inline bool xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family) { bool is_multicast = false; switch(family) { case NFPROTO_IPV4: is_multicast = ipv4_is_multicast(ip_hdr(skb)->daddr); break; case NFPROTO_IPV6: is_multicast = ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr); break; default: WARN_ON(1); break; } return is_multicast; } static bool xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct sk_buff *pskb = (struct sk_buff *)skb; const struct xt_cluster_match_info *info = par->matchinfo; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned long hash; /* This match assumes that all nodes see the same packets. This can be * achieved if the switch that connects the cluster nodes support some * sort of 'port mirroring'. However, if your switch does not support * this, your cluster nodes can reply ARP request using a multicast MAC * address. Thus, your switch will flood the same packets to the * cluster nodes with the same multicast MAC address. Using a multicast * link address is a RFC 1812 (section 3.3.2) violation, but this works * fine in practise. * * Unfortunately, if you use the multicast MAC address, the link layer * sets skbuff's pkt_type to PACKET_MULTICAST, which is not accepted * by TCP and others for packets coming to this node. For that reason, * this match mangles skbuff's pkt_type if it detects a packet * addressed to a unicast address but using PACKET_MULTICAST. Yes, I * know, matches should not alter packets, but we are doing this here * because we would need to add a PKTTYPE target for this sole purpose. */ if (!xt_cluster_is_multicast_addr(skb, xt_family(par)) && skb->pkt_type == PACKET_MULTICAST) { pskb->pkt_type = PACKET_HOST; } ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) return false; if (ct->master) hash = xt_cluster_hash(ct->master, info); else hash = xt_cluster_hash(ct, info); return !!((1 << hash) & info->node_mask) ^ !!(info->flags & XT_CLUSTER_F_INV); } static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_cluster_match_info *info = par->matchinfo; int ret; if (info->total_nodes > XT_CLUSTER_NODES_MAX) { pr_info_ratelimited("you have exceeded the maximum number of cluster nodes (%u > %u)\n", info->total_nodes, XT_CLUSTER_NODES_MAX); return -EINVAL; } if (info->node_mask >= (1ULL << info->total_nodes)) { pr_info_ratelimited("node mask cannot exceed total number of nodes\n"); return -EDOM; } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info_ratelimited("cannot load conntrack support for proto=%u\n", par->family); return ret; } static void xt_cluster_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static struct xt_match xt_cluster_match[] __read_mostly = { { .name = "cluster", .family = NFPROTO_IPV4, .match = xt_cluster_mt, .checkentry = xt_cluster_mt_checkentry, .matchsize = sizeof(struct xt_cluster_match_info), .destroy = xt_cluster_mt_destroy, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "cluster", .family = NFPROTO_IPV6, .match = xt_cluster_mt, .checkentry = xt_cluster_mt_checkentry, .matchsize = sizeof(struct xt_cluster_match_info), .destroy = xt_cluster_mt_destroy, .me = THIS_MODULE, }, #endif }; static int __init xt_cluster_mt_init(void) { return xt_register_matches(xt_cluster_match, ARRAY_SIZE(xt_cluster_match)); } static void __exit xt_cluster_mt_fini(void) { xt_unregister_matches(xt_cluster_match, ARRAY_SIZE(xt_cluster_match)); } MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: hash-based cluster match"); MODULE_ALIAS("ipt_cluster"); MODULE_ALIAS("ip6t_cluster"); module_init(xt_cluster_mt_init); module_exit(xt_cluster_mt_fini);
12098 162 12310 34 12021 12005 82 5 1 1 1 1 2 537 5 11 107 80 7 87 16 10 454 128 129 19 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NET_SCM_H #define __LINUX_NET_SCM_H #include <linux/limits.h> #include <linux/net.h> #include <linux/cred.h> #include <linux/file.h> #include <linux/security.h> #include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/sched/signal.h> #include <net/compat.h> /* Well, we should have at least one descriptor open * to accept passed FDs 8) */ #define SCM_MAX_FD 253 struct scm_creds { u32 pid; kuid_t uid; kgid_t gid; }; #ifdef CONFIG_UNIX struct unix_edge; #endif struct scm_fp_list { short count; short count_unix; short max; #ifdef CONFIG_UNIX bool inflight; bool dead; struct list_head vertices; struct unix_edge *edges; #endif struct user_struct *user; struct file *fp[SCM_MAX_FD]; }; struct scm_cookie { struct pid *pid; /* Skb credentials */ struct scm_fp_list *fp; /* Passed files */ struct scm_creds creds; /* Skb credentials */ #ifdef CONFIG_SECURITY_NETWORK u32 secid; /* Passed security ID */ #endif }; void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm); void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm); int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm); void __scm_destroy(struct scm_cookie *scm); struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl); #ifdef CONFIG_SECURITY_NETWORK static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_cookie *scm) { security_socket_getpeersec_dgram(sock, NULL, &scm->secid); } #else static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_cookie *scm) { } #endif /* CONFIG_SECURITY_NETWORK */ static __inline__ void scm_set_cred(struct scm_cookie *scm, struct pid *pid, kuid_t uid, kgid_t gid) { scm->pid = get_pid(pid); scm->creds.pid = pid_vnr(pid); scm->creds.uid = uid; scm->creds.gid = gid; } static __inline__ void scm_destroy_cred(struct scm_cookie *scm) { put_pid(scm->pid); scm->pid = NULL; } static __inline__ void scm_destroy(struct scm_cookie *scm) { scm_destroy_cred(scm); if (scm->fp) __scm_destroy(scm); } static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, bool forcecreds) { memset(scm, 0, sizeof(*scm)); scm->creds.uid = INVALID_UID; scm->creds.gid = INVALID_GID; if (forcecreds) scm_set_cred(scm, task_tgid(current), current_uid(), current_gid()); unix_get_peersec_dgram(sock, scm); if (msg->msg_controllen <= 0) return 0; return __scm_send(sock, msg, scm); } #ifdef CONFIG_SECURITY_NETWORK static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) { struct lsm_context ctx; int err; if (test_bit(SOCK_PASSSEC, &sock->flags)) { err = security_secid_to_secctx(scm->secid, &ctx); if (err >= 0) { put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, ctx.len, ctx.context); security_release_secctx(&ctx); } } } static inline bool scm_has_secdata(struct socket *sock) { return test_bit(SOCK_PASSSEC, &sock->flags); } #else static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) { } static inline bool scm_has_secdata(struct socket *sock) { return false; } #endif /* CONFIG_SECURITY_NETWORK */ static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm) { struct file *pidfd_file = NULL; int len, pidfd; /* put_cmsg() doesn't return an error if CMSG is truncated, * that's why we need to opencode these checks here. */ if (msg->msg_flags & MSG_CMSG_COMPAT) len = sizeof(struct compat_cmsghdr) + sizeof(int); else len = sizeof(struct cmsghdr) + sizeof(int); if (msg->msg_controllen < len) { msg->msg_flags |= MSG_CTRUNC; return; } if (!scm->pid) return; pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file); if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) { if (pidfd_file) { put_unused_fd(pidfd); fput(pidfd_file); } return; } if (pidfd_file) fd_install(pidfd, pidfd_file); } static inline bool __scm_recv_common(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, int flags) { if (!msg->msg_control) { if (test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags) || scm->fp || scm_has_secdata(sock)) msg->msg_flags |= MSG_CTRUNC; scm_destroy(scm); return false; } if (test_bit(SOCK_PASSCRED, &sock->flags)) { struct user_namespace *current_ns = current_user_ns(); struct ucred ucreds = { .pid = scm->creds.pid, .uid = from_kuid_munged(current_ns, scm->creds.uid), .gid = from_kgid_munged(current_ns, scm->creds.gid), }; put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds); } scm_passec(sock, msg, scm); if (scm->fp) scm_detach_fds(msg, scm); return true; } static inline void scm_recv(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, int flags) { if (!__scm_recv_common(sock, msg, scm, flags)) return; scm_destroy_cred(scm); } static inline void scm_recv_unix(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, int flags) { if (!__scm_recv_common(sock, msg, scm, flags)) return; if (test_bit(SOCK_PASSPIDFD, &sock->flags)) scm_pidfd_recv(msg, scm); scm_destroy_cred(scm); } static inline int scm_recv_one_fd(struct file *f, int __user *ufd, unsigned int flags) { if (!ufd) return -EFAULT; return receive_fd(f, ufd, flags); } #endif /* __LINUX_NET_SCM_H */
129 1 30 103 5 1 475 475 33 32 8 33 31 81 6 4 5 81 71 27 32 8 2 2 1 71 1 81 2 72 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_IP_TUNNELS_H #define __NET_IP_TUNNELS_H 1 #include <linux/if_tunnel.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/socket.h> #include <linux/types.h> #include <linux/u64_stats_sync.h> #include <linux/bitops.h> #include <net/dsfield.h> #include <net/gro_cells.h> #include <net/inet_ecn.h> #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/lwtunnel.h> #include <net/dst_cache.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> #endif /* Keep error state on tunnel for 30 sec */ #define IPTUNNEL_ERR_TIMEO (30*HZ) /* Used to memset ip_tunnel padding. */ #define IP_TUNNEL_KEY_SIZE offsetofend(struct ip_tunnel_key, tp_dst) /* Used to memset ipv4 address padding. */ #define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst) #define IP_TUNNEL_KEY_IPV4_PAD_LEN \ (sizeof_field(struct ip_tunnel_key, u) - \ sizeof_field(struct ip_tunnel_key, u.ipv4)) #define __ipt_flag_op(op, ...) \ op(__VA_ARGS__, __IP_TUNNEL_FLAG_NUM) #define IP_TUNNEL_DECLARE_FLAGS(...) \ __ipt_flag_op(DECLARE_BITMAP, __VA_ARGS__) #define ip_tunnel_flags_zero(...) __ipt_flag_op(bitmap_zero, __VA_ARGS__) #define ip_tunnel_flags_copy(...) __ipt_flag_op(bitmap_copy, __VA_ARGS__) #define ip_tunnel_flags_and(...) __ipt_flag_op(bitmap_and, __VA_ARGS__) #define ip_tunnel_flags_or(...) __ipt_flag_op(bitmap_or, __VA_ARGS__) #define ip_tunnel_flags_empty(...) \ __ipt_flag_op(bitmap_empty, __VA_ARGS__) #define ip_tunnel_flags_intersect(...) \ __ipt_flag_op(bitmap_intersects, __VA_ARGS__) #define ip_tunnel_flags_subset(...) \ __ipt_flag_op(bitmap_subset, __VA_ARGS__) struct ip_tunnel_key { __be64 tun_id; union { struct { __be32 src; __be32 dst; } ipv4; struct { struct in6_addr src; struct in6_addr dst; } ipv6; } u; IP_TUNNEL_DECLARE_FLAGS(tun_flags); __be32 label; /* Flow Label for IPv6 */ u32 nhid; u8 tos; /* TOS for IPv4, TC for IPv6 */ u8 ttl; /* TTL for IPv4, HL for IPv6 */ __be16 tp_src; __be16 tp_dst; __u8 flow_flags; }; struct ip_tunnel_encap { u16 type; u16 flags; __be16 sport; __be16 dport; }; /* Flags for ip_tunnel_info mode. */ #define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */ #define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */ #define IP_TUNNEL_INFO_BRIDGE 0x04 /* represents a bridged tunnel id */ /* Maximum tunnel options length. */ #define IP_TUNNEL_OPTS_MAX \ GENMASK((sizeof_field(struct ip_tunnel_info, \ options_len) * BITS_PER_BYTE) - 1, 0) #define ip_tunnel_info_opts(info) \ _Generic(info, \ const struct ip_tunnel_info * : ((const void *)((info) + 1)),\ struct ip_tunnel_info * : ((void *)((info) + 1))\ ) struct ip_tunnel_info { struct ip_tunnel_key key; struct ip_tunnel_encap encap; #ifdef CONFIG_DST_CACHE struct dst_cache dst_cache; #endif u8 options_len; u8 mode; }; /* 6rd prefix/relay information */ #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd_parm { struct in6_addr prefix; __be32 relay_prefix; u16 prefixlen; u16 relay_prefixlen; }; #endif struct ip_tunnel_prl_entry { struct ip_tunnel_prl_entry __rcu *next; __be32 addr; u16 flags; struct rcu_head rcu_head; }; struct metadata_dst; /* Kernel-side variant of ip_tunnel_parm */ struct ip_tunnel_parm_kern { char name[IFNAMSIZ]; IP_TUNNEL_DECLARE_FLAGS(i_flags); IP_TUNNEL_DECLARE_FLAGS(o_flags); __be32 i_key; __be32 o_key; int link; struct iphdr iph; }; struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; struct net_device *dev; netdevice_tracker dev_tracker; struct net *net; /* netns for packet i/o */ unsigned long err_time; /* Time when the last ICMP error * arrived */ int err_count; /* Number of arrived ICMP errors */ /* These four fields used only by GRE */ u32 i_seqno; /* The last seen seqno */ atomic_t o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ /* These four fields used only by ERSPAN */ u32 index; /* ERSPAN type II index */ u8 erspan_ver; /* ERSPAN version */ u8 dir; /* ERSPAN direction */ u16 hwid; /* ERSPAN hardware ID */ struct dst_cache dst_cache; struct ip_tunnel_parm_kern parms; int mlink; int encap_hlen; /* Encap header length (FOU,GUE) */ int hlen; /* tun_hlen + encap_hlen */ struct ip_tunnel_encap encap; /* for SIT */ #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd_parm ip6rd; #endif struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ unsigned int prl_count; /* # of entries in PRL */ unsigned int ip_tnl_net_id; struct gro_cells gro_cells; __u32 fwmark; bool collect_md; bool ignore_df; }; struct tnl_ptk_info { IP_TUNNEL_DECLARE_FLAGS(flags); __be16 proto; __be32 key; __be32 seq; int hdr_len; }; #define PACKET_RCVD 0 #define PACKET_REJECT 1 #define PACKET_NEXT 2 #define IP_TNL_HASH_BITS 7 #define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS) struct ip_tunnel_net { struct net_device *fb_tunnel_dev; struct rtnl_link_ops *rtnl_link_ops; struct hlist_head tunnels[IP_TNL_HASH_SIZE]; struct ip_tunnel __rcu *collect_md_tun; int type; }; static inline void ip_tunnel_set_options_present(unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(present) = { }; __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); ip_tunnel_flags_or(flags, flags, present); } static inline void ip_tunnel_clear_options_present(unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(present) = { }; __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); __ipt_flag_op(bitmap_andnot, flags, flags, present); } static inline bool ip_tunnel_is_options_present(const unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(present) = { }; __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); return ip_tunnel_flags_intersect(flags, present); } static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(supp) = { }; bitmap_set(supp, 0, BITS_PER_TYPE(__be16)); __set_bit(IP_TUNNEL_VTI_BIT, supp); return ip_tunnel_flags_subset(flags, supp); } static inline void ip_tunnel_flags_from_be16(unsigned long *dst, __be16 flags) { ip_tunnel_flags_zero(dst); bitmap_write(dst, be16_to_cpu(flags), 0, BITS_PER_TYPE(__be16)); __assign_bit(IP_TUNNEL_VTI_BIT, dst, flags & VTI_ISVTI); } static inline __be16 ip_tunnel_flags_to_be16(const unsigned long *flags) { __be16 ret; ret = cpu_to_be16(bitmap_read(flags, 0, BITS_PER_TYPE(__be16))); if (test_bit(IP_TUNNEL_VTI_BIT, flags)) ret |= VTI_ISVTI; return ret; } static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, __be32 saddr, __be32 daddr, u8 tos, u8 ttl, __be32 label, __be16 tp_src, __be16 tp_dst, __be64 tun_id, const unsigned long *tun_flags) { key->tun_id = tun_id; key->u.ipv4.src = saddr; key->u.ipv4.dst = daddr; memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD, 0, IP_TUNNEL_KEY_IPV4_PAD_LEN); key->tos = tos; key->ttl = ttl; key->label = label; ip_tunnel_flags_copy(key->tun_flags, tun_flags); /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of * the upper tunnel are used. * E.g: GRE over IPSEC, the tp_src and tp_port are zero. */ key->tp_src = tp_src; key->tp_dst = tp_dst; /* Clear struct padding. */ if (sizeof(*key) != IP_TUNNEL_KEY_SIZE) memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE, 0, sizeof(*key) - IP_TUNNEL_KEY_SIZE); } static inline bool ip_tunnel_dst_cache_usable(const struct sk_buff *skb, const struct ip_tunnel_info *info) { if (skb->mark) return false; return !info || !test_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags); } static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info *tun_info) { return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET; } static inline __be64 key32_to_tunnel_id(__be32 key) { #ifdef __BIG_ENDIAN return (__force __be64)key; #else return (__force __be64)((__force u64)key << 32); #endif } /* Returns the least-significant 32 bits of a __be64. */ static inline __be32 tunnel_id_to_key32(__be64 tun_id) { #ifdef __BIG_ENDIAN return (__force __be32)tun_id; #else return (__force __be32)((__force u64)tun_id >> 32); #endif } #ifdef CONFIG_INET static inline void ip_tunnel_init_flow(struct flowi4 *fl4, int proto, __be32 daddr, __be32 saddr, __be32 key, __u8 tos, struct net *net, int oif, __u32 mark, __u32 tun_inner_hash, __u8 flow_flags) { memset(fl4, 0, sizeof(*fl4)); if (oif) { fl4->flowi4_l3mdev = l3mdev_master_upper_ifindex_by_index(net, oif); /* Legacy VRF/l3mdev use case */ fl4->flowi4_oif = fl4->flowi4_l3mdev ? 0 : oif; } fl4->daddr = daddr; fl4->saddr = saddr; fl4->flowi4_tos = tos; fl4->flowi4_proto = proto; fl4->fl4_gre_key = key; fl4->flowi4_mark = mark; fl4->flowi4_multipath_hash = tun_inner_hash; fl4->flowi4_flags = flow_flags; } int ip_tunnel_init(struct net_device *dev); void ip_tunnel_uninit(struct net_device *dev); void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); struct net *ip_tunnel_get_link_net(const struct net_device *dev); int ip_tunnel_get_iflink(const struct net_device *dev); int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, struct rtnl_link_ops *ops, struct list_head *dev_to_kill); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const u8 proto, int tunnel_hlen); int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd); bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp, const void __user *data); bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp); int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, int link, const unsigned long *flags, __be32 remote, __be32 local, __be32 key); void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info); int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_error); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm_kern *p, __u32 fwmark); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm_kern *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *encap); void ip_tunnel_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm_kern *parms); extern const struct header_ops ip_tunnel_header_ops; __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); struct ip_tunnel_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4); int (*err_handler)(struct sk_buff *skb, u32 info); }; #define MAX_IPTUN_ENCAP_OPS 8 extern const struct ip_tunnel_encap_ops __rcu * iptun_encaps[MAX_IPTUN_ENCAP_OPS]; int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op, unsigned int num); int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, unsigned int num); int ip_tunnel_encap_setup(struct ip_tunnel *t, struct ip_tunnel_encap *ipencap); static inline enum skb_drop_reason pskb_inet_may_pull_reason(struct sk_buff *skb) { int nhlen; switch (skb->protocol) { #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): nhlen = sizeof(struct ipv6hdr); break; #endif case htons(ETH_P_IP): nhlen = sizeof(struct iphdr); break; default: nhlen = 0; } return pskb_network_may_pull_reason(skb, nhlen); } static inline bool pskb_inet_may_pull(struct sk_buff *skb) { return pskb_inet_may_pull_reason(skb) == SKB_NOT_DROPPED_YET; } /* Variant of pskb_inet_may_pull(). */ static inline enum skb_drop_reason skb_vlan_inet_prepare(struct sk_buff *skb, bool inner_proto_inherit) { int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN; __be16 type = skb->protocol; enum skb_drop_reason reason; /* Essentially this is skb_protocol(skb, true) * And we get MAC len. */ if (eth_type_vlan(type)) type = __vlan_get_protocol(skb, type, &maclen); switch (type) { #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): nhlen = sizeof(struct ipv6hdr); break; #endif case htons(ETH_P_IP): nhlen = sizeof(struct iphdr); break; } /* For ETH_P_IPV6/ETH_P_IP we make sure to pull * a base network header in skb->head. */ reason = pskb_may_pull_reason(skb, maclen + nhlen); if (reason) return reason; skb_set_network_header(skb, maclen); return SKB_NOT_DROPPED_YET; } static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; int hlen = -EINVAL; if (e->type == TUNNEL_ENCAP_NONE) return 0; if (e->type >= MAX_IPTUN_ENCAP_OPS) return -EINVAL; rcu_read_lock(); ops = rcu_dereference(iptun_encaps[e->type]); if (likely(ops && ops->encap_hlen)) hlen = ops->encap_hlen(e); rcu_read_unlock(); return hlen; } static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4) { const struct ip_tunnel_encap_ops *ops; int ret = -EINVAL; if (e->type == TUNNEL_ENCAP_NONE) return 0; if (e->type >= MAX_IPTUN_ENCAP_OPS) return -EINVAL; rcu_read_lock(); ops = rcu_dereference(iptun_encaps[e->type]); if (likely(ops && ops->build_header)) ret = ops->build_header(skb, e, protocol, fl4); rcu_read_unlock(); return ret; } /* Extract dsfield from inner protocol */ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, const struct sk_buff *skb) { __be16 payload_protocol = skb_protocol(skb, true); if (payload_protocol == htons(ETH_P_IP)) return iph->tos; else if (payload_protocol == htons(ETH_P_IPV6)) return ipv6_get_dsfield((const struct ipv6hdr *)iph); else return 0; } static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph, const struct sk_buff *skb) { __be16 payload_protocol = skb_protocol(skb, true); if (payload_protocol == htons(ETH_P_IPV6)) return ip6_flowlabel((const struct ipv6hdr *)iph); else return 0; } static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph, const struct sk_buff *skb) { __be16 payload_protocol = skb_protocol(skb, true); if (payload_protocol == htons(ETH_P_IP)) return iph->ttl; else if (payload_protocol == htons(ETH_P_IPV6)) return ((const struct ipv6hdr *)iph)->hop_limit; else return 0; } /* Propagate ECN bits out */ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, const struct sk_buff *skb) { u8 inner = ip_tunnel_get_dsfield(iph, skb); return INET_ECN_encapsulate(tos, inner); } int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, bool raw_proto, bool xnet); static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, bool xnet) { return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet); } void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, u8 proto, u8 tos, u8 ttl, __be16 df, bool xnet); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags); int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, int headroom, bool reply); int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); static inline int iptunnel_pull_offloads(struct sk_buff *skb) { if (skb_is_gso(skb)) { int err; err = skb_unclone(skb, GFP_ATOMIC); if (unlikely(err)) return err; skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >> NETIF_F_GSO_SHIFT); } skb->encapsulation = 0; return 0; } static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) { if (pkt_len > 0) { struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); u64_stats_add(&tstats->tx_bytes, pkt_len); u64_stats_inc(&tstats->tx_packets); u64_stats_update_end(&tstats->syncp); put_cpu_ptr(tstats); return; } if (pkt_len < 0) { DEV_STATS_INC(dev, tx_errors); DEV_STATS_INC(dev, tx_aborted_errors); } else { DEV_STATS_INC(dev, tx_dropped); } } static inline void ip_tunnel_info_opts_get(void *to, const struct ip_tunnel_info *info) { memcpy(to, info + 1, info->options_len); } static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, const unsigned long *flags) { info->options_len = len; if (len > 0) { memcpy(ip_tunnel_info_opts(info), from, len); ip_tunnel_flags_or(info->key.tun_flags, info->key.tun_flags, flags); } } static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) { return (struct ip_tunnel_info *)lwtstate->data; } DECLARE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt); /* Returns > 0 if metadata should be collected */ static inline int ip_tunnel_collect_metadata(void) { return static_branch_unlikely(&ip_tunnel_metadata_cnt); } void __init ip_tunnel_core_init(void); void ip_tunnel_need_metadata(void); void ip_tunnel_unneed_metadata(void); #else /* CONFIG_INET */ static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) { return NULL; } static inline void ip_tunnel_need_metadata(void) { } static inline void ip_tunnel_unneed_metadata(void) { } static inline void ip_tunnel_info_opts_get(void *to, const struct ip_tunnel_info *info) { } static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, const unsigned long *flags) { info->options_len = 0; } #endif /* CONFIG_INET */ #endif /* __NET_IP_TUNNELS_H */
1 28 10 8 8 2 2 3 1 1 1 2 2 33 4 28 2 22 2 4 29 29 15 18 28 33 4 4 1 2 2 22 3 19 12 3 2 9 5 4 4 7 1 9 2 11 11 1 2 1 12 12 2 2 1 5 5 8 3 1 2 2 4 4 5 5 3 1 1 1 4 53 62 52 5 38 5 2 2 3 3 70 11 4 3 2 1 2 2 5 5 9 9 8 3 3 3 1 7 7 1 1 1 1 7 2 5 3 3 11 4 3 7 28 9 11 8 19 1 1 1 1 47 47 20 11 3 3 3 44 44 3 70 70 70 70 70 70 24 47 70 3 2 2 3 3 3 2 1 70 70 22 48 22 51 19 70 68 14 56 70 24 46 47 46 1 44 3 3 3 3 5 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 // SPDX-License-Identifier: GPL-2.0-or-later /* * uvc_driver.c -- USB Video Class driver * * Copyright (C) 2005-2010 * Laurent Pinchart (laurent.pinchart@ideasonboard.com) */ #include <linux/atomic.h> #include <linux/bits.h> #include <linux/gpio/consumer.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/usb/quirks.h> #include <linux/usb/uvc.h> #include <linux/videodev2.h> #include <linux/vmalloc.h> #include <linux/wait.h> #include <linux/unaligned.h> #include <media/v4l2-common.h> #include <media/v4l2-ioctl.h> #include "uvcvideo.h" #define DRIVER_AUTHOR "Laurent Pinchart " \ "<laurent.pinchart@ideasonboard.com>" #define DRIVER_DESC "USB Video Class driver" unsigned int uvc_clock_param = CLOCK_MONOTONIC; unsigned int uvc_hw_timestamps_param; unsigned int uvc_no_drop_param = 1; static unsigned int uvc_quirks_param = -1; unsigned int uvc_dbg_param; unsigned int uvc_timeout_param = UVC_CTRL_STREAMING_TIMEOUT; /* ------------------------------------------------------------------------ * Utility functions */ struct usb_host_endpoint *uvc_find_endpoint(struct usb_host_interface *alts, u8 epaddr) { struct usb_host_endpoint *ep; unsigned int i; for (i = 0; i < alts->desc.bNumEndpoints; ++i) { ep = &alts->endpoint[i]; if (ep->desc.bEndpointAddress == epaddr) return ep; } return NULL; } static enum v4l2_colorspace uvc_colorspace(const u8 primaries) { static const enum v4l2_colorspace colorprimaries[] = { V4L2_COLORSPACE_SRGB, /* Unspecified */ V4L2_COLORSPACE_SRGB, V4L2_COLORSPACE_470_SYSTEM_M, V4L2_COLORSPACE_470_SYSTEM_BG, V4L2_COLORSPACE_SMPTE170M, V4L2_COLORSPACE_SMPTE240M, }; if (primaries < ARRAY_SIZE(colorprimaries)) return colorprimaries[primaries]; return V4L2_COLORSPACE_SRGB; /* Reserved */ } static enum v4l2_xfer_func uvc_xfer_func(const u8 transfer_characteristics) { /* * V4L2 does not currently have definitions for all possible values of * UVC transfer characteristics. If v4l2_xfer_func is extended with new * values, the mapping below should be updated. * * Substitutions are taken from the mapping given for * V4L2_XFER_FUNC_DEFAULT documented in videodev2.h. */ static const enum v4l2_xfer_func xfer_funcs[] = { V4L2_XFER_FUNC_DEFAULT, /* Unspecified */ V4L2_XFER_FUNC_709, V4L2_XFER_FUNC_709, /* Substitution for BT.470-2 M */ V4L2_XFER_FUNC_709, /* Substitution for BT.470-2 B, G */ V4L2_XFER_FUNC_709, /* Substitution for SMPTE 170M */ V4L2_XFER_FUNC_SMPTE240M, V4L2_XFER_FUNC_NONE, V4L2_XFER_FUNC_SRGB, }; if (transfer_characteristics < ARRAY_SIZE(xfer_funcs)) return xfer_funcs[transfer_characteristics]; return V4L2_XFER_FUNC_DEFAULT; /* Reserved */ } static enum v4l2_ycbcr_encoding uvc_ycbcr_enc(const u8 matrix_coefficients) { /* * V4L2 does not currently have definitions for all possible values of * UVC matrix coefficients. If v4l2_ycbcr_encoding is extended with new * values, the mapping below should be updated. * * Substitutions are taken from the mapping given for * V4L2_YCBCR_ENC_DEFAULT documented in videodev2.h. * * FCC is assumed to be close enough to 601. */ static const enum v4l2_ycbcr_encoding ycbcr_encs[] = { V4L2_YCBCR_ENC_DEFAULT, /* Unspecified */ V4L2_YCBCR_ENC_709, V4L2_YCBCR_ENC_601, /* Substitution for FCC */ V4L2_YCBCR_ENC_601, /* Substitution for BT.470-2 B, G */ V4L2_YCBCR_ENC_601, V4L2_YCBCR_ENC_SMPTE240M, }; if (matrix_coefficients < ARRAY_SIZE(ycbcr_encs)) return ycbcr_encs[matrix_coefficients]; return V4L2_YCBCR_ENC_DEFAULT; /* Reserved */ } /* ------------------------------------------------------------------------ * Terminal and unit management */ struct uvc_entity *uvc_entity_by_id(struct uvc_device *dev, int id) { struct uvc_entity *entity; list_for_each_entry(entity, &dev->entities, list) { if (entity->id == id) return entity; } return NULL; } static struct uvc_entity *uvc_entity_by_reference(struct uvc_device *dev, int id, struct uvc_entity *entity) { unsigned int i; if (entity == NULL) entity = list_entry(&dev->entities, struct uvc_entity, list); list_for_each_entry_continue(entity, &dev->entities, list) { for (i = 0; i < entity->bNrInPins; ++i) if (entity->baSourceID[i] == id) return entity; } return NULL; } static struct uvc_streaming *uvc_stream_by_id(struct uvc_device *dev, int id) { struct uvc_streaming *stream; list_for_each_entry(stream, &dev->streams, list) { if (stream->header.bTerminalLink == id) return stream; } return NULL; } /* ------------------------------------------------------------------------ * Streaming Object Management */ static void uvc_stream_delete(struct uvc_streaming *stream) { if (stream->async_wq) destroy_workqueue(stream->async_wq); mutex_destroy(&stream->mutex); usb_put_intf(stream->intf); kfree(stream->formats); kfree(stream->header.bmaControls); kfree(stream); } static struct uvc_streaming *uvc_stream_new(struct uvc_device *dev, struct usb_interface *intf) { struct uvc_streaming *stream; stream = kzalloc(sizeof(*stream), GFP_KERNEL); if (stream == NULL) return NULL; mutex_init(&stream->mutex); stream->dev = dev; stream->intf = usb_get_intf(intf); stream->intfnum = intf->cur_altsetting->desc.bInterfaceNumber; /* Allocate a stream specific work queue for asynchronous tasks. */ stream->async_wq = alloc_workqueue("uvcvideo", WQ_UNBOUND | WQ_HIGHPRI, 0); if (!stream->async_wq) { uvc_stream_delete(stream); return NULL; } return stream; } /* ------------------------------------------------------------------------ * Descriptors parsing */ static int uvc_parse_frame(struct uvc_device *dev, struct uvc_streaming *streaming, struct uvc_format *format, struct uvc_frame *frame, u32 **intervals, u8 ftype, int width_multiplier, const unsigned char *buffer, int buflen) { struct usb_host_interface *alts = streaming->intf->cur_altsetting; unsigned int maxIntervalIndex; unsigned int interval; unsigned int i, n; if (ftype != UVC_VS_FRAME_FRAME_BASED) n = buflen > 25 ? buffer[25] : 0; else n = buflen > 21 ? buffer[21] : 0; n = n ? n : 3; if (buflen < 26 + 4 * n) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d FRAME error\n", dev->udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } frame->bFrameIndex = buffer[3]; frame->bmCapabilities = buffer[4]; frame->wWidth = get_unaligned_le16(&buffer[5]) * width_multiplier; frame->wHeight = get_unaligned_le16(&buffer[7]); frame->dwMinBitRate = get_unaligned_le32(&buffer[9]); frame->dwMaxBitRate = get_unaligned_le32(&buffer[13]); if (ftype != UVC_VS_FRAME_FRAME_BASED) { frame->dwMaxVideoFrameBufferSize = get_unaligned_le32(&buffer[17]); frame->dwDefaultFrameInterval = get_unaligned_le32(&buffer[21]); frame->bFrameIntervalType = buffer[25]; } else { frame->dwMaxVideoFrameBufferSize = 0; frame->dwDefaultFrameInterval = get_unaligned_le32(&buffer[17]); frame->bFrameIntervalType = buffer[21]; } /* * Copy the frame intervals. * * Some bogus devices report dwMinFrameInterval equal to * dwMaxFrameInterval and have dwFrameIntervalStep set to zero. Setting * all null intervals to 1 fixes the problem and some other divisions * by zero that could happen. */ frame->dwFrameInterval = *intervals; for (i = 0; i < n; ++i) { interval = get_unaligned_le32(&buffer[26 + 4 * i]); (*intervals)[i] = interval ? interval : 1; } /* * Apply more fixes, quirks and workarounds to handle incorrect or * broken descriptors. */ /* * Several UVC chipsets screw up dwMaxVideoFrameBufferSize completely. * Observed behaviours range from setting the value to 1.1x the actual * frame size to hardwiring the 16 low bits to 0. This results in a * higher than necessary memory usage as well as a wrong image size * information. For uncompressed formats this can be fixed by computing * the value from the frame size. */ if (!(format->flags & UVC_FMT_FLAG_COMPRESSED)) frame->dwMaxVideoFrameBufferSize = format->bpp * frame->wWidth * frame->wHeight / 8; /* * Clamp the default frame interval to the boundaries. A zero * bFrameIntervalType value indicates a continuous frame interval * range, with dwFrameInterval[0] storing the minimum value and * dwFrameInterval[1] storing the maximum value. */ maxIntervalIndex = frame->bFrameIntervalType ? n - 1 : 1; frame->dwDefaultFrameInterval = clamp(frame->dwDefaultFrameInterval, frame->dwFrameInterval[0], frame->dwFrameInterval[maxIntervalIndex]); /* * Some devices report frame intervals that are not functional. If the * corresponding quirk is set, restrict operation to the first interval * only. */ if (dev->quirks & UVC_QUIRK_RESTRICT_FRAME_RATE) { frame->bFrameIntervalType = 1; (*intervals)[0] = frame->dwDefaultFrameInterval; } uvc_dbg(dev, DESCR, "- %ux%u (%u.%u fps)\n", frame->wWidth, frame->wHeight, 10000000 / frame->dwDefaultFrameInterval, (100000000 / frame->dwDefaultFrameInterval) % 10); *intervals += n; return buffer[0]; } static int uvc_parse_format(struct uvc_device *dev, struct uvc_streaming *streaming, struct uvc_format *format, struct uvc_frame *frames, u32 **intervals, const unsigned char *buffer, int buflen) { struct usb_host_interface *alts = streaming->intf->cur_altsetting; const struct uvc_format_desc *fmtdesc; struct uvc_frame *frame; const unsigned char *start = buffer; unsigned int width_multiplier = 1; unsigned int i, n; u8 ftype; int ret; format->type = buffer[2]; format->index = buffer[3]; format->frames = frames; switch (buffer[2]) { case UVC_VS_FORMAT_UNCOMPRESSED: case UVC_VS_FORMAT_FRAME_BASED: n = buffer[2] == UVC_VS_FORMAT_UNCOMPRESSED ? 27 : 28; if (buflen < n) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d FORMAT error\n", dev->udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } /* Find the format descriptor from its GUID. */ fmtdesc = uvc_format_by_guid(&buffer[5]); if (!fmtdesc) { /* * Unknown video formats are not fatal errors, the * caller will skip this descriptor. */ dev_info(&streaming->intf->dev, "Unknown video format %pUl\n", &buffer[5]); return 0; } format->fcc = fmtdesc->fcc; format->bpp = buffer[21]; /* * Some devices report a format that doesn't match what they * really send. */ if (dev->quirks & UVC_QUIRK_FORCE_Y8) { if (format->fcc == V4L2_PIX_FMT_YUYV) { format->fcc = V4L2_PIX_FMT_GREY; format->bpp = 8; width_multiplier = 2; } } /* Some devices report bpp that doesn't match the format. */ if (dev->quirks & UVC_QUIRK_FORCE_BPP) { const struct v4l2_format_info *info = v4l2_format_info(format->fcc); if (info) { unsigned int div = info->hdiv * info->vdiv; n = info->bpp[0] * div; for (i = 1; i < info->comp_planes; i++) n += info->bpp[i]; format->bpp = DIV_ROUND_UP(8 * n, div); } } if (buffer[2] == UVC_VS_FORMAT_UNCOMPRESSED) { ftype = UVC_VS_FRAME_UNCOMPRESSED; } else { ftype = UVC_VS_FRAME_FRAME_BASED; if (buffer[27]) format->flags = UVC_FMT_FLAG_COMPRESSED; } break; case UVC_VS_FORMAT_MJPEG: if (buflen < 11) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d FORMAT error\n", dev->udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } format->fcc = V4L2_PIX_FMT_MJPEG; format->flags = UVC_FMT_FLAG_COMPRESSED; format->bpp = 0; ftype = UVC_VS_FRAME_MJPEG; break; case UVC_VS_FORMAT_DV: if (buflen < 9) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d FORMAT error\n", dev->udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } if ((buffer[8] & 0x7f) > 2) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d: unknown DV format %u\n", dev->udev->devnum, alts->desc.bInterfaceNumber, buffer[8]); return -EINVAL; } format->fcc = V4L2_PIX_FMT_DV; format->flags = UVC_FMT_FLAG_COMPRESSED | UVC_FMT_FLAG_STREAM; format->bpp = 0; ftype = 0; /* Create a dummy frame descriptor. */ frame = &frames[0]; memset(frame, 0, sizeof(*frame)); frame->bFrameIntervalType = 1; frame->dwDefaultFrameInterval = 1; frame->dwFrameInterval = *intervals; *(*intervals)++ = 1; format->nframes = 1; break; case UVC_VS_FORMAT_MPEG2TS: case UVC_VS_FORMAT_STREAM_BASED: /* Not supported yet. */ default: uvc_dbg(dev, DESCR, "device %d videostreaming interface %d unsupported format %u\n", dev->udev->devnum, alts->desc.bInterfaceNumber, buffer[2]); return -EINVAL; } uvc_dbg(dev, DESCR, "Found format %p4cc", &format->fcc); buflen -= buffer[0]; buffer += buffer[0]; /* * Parse the frame descriptors. Only uncompressed, MJPEG and frame * based formats have frame descriptors. */ if (ftype) { while (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && buffer[2] == ftype) { frame = &frames[format->nframes]; ret = uvc_parse_frame(dev, streaming, format, frame, intervals, ftype, width_multiplier, buffer, buflen); if (ret < 0) return ret; format->nframes++; buflen -= ret; buffer += ret; } } if (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && buffer[2] == UVC_VS_STILL_IMAGE_FRAME) { buflen -= buffer[0]; buffer += buffer[0]; } if (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && buffer[2] == UVC_VS_COLORFORMAT) { if (buflen < 6) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d COLORFORMAT error\n", dev->udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } format->colorspace = uvc_colorspace(buffer[3]); format->xfer_func = uvc_xfer_func(buffer[4]); format->ycbcr_enc = uvc_ycbcr_enc(buffer[5]); buflen -= buffer[0]; buffer += buffer[0]; } else { format->colorspace = V4L2_COLORSPACE_SRGB; } return buffer - start; } static int uvc_parse_streaming(struct uvc_device *dev, struct usb_interface *intf) { struct uvc_streaming *streaming = NULL; struct uvc_format *format; struct uvc_frame *frame; struct usb_host_interface *alts = &intf->altsetting[0]; const unsigned char *_buffer, *buffer = alts->extra; int _buflen, buflen = alts->extralen; unsigned int nformats = 0, nframes = 0, nintervals = 0; unsigned int size, i, n, p; u32 *interval; u16 psize; int ret = -EINVAL; if (intf->cur_altsetting->desc.bInterfaceSubClass != UVC_SC_VIDEOSTREAMING) { uvc_dbg(dev, DESCR, "device %d interface %d isn't a video streaming interface\n", dev->udev->devnum, intf->altsetting[0].desc.bInterfaceNumber); return -EINVAL; } if (usb_driver_claim_interface(&uvc_driver.driver, intf, dev)) { uvc_dbg(dev, DESCR, "device %d interface %d is already claimed\n", dev->udev->devnum, intf->altsetting[0].desc.bInterfaceNumber); return -EINVAL; } streaming = uvc_stream_new(dev, intf); if (streaming == NULL) { usb_driver_release_interface(&uvc_driver.driver, intf); return -ENOMEM; } /* * The Pico iMage webcam has its class-specific interface descriptors * after the endpoint descriptors. */ if (buflen == 0) { for (i = 0; i < alts->desc.bNumEndpoints; ++i) { struct usb_host_endpoint *ep = &alts->endpoint[i]; if (ep->extralen == 0) continue; if (ep->extralen > 2 && ep->extra[1] == USB_DT_CS_INTERFACE) { uvc_dbg(dev, DESCR, "trying extra data from endpoint %u\n", i); buffer = alts->endpoint[i].extra; buflen = alts->endpoint[i].extralen; break; } } } /* Skip the standard interface descriptors. */ while (buflen > 2 && buffer[1] != USB_DT_CS_INTERFACE) { buflen -= buffer[0]; buffer += buffer[0]; } if (buflen <= 2) { uvc_dbg(dev, DESCR, "no class-specific streaming interface descriptors found\n"); goto error; } /* Parse the header descriptor. */ switch (buffer[2]) { case UVC_VS_OUTPUT_HEADER: streaming->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; size = 9; break; case UVC_VS_INPUT_HEADER: streaming->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; size = 13; break; default: uvc_dbg(dev, DESCR, "device %d videostreaming interface %d HEADER descriptor not found\n", dev->udev->devnum, alts->desc.bInterfaceNumber); goto error; } p = buflen >= 4 ? buffer[3] : 0; n = buflen >= size ? buffer[size-1] : 0; if (buflen < size + p*n) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d HEADER descriptor is invalid\n", dev->udev->devnum, alts->desc.bInterfaceNumber); goto error; } streaming->header.bNumFormats = p; streaming->header.bEndpointAddress = buffer[6]; if (buffer[2] == UVC_VS_INPUT_HEADER) { streaming->header.bmInfo = buffer[7]; streaming->header.bTerminalLink = buffer[8]; streaming->header.bStillCaptureMethod = buffer[9]; streaming->header.bTriggerSupport = buffer[10]; streaming->header.bTriggerUsage = buffer[11]; } else { streaming->header.bTerminalLink = buffer[7]; } streaming->header.bControlSize = n; streaming->header.bmaControls = kmemdup(&buffer[size], p * n, GFP_KERNEL); if (streaming->header.bmaControls == NULL) { ret = -ENOMEM; goto error; } buflen -= buffer[0]; buffer += buffer[0]; _buffer = buffer; _buflen = buflen; /* Count the format and frame descriptors. */ while (_buflen > 2 && _buffer[1] == USB_DT_CS_INTERFACE) { switch (_buffer[2]) { case UVC_VS_FORMAT_UNCOMPRESSED: case UVC_VS_FORMAT_MJPEG: case UVC_VS_FORMAT_FRAME_BASED: nformats++; break; case UVC_VS_FORMAT_DV: /* * DV format has no frame descriptor. We will create a * dummy frame descriptor with a dummy frame interval. */ nformats++; nframes++; nintervals++; break; case UVC_VS_FORMAT_MPEG2TS: case UVC_VS_FORMAT_STREAM_BASED: uvc_dbg(dev, DESCR, "device %d videostreaming interface %d FORMAT %u is not supported\n", dev->udev->devnum, alts->desc.bInterfaceNumber, _buffer[2]); break; case UVC_VS_FRAME_UNCOMPRESSED: case UVC_VS_FRAME_MJPEG: nframes++; if (_buflen > 25) nintervals += _buffer[25] ? _buffer[25] : 3; break; case UVC_VS_FRAME_FRAME_BASED: nframes++; if (_buflen > 21) nintervals += _buffer[21] ? _buffer[21] : 3; break; } _buflen -= _buffer[0]; _buffer += _buffer[0]; } if (nformats == 0) { uvc_dbg(dev, DESCR, "device %d videostreaming interface %d has no supported formats defined\n", dev->udev->devnum, alts->desc.bInterfaceNumber); goto error; } /* * Allocate memory for the formats, the frames and the intervals, * plus any required padding to guarantee that everything has the * correct alignment. */ size = nformats * sizeof(*format); size = ALIGN(size, __alignof__(*frame)) + nframes * sizeof(*frame); size = ALIGN(size, __alignof__(*interval)) + nintervals * sizeof(*interval); format = kzalloc(size, GFP_KERNEL); if (!format) { ret = -ENOMEM; goto error; } frame = (void *)format + nformats * sizeof(*format); frame = PTR_ALIGN(frame, __alignof__(*frame)); interval = (void *)frame + nframes * sizeof(*frame); interval = PTR_ALIGN(interval, __alignof__(*interval)); streaming->formats = format; streaming->nformats = 0; /* Parse the format descriptors. */ while (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE) { switch (buffer[2]) { case UVC_VS_FORMAT_UNCOMPRESSED: case UVC_VS_FORMAT_MJPEG: case UVC_VS_FORMAT_DV: case UVC_VS_FORMAT_FRAME_BASED: ret = uvc_parse_format(dev, streaming, format, frame, &interval, buffer, buflen); if (ret < 0) goto error; if (!ret) break; streaming->nformats++; frame += format->nframes; format++; buflen -= ret; buffer += ret; continue; default: break; } buflen -= buffer[0]; buffer += buffer[0]; } if (buflen) uvc_dbg(dev, DESCR, "device %d videostreaming interface %d has %u bytes of trailing descriptor garbage\n", dev->udev->devnum, alts->desc.bInterfaceNumber, buflen); /* Parse the alternate settings to find the maximum bandwidth. */ for (i = 0; i < intf->num_altsetting; ++i) { struct usb_host_endpoint *ep; alts = &intf->altsetting[i]; ep = uvc_find_endpoint(alts, streaming->header.bEndpointAddress); if (ep == NULL) continue; psize = uvc_endpoint_max_bpi(dev->udev, ep); if (psize > streaming->maxpsize) streaming->maxpsize = psize; } list_add_tail(&streaming->list, &dev->streams); return 0; error: usb_driver_release_interface(&uvc_driver.driver, intf); uvc_stream_delete(streaming); return ret; } static const u8 uvc_camera_guid[16] = UVC_GUID_UVC_CAMERA; static const u8 uvc_gpio_guid[16] = UVC_GUID_EXT_GPIO_CONTROLLER; static const u8 uvc_media_transport_input_guid[16] = UVC_GUID_UVC_MEDIA_TRANSPORT_INPUT; static const u8 uvc_processing_guid[16] = UVC_GUID_UVC_PROCESSING; static struct uvc_entity *uvc_alloc_new_entity(struct uvc_device *dev, u16 type, u16 id, unsigned int num_pads, unsigned int extra_size) { struct uvc_entity *entity; unsigned int num_inputs; unsigned int size; unsigned int i; /* Per UVC 1.1+ spec 3.7.2, the ID should be non-zero. */ if (id == 0) { dev_err(&dev->udev->dev, "Found Unit with invalid ID 0.\n"); return ERR_PTR(-EINVAL); } /* Per UVC 1.1+ spec 3.7.2, the ID is unique. */ if (uvc_entity_by_id(dev, id)) { dev_err(&dev->udev->dev, "Found multiple Units with ID %u\n", id); return ERR_PTR(-EINVAL); } extra_size = roundup(extra_size, sizeof(*entity->pads)); if (num_pads) num_inputs = type & UVC_TERM_OUTPUT ? num_pads : num_pads - 1; else num_inputs = 0; size = sizeof(*entity) + extra_size + sizeof(*entity->pads) * num_pads + num_inputs; entity = kzalloc(size, GFP_KERNEL); if (entity == NULL) return ERR_PTR(-ENOMEM); entity->id = id; entity->type = type; /* * Set the GUID for standard entity types. For extension units, the GUID * is initialized by the caller. */ switch (type) { case UVC_EXT_GPIO_UNIT: memcpy(entity->guid, uvc_gpio_guid, 16); break; case UVC_ITT_CAMERA: memcpy(entity->guid, uvc_camera_guid, 16); break; case UVC_ITT_MEDIA_TRANSPORT_INPUT: memcpy(entity->guid, uvc_media_transport_input_guid, 16); break; case UVC_VC_PROCESSING_UNIT: memcpy(entity->guid, uvc_processing_guid, 16); break; } entity->num_links = 0; entity->num_pads = num_pads; entity->pads = ((void *)(entity + 1)) + extra_size; for (i = 0; i < num_inputs; ++i) entity->pads[i].flags = MEDIA_PAD_FL_SINK; if (!UVC_ENTITY_IS_OTERM(entity) && num_pads) entity->pads[num_pads-1].flags = MEDIA_PAD_FL_SOURCE; entity->bNrInPins = num_inputs; entity->baSourceID = (u8 *)(&entity->pads[num_pads]); return entity; } static void uvc_entity_set_name(struct uvc_device *dev, struct uvc_entity *entity, const char *type_name, u8 string_id) { int ret; /* * First attempt to read the entity name from the device. If the entity * has no associated string, or if reading the string fails (most * likely due to a buggy firmware), fall back to default names based on * the entity type. */ if (string_id) { ret = usb_string(dev->udev, string_id, entity->name, sizeof(entity->name)); if (!ret) return; } sprintf(entity->name, "%s %u", type_name, entity->id); } /* Parse vendor-specific extensions. */ static int uvc_parse_vendor_control(struct uvc_device *dev, const unsigned char *buffer, int buflen) { struct usb_device *udev = dev->udev; struct usb_host_interface *alts = dev->intf->cur_altsetting; struct uvc_entity *unit; unsigned int n, p; int handled = 0; switch (le16_to_cpu(dev->udev->descriptor.idVendor)) { case 0x046d: /* Logitech */ if (buffer[1] != 0x41 || buffer[2] != 0x01) break; /* * Logitech implements several vendor specific functions * through vendor specific extension units (LXU). * * The LXU descriptors are similar to XU descriptors * (see "USB Device Video Class for Video Devices", section * 3.7.2.6 "Extension Unit Descriptor") with the following * differences: * * ---------------------------------------------------------- * 0 bLength 1 Number * Size of this descriptor, in bytes: 24+p+n*2 * ---------------------------------------------------------- * 23+p+n bmControlsType N Bitmap * Individual bits in the set are defined: * 0: Absolute * 1: Relative * * This bitset is mapped exactly the same as bmControls. * ---------------------------------------------------------- * 23+p+n*2 bReserved 1 Boolean * ---------------------------------------------------------- * 24+p+n*2 iExtension 1 Index * Index of a string descriptor that describes this * extension unit. * ---------------------------------------------------------- */ p = buflen >= 22 ? buffer[21] : 0; n = buflen >= 25 + p ? buffer[22+p] : 0; if (buflen < 25 + p + 2*n) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d EXTENSION_UNIT error\n", udev->devnum, alts->desc.bInterfaceNumber); break; } unit = uvc_alloc_new_entity(dev, UVC_VC_EXTENSION_UNIT, buffer[3], p + 1, 2 * n); if (IS_ERR(unit)) return PTR_ERR(unit); memcpy(unit->guid, &buffer[4], 16); unit->extension.bNumControls = buffer[20]; memcpy(unit->baSourceID, &buffer[22], p); unit->extension.bControlSize = buffer[22+p]; unit->extension.bmControls = (u8 *)unit + sizeof(*unit); unit->extension.bmControlsType = (u8 *)unit + sizeof(*unit) + n; memcpy(unit->extension.bmControls, &buffer[23+p], 2*n); uvc_entity_set_name(dev, unit, "Extension", buffer[24+p+2*n]); list_add_tail(&unit->list, &dev->entities); handled = 1; break; } return handled; } static int uvc_parse_standard_control(struct uvc_device *dev, const unsigned char *buffer, int buflen) { struct usb_device *udev = dev->udev; struct uvc_entity *unit, *term; struct usb_interface *intf; struct usb_host_interface *alts = dev->intf->cur_altsetting; unsigned int i, n, p, len; const char *type_name; u16 type; switch (buffer[2]) { case UVC_VC_HEADER: n = buflen >= 12 ? buffer[11] : 0; if (buflen < 12 + n) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d HEADER error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } dev->uvc_version = get_unaligned_le16(&buffer[3]); dev->clock_frequency = get_unaligned_le32(&buffer[7]); /* Parse all USB Video Streaming interfaces. */ for (i = 0; i < n; ++i) { intf = usb_ifnum_to_if(udev, buffer[12+i]); if (intf == NULL) { uvc_dbg(dev, DESCR, "device %d interface %d doesn't exists\n", udev->devnum, i); continue; } uvc_parse_streaming(dev, intf); } break; case UVC_VC_INPUT_TERMINAL: if (buflen < 8) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d INPUT_TERMINAL error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } /* * Reject invalid terminal types that would cause issues: * * - The high byte must be non-zero, otherwise it would be * confused with a unit. * * - Bit 15 must be 0, as we use it internally as a terminal * direction flag. * * Other unknown types are accepted. */ type = get_unaligned_le16(&buffer[4]); if ((type & 0x7f00) == 0 || (type & 0x8000) != 0) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d INPUT_TERMINAL %d has invalid type 0x%04x, skipping\n", udev->devnum, alts->desc.bInterfaceNumber, buffer[3], type); return 0; } n = 0; p = 0; len = 8; if (type == UVC_ITT_CAMERA) { n = buflen >= 15 ? buffer[14] : 0; len = 15; } else if (type == UVC_ITT_MEDIA_TRANSPORT_INPUT) { n = buflen >= 9 ? buffer[8] : 0; p = buflen >= 10 + n ? buffer[9+n] : 0; len = 10; } if (buflen < len + n + p) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d INPUT_TERMINAL error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } term = uvc_alloc_new_entity(dev, type | UVC_TERM_INPUT, buffer[3], 1, n + p); if (IS_ERR(term)) return PTR_ERR(term); if (UVC_ENTITY_TYPE(term) == UVC_ITT_CAMERA) { term->camera.bControlSize = n; term->camera.bmControls = (u8 *)term + sizeof(*term); term->camera.wObjectiveFocalLengthMin = get_unaligned_le16(&buffer[8]); term->camera.wObjectiveFocalLengthMax = get_unaligned_le16(&buffer[10]); term->camera.wOcularFocalLength = get_unaligned_le16(&buffer[12]); memcpy(term->camera.bmControls, &buffer[15], n); } else if (UVC_ENTITY_TYPE(term) == UVC_ITT_MEDIA_TRANSPORT_INPUT) { term->media.bControlSize = n; term->media.bmControls = (u8 *)term + sizeof(*term); term->media.bTransportModeSize = p; term->media.bmTransportModes = (u8 *)term + sizeof(*term) + n; memcpy(term->media.bmControls, &buffer[9], n); memcpy(term->media.bmTransportModes, &buffer[10+n], p); } if (UVC_ENTITY_TYPE(term) == UVC_ITT_CAMERA) type_name = "Camera"; else if (UVC_ENTITY_TYPE(term) == UVC_ITT_MEDIA_TRANSPORT_INPUT) type_name = "Media"; else type_name = "Input"; uvc_entity_set_name(dev, term, type_name, buffer[7]); list_add_tail(&term->list, &dev->entities); break; case UVC_VC_OUTPUT_TERMINAL: if (buflen < 9) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d OUTPUT_TERMINAL error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } /* * Make sure the terminal type MSB is not null, otherwise it * could be confused with a unit. */ type = get_unaligned_le16(&buffer[4]); if ((type & 0xff00) == 0) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d OUTPUT_TERMINAL %d has invalid type 0x%04x, skipping\n", udev->devnum, alts->desc.bInterfaceNumber, buffer[3], type); return 0; } term = uvc_alloc_new_entity(dev, type | UVC_TERM_OUTPUT, buffer[3], 1, 0); if (IS_ERR(term)) return PTR_ERR(term); memcpy(term->baSourceID, &buffer[7], 1); uvc_entity_set_name(dev, term, "Output", buffer[8]); list_add_tail(&term->list, &dev->entities); break; case UVC_VC_SELECTOR_UNIT: p = buflen >= 5 ? buffer[4] : 0; if (buflen < 5 || buflen < 6 + p) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d SELECTOR_UNIT error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], p + 1, 0); if (IS_ERR(unit)) return PTR_ERR(unit); memcpy(unit->baSourceID, &buffer[5], p); uvc_entity_set_name(dev, unit, "Selector", buffer[5+p]); list_add_tail(&unit->list, &dev->entities); break; case UVC_VC_PROCESSING_UNIT: n = buflen >= 8 ? buffer[7] : 0; p = dev->uvc_version >= 0x0110 ? 10 : 9; if (buflen < p + n) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d PROCESSING_UNIT error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], 2, n); if (IS_ERR(unit)) return PTR_ERR(unit); memcpy(unit->baSourceID, &buffer[4], 1); unit->processing.wMaxMultiplier = get_unaligned_le16(&buffer[5]); unit->processing.bControlSize = buffer[7]; unit->processing.bmControls = (u8 *)unit + sizeof(*unit); memcpy(unit->processing.bmControls, &buffer[8], n); if (dev->uvc_version >= 0x0110) unit->processing.bmVideoStandards = buffer[9+n]; uvc_entity_set_name(dev, unit, "Processing", buffer[8+n]); list_add_tail(&unit->list, &dev->entities); break; case UVC_VC_EXTENSION_UNIT: p = buflen >= 22 ? buffer[21] : 0; n = buflen >= 24 + p ? buffer[22+p] : 0; if (buflen < 24 + p + n) { uvc_dbg(dev, DESCR, "device %d videocontrol interface %d EXTENSION_UNIT error\n", udev->devnum, alts->desc.bInterfaceNumber); return -EINVAL; } unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], p + 1, n); if (IS_ERR(unit)) return PTR_ERR(unit); memcpy(unit->guid, &buffer[4], 16); unit->extension.bNumControls = buffer[20]; memcpy(unit->baSourceID, &buffer[22], p); unit->extension.bControlSize = buffer[22+p]; unit->extension.bmControls = (u8 *)unit + sizeof(*unit); memcpy(unit->extension.bmControls, &buffer[23+p], n); uvc_entity_set_name(dev, unit, "Extension", buffer[23+p+n]); list_add_tail(&unit->list, &dev->entities); break; default: uvc_dbg(dev, DESCR, "Found an unknown CS_INTERFACE descriptor (%u)\n", buffer[2]); break; } return 0; } static int uvc_parse_control(struct uvc_device *dev) { struct usb_host_interface *alts = dev->intf->cur_altsetting; const unsigned char *buffer = alts->extra; int buflen = alts->extralen; int ret; /* * Parse the default alternate setting only, as the UVC specification * defines a single alternate setting, the default alternate setting * zero. */ while (buflen > 2) { if (uvc_parse_vendor_control(dev, buffer, buflen) || buffer[1] != USB_DT_CS_INTERFACE) goto next_descriptor; ret = uvc_parse_standard_control(dev, buffer, buflen); if (ret < 0) return ret; next_descriptor: buflen -= buffer[0]; buffer += buffer[0]; } /* * Check if the optional status endpoint is present. Built-in iSight * webcams have an interrupt endpoint but spit proprietary data that * don't conform to the UVC status endpoint messages. Don't try to * handle the interrupt endpoint for those cameras. */ if (alts->desc.bNumEndpoints == 1 && !(dev->quirks & UVC_QUIRK_BUILTIN_ISIGHT)) { struct usb_host_endpoint *ep = &alts->endpoint[0]; struct usb_endpoint_descriptor *desc = &ep->desc; if (usb_endpoint_is_int_in(desc) && le16_to_cpu(desc->wMaxPacketSize) >= 8 && desc->bInterval != 0) { uvc_dbg(dev, DESCR, "Found a Status endpoint (addr %02x)\n", desc->bEndpointAddress); dev->int_ep = ep; } } return 0; } /* ----------------------------------------------------------------------------- * Privacy GPIO */ static void uvc_gpio_event(struct uvc_device *dev) { struct uvc_entity *unit = dev->gpio_unit; struct uvc_video_chain *chain; u8 new_val; if (!unit) return; new_val = gpiod_get_value_cansleep(unit->gpio.gpio_privacy); /* GPIO entities are always on the first chain. */ chain = list_first_entry(&dev->chains, struct uvc_video_chain, list); uvc_ctrl_status_event(chain, unit->controls, &new_val); } static int uvc_gpio_get_cur(struct uvc_device *dev, struct uvc_entity *entity, u8 cs, void *data, u16 size) { if (cs != UVC_CT_PRIVACY_CONTROL || size < 1) return -EINVAL; *(u8 *)data = gpiod_get_value_cansleep(entity->gpio.gpio_privacy); return 0; } static int uvc_gpio_get_info(struct uvc_device *dev, struct uvc_entity *entity, u8 cs, u8 *caps) { if (cs != UVC_CT_PRIVACY_CONTROL) return -EINVAL; *caps = UVC_CONTROL_CAP_GET | UVC_CONTROL_CAP_AUTOUPDATE; return 0; } static irqreturn_t uvc_gpio_irq(int irq, void *data) { struct uvc_device *dev = data; uvc_gpio_event(dev); return IRQ_HANDLED; } static int uvc_gpio_parse(struct uvc_device *dev) { struct uvc_entity *unit; struct gpio_desc *gpio_privacy; int irq; gpio_privacy = devm_gpiod_get_optional(&dev->intf->dev, "privacy", GPIOD_IN); if (IS_ERR_OR_NULL(gpio_privacy)) return PTR_ERR_OR_ZERO(gpio_privacy); irq = gpiod_to_irq(gpio_privacy); if (irq < 0) return dev_err_probe(&dev->intf->dev, irq, "No IRQ for privacy GPIO\n"); unit = uvc_alloc_new_entity(dev, UVC_EXT_GPIO_UNIT, UVC_EXT_GPIO_UNIT_ID, 0, 1); if (IS_ERR(unit)) return PTR_ERR(unit); unit->gpio.gpio_privacy = gpio_privacy; unit->gpio.irq = irq; unit->gpio.bControlSize = 1; unit->gpio.bmControls = (u8 *)unit + sizeof(*unit); unit->gpio.bmControls[0] = 1; unit->get_cur = uvc_gpio_get_cur; unit->get_info = uvc_gpio_get_info; strscpy(unit->name, "GPIO", sizeof(unit->name)); list_add_tail(&unit->list, &dev->entities); dev->gpio_unit = unit; return 0; } static int uvc_gpio_init_irq(struct uvc_device *dev) { struct uvc_entity *unit = dev->gpio_unit; int ret; if (!unit || unit->gpio.irq < 0) return 0; ret = request_threaded_irq(unit->gpio.irq, NULL, uvc_gpio_irq, IRQF_ONESHOT | IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, "uvc_privacy_gpio", dev); unit->gpio.initialized = !ret; return ret; } static void uvc_gpio_deinit(struct uvc_device *dev) { if (!dev->gpio_unit || !dev->gpio_unit->gpio.initialized) return; free_irq(dev->gpio_unit->gpio.irq, dev); } /* ------------------------------------------------------------------------ * UVC device scan */ /* * Scan the UVC descriptors to locate a chain starting at an Output Terminal * and containing the following units: * * - one or more Output Terminals (USB Streaming or Display) * - zero or one Processing Unit * - zero, one or more single-input Selector Units * - zero or one multiple-input Selector Units, provided all inputs are * connected to input terminals * - zero, one or mode single-input Extension Units * - one or more Input Terminals (Camera, External or USB Streaming) * * The terminal and units must match on of the following structures: * * ITT_*(0) -> +---------+ +---------+ +---------+ -> TT_STREAMING(0) * ... | SU{0,1} | -> | PU{0,1} | -> | XU{0,n} | ... * ITT_*(n) -> +---------+ +---------+ +---------+ -> TT_STREAMING(n) * * +---------+ +---------+ -> OTT_*(0) * TT_STREAMING -> | PU{0,1} | -> | XU{0,n} | ... * +---------+ +---------+ -> OTT_*(n) * * The Processing Unit and Extension Units can be in any order. Additional * Extension Units connected to the main chain as single-unit branches are * also supported. Single-input Selector Units are ignored. */ static int uvc_scan_chain_entity(struct uvc_video_chain *chain, struct uvc_entity *entity) { switch (UVC_ENTITY_TYPE(entity)) { case UVC_VC_EXTENSION_UNIT: uvc_dbg_cont(PROBE, " <- XU %d", entity->id); if (entity->bNrInPins != 1) { uvc_dbg(chain->dev, DESCR, "Extension unit %d has more than 1 input pin\n", entity->id); return -1; } break; case UVC_VC_PROCESSING_UNIT: uvc_dbg_cont(PROBE, " <- PU %d", entity->id); if (chain->processing != NULL) { uvc_dbg(chain->dev, DESCR, "Found multiple Processing Units in chain\n"); return -1; } chain->processing = entity; break; case UVC_VC_SELECTOR_UNIT: uvc_dbg_cont(PROBE, " <- SU %d", entity->id); /* Single-input selector units are ignored. */ if (entity->bNrInPins == 1) break; if (chain->selector != NULL) { uvc_dbg(chain->dev, DESCR, "Found multiple Selector Units in chain\n"); return -1; } chain->selector = entity; break; case UVC_ITT_VENDOR_SPECIFIC: case UVC_ITT_CAMERA: case UVC_ITT_MEDIA_TRANSPORT_INPUT: uvc_dbg_cont(PROBE, " <- IT %d\n", entity->id); break; case UVC_OTT_VENDOR_SPECIFIC: case UVC_OTT_DISPLAY: case UVC_OTT_MEDIA_TRANSPORT_OUTPUT: uvc_dbg_cont(PROBE, " OT %d", entity->id); break; case UVC_TT_STREAMING: if (UVC_ENTITY_IS_ITERM(entity)) uvc_dbg_cont(PROBE, " <- IT %d\n", entity->id); else uvc_dbg_cont(PROBE, " OT %d", entity->id); break; default: uvc_dbg(chain->dev, DESCR, "Unsupported entity type 0x%04x found in chain\n", UVC_ENTITY_TYPE(entity)); return -1; } list_add_tail(&entity->chain, &chain->entities); return 0; } static int uvc_scan_chain_forward(struct uvc_video_chain *chain, struct uvc_entity *entity, struct uvc_entity *prev) { struct uvc_entity *forward; int found; /* Forward scan */ forward = NULL; found = 0; while (1) { forward = uvc_entity_by_reference(chain->dev, entity->id, forward); if (forward == NULL) break; if (forward == prev) continue; if (forward->chain.next || forward->chain.prev) { uvc_dbg(chain->dev, DESCR, "Found reference to entity %d already in chain\n", forward->id); return -EINVAL; } switch (UVC_ENTITY_TYPE(forward)) { case UVC_VC_EXTENSION_UNIT: if (forward->bNrInPins != 1) { uvc_dbg(chain->dev, DESCR, "Extension unit %d has more than 1 input pin\n", forward->id); return -EINVAL; } /* * Some devices reference an output terminal as the * source of extension units. This is incorrect, as * output terminals only have an input pin, and thus * can't be connected to any entity in the forward * direction. The resulting topology would cause issues * when registering the media controller graph. To * avoid this problem, connect the extension unit to * the source of the output terminal instead. */ if (UVC_ENTITY_IS_OTERM(entity)) { struct uvc_entity *source; source = uvc_entity_by_id(chain->dev, entity->baSourceID[0]); if (!source) { uvc_dbg(chain->dev, DESCR, "Can't connect extension unit %u in chain\n", forward->id); break; } forward->baSourceID[0] = source->id; } list_add_tail(&forward->chain, &chain->entities); if (!found) uvc_dbg_cont(PROBE, " (->"); uvc_dbg_cont(PROBE, " XU %d", forward->id); found = 1; break; case UVC_OTT_VENDOR_SPECIFIC: case UVC_OTT_DISPLAY: case UVC_OTT_MEDIA_TRANSPORT_OUTPUT: case UVC_TT_STREAMING: if (UVC_ENTITY_IS_ITERM(forward)) { uvc_dbg(chain->dev, DESCR, "Unsupported input terminal %u\n", forward->id); return -EINVAL; } if (UVC_ENTITY_IS_OTERM(entity)) { uvc_dbg(chain->dev, DESCR, "Unsupported connection between output terminals %u and %u\n", entity->id, forward->id); break; } list_add_tail(&forward->chain, &chain->entities); if (!found) uvc_dbg_cont(PROBE, " (->"); uvc_dbg_cont(PROBE, " OT %d", forward->id); found = 1; break; } } if (found) uvc_dbg_cont(PROBE, ")"); return 0; } static int uvc_scan_chain_backward(struct uvc_video_chain *chain, struct uvc_entity **_entity) { struct uvc_entity *entity = *_entity; struct uvc_entity *term; int id = -EINVAL, i; switch (UVC_ENTITY_TYPE(entity)) { case UVC_VC_EXTENSION_UNIT: case UVC_VC_PROCESSING_UNIT: id = entity->baSourceID[0]; break; case UVC_VC_SELECTOR_UNIT: /* Single-input selector units are ignored. */ if (entity->bNrInPins == 1) { id = entity->baSourceID[0]; break; } uvc_dbg_cont(PROBE, " <- IT"); chain->selector = entity; for (i = 0; i < entity->bNrInPins; ++i) { id = entity->baSourceID[i]; term = uvc_entity_by_id(chain->dev, id); if (term == NULL || !UVC_ENTITY_IS_ITERM(term)) { uvc_dbg(chain->dev, DESCR, "Selector unit %d input %d isn't connected to an input terminal\n", entity->id, i); return -1; } if (term->chain.next || term->chain.prev) { uvc_dbg(chain->dev, DESCR, "Found reference to entity %d already in chain\n", term->id); return -EINVAL; } uvc_dbg_cont(PROBE, " %d", term->id); list_add_tail(&term->chain, &chain->entities); uvc_scan_chain_forward(chain, term, entity); } uvc_dbg_cont(PROBE, "\n"); id = 0; break; case UVC_ITT_VENDOR_SPECIFIC: case UVC_ITT_CAMERA: case UVC_ITT_MEDIA_TRANSPORT_INPUT: case UVC_OTT_VENDOR_SPECIFIC: case UVC_OTT_DISPLAY: case UVC_OTT_MEDIA_TRANSPORT_OUTPUT: case UVC_TT_STREAMING: id = UVC_ENTITY_IS_OTERM(entity) ? entity->baSourceID[0] : 0; break; } if (id <= 0) { *_entity = NULL; return id; } entity = uvc_entity_by_id(chain->dev, id); if (entity == NULL) { uvc_dbg(chain->dev, DESCR, "Found reference to unknown entity %d\n", id); return -EINVAL; } *_entity = entity; return 0; } static int uvc_scan_chain(struct uvc_video_chain *chain, struct uvc_entity *term) { struct uvc_entity *entity, *prev; uvc_dbg(chain->dev, PROBE, "Scanning UVC chain:"); entity = term; prev = NULL; while (entity != NULL) { /* Entity must not be part of an existing chain */ if (entity->chain.next || entity->chain.prev) { uvc_dbg(chain->dev, DESCR, "Found reference to entity %d already in chain\n", entity->id); return -EINVAL; } /* Process entity */ if (uvc_scan_chain_entity(chain, entity) < 0) return -EINVAL; /* Forward scan */ if (uvc_scan_chain_forward(chain, entity, prev) < 0) return -EINVAL; /* Backward scan */ prev = entity; if (uvc_scan_chain_backward(chain, &entity) < 0) return -EINVAL; } return 0; } static unsigned int uvc_print_terms(struct list_head *terms, u16 dir, char *buffer) { struct uvc_entity *term; unsigned int nterms = 0; char *p = buffer; list_for_each_entry(term, terms, chain) { if (!UVC_ENTITY_IS_TERM(term) || UVC_TERM_DIRECTION(term) != dir) continue; if (nterms) p += sprintf(p, ","); if (++nterms >= 4) { p += sprintf(p, "..."); break; } p += sprintf(p, "%u", term->id); } return p - buffer; } static const char *uvc_print_chain(struct uvc_video_chain *chain) { static char buffer[43]; char *p = buffer; p += uvc_print_terms(&chain->entities, UVC_TERM_INPUT, p); p += sprintf(p, " -> "); uvc_print_terms(&chain->entities, UVC_TERM_OUTPUT, p); return buffer; } static struct uvc_video_chain *uvc_alloc_chain(struct uvc_device *dev) { struct uvc_video_chain *chain; chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (chain == NULL) return NULL; INIT_LIST_HEAD(&chain->entities); mutex_init(&chain->ctrl_mutex); chain->dev = dev; v4l2_prio_init(&chain->prio); return chain; } /* * Fallback heuristic for devices that don't connect units and terminals in a * valid chain. * * Some devices have invalid baSourceID references, causing uvc_scan_chain() * to fail, but if we just take the entities we can find and put them together * in the most sensible chain we can think of, turns out they do work anyway. * Note: This heuristic assumes there is a single chain. * * At the time of writing, devices known to have such a broken chain are * - Acer Integrated Camera (5986:055a) * - Realtek rtl157a7 (0bda:57a7) */ static int uvc_scan_fallback(struct uvc_device *dev) { struct uvc_video_chain *chain; struct uvc_entity *iterm = NULL; struct uvc_entity *oterm = NULL; struct uvc_entity *entity; struct uvc_entity *prev; /* * Start by locating the input and output terminals. We only support * devices with exactly one of each for now. */ list_for_each_entry(entity, &dev->entities, list) { if (UVC_ENTITY_IS_ITERM(entity)) { if (iterm) return -EINVAL; iterm = entity; } if (UVC_ENTITY_IS_OTERM(entity)) { if (oterm) return -EINVAL; oterm = entity; } } if (iterm == NULL || oterm == NULL) return -EINVAL; /* Allocate the chain and fill it. */ chain = uvc_alloc_chain(dev); if (chain == NULL) return -ENOMEM; if (uvc_scan_chain_entity(chain, oterm) < 0) goto error; prev = oterm; /* * Add all Processing and Extension Units with two pads. The order * doesn't matter much, use reverse list traversal to connect units in * UVC descriptor order as we build the chain from output to input. This * leads to units appearing in the order meant by the manufacturer for * the cameras known to require this heuristic. */ list_for_each_entry_reverse(entity, &dev->entities, list) { if (entity->type != UVC_VC_PROCESSING_UNIT && entity->type != UVC_VC_EXTENSION_UNIT) continue; if (entity->num_pads != 2) continue; if (uvc_scan_chain_entity(chain, entity) < 0) goto error; prev->baSourceID[0] = entity->id; prev = entity; } if (uvc_scan_chain_entity(chain, iterm) < 0) goto error; prev->baSourceID[0] = iterm->id; list_add_tail(&chain->list, &dev->chains); uvc_dbg(dev, PROBE, "Found a video chain by fallback heuristic (%s)\n", uvc_print_chain(chain)); return 0; error: kfree(chain); return -EINVAL; } /* * Scan the device for video chains and register video devices. * * Chains are scanned starting at their output terminals and walked backwards. */ static int uvc_scan_device(struct uvc_device *dev) { struct uvc_video_chain *chain; struct uvc_entity *term; list_for_each_entry(term, &dev->entities, list) { if (!UVC_ENTITY_IS_OTERM(term)) continue; /* * If the terminal is already included in a chain, skip it. * This can happen for chains that have multiple output * terminals, where all output terminals beside the first one * will be inserted in the chain in forward scans. */ if (term->chain.next || term->chain.prev) continue; chain = uvc_alloc_chain(dev); if (chain == NULL) return -ENOMEM; term->flags |= UVC_ENTITY_FLAG_DEFAULT; if (uvc_scan_chain(chain, term) < 0) { kfree(chain); continue; } uvc_dbg(dev, PROBE, "Found a valid video chain (%s)\n", uvc_print_chain(chain)); list_add_tail(&chain->list, &dev->chains); } if (list_empty(&dev->chains)) uvc_scan_fallback(dev); if (list_empty(&dev->chains)) { dev_info(&dev->udev->dev, "No valid video chain found.\n"); return -1; } /* Add GPIO entity to the first chain. */ if (dev->gpio_unit) { chain = list_first_entry(&dev->chains, struct uvc_video_chain, list); list_add_tail(&dev->gpio_unit->chain, &chain->entities); } return 0; } /* ------------------------------------------------------------------------ * Video device registration and unregistration */ /* * Delete the UVC device. * * Called by the kernel when the last reference to the uvc_device structure * is released. * * As this function is called after or during disconnect(), all URBs have * already been cancelled by the USB core. There is no need to kill the * interrupt URB manually. */ static void uvc_delete(struct kref *kref) { struct uvc_device *dev = container_of(kref, struct uvc_device, ref); struct list_head *p, *n; uvc_status_cleanup(dev); uvc_ctrl_cleanup_device(dev); usb_put_intf(dev->intf); usb_put_dev(dev->udev); #ifdef CONFIG_MEDIA_CONTROLLER media_device_cleanup(&dev->mdev); #endif list_for_each_safe(p, n, &dev->chains) { struct uvc_video_chain *chain; chain = list_entry(p, struct uvc_video_chain, list); kfree(chain); } list_for_each_safe(p, n, &dev->entities) { struct uvc_entity *entity; entity = list_entry(p, struct uvc_entity, list); #ifdef CONFIG_MEDIA_CONTROLLER uvc_mc_cleanup_entity(entity); #endif kfree(entity); } list_for_each_safe(p, n, &dev->streams) { struct uvc_streaming *streaming; streaming = list_entry(p, struct uvc_streaming, list); usb_driver_release_interface(&uvc_driver.driver, streaming->intf); uvc_stream_delete(streaming); } kfree(dev); } static void uvc_release(struct video_device *vdev) { struct uvc_streaming *stream = video_get_drvdata(vdev); struct uvc_device *dev = stream->dev; kref_put(&dev->ref, uvc_delete); } /* * Unregister the video devices. */ static void uvc_unregister_video(struct uvc_device *dev) { struct uvc_streaming *stream; uvc_gpio_deinit(dev); list_for_each_entry(stream, &dev->streams, list) { /* Nothing to do here, continue. */ if (!video_is_registered(&stream->vdev)) continue; /* * For stream->vdev we follow the same logic as: * vb2_video_unregister_device(). */ /* 1. Take a reference to vdev */ get_device(&stream->vdev.dev); /* 2. Ensure that no new ioctls can be called. */ video_unregister_device(&stream->vdev); /* 3. Wait for old ioctls to finish. */ mutex_lock(&stream->mutex); /* 4. Stop streaming. */ uvc_queue_release(&stream->queue); mutex_unlock(&stream->mutex); put_device(&stream->vdev.dev); /* * For stream->meta.vdev we can directly call: * vb2_video_unregister_device(). */ vb2_video_unregister_device(&stream->meta.vdev); /* * Now both vdevs are not streaming and all the ioctls will * return -ENODEV. */ uvc_debugfs_cleanup_stream(stream); } uvc_status_unregister(dev); if (dev->vdev.dev) v4l2_device_unregister(&dev->vdev); #ifdef CONFIG_MEDIA_CONTROLLER if (media_devnode_is_registered(dev->mdev.devnode)) media_device_unregister(&dev->mdev); #endif } int uvc_register_video_device(struct uvc_device *dev, struct uvc_streaming *stream, struct video_device *vdev, struct uvc_video_queue *queue, enum v4l2_buf_type type, const struct v4l2_file_operations *fops, const struct v4l2_ioctl_ops *ioctl_ops) { int ret; /* Initialize the video buffers queue. */ ret = uvc_queue_init(queue, type); if (ret) return ret; /* Register the device with V4L. */ /* * We already hold a reference to dev->udev. The video device will be * unregistered before the reference is released, so we don't need to * get another one. */ vdev->v4l2_dev = &dev->vdev; vdev->fops = fops; vdev->ioctl_ops = ioctl_ops; vdev->release = uvc_release; vdev->prio = &stream->chain->prio; if (type == V4L2_BUF_TYPE_VIDEO_OUTPUT) vdev->vfl_dir = VFL_DIR_TX; else vdev->vfl_dir = VFL_DIR_RX; switch (type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: default: vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING; break; case V4L2_BUF_TYPE_VIDEO_OUTPUT: vdev->device_caps = V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_STREAMING; break; case V4L2_BUF_TYPE_META_CAPTURE: vdev->device_caps = V4L2_CAP_META_CAPTURE | V4L2_CAP_STREAMING; break; } strscpy(vdev->name, dev->name, sizeof(vdev->name)); /* * Set the driver data before calling video_register_device, otherwise * the file open() handler might race us. */ video_set_drvdata(vdev, stream); ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1); if (ret < 0) { dev_err(&stream->intf->dev, "Failed to register %s device (%d).\n", v4l2_type_names[type], ret); return ret; } kref_get(&dev->ref); return 0; } static int uvc_register_video(struct uvc_device *dev, struct uvc_streaming *stream) { int ret; /* Initialize the streaming interface with default parameters. */ ret = uvc_video_init(stream); if (ret < 0) { dev_err(&stream->intf->dev, "Failed to initialize the device (%d).\n", ret); return ret; } if (stream->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) stream->chain->caps |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_META_CAPTURE; else stream->chain->caps |= V4L2_CAP_VIDEO_OUTPUT; uvc_debugfs_init_stream(stream); /* Register the device with V4L. */ return uvc_register_video_device(dev, stream, &stream->vdev, &stream->queue, stream->type, &uvc_fops, &uvc_ioctl_ops); } /* * Register all video devices in all chains. */ static int uvc_register_terms(struct uvc_device *dev, struct uvc_video_chain *chain) { struct uvc_streaming *stream; struct uvc_entity *term; int ret; list_for_each_entry(term, &chain->entities, chain) { if (UVC_ENTITY_TYPE(term) != UVC_TT_STREAMING) continue; stream = uvc_stream_by_id(dev, term->id); if (stream == NULL) { dev_info(&dev->udev->dev, "No streaming interface found for terminal %u.", term->id); continue; } stream->chain = chain; ret = uvc_register_video(dev, stream); if (ret < 0) return ret; /* * Register a metadata node, but ignore a possible failure, * complete registration of video nodes anyway. */ uvc_meta_register(stream); term->vdev = &stream->vdev; } return 0; } static int uvc_register_chains(struct uvc_device *dev) { struct uvc_video_chain *chain; int ret; list_for_each_entry(chain, &dev->chains, list) { ret = uvc_register_terms(dev, chain); if (ret < 0) return ret; #ifdef CONFIG_MEDIA_CONTROLLER ret = uvc_mc_register_entities(chain); if (ret < 0) dev_info(&dev->udev->dev, "Failed to register entities (%d).\n", ret); #endif } return 0; } /* ------------------------------------------------------------------------ * USB probe, disconnect, suspend and resume */ static const struct uvc_device_info uvc_quirk_none = { 0 }; static int uvc_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct uvc_device *dev; const struct uvc_device_info *info = (const struct uvc_device_info *)id->driver_info; int function; int ret; /* Allocate memory for the device and initialize it. */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (dev == NULL) return -ENOMEM; INIT_LIST_HEAD(&dev->entities); INIT_LIST_HEAD(&dev->chains); INIT_LIST_HEAD(&dev->streams); kref_init(&dev->ref); atomic_set(&dev->nmappings, 0); dev->udev = usb_get_dev(udev); dev->intf = usb_get_intf(intf); dev->intfnum = intf->cur_altsetting->desc.bInterfaceNumber; dev->info = info ? info : &uvc_quirk_none; dev->quirks = uvc_quirks_param == -1 ? dev->info->quirks : uvc_quirks_param; if (id->idVendor && id->idProduct) uvc_dbg(dev, PROBE, "Probing known UVC device %s (%04x:%04x)\n", udev->devpath, id->idVendor, id->idProduct); else uvc_dbg(dev, PROBE, "Probing generic UVC device %s\n", udev->devpath); if (udev->product != NULL) strscpy(dev->name, udev->product, sizeof(dev->name)); else snprintf(dev->name, sizeof(dev->name), "UVC Camera (%04x:%04x)", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct)); /* * Add iFunction or iInterface to names when available as additional * distinguishers between interfaces. iFunction is prioritized over * iInterface which matches Windows behavior at the point of writing. */ if (intf->intf_assoc && intf->intf_assoc->iFunction != 0) function = intf->intf_assoc->iFunction; else function = intf->cur_altsetting->desc.iInterface; if (function != 0) { size_t len; strlcat(dev->name, ": ", sizeof(dev->name)); len = strlen(dev->name); usb_string(udev, function, dev->name + len, sizeof(dev->name) - len); } /* Initialize the media device. */ #ifdef CONFIG_MEDIA_CONTROLLER dev->mdev.dev = &intf->dev; strscpy(dev->mdev.model, dev->name, sizeof(dev->mdev.model)); if (udev->serial) strscpy(dev->mdev.serial, udev->serial, sizeof(dev->mdev.serial)); usb_make_path(udev, dev->mdev.bus_info, sizeof(dev->mdev.bus_info)); dev->mdev.hw_revision = le16_to_cpu(udev->descriptor.bcdDevice); media_device_init(&dev->mdev); dev->vdev.mdev = &dev->mdev; #endif /* Parse the Video Class control descriptor. */ if (uvc_parse_control(dev) < 0) { uvc_dbg(dev, PROBE, "Unable to parse UVC descriptors\n"); goto error; } /* Parse the associated GPIOs. */ if (uvc_gpio_parse(dev) < 0) { uvc_dbg(dev, PROBE, "Unable to parse UVC GPIOs\n"); goto error; } dev_info(&dev->udev->dev, "Found UVC %u.%02x device %s (%04x:%04x)\n", dev->uvc_version >> 8, dev->uvc_version & 0xff, udev->product ? udev->product : "<unnamed>", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct)); if (dev->quirks != dev->info->quirks) { dev_info(&dev->udev->dev, "Forcing device quirks to 0x%x by module parameter for testing purpose.\n", dev->quirks); dev_info(&dev->udev->dev, "Please report required quirks to the linux-media mailing list.\n"); } if (dev->info->uvc_version) { dev->uvc_version = dev->info->uvc_version; dev_info(&dev->udev->dev, "Forcing UVC version to %u.%02x\n", dev->uvc_version >> 8, dev->uvc_version & 0xff); } /* Register the V4L2 device. */ if (v4l2_device_register(&intf->dev, &dev->vdev) < 0) goto error; /* Scan the device for video chains. */ if (uvc_scan_device(dev) < 0) goto error; /* Initialize controls. */ if (uvc_ctrl_init_device(dev) < 0) goto error; /* Register video device nodes. */ if (uvc_register_chains(dev) < 0) goto error; #ifdef CONFIG_MEDIA_CONTROLLER /* Register the media device node */ if (media_device_register(&dev->mdev) < 0) goto error; #endif /* Save our data pointer in the interface data. */ usb_set_intfdata(intf, dev); /* Initialize the interrupt URB. */ ret = uvc_status_init(dev); if (ret < 0) { dev_info(&dev->udev->dev, "Unable to initialize the status endpoint (%d), status interrupt will not be supported.\n", ret); } ret = uvc_gpio_init_irq(dev); if (ret < 0) { dev_err(&dev->udev->dev, "Unable to request privacy GPIO IRQ (%d)\n", ret); goto error; } if (dev->quirks & UVC_QUIRK_NO_RESET_RESUME) udev->quirks &= ~USB_QUIRK_RESET_RESUME; if (!(dev->quirks & UVC_QUIRK_DISABLE_AUTOSUSPEND)) usb_enable_autosuspend(udev); uvc_dbg(dev, PROBE, "UVC device initialized\n"); return 0; error: uvc_unregister_video(dev); kref_put(&dev->ref, uvc_delete); return -ENODEV; } static void uvc_disconnect(struct usb_interface *intf) { struct uvc_device *dev = usb_get_intfdata(intf); /* * Set the USB interface data to NULL. This can be done outside the * lock, as there's no other reader. */ usb_set_intfdata(intf, NULL); if (intf->cur_altsetting->desc.bInterfaceSubClass == UVC_SC_VIDEOSTREAMING) return; uvc_unregister_video(dev); kref_put(&dev->ref, uvc_delete); } static int uvc_suspend(struct usb_interface *intf, pm_message_t message) { struct uvc_device *dev = usb_get_intfdata(intf); struct uvc_streaming *stream; uvc_dbg(dev, SUSPEND, "Suspending interface %u\n", intf->cur_altsetting->desc.bInterfaceNumber); /* Controls are cached on the fly so they don't need to be saved. */ if (intf->cur_altsetting->desc.bInterfaceSubClass == UVC_SC_VIDEOCONTROL) { uvc_status_suspend(dev); return 0; } list_for_each_entry(stream, &dev->streams, list) { if (stream->intf == intf) return uvc_video_suspend(stream); } uvc_dbg(dev, SUSPEND, "Suspend: video streaming USB interface mismatch\n"); return -EINVAL; } static int __uvc_resume(struct usb_interface *intf, int reset) { struct uvc_device *dev = usb_get_intfdata(intf); struct uvc_streaming *stream; int ret = 0; uvc_dbg(dev, SUSPEND, "Resuming interface %u\n", intf->cur_altsetting->desc.bInterfaceNumber); if (intf->cur_altsetting->desc.bInterfaceSubClass == UVC_SC_VIDEOCONTROL) { if (reset) { ret = uvc_ctrl_restore_values(dev); if (ret < 0) return ret; } return uvc_status_resume(dev); } list_for_each_entry(stream, &dev->streams, list) { if (stream->intf == intf) { ret = uvc_video_resume(stream, reset); if (ret < 0) uvc_queue_streamoff(&stream->queue, stream->queue.queue.type); return ret; } } uvc_dbg(dev, SUSPEND, "Resume: video streaming USB interface mismatch\n"); return -EINVAL; } static int uvc_resume(struct usb_interface *intf) { return __uvc_resume(intf, 0); } static int uvc_reset_resume(struct usb_interface *intf) { return __uvc_resume(intf, 1); } /* ------------------------------------------------------------------------ * Module parameters */ static int uvc_clock_param_get(char *buffer, const struct kernel_param *kp) { if (uvc_clock_param == CLOCK_MONOTONIC) return sprintf(buffer, "CLOCK_MONOTONIC"); else return sprintf(buffer, "CLOCK_REALTIME"); } static int uvc_clock_param_set(const char *val, const struct kernel_param *kp) { if (strncasecmp(val, "clock_", strlen("clock_")) == 0) val += strlen("clock_"); if (strcasecmp(val, "monotonic") == 0) uvc_clock_param = CLOCK_MONOTONIC; else if (strcasecmp(val, "realtime") == 0) uvc_clock_param = CLOCK_REALTIME; else return -EINVAL; return 0; } module_param_call(clock, uvc_clock_param_set, uvc_clock_param_get, &uvc_clock_param, 0644); MODULE_PARM_DESC(clock, "Video buffers timestamp clock"); module_param_named(hwtimestamps, uvc_hw_timestamps_param, uint, 0644); MODULE_PARM_DESC(hwtimestamps, "Use hardware timestamps"); static int param_set_nodrop(const char *val, const struct kernel_param *kp) { pr_warn_once("uvcvideo: " DEPRECATED "nodrop parameter will be eventually removed.\n"); return param_set_bool(val, kp); } static const struct kernel_param_ops param_ops_nodrop = { .set = param_set_nodrop, .get = param_get_uint, }; param_check_uint(nodrop, &uvc_no_drop_param); module_param_cb(nodrop, &param_ops_nodrop, &uvc_no_drop_param, 0644); __MODULE_PARM_TYPE(nodrop, "uint"); MODULE_PARM_DESC(nodrop, "Don't drop incomplete frames"); module_param_named(quirks, uvc_quirks_param, uint, 0644); MODULE_PARM_DESC(quirks, "Forced device quirks"); module_param_named(trace, uvc_dbg_param, uint, 0644); MODULE_PARM_DESC(trace, "Trace level bitmask"); module_param_named(timeout, uvc_timeout_param, uint, 0644); MODULE_PARM_DESC(timeout, "Streaming control requests timeout"); /* ------------------------------------------------------------------------ * Driver initialization and cleanup */ static const struct uvc_device_info uvc_quirk_probe_minmax = { .quirks = UVC_QUIRK_PROBE_MINMAX, }; static const struct uvc_device_info uvc_quirk_fix_bandwidth = { .quirks = UVC_QUIRK_FIX_BANDWIDTH, }; static const struct uvc_device_info uvc_quirk_probe_def = { .quirks = UVC_QUIRK_PROBE_DEF, }; static const struct uvc_device_info uvc_quirk_stream_no_fid = { .quirks = UVC_QUIRK_STREAM_NO_FID, }; static const struct uvc_device_info uvc_quirk_force_y8 = { .quirks = UVC_QUIRK_FORCE_Y8, }; #define UVC_INFO_QUIRK(q) (kernel_ulong_t)&(struct uvc_device_info){.quirks = q} #define UVC_INFO_META(m) (kernel_ulong_t)&(struct uvc_device_info) \ {.meta_format = m} /* * The Logitech cameras listed below have their interface class set to * VENDOR_SPEC because they don't announce themselves as UVC devices, even * though they are compliant. * * Sort these by vendor/product ID. */ static const struct usb_device_id uvc_ids[] = { /* Quanta ACER HD User Facing */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0408, .idProduct = 0x4033, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = UVC_PC_PROTOCOL_15, .driver_info = (kernel_ulong_t)&(const struct uvc_device_info){ .uvc_version = 0x010a, } }, /* Quanta ACER HD User Facing */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0408, .idProduct = 0x4035, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = UVC_PC_PROTOCOL_15, .driver_info = (kernel_ulong_t)&(const struct uvc_device_info){ .uvc_version = 0x010a, } }, /* LogiLink Wireless Webcam */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0416, .idProduct = 0xa91a, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Genius eFace 2025 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0458, .idProduct = 0x706e, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Microsoft Lifecam NX-6000 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x045e, .idProduct = 0x00f8, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Microsoft Lifecam NX-3000 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x045e, .idProduct = 0x0721, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Microsoft Lifecam VX-7000 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x045e, .idProduct = 0x0723, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Logitech, Webcam C910 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x0821, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_WAKE_AUTOSUSPEND)}, /* Logitech, Webcam B910 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x0823, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_WAKE_AUTOSUSPEND)}, /* Logitech Quickcam Fusion */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08c1, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Logitech Quickcam Orbit MP */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08c2, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Logitech Quickcam Pro for Notebook */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08c3, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Logitech Quickcam Pro 5000 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08c5, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Logitech Quickcam OEM Dell Notebook */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08c6, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Logitech Quickcam OEM Cisco VT Camera II */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08c7, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Logitech HD Pro Webcam C920 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x082d, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_RESTORE_CTRLS_ON_INIT | UVC_QUIRK_INVALID_DEVICE_SOF) }, /* Logitech HD Pro Webcam C922 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x085c, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_INVALID_DEVICE_SOF) }, /* Logitech Rally Bar Huddle */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x087c, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_NO_RESET_RESUME) }, /* Logitech Rally Bar */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x089b, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_NO_RESET_RESUME) }, /* Logitech Rally Bar Mini */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x046d, .idProduct = 0x08d3, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_NO_RESET_RESUME) }, /* Chicony CNF7129 (Asus EEE 100HE) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x04f2, .idProduct = 0xb071, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_RESTRICT_FRAME_RATE) }, /* Alcor Micro AU3820 (Future Boy PC USB Webcam) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x058f, .idProduct = 0x3820, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Dell XPS m1530 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05a9, .idProduct = 0x2640, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Dell SP2008WFP Monitor */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05a9, .idProduct = 0x2641, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Dell Alienware X51 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05a9, .idProduct = 0x2643, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Dell Studio Hybrid 140g (OmniVision webcam) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05a9, .idProduct = 0x264a, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Dell XPS M1330 (OmniVision OV7670 webcam) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05a9, .idProduct = 0x7670, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Apple Built-In iSight */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05ac, .idProduct = 0x8501, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_PROBE_MINMAX | UVC_QUIRK_BUILTIN_ISIGHT) }, /* Apple FaceTime HD Camera (Built-In) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05ac, .idProduct = 0x8514, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Apple Built-In iSight via iBridge */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05ac, .idProduct = 0x8600, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* Foxlink ("HP Webcam" on HP Mini 5103) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05c8, .idProduct = 0x0403, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_fix_bandwidth }, /* Genesys Logic USB 2.0 PC Camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x05e3, .idProduct = 0x0505, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Hercules Classic Silver */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x06f8, .idProduct = 0x300c, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_fix_bandwidth }, /* ViMicro Vega */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0ac8, .idProduct = 0x332d, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_fix_bandwidth }, /* ViMicro - Minoru3D */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0ac8, .idProduct = 0x3410, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_fix_bandwidth }, /* ViMicro Venus - Minoru3D */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0ac8, .idProduct = 0x3420, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_fix_bandwidth }, /* Ophir Optronics - SPCAM 620U */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0bd3, .idProduct = 0x0555, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Sonix Technology Co. Ltd. - 292A IPC AR0330 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0c45, .idProduct = 0x6366, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_MJPEG_NO_EOF) }, /* MT6227 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0e8d, .idProduct = 0x0004, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_PROBE_MINMAX | UVC_QUIRK_PROBE_DEF) }, /* IMC Networks (Medion Akoya) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x13d3, .idProduct = 0x5103, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* JMicron USB2.0 XGA WebCam */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x152d, .idProduct = 0x0310, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Kurokesu C1 PRO */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x16d0, .idProduct = 0x0ed1, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_MJPEG_NO_EOF) }, /* Syntek (HP Spartan) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x174f, .idProduct = 0x5212, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Syntek (Samsung Q310) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x174f, .idProduct = 0x5931, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Syntek (Packard Bell EasyNote MX52 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x174f, .idProduct = 0x8a12, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Syntek (Asus F9SG) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x174f, .idProduct = 0x8a31, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Syntek (Asus U3S) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x174f, .idProduct = 0x8a33, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Syntek (JAOtech Smart Terminal) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x174f, .idProduct = 0x8a34, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Miricle 307K */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x17dc, .idProduct = 0x0202, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Lenovo Thinkpad SL400/SL500 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x17ef, .idProduct = 0x480b, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_stream_no_fid }, /* Aveo Technology USB 2.0 Camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1871, .idProduct = 0x0306, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_PROBE_MINMAX | UVC_QUIRK_PROBE_EXTRAFIELDS) }, /* Aveo Technology USB 2.0 Camera (Tasco USB Microscope) */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1871, .idProduct = 0x0516, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Ecamm Pico iMage */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x18cd, .idProduct = 0xcafe, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_PROBE_EXTRAFIELDS) }, /* Manta MM-353 Plako */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x18ec, .idProduct = 0x3188, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* FSC WebCam V30S */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x18ec, .idProduct = 0x3288, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Arkmicro unbranded */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x18ec, .idProduct = 0x3290, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_def }, /* The Imaging Source USB CCD cameras */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x199e, .idProduct = 0x8102, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0 }, /* Bodelin ProScopeHR */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_DEV_HI | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x19ab, .idProduct = 0x1000, .bcdDevice_hi = 0x0126, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_STATUS_INTERVAL) }, /* MSI StarCam 370i */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1b3b, .idProduct = 0x2951, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Generalplus Technology Inc. 808 Camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1b3f, .idProduct = 0x2002, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Shenzhen Aoni Electronic Co.,Ltd 2K FHD camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1bcf, .idProduct = 0x0b40, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&(const struct uvc_device_info){ .uvc_version = 0x010a, } }, /* SiGma Micro USB Web Camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1c4f, .idProduct = 0x3000, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_PROBE_MINMAX | UVC_QUIRK_IGNORE_SELECTOR_UNIT) }, /* NXP Semiconductors IR VIDEO */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1fc9, .idProduct = 0x009b, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_probe_minmax }, /* Oculus VR Positional Tracker DK2 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x2833, .idProduct = 0x0201, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_force_y8 }, /* Oculus VR Rift Sensor */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x2833, .idProduct = 0x0211, .bInterfaceClass = USB_CLASS_VENDOR_SPEC, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_force_y8 }, /* GEO Semiconductor GC6500 */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x29fe, .idProduct = 0x4d53, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_FORCE_BPP) }, /* Insta360 Link */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x2e1a, .idProduct = 0x4c01, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_DISABLE_AUTOSUSPEND) }, /* Intel D410/ASR depth camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0ad2, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel D415/ASRC depth camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0ad3, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel D430/AWG depth camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0ad4, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel RealSense D4M */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0b03, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel D435/AWGC depth camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0b07, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel D435i depth camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0b3a, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel D405 Depth Camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0b5b, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel D455 Depth Camera */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x0b5c, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Intel D421 Depth Module */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x8086, .idProduct = 0x1155, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_INFO_META(V4L2_META_FMT_D4XX) }, /* Generic USB Video Class */ { USB_INTERFACE_INFO(USB_CLASS_VIDEO, 1, UVC_PC_PROTOCOL_UNDEFINED) }, { USB_INTERFACE_INFO(USB_CLASS_VIDEO, 1, UVC_PC_PROTOCOL_15) }, {} }; MODULE_DEVICE_TABLE(usb, uvc_ids); struct uvc_driver uvc_driver = { .driver = { .name = "uvcvideo", .probe = uvc_probe, .disconnect = uvc_disconnect, .suspend = uvc_suspend, .resume = uvc_resume, .reset_resume = uvc_reset_resume, .id_table = uvc_ids, .supports_autosuspend = 1, }, }; static int __init uvc_init(void) { int ret; uvc_debugfs_init(); ret = usb_register(&uvc_driver.driver); if (ret < 0) { uvc_debugfs_cleanup(); return ret; } return 0; } static void __exit uvc_cleanup(void) { usb_deregister(&uvc_driver.driver); uvc_debugfs_cleanup(); } module_init(uvc_init); module_exit(uvc_cleanup); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); MODULE_VERSION(DRIVER_VERSION);
12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 // SPDX-License-Identifier: GPL-2.0-only #include <linux/clockchips.h> #include <linux/interrupt.h> #include <linux/export.h> #include <linux/delay.h> #include <linux/hpet.h> #include <linux/cpu.h> #include <linux/irq.h> #include <asm/cpuid.h> #include <asm/irq_remapping.h> #include <asm/hpet.h> #include <asm/time.h> #include <asm/mwait.h> #undef pr_fmt #define pr_fmt(fmt) "hpet: " fmt enum hpet_mode { HPET_MODE_UNUSED, HPET_MODE_LEGACY, HPET_MODE_CLOCKEVT, HPET_MODE_DEVICE, }; struct hpet_channel { struct clock_event_device evt; unsigned int num; unsigned int cpu; unsigned int irq; unsigned int in_use; enum hpet_mode mode; unsigned int boot_cfg; char name[10]; }; struct hpet_base { unsigned int nr_channels; unsigned int nr_clockevents; unsigned int boot_cfg; struct hpet_channel *channels; }; #define HPET_MASK CLOCKSOURCE_MASK(32) #define HPET_MIN_CYCLES 128 #define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) /* * HPET address is set in acpi/boot.c, when an ACPI entry exists */ unsigned long hpet_address; u8 hpet_blockid; /* OS timer block num */ bool hpet_msi_disable; #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_GENERIC_MSI_IRQ) static DEFINE_PER_CPU(struct hpet_channel *, cpu_hpet_channel); static struct irq_domain *hpet_domain; #endif static void __iomem *hpet_virt_address; static struct hpet_base hpet_base; static bool hpet_legacy_int_enabled; static unsigned long hpet_freq; bool boot_hpet_disable; bool hpet_force_user; static bool hpet_verbose; static inline struct hpet_channel *clockevent_to_channel(struct clock_event_device *evt) { return container_of(evt, struct hpet_channel, evt); } inline unsigned int hpet_readl(unsigned int a) { return readl(hpet_virt_address + a); } static inline void hpet_writel(unsigned int d, unsigned int a) { writel(d, hpet_virt_address + a); } static inline void hpet_set_mapping(void) { hpet_virt_address = ioremap(hpet_address, HPET_MMAP_SIZE); } static inline void hpet_clear_mapping(void) { iounmap(hpet_virt_address); hpet_virt_address = NULL; } /* * HPET command line enable / disable */ static int __init hpet_setup(char *str) { while (str) { char *next = strchr(str, ','); if (next) *next++ = 0; if (!strncmp("disable", str, 7)) boot_hpet_disable = true; if (!strncmp("force", str, 5)) hpet_force_user = true; if (!strncmp("verbose", str, 7)) hpet_verbose = true; str = next; } return 1; } __setup("hpet=", hpet_setup); static int __init disable_hpet(char *str) { boot_hpet_disable = true; return 1; } __setup("nohpet", disable_hpet); static inline int is_hpet_capable(void) { return !boot_hpet_disable && hpet_address; } /** * is_hpet_enabled - Check whether the legacy HPET timer interrupt is enabled */ int is_hpet_enabled(void) { return is_hpet_capable() && hpet_legacy_int_enabled; } EXPORT_SYMBOL_GPL(is_hpet_enabled); static void _hpet_print_config(const char *function, int line) { u32 i, id, period, cfg, status, channels, l, h; pr_info("%s(%d):\n", function, line); id = hpet_readl(HPET_ID); period = hpet_readl(HPET_PERIOD); pr_info("ID: 0x%x, PERIOD: 0x%x\n", id, period); cfg = hpet_readl(HPET_CFG); status = hpet_readl(HPET_STATUS); pr_info("CFG: 0x%x, STATUS: 0x%x\n", cfg, status); l = hpet_readl(HPET_COUNTER); h = hpet_readl(HPET_COUNTER+4); pr_info("COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h); channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; for (i = 0; i < channels; i++) { l = hpet_readl(HPET_Tn_CFG(i)); h = hpet_readl(HPET_Tn_CFG(i)+4); pr_info("T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", i, l, h); l = hpet_readl(HPET_Tn_CMP(i)); h = hpet_readl(HPET_Tn_CMP(i)+4); pr_info("T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", i, l, h); l = hpet_readl(HPET_Tn_ROUTE(i)); h = hpet_readl(HPET_Tn_ROUTE(i)+4); pr_info("T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", i, l, h); } } #define hpet_print_config() \ do { \ if (hpet_verbose) \ _hpet_print_config(__func__, __LINE__); \ } while (0) /* * When the HPET driver (/dev/hpet) is enabled, we need to reserve * timer 0 and timer 1 in case of RTC emulation. */ #ifdef CONFIG_HPET static void __init hpet_reserve_platform_timers(void) { struct hpet_data hd; unsigned int i; memset(&hd, 0, sizeof(hd)); hd.hd_phys_address = hpet_address; hd.hd_address = hpet_virt_address; hd.hd_nirqs = hpet_base.nr_channels; /* * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254 * is wrong for i8259!) not the output IRQ. Many BIOS writers * don't bother configuring *any* comparator interrupts. */ hd.hd_irq[0] = HPET_LEGACY_8254; hd.hd_irq[1] = HPET_LEGACY_RTC; for (i = 0; i < hpet_base.nr_channels; i++) { struct hpet_channel *hc = hpet_base.channels + i; if (i >= 2) hd.hd_irq[i] = hc->irq; switch (hc->mode) { case HPET_MODE_UNUSED: case HPET_MODE_DEVICE: hc->mode = HPET_MODE_DEVICE; break; case HPET_MODE_CLOCKEVT: case HPET_MODE_LEGACY: hpet_reserve_timer(&hd, hc->num); break; } } hpet_alloc(&hd); } static void __init hpet_select_device_channel(void) { int i; for (i = 0; i < hpet_base.nr_channels; i++) { struct hpet_channel *hc = hpet_base.channels + i; /* Associate the first unused channel to /dev/hpet */ if (hc->mode == HPET_MODE_UNUSED) { hc->mode = HPET_MODE_DEVICE; return; } } } #else static inline void hpet_reserve_platform_timers(void) { } static inline void hpet_select_device_channel(void) {} #endif /* Common HPET functions */ static void hpet_stop_counter(void) { u32 cfg = hpet_readl(HPET_CFG); cfg &= ~HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); } static void hpet_reset_counter(void) { hpet_writel(0, HPET_COUNTER); hpet_writel(0, HPET_COUNTER + 4); } static void hpet_start_counter(void) { unsigned int cfg = hpet_readl(HPET_CFG); cfg |= HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); } static void hpet_restart_counter(void) { hpet_stop_counter(); hpet_reset_counter(); hpet_start_counter(); } static void hpet_resume_device(void) { force_hpet_resume(); } static void hpet_resume_counter(struct clocksource *cs) { hpet_resume_device(); hpet_restart_counter(); } static void hpet_enable_legacy_int(void) { unsigned int cfg = hpet_readl(HPET_CFG); cfg |= HPET_CFG_LEGACY; hpet_writel(cfg, HPET_CFG); hpet_legacy_int_enabled = true; } static int hpet_clkevt_set_state_periodic(struct clock_event_device *evt) { unsigned int channel = clockevent_to_channel(evt)->num; unsigned int cfg, cmp, now; uint64_t delta; hpet_stop_counter(); delta = ((uint64_t)(NSEC_PER_SEC / HZ)) * evt->mult; delta >>= evt->shift; now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned int)delta; cfg = hpet_readl(HPET_Tn_CFG(channel)); cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(channel)); hpet_writel(cmp, HPET_Tn_CMP(channel)); udelay(1); /* * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL * cleared) to T0_CMP to set the period. The HPET_TN_SETVAL * bit is automatically cleared after the first write. * (See AMD-8111 HyperTransport I/O Hub Data Sheet, * Publication # 24674) */ hpet_writel((unsigned int)delta, HPET_Tn_CMP(channel)); hpet_start_counter(); hpet_print_config(); return 0; } static int hpet_clkevt_set_state_oneshot(struct clock_event_device *evt) { unsigned int channel = clockevent_to_channel(evt)->num; unsigned int cfg; cfg = hpet_readl(HPET_Tn_CFG(channel)); cfg &= ~HPET_TN_PERIODIC; cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(channel)); return 0; } static int hpet_clkevt_set_state_shutdown(struct clock_event_device *evt) { unsigned int channel = clockevent_to_channel(evt)->num; unsigned int cfg; cfg = hpet_readl(HPET_Tn_CFG(channel)); cfg &= ~HPET_TN_ENABLE; hpet_writel(cfg, HPET_Tn_CFG(channel)); return 0; } static int hpet_clkevt_legacy_resume(struct clock_event_device *evt) { hpet_enable_legacy_int(); hpet_print_config(); return 0; } static int hpet_clkevt_set_next_event(unsigned long delta, struct clock_event_device *evt) { unsigned int channel = clockevent_to_channel(evt)->num; u32 cnt; s32 res; cnt = hpet_readl(HPET_COUNTER); cnt += (u32) delta; hpet_writel(cnt, HPET_Tn_CMP(channel)); /* * HPETs are a complete disaster. The compare register is * based on a equal comparison and neither provides a less * than or equal functionality (which would require to take * the wraparound into account) nor a simple count down event * mode. Further the write to the comparator register is * delayed internally up to two HPET clock cycles in certain * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even * longer delays. We worked around that by reading back the * compare register, but that required another workaround for * ICH9,10 chips where the first readout after write can * return the old stale value. We already had a minimum * programming delta of 5us enforced, but a NMI or SMI hitting * between the counter readout and the comparator write can * move us behind that point easily. Now instead of reading * the compare register back several times, we make the ETIME * decision based on the following: Return ETIME if the * counter value after the write is less than HPET_MIN_CYCLES * away from the event or if the counter is already ahead of * the event. The minimum programming delta for the generic * clockevents code is set to 1.5 * HPET_MIN_CYCLES. */ res = (s32)(cnt - hpet_readl(HPET_COUNTER)); return res < HPET_MIN_CYCLES ? -ETIME : 0; } static void hpet_init_clockevent(struct hpet_channel *hc, unsigned int rating) { struct clock_event_device *evt = &hc->evt; evt->rating = rating; evt->irq = hc->irq; evt->name = hc->name; evt->cpumask = cpumask_of(hc->cpu); evt->set_state_oneshot = hpet_clkevt_set_state_oneshot; evt->set_next_event = hpet_clkevt_set_next_event; evt->set_state_shutdown = hpet_clkevt_set_state_shutdown; evt->features = CLOCK_EVT_FEAT_ONESHOT; if (hc->boot_cfg & HPET_TN_PERIODIC) { evt->features |= CLOCK_EVT_FEAT_PERIODIC; evt->set_state_periodic = hpet_clkevt_set_state_periodic; } } static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc) { /* * Start HPET with the boot CPU's cpumask and make it global after * the IO_APIC has been initialized. */ hc->cpu = boot_cpu_data.cpu_index; strscpy(hc->name, "hpet", sizeof(hc->name)); hpet_init_clockevent(hc, 50); hc->evt.tick_resume = hpet_clkevt_legacy_resume; /* * Legacy horrors and sins from the past. HPET used periodic mode * unconditionally forever on the legacy channel 0. Removing the * below hack and using the conditional in hpet_init_clockevent() * makes at least Qemu and one hardware machine fail to boot. * There are two issues which cause the boot failure: * * #1 After the timer delivery test in IOAPIC and the IOAPIC setup * the next interrupt is not delivered despite the HPET channel * being programmed correctly. Reprogramming the HPET after * switching to IOAPIC makes it work again. After fixing this, * the next issue surfaces: * * #2 Due to the unconditional periodic mode availability the Local * APIC timer calibration can hijack the global clockevents * event handler without causing damage. Using oneshot at this * stage makes if hang because the HPET does not get * reprogrammed due to the handler hijacking. Duh, stupid me! * * Both issues require major surgery and especially the kick HPET * again after enabling IOAPIC results in really nasty hackery. * This 'assume periodic works' magic has survived since HPET * support got added, so it's questionable whether this should be * fixed. Both Qemu and the failing hardware machine support * periodic mode despite the fact that both don't advertise it in * the configuration register and both need that extra kick after * switching to IOAPIC. Seems to be a feature... */ hc->evt.features |= CLOCK_EVT_FEAT_PERIODIC; hc->evt.set_state_periodic = hpet_clkevt_set_state_periodic; /* Start HPET legacy interrupts */ hpet_enable_legacy_int(); clockevents_config_and_register(&hc->evt, hpet_freq, HPET_MIN_PROG_DELTA, 0x7FFFFFFF); global_clock_event = &hc->evt; pr_debug("Clockevent registered\n"); } /* * HPET MSI Support */ #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_GENERIC_MSI_IRQ) static void hpet_msi_unmask(struct irq_data *data) { struct hpet_channel *hc = irq_data_get_irq_handler_data(data); unsigned int cfg; cfg = hpet_readl(HPET_Tn_CFG(hc->num)); cfg |= HPET_TN_ENABLE | HPET_TN_FSB; hpet_writel(cfg, HPET_Tn_CFG(hc->num)); } static void hpet_msi_mask(struct irq_data *data) { struct hpet_channel *hc = irq_data_get_irq_handler_data(data); unsigned int cfg; cfg = hpet_readl(HPET_Tn_CFG(hc->num)); cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB); hpet_writel(cfg, HPET_Tn_CFG(hc->num)); } static void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg) { hpet_writel(msg->data, HPET_Tn_ROUTE(hc->num)); hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hc->num) + 4); } static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg) { hpet_msi_write(irq_data_get_irq_handler_data(data), msg); } static struct irq_chip hpet_msi_controller __ro_after_init = { .name = "HPET-MSI", .irq_unmask = hpet_msi_unmask, .irq_mask = hpet_msi_mask, .irq_ack = irq_chip_ack_parent, .irq_set_affinity = msi_domain_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_write_msi_msg = hpet_msi_write_msg, .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_AFFINITY_PRE_STARTUP, }; static int hpet_msi_init(struct irq_domain *domain, struct msi_domain_info *info, unsigned int virq, irq_hw_number_t hwirq, msi_alloc_info_t *arg) { irq_domain_set_info(domain, virq, arg->hwirq, info->chip, NULL, handle_edge_irq, arg->data, "edge"); return 0; } static struct msi_domain_ops hpet_msi_domain_ops = { .msi_init = hpet_msi_init, }; static struct msi_domain_info hpet_msi_domain_info = { .ops = &hpet_msi_domain_ops, .chip = &hpet_msi_controller, .flags = MSI_FLAG_USE_DEF_DOM_OPS, }; static struct irq_domain *hpet_create_irq_domain(int hpet_id) { struct msi_domain_info *domain_info; struct irq_domain *parent, *d; struct fwnode_handle *fn; struct irq_fwspec fwspec; if (x86_vector_domain == NULL) return NULL; domain_info = kzalloc(sizeof(*domain_info), GFP_KERNEL); if (!domain_info) return NULL; *domain_info = hpet_msi_domain_info; domain_info->data = (void *)(long)hpet_id; fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name, hpet_id); if (!fn) { kfree(domain_info); return NULL; } fwspec.fwnode = fn; fwspec.param_count = 1; fwspec.param[0] = hpet_id; parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_GENERIC_MSI); if (!parent) { irq_domain_free_fwnode(fn); kfree(domain_info); return NULL; } if (parent != x86_vector_domain) hpet_msi_controller.name = "IR-HPET-MSI"; d = msi_create_irq_domain(fn, domain_info, parent); if (!d) { irq_domain_free_fwnode(fn); kfree(domain_info); } return d; } static inline int hpet_dev_id(struct irq_domain *domain) { struct msi_domain_info *info = msi_get_domain_info(domain); return (int)(long)info->data; } static int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc, int dev_num) { struct irq_alloc_info info; init_irq_alloc_info(&info, NULL); info.type = X86_IRQ_ALLOC_TYPE_HPET; info.data = hc; info.devid = hpet_dev_id(domain); info.hwirq = dev_num; return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); } static int hpet_clkevt_msi_resume(struct clock_event_device *evt) { struct hpet_channel *hc = clockevent_to_channel(evt); struct irq_data *data = irq_get_irq_data(hc->irq); struct msi_msg msg; /* Restore the MSI msg and unmask the interrupt */ irq_chip_compose_msi_msg(data, &msg); hpet_msi_write(hc, &msg); hpet_msi_unmask(data); return 0; } static irqreturn_t hpet_msi_interrupt_handler(int irq, void *data) { struct hpet_channel *hc = data; struct clock_event_device *evt = &hc->evt; if (!evt->event_handler) { pr_info("Spurious interrupt HPET channel %d\n", hc->num); return IRQ_HANDLED; } evt->event_handler(evt); return IRQ_HANDLED; } static int hpet_setup_msi_irq(struct hpet_channel *hc) { if (request_irq(hc->irq, hpet_msi_interrupt_handler, IRQF_TIMER | IRQF_NOBALANCING, hc->name, hc)) return -1; disable_irq(hc->irq); irq_set_affinity(hc->irq, cpumask_of(hc->cpu)); enable_irq(hc->irq); pr_debug("%s irq %u for MSI\n", hc->name, hc->irq); return 0; } /* Invoked from the hotplug callback on @cpu */ static void init_one_hpet_msi_clockevent(struct hpet_channel *hc, int cpu) { struct clock_event_device *evt = &hc->evt; hc->cpu = cpu; per_cpu(cpu_hpet_channel, cpu) = hc; hpet_setup_msi_irq(hc); hpet_init_clockevent(hc, 110); evt->tick_resume = hpet_clkevt_msi_resume; clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA, 0x7FFFFFFF); } static struct hpet_channel *hpet_get_unused_clockevent(void) { int i; for (i = 0; i < hpet_base.nr_channels; i++) { struct hpet_channel *hc = hpet_base.channels + i; if (hc->mode != HPET_MODE_CLOCKEVT || hc->in_use) continue; hc->in_use = 1; return hc; } return NULL; } static int hpet_cpuhp_online(unsigned int cpu) { struct hpet_channel *hc = hpet_get_unused_clockevent(); if (hc) init_one_hpet_msi_clockevent(hc, cpu); return 0; } static int hpet_cpuhp_dead(unsigned int cpu) { struct hpet_channel *hc = per_cpu(cpu_hpet_channel, cpu); if (!hc) return 0; free_irq(hc->irq, hc); hc->in_use = 0; per_cpu(cpu_hpet_channel, cpu) = NULL; return 0; } static void __init hpet_select_clockevents(void) { unsigned int i; hpet_base.nr_clockevents = 0; /* No point if MSI is disabled or CPU has an Always Running APIC Timer */ if (hpet_msi_disable || boot_cpu_has(X86_FEATURE_ARAT)) return; hpet_print_config(); hpet_domain = hpet_create_irq_domain(hpet_blockid); if (!hpet_domain) return; for (i = 0; i < hpet_base.nr_channels; i++) { struct hpet_channel *hc = hpet_base.channels + i; int irq; if (hc->mode != HPET_MODE_UNUSED) continue; /* Only consider HPET channel with MSI support */ if (!(hc->boot_cfg & HPET_TN_FSB_CAP)) continue; sprintf(hc->name, "hpet%d", i); irq = hpet_assign_irq(hpet_domain, hc, hc->num); if (irq <= 0) continue; hc->irq = irq; hc->mode = HPET_MODE_CLOCKEVT; if (++hpet_base.nr_clockevents == num_possible_cpus()) break; } pr_info("%d channels of %d reserved for per-cpu timers\n", hpet_base.nr_channels, hpet_base.nr_clockevents); } #else static inline void hpet_select_clockevents(void) { } #define hpet_cpuhp_online NULL #define hpet_cpuhp_dead NULL #endif /* * Clock source related code */ #if defined(CONFIG_SMP) && defined(CONFIG_64BIT) /* * Reading the HPET counter is a very slow operation. If a large number of * CPUs are trying to access the HPET counter simultaneously, it can cause * massive delays and slow down system performance dramatically. This may * happen when HPET is the default clock source instead of TSC. For a * really large system with hundreds of CPUs, the slowdown may be so * severe, that it can actually crash the system because of a NMI watchdog * soft lockup, for example. * * If multiple CPUs are trying to access the HPET counter at the same time, * we don't actually need to read the counter multiple times. Instead, the * other CPUs can use the counter value read by the first CPU in the group. * * This special feature is only enabled on x86-64 systems. It is unlikely * that 32-bit x86 systems will have enough CPUs to require this feature * with its associated locking overhead. We also need 64-bit atomic read. * * The lock and the HPET value are stored together and can be read in a * single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t * is 32 bits in size. */ union hpet_lock { struct { arch_spinlock_t lock; u32 value; }; u64 lockval; }; static union hpet_lock hpet __cacheline_aligned = { { .lock = __ARCH_SPIN_LOCK_UNLOCKED, }, }; static u64 read_hpet(struct clocksource *cs) { unsigned long flags; union hpet_lock old, new; BUILD_BUG_ON(sizeof(union hpet_lock) != 8); /* * Read HPET directly if in NMI. */ if (in_nmi()) return (u64)hpet_readl(HPET_COUNTER); /* * Read the current state of the lock and HPET value atomically. */ old.lockval = READ_ONCE(hpet.lockval); if (arch_spin_is_locked(&old.lock)) goto contended; local_irq_save(flags); if (arch_spin_trylock(&hpet.lock)) { new.value = hpet_readl(HPET_COUNTER); /* * Use WRITE_ONCE() to prevent store tearing. */ WRITE_ONCE(hpet.value, new.value); arch_spin_unlock(&hpet.lock); local_irq_restore(flags); return (u64)new.value; } local_irq_restore(flags); contended: /* * Contended case * -------------- * Wait until the HPET value change or the lock is free to indicate * its value is up-to-date. * * It is possible that old.value has already contained the latest * HPET value while the lock holder was in the process of releasing * the lock. Checking for lock state change will enable us to return * the value immediately instead of waiting for the next HPET reader * to come along. */ do { cpu_relax(); new.lockval = READ_ONCE(hpet.lockval); } while ((new.value == old.value) && arch_spin_is_locked(&new.lock)); return (u64)new.value; } #else /* * For UP or 32-bit. */ static u64 read_hpet(struct clocksource *cs) { return (u64)hpet_readl(HPET_COUNTER); } #endif static struct clocksource clocksource_hpet = { .name = "hpet", .rating = 250, .read = read_hpet, .mask = HPET_MASK, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .resume = hpet_resume_counter, }; /* * AMD SB700 based systems with spread spectrum enabled use a SMM based * HPET emulation to provide proper frequency setting. * * On such systems the SMM code is initialized with the first HPET register * access and takes some time to complete. During this time the config * register reads 0xffffffff. We check for max 1000 loops whether the * config register reads a non-0xffffffff value to make sure that the * HPET is up and running before we proceed any further. * * A counting loop is safe, as the HPET access takes thousands of CPU cycles. * * On non-SB700 based machines this check is only done once and has no * side effects. */ static bool __init hpet_cfg_working(void) { int i; for (i = 0; i < 1000; i++) { if (hpet_readl(HPET_CFG) != 0xFFFFFFFF) return true; } pr_warn("Config register invalid. Disabling HPET\n"); return false; } static bool __init hpet_counting(void) { u64 start, now, t1; hpet_restart_counter(); t1 = hpet_readl(HPET_COUNTER); start = rdtsc(); /* * We don't know the TSC frequency yet, but waiting for * 200000 TSC cycles is safe: * 4 GHz == 50us * 1 GHz == 200us */ do { if (t1 != hpet_readl(HPET_COUNTER)) return true; now = rdtsc(); } while ((now - start) < 200000UL); pr_warn("Counter not counting. HPET disabled\n"); return false; } static bool __init mwait_pc10_supported(void) { unsigned int eax, ebx, ecx, mwait_substates; if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return false; if (!cpu_feature_enabled(X86_FEATURE_MWAIT)) return false; cpuid(CPUID_LEAF_MWAIT, &eax, &ebx, &ecx, &mwait_substates); return (ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) && (ecx & CPUID5_ECX_INTERRUPT_BREAK) && (mwait_substates & (0xF << 28)); } /* * Check whether the system supports PC10. If so force disable HPET as that * stops counting in PC10. This check is overbroad as it does not take any * of the following into account: * * - ACPI tables * - Enablement of intel_idle * - Command line arguments which limit intel_idle C-state support * * That's perfectly fine. HPET is a piece of hardware designed by committee * and the only reasons why it is still in use on modern systems is the * fact that it is impossible to reliably query TSC and CPU frequency via * CPUID or firmware. * * If HPET is functional it is useful for calibrating TSC, but this can be * done via PMTIMER as well which seems to be the last remaining timer on * X86/INTEL platforms that has not been completely wreckaged by feature * creep. * * In theory HPET support should be removed altogether, but there are older * systems out there which depend on it because TSC and APIC timer are * dysfunctional in deeper C-states. * * It's only 20 years now that hardware people have been asked to provide * reliable and discoverable facilities which can be used for timekeeping * and per CPU timer interrupts. * * The probability that this problem is going to be solved in the * foreseeable future is close to zero, so the kernel has to be cluttered * with heuristics to keep up with the ever growing amount of hardware and * firmware trainwrecks. Hopefully some day hardware people will understand * that the approach of "This can be fixed in software" is not sustainable. * Hope dies last... */ static bool __init hpet_is_pc10_damaged(void) { unsigned long long pcfg; /* Check whether PC10 substates are supported */ if (!mwait_pc10_supported()) return false; /* Check whether PC10 is enabled in PKG C-state limit */ rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, pcfg); if ((pcfg & 0xF) < 8) return false; if (hpet_force_user) { pr_warn("HPET force enabled via command line, but dysfunctional in PC10.\n"); return false; } pr_info("HPET dysfunctional in PC10. Force disabled.\n"); boot_hpet_disable = true; return true; } /** * hpet_enable - Try to setup the HPET timer. Returns 1 on success. */ int __init hpet_enable(void) { u32 hpet_period, cfg, id, irq; unsigned int i, channels; struct hpet_channel *hc; u64 freq; if (!is_hpet_capable()) return 0; if (hpet_is_pc10_damaged()) return 0; hpet_set_mapping(); if (!hpet_virt_address) return 0; /* Validate that the config register is working */ if (!hpet_cfg_working()) goto out_nohpet; /* * Read the period and check for a sane value: */ hpet_period = hpet_readl(HPET_PERIOD); if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) goto out_nohpet; /* The period is a femtoseconds value. Convert it to a frequency. */ freq = FSEC_PER_SEC; do_div(freq, hpet_period); hpet_freq = freq; /* * Read the HPET ID register to retrieve the IRQ routing * information and the number of channels */ id = hpet_readl(HPET_ID); hpet_print_config(); /* This is the HPET channel number which is zero based */ channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; /* * The legacy routing mode needs at least two channels, tick timer * and the rtc emulation channel. */ if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC) && channels < 2) goto out_nohpet; hc = kcalloc(channels, sizeof(*hc), GFP_KERNEL); if (!hc) { pr_warn("Disabling HPET.\n"); goto out_nohpet; } hpet_base.channels = hc; hpet_base.nr_channels = channels; /* Read, store and sanitize the global configuration */ cfg = hpet_readl(HPET_CFG); hpet_base.boot_cfg = cfg; cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); hpet_writel(cfg, HPET_CFG); if (cfg) pr_warn("Global config: Unknown bits %#x\n", cfg); /* Read, store and sanitize the per channel configuration */ for (i = 0; i < channels; i++, hc++) { hc->num = i; cfg = hpet_readl(HPET_Tn_CFG(i)); hc->boot_cfg = cfg; irq = (cfg & Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; hc->irq = irq; cfg &= ~(HPET_TN_ENABLE | HPET_TN_LEVEL | HPET_TN_FSB); hpet_writel(cfg, HPET_Tn_CFG(i)); cfg &= ~(HPET_TN_PERIODIC | HPET_TN_PERIODIC_CAP | HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE | HPET_TN_FSB | HPET_TN_FSB_CAP); if (cfg) pr_warn("Channel #%u config: Unknown bits %#x\n", i, cfg); } hpet_print_config(); /* * Validate that the counter is counting. This needs to be done * after sanitizing the config registers to properly deal with * force enabled HPETs. */ if (!hpet_counting()) goto out_nohpet; if (tsc_clocksource_watchdog_disabled()) clocksource_hpet.flags |= CLOCK_SOURCE_MUST_VERIFY; clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); if (id & HPET_ID_LEGSUP) { hpet_legacy_clockevent_register(&hpet_base.channels[0]); hpet_base.channels[0].mode = HPET_MODE_LEGACY; if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC)) hpet_base.channels[1].mode = HPET_MODE_LEGACY; return 1; } return 0; out_nohpet: kfree(hpet_base.channels); hpet_base.channels = NULL; hpet_base.nr_channels = 0; hpet_clear_mapping(); hpet_address = 0; return 0; } /* * The late initialization runs after the PCI quirks have been invoked * which might have detected a system on which the HPET can be enforced. * * Also, the MSI machinery is not working yet when the HPET is initialized * early. * * If the HPET is enabled, then: * * 1) Reserve one channel for /dev/hpet if CONFIG_HPET=y * 2) Reserve up to num_possible_cpus() channels as per CPU clockevents * 3) Setup /dev/hpet if CONFIG_HPET=y * 4) Register hotplug callbacks when clockevents are available */ static __init int hpet_late_init(void) { int ret; if (!hpet_address) { if (!force_hpet_address) return -ENODEV; hpet_address = force_hpet_address; hpet_enable(); } if (!hpet_virt_address) return -ENODEV; hpet_select_device_channel(); hpet_select_clockevents(); hpet_reserve_platform_timers(); hpet_print_config(); if (!hpet_base.nr_clockevents) return 0; ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "x86/hpet:online", hpet_cpuhp_online, NULL); if (ret) return ret; ret = cpuhp_setup_state(CPUHP_X86_HPET_DEAD, "x86/hpet:dead", NULL, hpet_cpuhp_dead); if (ret) goto err_cpuhp; return 0; err_cpuhp: cpuhp_remove_state(CPUHP_AP_X86_HPET_ONLINE); return ret; } fs_initcall(hpet_late_init); void hpet_disable(void) { unsigned int i; u32 cfg; if (!is_hpet_capable() || !hpet_virt_address) return; /* Restore boot configuration with the enable bit cleared */ cfg = hpet_base.boot_cfg; cfg &= ~HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); /* Restore the channel boot configuration */ for (i = 0; i < hpet_base.nr_channels; i++) hpet_writel(hpet_base.channels[i].boot_cfg, HPET_Tn_CFG(i)); /* If the HPET was enabled at boot time, reenable it */ if (hpet_base.boot_cfg & HPET_CFG_ENABLE) hpet_writel(hpet_base.boot_cfg, HPET_CFG); } #ifdef CONFIG_HPET_EMULATE_RTC /* * HPET in LegacyReplacement mode eats up the RTC interrupt line. When HPET * is enabled, we support RTC interrupt functionality in software. * * RTC has 3 kinds of interrupts: * * 1) Update Interrupt - generate an interrupt, every second, when the * RTC clock is updated * 2) Alarm Interrupt - generate an interrupt at a specific time of day * 3) Periodic Interrupt - generate periodic interrupt, with frequencies * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all frequencies in powers of 2) * * (1) and (2) above are implemented using polling at a frequency of 64 Hz: * DEFAULT_RTC_INT_FREQ. * * The exact frequency is a tradeoff between accuracy and interrupt overhead. * * For (3), we use interrupts at 64 Hz, or the user specified periodic frequency, * if it's higher. */ #include <linux/mc146818rtc.h> #include <linux/rtc.h> #define DEFAULT_RTC_INT_FREQ 64 #define DEFAULT_RTC_SHIFT 6 #define RTC_NUM_INTS 1 static unsigned long hpet_rtc_flags; static int hpet_prev_update_sec; static struct rtc_time hpet_alarm_time; static unsigned long hpet_pie_count; static u32 hpet_t1_cmp; static u32 hpet_default_delta; static u32 hpet_pie_delta; static unsigned long hpet_pie_limit; static rtc_irq_handler irq_handler; /* * Check that the HPET counter c1 is ahead of c2 */ static inline int hpet_cnt_ahead(u32 c1, u32 c2) { return (s32)(c2 - c1) < 0; } /* * Registers a IRQ handler. */ int hpet_register_irq_handler(rtc_irq_handler handler) { if (!is_hpet_enabled()) return -ENODEV; if (irq_handler) return -EBUSY; irq_handler = handler; return 0; } EXPORT_SYMBOL_GPL(hpet_register_irq_handler); /* * Deregisters the IRQ handler registered with hpet_register_irq_handler() * and does cleanup. */ void hpet_unregister_irq_handler(rtc_irq_handler handler) { if (!is_hpet_enabled()) return; irq_handler = NULL; hpet_rtc_flags = 0; } EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler); /* * Channel 1 for RTC emulation. We use one shot mode, as periodic mode * is not supported by all HPET implementations for channel 1. * * hpet_rtc_timer_init() is called when the rtc is initialized. */ int hpet_rtc_timer_init(void) { unsigned int cfg, cnt, delta; unsigned long flags; if (!is_hpet_enabled()) return 0; if (!hpet_default_delta) { struct clock_event_device *evt = &hpet_base.channels[0].evt; uint64_t clc; clc = (uint64_t) evt->mult * NSEC_PER_SEC; clc >>= evt->shift + DEFAULT_RTC_SHIFT; hpet_default_delta = clc; } if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) delta = hpet_default_delta; else delta = hpet_pie_delta; local_irq_save(flags); cnt = delta + hpet_readl(HPET_COUNTER); hpet_writel(cnt, HPET_T1_CMP); hpet_t1_cmp = cnt; cfg = hpet_readl(HPET_T1_CFG); cfg &= ~HPET_TN_PERIODIC; cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; hpet_writel(cfg, HPET_T1_CFG); local_irq_restore(flags); return 1; } EXPORT_SYMBOL_GPL(hpet_rtc_timer_init); static void hpet_disable_rtc_channel(void) { u32 cfg = hpet_readl(HPET_T1_CFG); cfg &= ~HPET_TN_ENABLE; hpet_writel(cfg, HPET_T1_CFG); } /* * The functions below are called from rtc driver. * Return 0 if HPET is not being used. * Otherwise do the necessary changes and return 1. */ int hpet_mask_rtc_irq_bit(unsigned long bit_mask) { if (!is_hpet_enabled()) return 0; hpet_rtc_flags &= ~bit_mask; if (unlikely(!hpet_rtc_flags)) hpet_disable_rtc_channel(); return 1; } EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit); int hpet_set_rtc_irq_bit(unsigned long bit_mask) { unsigned long oldbits = hpet_rtc_flags; if (!is_hpet_enabled()) return 0; hpet_rtc_flags |= bit_mask; if ((bit_mask & RTC_UIE) && !(oldbits & RTC_UIE)) hpet_prev_update_sec = -1; if (!oldbits) hpet_rtc_timer_init(); return 1; } EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit); int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec) { if (!is_hpet_enabled()) return 0; hpet_alarm_time.tm_hour = hrs; hpet_alarm_time.tm_min = min; hpet_alarm_time.tm_sec = sec; return 1; } EXPORT_SYMBOL_GPL(hpet_set_alarm_time); int hpet_set_periodic_freq(unsigned long freq) { uint64_t clc; if (!is_hpet_enabled()) return 0; if (freq <= DEFAULT_RTC_INT_FREQ) { hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq; } else { struct clock_event_device *evt = &hpet_base.channels[0].evt; clc = (uint64_t) evt->mult * NSEC_PER_SEC; do_div(clc, freq); clc >>= evt->shift; hpet_pie_delta = clc; hpet_pie_limit = 0; } return 1; } EXPORT_SYMBOL_GPL(hpet_set_periodic_freq); int hpet_rtc_dropped_irq(void) { return is_hpet_enabled(); } EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); static void hpet_rtc_timer_reinit(void) { unsigned int delta; int lost_ints = -1; if (unlikely(!hpet_rtc_flags)) hpet_disable_rtc_channel(); if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) delta = hpet_default_delta; else delta = hpet_pie_delta; /* * Increment the comparator value until we are ahead of the * current count. */ do { hpet_t1_cmp += delta; hpet_writel(hpet_t1_cmp, HPET_T1_CMP); lost_ints++; } while (!hpet_cnt_ahead(hpet_t1_cmp, hpet_readl(HPET_COUNTER))); if (lost_ints) { if (hpet_rtc_flags & RTC_PIE) hpet_pie_count += lost_ints; if (printk_ratelimit()) pr_warn("Lost %d RTC interrupts\n", lost_ints); } } irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) { struct rtc_time curr_time; unsigned long rtc_int_flag = 0; hpet_rtc_timer_reinit(); memset(&curr_time, 0, sizeof(struct rtc_time)); if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) { if (unlikely(mc146818_get_time(&curr_time, 10) < 0)) { pr_err_ratelimited("unable to read current time from RTC\n"); return IRQ_HANDLED; } } if (hpet_rtc_flags & RTC_UIE && curr_time.tm_sec != hpet_prev_update_sec) { if (hpet_prev_update_sec >= 0) rtc_int_flag = RTC_UF; hpet_prev_update_sec = curr_time.tm_sec; } if (hpet_rtc_flags & RTC_PIE && ++hpet_pie_count >= hpet_pie_limit) { rtc_int_flag |= RTC_PF; hpet_pie_count = 0; } if (hpet_rtc_flags & RTC_AIE && (curr_time.tm_sec == hpet_alarm_time.tm_sec) && (curr_time.tm_min == hpet_alarm_time.tm_min) && (curr_time.tm_hour == hpet_alarm_time.tm_hour)) rtc_int_flag |= RTC_AF; if (rtc_int_flag) { rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); if (irq_handler) irq_handler(rtc_int_flag, dev_id); } return IRQ_HANDLED; } EXPORT_SYMBOL_GPL(hpet_rtc_interrupt); #endif
13 13 13 13 13 13 13 13 13 13 12 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 // SPDX-License-Identifier: GPL-2.0 /* * Multipath support for RPC * * Copyright (c) 2015, 2016, Primary Data, Inc. All rights reserved. * * Trond Myklebust <trond.myklebust@primarydata.com> * */ #include <linux/atomic.h> #include <linux/types.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/rculist.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/sunrpc/xprt.h> #include <linux/sunrpc/addr.h> #include <linux/sunrpc/xprtmultipath.h> #include "sysfs.h" typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps, const struct rpc_xprt *cur); static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular; static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin; static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall; static const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline; static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt) { if (unlikely(xprt_get(xprt) == NULL)) return; list_add_tail_rcu(&xprt->xprt_switch, &xps->xps_xprt_list); smp_wmb(); if (xps->xps_nxprts == 0) xps->xps_net = xprt->xprt_net; xps->xps_nxprts++; xps->xps_nactive++; } /** * rpc_xprt_switch_add_xprt - Add a new rpc_xprt to an rpc_xprt_switch * @xps: pointer to struct rpc_xprt_switch * @xprt: pointer to struct rpc_xprt * * Adds xprt to the end of the list of struct rpc_xprt in xps. */ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt) { if (xprt == NULL) return; spin_lock(&xps->xps_lock); if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) xprt_switch_add_xprt_locked(xps, xprt); spin_unlock(&xps->xps_lock); rpc_sysfs_xprt_setup(xps, xprt, GFP_KERNEL); } static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt, bool offline) { if (unlikely(xprt == NULL)) return; if (!test_bit(XPRT_OFFLINE, &xprt->state) && offline) xps->xps_nactive--; xps->xps_nxprts--; if (xps->xps_nxprts == 0) xps->xps_net = NULL; smp_wmb(); list_del_rcu(&xprt->xprt_switch); } /** * rpc_xprt_switch_remove_xprt - Removes an rpc_xprt from a rpc_xprt_switch * @xps: pointer to struct rpc_xprt_switch * @xprt: pointer to struct rpc_xprt * @offline: indicates if the xprt that's being removed is in an offline state * * Removes xprt from the list of struct rpc_xprt in xps. */ void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt, bool offline) { spin_lock(&xps->xps_lock); xprt_switch_remove_xprt_locked(xps, xprt, offline); spin_unlock(&xps->xps_lock); xprt_put(xprt); } static DEFINE_IDA(rpc_xprtswitch_ids); void xprt_multipath_cleanup_ids(void) { ida_destroy(&rpc_xprtswitch_ids); } static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags) { int id; id = ida_alloc(&rpc_xprtswitch_ids, gfp_flags); if (id < 0) return id; xps->xps_id = id; return 0; } static void xprt_switch_free_id(struct rpc_xprt_switch *xps) { ida_free(&rpc_xprtswitch_ids, xps->xps_id); } /** * xprt_switch_alloc - Allocate a new struct rpc_xprt_switch * @xprt: pointer to struct rpc_xprt * @gfp_flags: allocation flags * * On success, returns an initialised struct rpc_xprt_switch, containing * the entry xprt. Returns NULL on failure. */ struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt, gfp_t gfp_flags) { struct rpc_xprt_switch *xps; xps = kmalloc(sizeof(*xps), gfp_flags); if (xps != NULL) { spin_lock_init(&xps->xps_lock); kref_init(&xps->xps_kref); xprt_switch_alloc_id(xps, gfp_flags); xps->xps_nxprts = xps->xps_nactive = 0; atomic_long_set(&xps->xps_queuelen, 0); xps->xps_net = NULL; INIT_LIST_HEAD(&xps->xps_xprt_list); xps->xps_iter_ops = &rpc_xprt_iter_singular; rpc_sysfs_xprt_switch_setup(xps, xprt, gfp_flags); xprt_switch_add_xprt_locked(xps, xprt); xps->xps_nunique_destaddr_xprts = 1; rpc_sysfs_xprt_setup(xps, xprt, gfp_flags); } return xps; } static void xprt_switch_free_entries(struct rpc_xprt_switch *xps) { spin_lock(&xps->xps_lock); while (!list_empty(&xps->xps_xprt_list)) { struct rpc_xprt *xprt; xprt = list_first_entry(&xps->xps_xprt_list, struct rpc_xprt, xprt_switch); xprt_switch_remove_xprt_locked(xps, xprt, true); spin_unlock(&xps->xps_lock); xprt_put(xprt); spin_lock(&xps->xps_lock); } spin_unlock(&xps->xps_lock); } static void xprt_switch_free(struct kref *kref) { struct rpc_xprt_switch *xps = container_of(kref, struct rpc_xprt_switch, xps_kref); xprt_switch_free_entries(xps); rpc_sysfs_xprt_switch_destroy(xps); xprt_switch_free_id(xps); kfree_rcu(xps, xps_rcu); } /** * xprt_switch_get - Return a reference to a rpc_xprt_switch * @xps: pointer to struct rpc_xprt_switch * * Returns a reference to xps unless the refcount is already zero. */ struct rpc_xprt_switch *xprt_switch_get(struct rpc_xprt_switch *xps) { if (xps != NULL && kref_get_unless_zero(&xps->xps_kref)) return xps; return NULL; } /** * xprt_switch_put - Release a reference to a rpc_xprt_switch * @xps: pointer to struct rpc_xprt_switch * * Release the reference to xps, and free it once the refcount is zero. */ void xprt_switch_put(struct rpc_xprt_switch *xps) { if (xps != NULL) kref_put(&xps->xps_kref, xprt_switch_free); } /** * rpc_xprt_switch_set_roundrobin - Set a round-robin policy on rpc_xprt_switch * @xps: pointer to struct rpc_xprt_switch * * Sets a round-robin default policy for iterators acting on xps. */ void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps) { if (READ_ONCE(xps->xps_iter_ops) != &rpc_xprt_iter_roundrobin) WRITE_ONCE(xps->xps_iter_ops, &rpc_xprt_iter_roundrobin); } static const struct rpc_xprt_iter_ops *xprt_iter_ops(const struct rpc_xprt_iter *xpi) { if (xpi->xpi_ops != NULL) return xpi->xpi_ops; return rcu_dereference(xpi->xpi_xpswitch)->xps_iter_ops; } static void xprt_iter_no_rewind(struct rpc_xprt_iter *xpi) { } static void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi) { WRITE_ONCE(xpi->xpi_cursor, NULL); } static bool xprt_is_active(const struct rpc_xprt *xprt) { return (kref_read(&xprt->kref) != 0 && !test_bit(XPRT_OFFLINE, &xprt->state)); } static struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head) { struct rpc_xprt *pos; list_for_each_entry_rcu(pos, head, xprt_switch) { if (xprt_is_active(pos)) return pos; } return NULL; } static struct rpc_xprt *xprt_switch_find_first_entry_offline(struct list_head *head) { struct rpc_xprt *pos; list_for_each_entry_rcu(pos, head, xprt_switch) { if (!xprt_is_active(pos)) return pos; } return NULL; } static struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi) { struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); if (xps == NULL) return NULL; return xprt_switch_find_first_entry(&xps->xps_xprt_list); } static struct rpc_xprt *_xprt_switch_find_current_entry(struct list_head *head, const struct rpc_xprt *cur, bool find_active) { struct rpc_xprt *pos; bool found = false; list_for_each_entry_rcu(pos, head, xprt_switch) { if (cur == pos) found = true; if (found && ((find_active && xprt_is_active(pos)) || (!find_active && !xprt_is_active(pos)))) return pos; } return NULL; } static struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head, const struct rpc_xprt *cur) { return _xprt_switch_find_current_entry(head, cur, true); } static struct rpc_xprt * _xprt_iter_current_entry(struct rpc_xprt_iter *xpi, struct rpc_xprt *first_entry(struct list_head *head), struct rpc_xprt *current_entry(struct list_head *head, const struct rpc_xprt *cur)) { struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); struct list_head *head; if (xps == NULL) return NULL; head = &xps->xps_xprt_list; if (xpi->xpi_cursor == NULL || xps->xps_nxprts < 2) return first_entry(head); return current_entry(head, xpi->xpi_cursor); } static struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi) { return _xprt_iter_current_entry(xpi, xprt_switch_find_first_entry, xprt_switch_find_current_entry); } static struct rpc_xprt *xprt_switch_find_current_entry_offline(struct list_head *head, const struct rpc_xprt *cur) { return _xprt_switch_find_current_entry(head, cur, false); } static struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi) { return _xprt_iter_current_entry(xpi, xprt_switch_find_first_entry_offline, xprt_switch_find_current_entry_offline); } static bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, const struct sockaddr *sap) { struct list_head *head; struct rpc_xprt *pos; if (xps == NULL || sap == NULL) return false; head = &xps->xps_xprt_list; list_for_each_entry_rcu(pos, head, xprt_switch) { if (rpc_cmp_addr_port(sap, (struct sockaddr *)&pos->addr)) { pr_info("RPC: addr %s already in xprt switch\n", pos->address_strings[RPC_DISPLAY_ADDR]); return true; } } return false; } bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, const struct sockaddr *sap) { bool res; rcu_read_lock(); res = __rpc_xprt_switch_has_addr(xps, sap); rcu_read_unlock(); return res; } static struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, const struct rpc_xprt *cur, bool check_active) { struct rpc_xprt *pos, *prev = NULL; bool found = false; list_for_each_entry_rcu(pos, head, xprt_switch) { if (cur == prev) found = true; /* for request to return active transports return only * active, for request to return offline transports * return only offline */ if (found && ((check_active && xprt_is_active(pos)) || (!check_active && !xprt_is_active(pos)))) return pos; prev = pos; } return NULL; } static struct rpc_xprt *xprt_switch_set_next_cursor(struct rpc_xprt_switch *xps, struct rpc_xprt **cursor, xprt_switch_find_xprt_t find_next) { struct rpc_xprt *pos, *old; old = smp_load_acquire(cursor); pos = find_next(xps, old); smp_store_release(cursor, pos); return pos; } static struct rpc_xprt *xprt_iter_next_entry_multiple(struct rpc_xprt_iter *xpi, xprt_switch_find_xprt_t find_next) { struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); if (xps == NULL) return NULL; return xprt_switch_set_next_cursor(xps, &xpi->xpi_cursor, find_next); } static struct rpc_xprt *__xprt_switch_find_next_entry_roundrobin(struct list_head *head, const struct rpc_xprt *cur) { struct rpc_xprt *ret; ret = xprt_switch_find_next_entry(head, cur, true); if (ret != NULL) return ret; return xprt_switch_find_first_entry(head); } static struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct rpc_xprt_switch *xps, const struct rpc_xprt *cur) { struct list_head *head = &xps->xps_xprt_list; struct rpc_xprt *xprt; unsigned int nactive; for (;;) { unsigned long xprt_queuelen, xps_queuelen; xprt = __xprt_switch_find_next_entry_roundrobin(head, cur); if (!xprt) break; xprt_queuelen = atomic_long_read(&xprt->queuelen); xps_queuelen = atomic_long_read(&xps->xps_queuelen); nactive = READ_ONCE(xps->xps_nactive); /* Exit loop if xprt_queuelen <= average queue length */ if (xprt_queuelen * nactive <= xps_queuelen) break; cur = xprt; } return xprt; } static struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi) { return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry_roundrobin); } static struct rpc_xprt *xprt_switch_find_next_entry_all(struct rpc_xprt_switch *xps, const struct rpc_xprt *cur) { return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, true); } static struct rpc_xprt *xprt_switch_find_next_entry_offline(struct rpc_xprt_switch *xps, const struct rpc_xprt *cur) { return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, false); } static struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi) { return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry_all); } static struct rpc_xprt *xprt_iter_next_entry_offline(struct rpc_xprt_iter *xpi) { return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry_offline); } /* * xprt_iter_rewind - Resets the xprt iterator * @xpi: pointer to rpc_xprt_iter * * Resets xpi to ensure that it points to the first entry in the list * of transports. */ void xprt_iter_rewind(struct rpc_xprt_iter *xpi) { rcu_read_lock(); xprt_iter_ops(xpi)->xpi_rewind(xpi); rcu_read_unlock(); } static void __xprt_iter_init(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps, const struct rpc_xprt_iter_ops *ops) { rcu_assign_pointer(xpi->xpi_xpswitch, xprt_switch_get(xps)); xpi->xpi_cursor = NULL; xpi->xpi_ops = ops; } /** * xprt_iter_init - Initialise an xprt iterator * @xpi: pointer to rpc_xprt_iter * @xps: pointer to rpc_xprt_switch * * Initialises the iterator to use the default iterator ops * as set in xps. This function is mainly intended for internal * use in the rpc_client. */ void xprt_iter_init(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps) { __xprt_iter_init(xpi, xps, NULL); } /** * xprt_iter_init_listall - Initialise an xprt iterator * @xpi: pointer to rpc_xprt_iter * @xps: pointer to rpc_xprt_switch * * Initialises the iterator to iterate once through the entire list * of entries in xps. */ void xprt_iter_init_listall(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps) { __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listall); } void xprt_iter_init_listoffline(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps) { __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listoffline); } /** * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch * @xpi: pointer to rpc_xprt_iter * @newswitch: pointer to a new rpc_xprt_switch or NULL * * Swaps out the existing xpi->xpi_xpswitch with a new value. */ struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *newswitch) { struct rpc_xprt_switch __rcu *oldswitch; /* Atomically swap out the old xpswitch */ oldswitch = xchg(&xpi->xpi_xpswitch, RCU_INITIALIZER(newswitch)); if (newswitch != NULL) xprt_iter_rewind(xpi); return rcu_dereference_protected(oldswitch, true); } /** * xprt_iter_destroy - Destroys the xprt iterator * @xpi: pointer to rpc_xprt_iter */ void xprt_iter_destroy(struct rpc_xprt_iter *xpi) { xprt_switch_put(xprt_iter_xchg_switch(xpi, NULL)); } /** * xprt_iter_xprt - Returns the rpc_xprt pointed to by the cursor * @xpi: pointer to rpc_xprt_iter * * Returns a pointer to the struct rpc_xprt that is currently * pointed to by the cursor. * Caller must be holding rcu_read_lock(). */ struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi) { WARN_ON_ONCE(!rcu_read_lock_held()); return xprt_iter_ops(xpi)->xpi_xprt(xpi); } static struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi, struct rpc_xprt *(*fn)(struct rpc_xprt_iter *)) { struct rpc_xprt *ret; do { ret = fn(xpi); if (ret == NULL) break; ret = xprt_get(ret); } while (ret == NULL); return ret; } /** * xprt_iter_get_next - Returns the next rpc_xprt following the cursor * @xpi: pointer to rpc_xprt_iter * * Returns a reference to the struct rpc_xprt that immediately follows the * entry pointed to by the cursor. */ struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi) { struct rpc_xprt *xprt; rcu_read_lock(); xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_next); rcu_read_unlock(); return xprt; } /* Policy for always returning the first entry in the rpc_xprt_switch */ static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular = { .xpi_rewind = xprt_iter_no_rewind, .xpi_xprt = xprt_iter_first_entry, .xpi_next = xprt_iter_first_entry, }; /* Policy for round-robin iteration of entries in the rpc_xprt_switch */ static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin = { .xpi_rewind = xprt_iter_default_rewind, .xpi_xprt = xprt_iter_current_entry, .xpi_next = xprt_iter_next_entry_roundrobin, }; /* Policy for once-through iteration of entries in the rpc_xprt_switch */ static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall = { .xpi_rewind = xprt_iter_default_rewind, .xpi_xprt = xprt_iter_current_entry, .xpi_next = xprt_iter_next_entry_all, }; static const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline = { .xpi_rewind = xprt_iter_default_rewind, .xpi_xprt = xprt_iter_current_entry_offline, .xpi_next = xprt_iter_next_entry_offline, };
92 91 201 202 202 192 2 10 201 202 2 199 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Support for INET6 connection oriented protocols. * * Authors: See the TCPv6 sources */ #include <linux/module.h> #include <linux/in6.h> #include <linux/ipv6.h> #include <linux/jhash.h> #include <linux/slab.h> #include <net/addrconf.h> #include <net/inet_connection_sock.h> #include <net/inet_ecn.h> #include <net/inet_hashtables.h> #include <net/ip6_route.h> #include <net/sock.h> #include <net/inet6_connection_sock.h> #include <net/sock_reuseport.h> struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, const struct request_sock *req, u8 proto) { struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = proto; fl6->daddr = ireq->ir_v6_rmt_addr; rcu_read_lock(); final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); rcu_read_unlock(); fl6->saddr = ireq->ir_v6_loc_addr; fl6->flowi6_oif = ireq->ir_iif; fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); fl6->flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi_common(fl6)); dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); if (IS_ERR(dst)) return NULL; return dst; } EXPORT_SYMBOL(inet6_csk_route_req); void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; sin6->sin6_family = AF_INET6; sin6->sin6_addr = sk->sk_v6_daddr; sin6->sin6_port = inet_sk(sk)->inet_dport; /* We do not store received flowlabel for TCP */ sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, sk->sk_bound_dev_if); } EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); static inline struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) { return __sk_dst_check(sk, cookie); } static struct dst_entry *inet6_csk_route_socket(struct sock *sk, struct flowi6 *fl6) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = sk->sk_protocol; fl6->daddr = sk->sk_v6_daddr; fl6->saddr = np->saddr; fl6->flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl6->flowlabel); fl6->flowi6_oif = sk->sk_bound_dev_if; fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; fl6->flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); rcu_read_lock(); final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); rcu_read_unlock(); dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (!dst) { dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); if (!IS_ERR(dst)) ip6_dst_store(sk, dst, NULL, NULL); } return dst; } int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused) { struct ipv6_pinfo *np = inet6_sk(sk); struct flowi6 fl6; struct dst_entry *dst; int res; dst = inet6_csk_route_socket(sk, &fl6); if (IS_ERR(dst)) { WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); sk->sk_route_caps = 0; kfree_skb(skb); return PTR_ERR(dst); } rcu_read_lock(); skb_dst_set_noref(skb, dst); /* Restore final destination back after routing done */ fl6.daddr = sk->sk_v6_daddr; res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), np->tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); return res; } EXPORT_SYMBOL_GPL(inet6_csk_xmit); struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) { struct flowi6 fl6; struct dst_entry *dst = inet6_csk_route_socket(sk, &fl6); if (IS_ERR(dst)) return NULL; dst->ops->update_pmtu(dst, sk, NULL, mtu, true); dst = inet6_csk_route_socket(sk, &fl6); return IS_ERR(dst) ? NULL : dst; } EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu);
22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 /* SPDX-License-Identifier: LGPL-2.1 */ /* * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) * Jeremy Allison (jra@samba.org) * */ #ifndef _CIFS_GLOB_H #define _CIFS_GLOB_H #include <linux/in.h> #include <linux/in6.h> #include <linux/inet.h> #include <linux/slab.h> #include <linux/scatterlist.h> #include <linux/mm.h> #include <linux/mempool.h> #include <linux/workqueue.h> #include <linux/utsname.h> #include <linux/sched/mm.h> #include <linux/netfs.h> #include "cifs_fs_sb.h" #include "cifsacl.h" #include <crypto/internal/hash.h> #include <uapi/linux/cifs/cifs_mount.h> #include "../common/smb2pdu.h" #include "smb2pdu.h" #include <linux/filelock.h> #define SMB_PATH_MAX 260 #define CIFS_PORT 445 #define RFC1001_PORT 139 /* * The sizes of various internal tables and strings */ #define MAX_UID_INFO 16 #define MAX_SES_INFO 2 #define MAX_TCON_INFO 4 #define MAX_TREE_SIZE (2 + CIFS_NI_MAXHOST + 1 + CIFS_MAX_SHARE_LEN + 1) #define CIFS_MIN_RCV_POOL 4 #define MAX_REOPEN_ATT 5 /* these many maximum attempts to reopen a file */ /* * default attribute cache timeout (jiffies) */ #define CIFS_DEF_ACTIMEO (1 * HZ) /* * max sleep time before retry to server */ #define CIFS_MAX_SLEEP 2000 /* * max attribute cache timeout (jiffies) - 2^30 */ #define CIFS_MAX_ACTIMEO (1 << 30) /* * Max persistent and resilient handle timeout (milliseconds). * Windows durable max was 960000 (16 minutes) */ #define SMB3_MAX_HANDLE_TIMEOUT 960000 /* * MAX_REQ is the maximum number of requests that WE will send * on one socket concurrently. */ #define CIFS_MAX_REQ 32767 #define RFC1001_NAME_LEN 15 #define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1) /* maximum length of ip addr as a string (including ipv6 and sctp) */ #define SERVER_NAME_LENGTH 80 #define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) /* echo interval in seconds */ #define SMB_ECHO_INTERVAL_MIN 1 #define SMB_ECHO_INTERVAL_MAX 600 #define SMB_ECHO_INTERVAL_DEFAULT 60 /* smb multichannel query server interfaces interval in seconds */ #define SMB_INTERFACE_POLL_INTERVAL 600 /* maximum number of PDUs in one compound */ #define MAX_COMPOUND 7 /* * Default number of credits to keep available for SMB3. * This value is chosen somewhat arbitrarily. The Windows client * defaults to 128 credits, the Windows server allows clients up to * 512 credits (or 8K for later versions), and the NetApp server * does not limit clients at all. Choose a high enough default value * such that the client shouldn't limit performance, but allow mount * to override (until you approach 64K, where we limit credits to 65000 * to reduce possibility of seeing more server credit overflow bugs. */ #define SMB2_MAX_CREDITS_AVAILABLE 32000 #include "cifspdu.h" #ifndef XATTR_DOS_ATTRIB #define XATTR_DOS_ATTRIB "user.DOSATTRIB" #endif #define CIFS_MAX_WORKSTATION_LEN (__NEW_UTS_LEN + 1) /* reasonable max for client */ #define CIFS_DFS_ROOT_SES(ses) ((ses)->dfs_root_ses ?: (ses)) /* * CIFS vfs client Status information (based on what we know.) */ /* associated with each connection */ enum statusEnum { CifsNew = 0, CifsGood, CifsExiting, CifsNeedReconnect, CifsNeedNegotiate, CifsInNegotiate, }; /* associated with each smb session */ enum ses_status_enum { SES_NEW = 0, SES_GOOD, SES_EXITING, SES_NEED_RECON, SES_IN_SETUP }; /* associated with each tree connection to the server */ enum tid_status_enum { TID_NEW = 0, TID_GOOD, TID_EXITING, TID_NEED_RECON, TID_NEED_TCON, TID_IN_TCON, TID_NEED_FILES_INVALIDATE, /* currently unused */ TID_IN_FILES_INVALIDATE }; enum securityEnum { Unspecified = 0, /* not specified */ NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ Kerberos, /* Kerberos via SPNEGO */ }; enum upcall_target_enum { UPTARGET_UNSPECIFIED, /* not specified, defaults to app */ UPTARGET_MOUNT, /* upcall to the mount namespace */ UPTARGET_APP, /* upcall to the application namespace which did the mount */ }; enum cifs_reparse_type { CIFS_REPARSE_TYPE_NFS, CIFS_REPARSE_TYPE_WSL, CIFS_REPARSE_TYPE_DEFAULT = CIFS_REPARSE_TYPE_NFS, }; static inline const char *cifs_reparse_type_str(enum cifs_reparse_type type) { switch (type) { case CIFS_REPARSE_TYPE_NFS: return "nfs"; case CIFS_REPARSE_TYPE_WSL: return "wsl"; default: return "unknown"; } } struct session_key { unsigned int len; char *response; }; /* crypto hashing related structure/fields, not specific to a sec mech */ struct cifs_secmech { struct shash_desc *md5; /* md5 hash function, for CIFS/SMB1 signatures */ struct shash_desc *hmacsha256; /* hmac-sha256 hash function, for SMB2 signatures */ struct shash_desc *sha512; /* sha512 hash function, for SMB3.1.1 preauth hash */ struct shash_desc *aes_cmac; /* block-cipher based MAC function, for SMB3 signatures */ struct crypto_aead *enc; /* smb3 encryption AEAD TFM (AES-CCM and AES-GCM) */ struct crypto_aead *dec; /* smb3 decryption AEAD TFM (AES-CCM and AES-GCM) */ }; /* per smb session structure/fields */ struct ntlmssp_auth { bool sesskey_per_smbsess; /* whether session key is per smb session */ __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */ __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */ unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlmssp */ }; struct cifs_cred { int uid; int gid; int mode; int cecount; struct smb_sid osid; struct smb_sid gsid; struct cifs_ntace *ntaces; struct smb_ace *aces; }; struct cifs_open_info_data { bool adjust_tz; union { bool reparse_point; bool symlink; }; struct { /* ioctl response buffer */ struct { int buftype; struct kvec iov; } io; __u32 tag; union { struct reparse_data_buffer *buf; struct reparse_posix_data *posix; }; } reparse; struct { __u8 eas[SMB2_WSL_MAX_QUERY_EA_RESP_SIZE]; unsigned int eas_len; } wsl; char *symlink_target; struct smb_sid posix_owner; struct smb_sid posix_group; union { struct smb2_file_all_info fi; struct smb311_posix_qinfo posix_fi; }; }; /* ***************************************************************** * Except the CIFS PDUs themselves all the * globally interesting structs should go here ***************************************************************** */ /* * A smb_rqst represents a complete request to be issued to a server. It's * formed by a kvec array, followed by an array of pages. Page data is assumed * to start at the beginning of the first page. */ struct smb_rqst { struct kvec *rq_iov; /* array of kvecs */ unsigned int rq_nvec; /* number of kvecs in array */ struct iov_iter rq_iter; /* Data iterator */ struct folio_queue *rq_buffer; /* Buffer for encryption */ }; struct mid_q_entry; struct TCP_Server_Info; struct cifsFileInfo; struct cifs_ses; struct cifs_tcon; struct dfs_info3_param; struct cifs_fattr; struct smb3_fs_context; struct cifs_fid; struct cifs_io_subrequest; struct cifs_io_parms; struct cifs_search_info; struct cifsInodeInfo; struct cifs_open_parms; struct cifs_credits; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, struct smb_rqst *, struct mid_q_entry *); bool (*compare_fids)(struct cifsFileInfo *, struct cifsFileInfo *); /* setup request: allocate mid, sign message */ struct mid_q_entry *(*setup_request)(struct cifs_ses *, struct TCP_Server_Info *, struct smb_rqst *); /* setup async request: allocate mid, sign message */ struct mid_q_entry *(*setup_async_request)(struct TCP_Server_Info *, struct smb_rqst *); /* check response: verify signature, map error */ int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *, bool); void (*add_credits)(struct TCP_Server_Info *server, struct cifs_credits *credits, const int optype); void (*set_credits)(struct TCP_Server_Info *, const int); int * (*get_credits_field)(struct TCP_Server_Info *, const int); unsigned int (*get_credits)(struct mid_q_entry *); __u64 (*get_next_mid)(struct TCP_Server_Info *); void (*revert_current_mid)(struct TCP_Server_Info *server, const unsigned int val); /* data offset from read response message */ unsigned int (*read_data_offset)(char *); /* * Data length from read response message * When in_remaining is true, the returned data length is in * message field DataRemaining for out-of-band data read (e.g through * Memory Registration RDMA write in SMBD). * Otherwise, the returned data length is in message field DataLength. */ unsigned int (*read_data_length)(char *, bool in_remaining); /* map smb to linux error */ int (*map_error)(char *, bool); /* find mid corresponding to the response message */ struct mid_q_entry * (*find_mid)(struct TCP_Server_Info *, char *); void (*dump_detail)(void *buf, struct TCP_Server_Info *ptcp_info); void (*clear_stats)(struct cifs_tcon *); void (*print_stats)(struct seq_file *m, struct cifs_tcon *); void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *); /* verify the message */ int (*check_message)(char *, unsigned int, struct TCP_Server_Info *); bool (*is_oplock_break)(char *, struct TCP_Server_Info *); int (*handle_cancelled_mid)(struct mid_q_entry *, struct TCP_Server_Info *); void (*downgrade_oplock)(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, unsigned int epoch, bool *purge_cache); /* process transaction2 response */ bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *, char *, int); /* check if we need to negotiate */ bool (*need_neg)(struct TCP_Server_Info *); /* negotiate to the server */ int (*negotiate)(const unsigned int xid, struct cifs_ses *ses, struct TCP_Server_Info *server); /* set negotiated write size */ unsigned int (*negotiate_wsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx); /* set negotiated read size */ unsigned int (*negotiate_rsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx); /* setup smb sessionn */ int (*sess_setup)(const unsigned int, struct cifs_ses *, struct TCP_Server_Info *server, const struct nls_table *); /* close smb session */ int (*logoff)(const unsigned int, struct cifs_ses *); /* connect to a server share */ int (*tree_connect)(const unsigned int, struct cifs_ses *, const char *, struct cifs_tcon *, const struct nls_table *); /* close tree connection */ int (*tree_disconnect)(const unsigned int, struct cifs_tcon *); /* get DFS referrals */ int (*get_dfs_refer)(const unsigned int, struct cifs_ses *, const char *, struct dfs_info3_param **, unsigned int *, const struct nls_table *, int); /* informational QFS call */ void (*qfs_tcon)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *); /* query for server interfaces */ int (*query_server_interfaces)(const unsigned int, struct cifs_tcon *, bool); /* check if a path is accessible or not */ int (*is_path_accessible)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *); /* query path data from the server */ int (*query_path_info)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, struct cifs_open_info_data *data); /* query file data from the server */ int (*query_file_info)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *cfile, struct cifs_open_info_data *data); /* query reparse point to determine which type of special file */ int (*query_reparse_point)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, u32 *tag, struct kvec *rsp, int *rsp_buftype); /* get server index number */ int (*get_srv_inum)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, u64 *uniqueid, struct cifs_open_info_data *data); /* set size by path */ int (*set_path_size)(const unsigned int, struct cifs_tcon *, const char *, __u64, struct cifs_sb_info *, bool, struct dentry *); /* set size by file handle */ int (*set_file_size)(const unsigned int, struct cifs_tcon *, struct cifsFileInfo *, __u64, bool); /* set attributes */ int (*set_file_info)(struct inode *, const char *, FILE_BASIC_INFO *, const unsigned int); int (*set_compression)(const unsigned int, struct cifs_tcon *, struct cifsFileInfo *); /* check if we can send an echo or nor */ bool (*can_echo)(struct TCP_Server_Info *); /* send echo request */ int (*echo)(struct TCP_Server_Info *); /* create directory */ int (*posix_mkdir)(const unsigned int xid, struct inode *inode, umode_t mode, struct cifs_tcon *tcon, const char *full_path, struct cifs_sb_info *cifs_sb); int (*mkdir)(const unsigned int xid, struct inode *inode, umode_t mode, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *sb); /* set info on created directory */ void (*mkdir_setinfo)(struct inode *, const char *, struct cifs_sb_info *, struct cifs_tcon *, const unsigned int); /* remove directory */ int (*rmdir)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *); /* unlink file */ int (*unlink)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *, struct dentry *); /* open, rename and delete file */ int (*rename_pending_delete)(const char *, struct dentry *, const unsigned int); /* send rename request */ int (*rename)(const unsigned int xid, struct cifs_tcon *tcon, struct dentry *source_dentry, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb); /* send create hardlink request */ int (*create_hardlink)(const unsigned int xid, struct cifs_tcon *tcon, struct dentry *source_dentry, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb); /* query symlink target */ int (*query_symlink)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, char **target_path); /* open a file for non-posix mounts */ int (*open)(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf); /* set fid protocol-specific info */ void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); /* close a file */ int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* close a file, returning file attributes and timestamps */ int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *pfile_info); /* send a flush request to the server */ int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* async read from the server */ int (*async_readv)(struct cifs_io_subrequest *); /* async write to the server */ void (*async_writev)(struct cifs_io_subrequest *); /* sync read from the server */ int (*sync_read)(const unsigned int, struct cifs_fid *, struct cifs_io_parms *, unsigned int *, char **, int *); /* sync write to the server */ int (*sync_write)(const unsigned int, struct cifs_fid *, struct cifs_io_parms *, unsigned int *, struct kvec *, unsigned long); /* open dir, start readdir */ int (*query_dir_first)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *, struct cifs_fid *, __u16, struct cifs_search_info *); /* continue readdir */ int (*query_dir_next)(const unsigned int, struct cifs_tcon *, struct cifs_fid *, __u16, struct cifs_search_info *srch_inf); /* close dir */ int (*close_dir)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* calculate a size of SMB message */ unsigned int (*calc_smb_size)(void *buf); /* check for STATUS_PENDING and process the response if yes */ bool (*is_status_pending)(char *buf, struct TCP_Server_Info *server); /* check for STATUS_NETWORK_SESSION_EXPIRED */ bool (*is_session_expired)(char *); /* send oplock break response */ int (*oplock_response)(struct cifs_tcon *tcon, __u64 persistent_fid, __u64 volatile_fid, __u16 net_fid, struct cifsInodeInfo *cifs_inode); /* query remote filesystem */ int (*queryfs)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *, struct kstatfs *); /* send mandatory brlock to the server */ int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64, __u64, __u32, int, int, bool); /* unlock range of mandatory locks */ int (*mand_unlock_range)(struct cifsFileInfo *, struct file_lock *, const unsigned int); /* push brlocks from the cache to the server */ int (*push_mand_locks)(struct cifsFileInfo *); /* get lease key of the inode */ void (*get_lease_key)(struct inode *, struct cifs_fid *); /* set lease key of the inode */ void (*set_lease_key)(struct inode *, struct cifs_fid *); /* generate new lease key */ void (*new_lease_key)(struct cifs_fid *); int (*generate_signingkey)(struct cifs_ses *ses, struct TCP_Server_Info *server); int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *, bool allocate_crypto); int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon, struct cifsFileInfo *src_file); int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *src_file, void __user *); int (*notify)(const unsigned int xid, struct file *pfile, void __user *pbuf, bool return_changes); int (*query_mf_symlink)(unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const unsigned char *, char *, unsigned int *); int (*create_mf_symlink)(unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const unsigned char *, char *, unsigned int *); /* if we can do cache read operations */ bool (*is_read_op)(__u32); /* set oplock level for the inode */ void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int, bool *); /* create lease context buffer for CREATE request */ char * (*create_lease_buf)(u8 *lease_key, u8 oplock); /* parse lease context buffer and return oplock/epoch info */ __u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey); ssize_t (*copychunk_range)(const unsigned int, struct cifsFileInfo *src_file, struct cifsFileInfo *target_file, u64 src_off, u64 len, u64 dest_off); int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src, struct cifsFileInfo *target_file, u64 src_off, u64 len, u64 dest_off); int (*validate_negotiate)(const unsigned int, struct cifs_tcon *); ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *, const unsigned char *, const unsigned char *, char *, size_t, struct cifs_sb_info *); int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *, const char *, const void *, const __u16, const struct nls_table *, struct cifs_sb_info *); struct smb_ntsd * (*get_acl)(struct cifs_sb_info *cifssb, struct inode *ino, const char *patch, u32 *plen, u32 info); struct smb_ntsd * (*get_acl_by_fid)(struct cifs_sb_info *cifssmb, const struct cifs_fid *pfid, u32 *plen, u32 info); int (*set_acl)(struct smb_ntsd *pntsd, __u32 len, struct inode *ino, const char *path, int flag); /* writepages retry size */ unsigned int (*wp_retry_size)(struct inode *); /* get mtu credits */ int (*wait_mtu_credits)(struct TCP_Server_Info *, size_t, size_t *, struct cifs_credits *); /* adjust previously taken mtu credits to request size */ int (*adjust_credits)(struct TCP_Server_Info *server, struct cifs_io_subrequest *subreq, unsigned int /*enum smb3_rw_credits_trace*/ trace); /* check if we need to issue closedir */ bool (*dir_needs_close)(struct cifsFileInfo *); long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t, loff_t); /* init transform (compress/encrypt) request */ int (*init_transform_rq)(struct TCP_Server_Info *, int num_rqst, struct smb_rqst *, struct smb_rqst *); int (*is_transform_hdr)(void *buf); int (*receive_transform)(struct TCP_Server_Info *, struct mid_q_entry **, char **, int *); enum securityEnum (*select_sectype)(struct TCP_Server_Info *, enum securityEnum); int (*next_header)(struct TCP_Server_Info *server, char *buf, unsigned int *noff); /* ioctl passthrough for query_info */ int (*ioctl_query_info)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, __le16 *path, int is_dir, unsigned long p); /* make unix special files (block, char, fifo, socket) */ int (*make_node)(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t device_number); /* version specific fiemap implementation */ int (*fiemap)(struct cifs_tcon *tcon, struct cifsFileInfo *, struct fiemap_extent_info *, u64, u64); /* version specific llseek implementation */ loff_t (*llseek)(struct file *, struct cifs_tcon *, loff_t, int); /* Check for STATUS_IO_TIMEOUT */ bool (*is_status_io_timeout)(char *buf); /* Check for STATUS_NETWORK_NAME_DELETED */ bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv); int (*parse_reparse_point)(struct cifs_sb_info *cifs_sb, const char *full_path, struct kvec *rsp_iov, struct cifs_open_info_data *data); int (*create_reparse_symlink)(const unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, const char *symname); }; struct smb_version_values { char *version_string; __u16 protocol_id; __u32 req_capabilities; __u32 large_lock_type; __u32 exclusive_lock_type; __u32 shared_lock_type; __u32 unlock_lock_type; size_t header_preamble_size; size_t header_size; size_t max_header_size; size_t read_rsp_size; __le16 lock_cmd; unsigned int cap_unix; unsigned int cap_nt_find; unsigned int cap_large_files; __u16 signing_enabled; __u16 signing_required; size_t create_lease_size; }; #define HEADER_SIZE(server) (server->vals->header_size) #define MAX_HEADER_SIZE(server) (server->vals->max_header_size) #define HEADER_PREAMBLE_SIZE(server) (server->vals->header_preamble_size) #define MID_HEADER_SIZE(server) (HEADER_SIZE(server) - 1 - HEADER_PREAMBLE_SIZE(server)) /** * CIFS superblock mount flags (mnt_cifs_flags) to consider when * trying to reuse existing superblock for a new mount */ #define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \ CIFS_MOUNT_SERVER_INUM | CIFS_MOUNT_DIRECT_IO | \ CIFS_MOUNT_NO_XATTR | CIFS_MOUNT_MAP_SPECIAL_CHR | \ CIFS_MOUNT_MAP_SFM_CHR | \ CIFS_MOUNT_UNX_EMUL | CIFS_MOUNT_NO_BRL | \ CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_OVERR_UID | \ CIFS_MOUNT_OVERR_GID | CIFS_MOUNT_DYNPERM | \ CIFS_MOUNT_NOPOSIXBRL | CIFS_MOUNT_NOSSYNC | \ CIFS_MOUNT_FSCACHE | CIFS_MOUNT_MF_SYMLINKS | \ CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_STRICT_IO | \ CIFS_MOUNT_CIFS_BACKUPUID | CIFS_MOUNT_CIFS_BACKUPGID | \ CIFS_MOUNT_UID_FROM_ACL | CIFS_MOUNT_NO_HANDLE_CACHE | \ CIFS_MOUNT_NO_DFS | CIFS_MOUNT_MODE_FROM_SID | \ CIFS_MOUNT_RO_CACHE | CIFS_MOUNT_RW_CACHE) /** * Generic VFS superblock mount flags (s_flags) to consider when * trying to reuse existing superblock for a new mount */ #define CIFS_MS_MASK (SB_RDONLY | SB_MANDLOCK | SB_NOEXEC | SB_NOSUID | \ SB_NODEV | SB_SYNCHRONOUS) struct cifs_mnt_data { struct cifs_sb_info *cifs_sb; struct smb3_fs_context *ctx; int flags; }; static inline unsigned int get_rfc1002_length(void *buf) { return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; } static inline void inc_rfc1001_len(void *buf, int count) { be32_add_cpu((__be32 *)buf, count); } struct TCP_Server_Info { struct list_head tcp_ses_list; struct list_head smb_ses_list; spinlock_t srv_lock; /* protect anything here that is not protected */ __u64 conn_id; /* connection identifier (useful for debugging) */ int srv_count; /* reference counter */ /* 15 character server name + 0x20 16th byte indicating type = srv */ char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; struct smb_version_operations *ops; struct smb_version_values *vals; /* updates to tcpStatus protected by cifs_tcp_ses_lock */ enum statusEnum tcpStatus; /* what we think the status is */ char *hostname; /* hostname portion of UNC string */ struct socket *ssocket; struct sockaddr_storage dstaddr; struct sockaddr_storage srcaddr; /* locally bind to this IP */ #ifdef CONFIG_NET_NS struct net *net; #endif wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ spinlock_t mid_lock; /* protect mid queue and it's entries */ struct list_head pending_mid_q; bool noblocksnd; /* use blocking sendmsg */ bool noautotune; /* do not autotune send buf sizes */ bool nosharesock; bool tcp_nodelay; bool terminate; unsigned int credits; /* send no more requests at once */ unsigned int max_credits; /* can override large 32000 default at mnt */ unsigned int in_flight; /* number of requests on the wire to server */ unsigned int max_in_flight; /* max number of requests that were on wire */ spinlock_t req_lock; /* protect the two values above */ struct mutex _srv_mutex; unsigned int nofs_flag; struct task_struct *tsk; char server_GUID[16]; __u16 sec_mode; bool sign; /* is signing enabled on this connection? */ bool ignore_signature:1; /* skip validation of signatures in SMB2/3 rsp */ bool session_estab; /* mark when very first sess is established */ int echo_credits; /* echo reserved slots */ int oplock_credits; /* oplock break reserved slots */ bool echoes:1; /* enable echoes */ __u8 client_guid[SMB2_CLIENT_GUID_SIZE]; /* Client GUID */ u16 dialect; /* dialect index that server chose */ bool oplocks:1; /* enable oplocks */ unsigned int maxReq; /* Clients should submit no more */ /* than maxReq distinct unanswered SMBs to the server when using */ /* multiplexed reads or writes (for SMB1/CIFS only, not SMB2/SMB3) */ unsigned int maxBuf; /* maxBuf specifies the maximum */ /* message size the server can send or receive for non-raw SMBs */ /* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */ /* when socket is setup (and during reconnect) before NegProt sent */ unsigned int max_rw; /* maxRw specifies the maximum */ /* message size the server can send or receive for */ /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ unsigned int capabilities; /* selective disabling of caps by smb sess */ int timeAdj; /* Adjust for difference in server time zone in sec */ __u64 CurrentMid; /* multiplex id - rotating counter, protected by GlobalMid_Lock */ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; __u32 sequence_number; /* for signing, protected by srv_mutex */ __u32 reconnect_instance; /* incremented on each reconnect */ struct session_key session_key; unsigned long lstrp; /* when we got last response from this server */ struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ #define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */ #define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */ char negflavor; /* NEGOTIATE response flavor */ /* extended security flavors that server supports */ bool sec_ntlmssp; /* supports NTLMSSP */ bool sec_kerberosu2u; /* supports U2U Kerberos */ bool sec_kerberos; /* supports plain Kerberos */ bool sec_mskerberos; /* supports legacy MS Kerberos */ bool large_buf; /* is current buffer large? */ /* use SMBD connection instead of socket */ bool rdma; /* point to the SMBD connection if RDMA is used instead of socket */ struct smbd_connection *smbd_conn; struct delayed_work echo; /* echo ping workqueue job */ char *smallbuf; /* pointer to current "small" buffer */ char *bigbuf; /* pointer to current "big" buffer */ /* Total size of this PDU. Only valid from cifs_demultiplex_thread */ unsigned int pdu_size; unsigned int total_read; /* total amount of data read in this pass */ atomic_t in_send; /* requests trying to send */ atomic_t num_waiters; /* blocked waiting to get in sendrecv */ #ifdef CONFIG_CIFS_STATS2 atomic_t num_cmds[NUMBER_OF_SMB2_COMMANDS]; /* total requests by cmd */ atomic_t smb2slowcmd[NUMBER_OF_SMB2_COMMANDS]; /* count resps > 1 sec */ __u64 time_per_cmd[NUMBER_OF_SMB2_COMMANDS]; /* total time per cmd */ __u32 slowest_cmd[NUMBER_OF_SMB2_COMMANDS]; __u32 fastest_cmd[NUMBER_OF_SMB2_COMMANDS]; #endif /* STATS2 */ unsigned int max_read; unsigned int max_write; unsigned int min_offload; unsigned int retrans; struct { bool requested; /* "compress" mount option set*/ bool enabled; /* actually negotiated with server */ __le16 alg; /* preferred alg negotiated with server */ } compression; __u16 signing_algorithm; __le16 cipher_type; /* save initial negprot hash */ __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; bool signing_negotiated; /* true if valid signing context rcvd from server */ bool posix_ext_supported; struct delayed_work reconnect; /* reconnect workqueue job */ struct mutex reconnect_mutex; /* prevent simultaneous reconnects */ unsigned long echo_interval; /* * Number of targets available for reconnect. The more targets * the more tasks have to wait to let the demultiplex thread * reconnect. */ int nr_targets; bool noblockcnt; /* use non-blocking connect() */ /* * If this is a session channel, * primary_server holds the ref-counted * pointer to primary channel connection for the session. */ #define SERVER_IS_CHAN(server) (!!(server)->primary_server) struct TCP_Server_Info *primary_server; __u16 channel_sequence_num; /* incremented on primary channel on each chan reconnect */ #ifdef CONFIG_CIFS_SWN_UPCALL bool use_swn_dstaddr; struct sockaddr_storage swn_dstaddr; #endif /* * Canonical DFS referral path used in cifs_reconnect() for failover as * well as in DFS cache refresher. * * format: \\HOST\SHARE[\OPTIONAL PATH] */ char *leaf_fullpath; bool dfs_conn:1; char dns_dom[CIFS_MAX_DOMAINNAME_LEN + 1]; }; static inline bool is_smb1(struct TCP_Server_Info *server) { return HEADER_PREAMBLE_SIZE(server) != 0; } static inline void cifs_server_lock(struct TCP_Server_Info *server) { unsigned int nofs_flag = memalloc_nofs_save(); mutex_lock(&server->_srv_mutex); server->nofs_flag = nofs_flag; } static inline void cifs_server_unlock(struct TCP_Server_Info *server) { unsigned int nofs_flag = server->nofs_flag; mutex_unlock(&server->_srv_mutex); memalloc_nofs_restore(nofs_flag); } struct cifs_credits { unsigned int value; unsigned int instance; unsigned int in_flight_check; unsigned int rreq_debug_id; unsigned int rreq_debug_index; }; static inline unsigned int in_flight(struct TCP_Server_Info *server) { unsigned int num; spin_lock(&server->req_lock); num = server->in_flight; spin_unlock(&server->req_lock); return num; } static inline bool has_credits(struct TCP_Server_Info *server, int *credits, int num_credits) { int num; spin_lock(&server->req_lock); num = *credits; spin_unlock(&server->req_lock); return num >= num_credits; } static inline void add_credits(struct TCP_Server_Info *server, struct cifs_credits *credits, const int optype) { server->ops->add_credits(server, credits, optype); } static inline void add_credits_and_wake_if(struct TCP_Server_Info *server, struct cifs_credits *credits, const int optype) { if (credits->value) { server->ops->add_credits(server, credits, optype); wake_up(&server->request_q); credits->value = 0; } } static inline void set_credits(struct TCP_Server_Info *server, const int val) { server->ops->set_credits(server, val); } static inline int adjust_credits(struct TCP_Server_Info *server, struct cifs_io_subrequest *subreq, unsigned int /* enum smb3_rw_credits_trace */ trace) { return server->ops->adjust_credits ? server->ops->adjust_credits(server, subreq, trace) : 0; } static inline __le64 get_next_mid64(struct TCP_Server_Info *server) { return cpu_to_le64(server->ops->get_next_mid(server)); } static inline __le16 get_next_mid(struct TCP_Server_Info *server) { __u16 mid = server->ops->get_next_mid(server); /* * The value in the SMB header should be little endian for easy * on-the-wire decoding. */ return cpu_to_le16(mid); } static inline void revert_current_mid(struct TCP_Server_Info *server, const unsigned int val) { if (server->ops->revert_current_mid) server->ops->revert_current_mid(server, val); } static inline void revert_current_mid_from_hdr(struct TCP_Server_Info *server, const struct smb2_hdr *shdr) { unsigned int num = le16_to_cpu(shdr->CreditCharge); return revert_current_mid(server, num > 0 ? num : 1); } static inline __u16 get_mid(const struct smb_hdr *smb) { return le16_to_cpu(smb->Mid); } static inline bool compare_mid(__u16 mid, const struct smb_hdr *smb) { return mid == le16_to_cpu(smb->Mid); } /* * When the server supports very large reads and writes via POSIX extensions, * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not * including the RFC1001 length. * * Note that this might make for "interesting" allocation problems during * writeback however as we have to allocate an array of pointers for the * pages. A 16M write means ~32kb page array with PAGE_SIZE == 4096. * * For reads, there is a similar problem as we need to allocate an array * of kvecs to handle the receive, though that should only need to be done * once. */ #define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) #define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4) /* * When the server doesn't allow large posix writes, only allow a rsize/wsize * of 2^17-1 minus the size of the call header. That allows for a read or * write up to the maximum size described by RFC1002. */ #define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) #define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) #define CIFS_DEFAULT_IOSIZE (1024 * 1024) /* * Windows only supports a max of 60kb reads and 65535 byte writes. Default to * those values when posix extensions aren't in force. In actuality here, we * use 65536 to allow for a write that is a multiple of 4k. Most servers seem * to be ok with the extra byte even though Windows doesn't send writes that * are that large. * * Citation: * * https://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx */ #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) /* * Macros to allow the TCP_Server_Info->net field and related code to drop out * when CONFIG_NET_NS isn't set. */ #ifdef CONFIG_NET_NS static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv) { return srv->net; } static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) { srv->net = net; } #else static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv) { return &init_net; } static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) { } #endif struct cifs_server_iface { struct list_head iface_head; struct kref refcount; size_t speed; size_t weight_fulfilled; unsigned int num_channels; unsigned int rdma_capable : 1; unsigned int rss_capable : 1; unsigned int is_active : 1; /* unset if non existent */ struct sockaddr_storage sockaddr; }; /* release iface when last ref is dropped */ static inline void release_iface(struct kref *ref) { struct cifs_server_iface *iface = container_of(ref, struct cifs_server_iface, refcount); kfree(iface); } struct cifs_chan { unsigned int in_reconnect : 1; /* if session setup in progress for this channel */ struct TCP_Server_Info *server; struct cifs_server_iface *iface; /* interface in use */ __u8 signkey[SMB3_SIGN_KEY_SIZE]; }; #define CIFS_SES_FLAG_SCALE_CHANNELS (0x1) /* * Session structure. One of these for each uid session with a particular host */ struct cifs_ses { struct list_head smb_ses_list; struct list_head rlist; /* reconnect list */ struct list_head tcon_list; struct list_head dlist; /* dfs list */ struct cifs_tcon *tcon_ipc; spinlock_t ses_lock; /* protect anything here that is not protected */ struct mutex session_mutex; struct TCP_Server_Info *server; /* pointer to server info */ int ses_count; /* reference counter */ enum ses_status_enum ses_status; /* updates protected by cifs_tcp_ses_lock */ unsigned int overrideSecFlg; /* if non-zero override global sec flags */ char *serverOS; /* name of operating system underlying server */ char *serverNOS; /* name of network operating system of server */ char *serverDomain; /* security realm of server */ __u64 Suid; /* remote smb uid */ kuid_t linux_uid; /* overriding owner of files on the mount */ kuid_t cred_uid; /* owner of credentials */ unsigned int capabilities; char ip_addr[INET6_ADDRSTRLEN + 1]; /* Max ipv6 (or v4) addr string len */ char *user_name; /* must not be null except during init of sess and after mount option parsing we fill it */ char *domainName; char *password; char *password2; /* When key rotation used, new password may be set before it expires */ char workstation_name[CIFS_MAX_WORKSTATION_LEN]; struct session_key auth_key; struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */ enum securityEnum sectype; /* what security flavor was specified? */ enum upcall_target_enum upcall_target; /* what upcall target was specified? */ bool sign; /* is signing required? */ bool domainAuto:1; bool expired_pwd; /* track if access denied or expired pwd so can know if need to update */ unsigned int flags; __u16 session_flags; __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE]; __u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE]; __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; /* * Network interfaces available on the server this session is * connected to. * * Other channels can be opened by connecting and binding this * session to interfaces from this list. * * iface_lock should be taken when accessing any of these fields */ spinlock_t iface_lock; /* ========= begin: protected by iface_lock ======== */ struct list_head iface_list; size_t iface_count; unsigned long iface_last_update; /* jiffies */ /* ========= end: protected by iface_lock ======== */ spinlock_t chan_lock; /* ========= begin: protected by chan_lock ======== */ #define CIFS_MAX_CHANNELS 16 #define CIFS_INVAL_CHAN_INDEX (-1) #define CIFS_ALL_CHANNELS_SET(ses) \ ((1UL << (ses)->chan_count) - 1) #define CIFS_ALL_CHANS_GOOD(ses) \ (!(ses)->chans_need_reconnect) #define CIFS_ALL_CHANS_NEED_RECONNECT(ses) \ ((ses)->chans_need_reconnect == CIFS_ALL_CHANNELS_SET(ses)) #define CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses) \ ((ses)->chans_need_reconnect = CIFS_ALL_CHANNELS_SET(ses)) #define CIFS_CHAN_NEEDS_RECONNECT(ses, index) \ test_bit((index), &(ses)->chans_need_reconnect) #define CIFS_CHAN_IN_RECONNECT(ses, index) \ ((ses)->chans[(index)].in_reconnect) struct cifs_chan chans[CIFS_MAX_CHANNELS]; size_t chan_count; size_t chan_max; atomic_t chan_seq; /* round robin state */ /* * chans_need_reconnect is a bitmap indicating which of the channels * under this smb session needs to be reconnected. * If not multichannel session, only one bit will be used. * * We will ask for sess and tcon reconnection only if all the * channels are marked for needing reconnection. This will * enable the sessions on top to continue to live till any * of the channels below are active. */ unsigned long chans_need_reconnect; /* ========= end: protected by chan_lock ======== */ struct cifs_ses *dfs_root_ses; struct nls_table *local_nls; char *dns_dom; /* FQDN of the domain */ }; static inline bool cap_unix(struct cifs_ses *ses) { return ses->server->vals->cap_unix & ses->capabilities; } /* * common struct for holding inode info when searching for or updating an * inode with new info */ #define CIFS_FATTR_JUNCTION 0x1 #define CIFS_FATTR_DELETE_PENDING 0x2 #define CIFS_FATTR_NEED_REVAL 0x4 #define CIFS_FATTR_INO_COLLISION 0x8 #define CIFS_FATTR_UNKNOWN_NLINK 0x10 #define CIFS_FATTR_FAKE_ROOT_INO 0x20 struct cifs_fattr { u32 cf_flags; u32 cf_cifsattrs; u64 cf_uniqueid; u64 cf_eof; u64 cf_bytes; u64 cf_createtime; kuid_t cf_uid; kgid_t cf_gid; umode_t cf_mode; dev_t cf_rdev; unsigned int cf_nlink; unsigned int cf_dtype; struct timespec64 cf_atime; struct timespec64 cf_mtime; struct timespec64 cf_ctime; u32 cf_cifstag; char *cf_symlink_target; }; /* * there is one of these for each connection to a resource on a particular * session */ struct cifs_tcon { struct list_head tcon_list; int debug_id; /* Debugging for tracing */ int tc_count; struct list_head rlist; /* reconnect list */ spinlock_t tc_lock; /* protect anything here that is not protected */ atomic_t num_local_opens; /* num of all opens including disconnected */ atomic_t num_remote_opens; /* num of all network opens on server */ struct list_head openFileList; spinlock_t open_file_lock; /* protects list above */ struct cifs_ses *ses; /* pointer to session associated with */ char tree_name[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */ char *nativeFileSystem; char *password; /* for share-level security */ __u32 tid; /* The 4 byte tree id */ __u16 Flags; /* optional support bits */ enum tid_status_enum status; atomic_t num_smbs_sent; union { struct { atomic_t num_writes; atomic_t num_reads; atomic_t num_flushes; atomic_t num_oplock_brks; atomic_t num_opens; atomic_t num_closes; atomic_t num_deletes; atomic_t num_mkdirs; atomic_t num_posixopens; atomic_t num_posixmkdirs; atomic_t num_rmdirs; atomic_t num_renames; atomic_t num_t2renames; atomic_t num_ffirst; atomic_t num_fnext; atomic_t num_fclose; atomic_t num_hardlinks; atomic_t num_symlinks; atomic_t num_locks; atomic_t num_acl_get; atomic_t num_acl_set; } cifs_stats; struct { atomic_t smb2_com_sent[NUMBER_OF_SMB2_COMMANDS]; atomic_t smb2_com_failed[NUMBER_OF_SMB2_COMMANDS]; } smb2_stats; } stats; __u64 bytes_read; __u64 bytes_written; spinlock_t stat_lock; /* protects the two fields above */ time64_t stats_from_time; FILE_SYSTEM_DEVICE_INFO fsDevInfo; FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */ FILE_SYSTEM_UNIX_INFO fsUnixInfo; bool ipc:1; /* set if connection to IPC$ share (always also pipe) */ bool pipe:1; /* set if connection to pipe share */ bool print:1; /* set if connection to printer share */ bool retry:1; bool nocase:1; bool nohandlecache:1; /* if strange server resource prob can turn off */ bool nodelete:1; bool seal:1; /* transport encryption for this mounted share */ bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol for this mount even if server would support */ bool posix_extensions; /* if true SMB3.11 posix extensions enabled */ bool local_lease:1; /* check leases (only) on local system not remote */ bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ bool broken_sparse_sup; /* if server or share does not support sparse */ bool need_reconnect:1; /* connection reset, tid now invalid */ bool need_reopen_files:1; /* need to reopen tcon file handles */ bool use_resilient:1; /* use resilient instead of durable handles */ bool use_persistent:1; /* use persistent instead of durable handles */ bool no_lease:1; /* Do not request leases on files or directories */ bool use_witness:1; /* use witness protocol */ __le32 capabilities; __u32 share_flags; __u32 maximal_access; __u32 vol_serial_number; __le64 vol_create_time; __u64 snapshot_time; /* for timewarp tokens - timestamp of snapshot */ __u32 handle_timeout; /* persistent and durable handle timeout in ms */ __u32 ss_flags; /* sector size flags */ __u32 perf_sector_size; /* best sector size for perf */ __u32 max_chunks; __u32 max_bytes_chunk; __u32 max_bytes_copy; __u32 max_cached_dirs; #ifdef CONFIG_CIFS_FSCACHE u64 resource_id; /* server resource id */ bool fscache_acquired; /* T if we've tried acquiring a cookie */ struct fscache_volume *fscache; /* cookie for share */ struct mutex fscache_lock; /* Prevent regetting a cookie */ #endif struct list_head pending_opens; /* list of incomplete opens */ struct cached_fids *cfids; /* BB add field for back pointer to sb struct(s)? */ #ifdef CONFIG_CIFS_DFS_UPCALL struct delayed_work dfs_cache_work; struct list_head dfs_ses_list; #endif struct delayed_work query_interfaces; /* query interfaces workqueue job */ char *origin_fullpath; /* canonical copy of smb3_fs_context::source */ }; /* * This is a refcounted and timestamped container for a tcon pointer. The * container holds a tcon reference. It is considered safe to free one of * these when the tl_count goes to 0. The tl_time is the time of the last * "get" on the container. */ struct tcon_link { struct rb_node tl_rbnode; kuid_t tl_uid; unsigned long tl_flags; #define TCON_LINK_MASTER 0 #define TCON_LINK_PENDING 1 #define TCON_LINK_IN_TREE 2 unsigned long tl_time; atomic_t tl_count; struct cifs_tcon *tl_tcon; }; extern struct tcon_link *cifs_sb_tlink(struct cifs_sb_info *cifs_sb); extern void smb3_free_compound_rqst(int num_rqst, struct smb_rqst *rqst); static inline struct cifs_tcon * tlink_tcon(struct tcon_link *tlink) { return tlink->tl_tcon; } static inline struct tcon_link * cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb) { return cifs_sb->master_tlink; } extern void cifs_put_tlink(struct tcon_link *tlink); static inline struct tcon_link * cifs_get_tlink(struct tcon_link *tlink) { if (tlink && !IS_ERR(tlink)) atomic_inc(&tlink->tl_count); return tlink; } /* This function is always expected to succeed */ extern struct cifs_tcon *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb); #define CIFS_OPLOCK_NO_CHANGE 0xfe struct cifs_pending_open { struct list_head olist; struct tcon_link *tlink; __u8 lease_key[16]; __u32 oplock; }; struct cifs_deferred_close { struct list_head dlist; struct tcon_link *tlink; __u16 netfid; __u64 persistent_fid; __u64 volatile_fid; }; /* * This info hangs off the cifsFileInfo structure, pointed to by llist. * This is used to track byte stream locks on the file */ struct cifsLockInfo { struct list_head llist; /* pointer to n