Total coverage: 272713 (18%)of 1575642
2 1 1 3 36 32 43 44 39 37 29 17 3 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * Null algorithms, aka Much Ado About Nothing. * * These are needed for IPsec, and may be useful in general for * testing & debugging. * * The null cipher is compliant with RFC2410. * * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> */ #include <crypto/null.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/string.h> static DEFINE_MUTEX(crypto_default_null_skcipher_lock); static struct crypto_sync_skcipher *crypto_default_null_skcipher; static int crypto_default_null_skcipher_refcnt; static int null_compress(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) { if (slen > *dlen) return -EINVAL; memcpy(dst, src, slen); *dlen = slen; return 0; } static int null_init(struct shash_desc *desc) { return 0; } static int null_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return 0; } static int null_final(struct shash_desc *desc, u8 *out) { return 0; } static int null_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return 0; } static int null_hash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { return 0; } static int null_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { return 0; } static int null_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { return 0; } static void null_crypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { memcpy(dst, src, NULL_BLOCK_SIZE); } static int null_skcipher_crypt(struct skcipher_request *req) { struct skcipher_walk walk; int err; err = skcipher_walk_virt(&walk, req, false); while (walk.nbytes) { if (walk.src.virt.addr != walk.dst.virt.addr) memcpy(walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); err = skcipher_walk_done(&walk, 0); } return err; } static struct shash_alg digest_null = { .digestsize = NULL_DIGEST_SIZE, .setkey = null_hash_setkey, .init = null_init, .update = null_update, .finup = null_digest, .digest = null_digest, .final = null_final, .base = { .cra_name = "digest_null", .cra_driver_name = "digest_null-generic", .cra_blocksize = NULL_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static struct skcipher_alg skcipher_null = { .base.cra_name = "ecb(cipher_null)", .base.cra_driver_name = "ecb-cipher_null", .base.cra_priority = 100, .base.cra_blocksize = NULL_BLOCK_SIZE, .base.cra_ctxsize = 0, .base.cra_module = THIS_MODULE, .min_keysize = NULL_KEY_SIZE, .max_keysize = NULL_KEY_SIZE, .ivsize = NULL_IV_SIZE, .setkey = null_skcipher_setkey, .encrypt = null_skcipher_crypt, .decrypt = null_skcipher_crypt, }; static struct crypto_alg null_algs[] = { { .cra_name = "cipher_null", .cra_driver_name = "cipher_null-generic", .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = NULL_BLOCK_SIZE, .cra_ctxsize = 0, .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = NULL_KEY_SIZE, .cia_max_keysize = NULL_KEY_SIZE, .cia_setkey = null_setkey, .cia_encrypt = null_crypt, .cia_decrypt = null_crypt } } }, { .cra_name = "compress_null", .cra_driver_name = "compress_null-generic", .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, .cra_blocksize = NULL_BLOCK_SIZE, .cra_ctxsize = 0, .cra_module = THIS_MODULE, .cra_u = { .compress = { .coa_compress = null_compress, .coa_decompress = null_compress } } } }; MODULE_ALIAS_CRYPTO("compress_null"); MODULE_ALIAS_CRYPTO("digest_null"); MODULE_ALIAS_CRYPTO("cipher_null"); struct crypto_sync_skcipher *crypto_get_default_null_skcipher(void) { struct crypto_sync_skcipher *tfm; mutex_lock(&crypto_default_null_skcipher_lock); tfm = crypto_default_null_skcipher; if (!tfm) { tfm = crypto_alloc_sync_skcipher("ecb(cipher_null)", 0, 0); if (IS_ERR(tfm)) goto unlock; crypto_default_null_skcipher = tfm; } crypto_default_null_skcipher_refcnt++; unlock: mutex_unlock(&crypto_default_null_skcipher_lock); return tfm; } EXPORT_SYMBOL_GPL(crypto_get_default_null_skcipher); void crypto_put_default_null_skcipher(void) { mutex_lock(&crypto_default_null_skcipher_lock); if (!--crypto_default_null_skcipher_refcnt) { crypto_free_sync_skcipher(crypto_default_null_skcipher); crypto_default_null_skcipher = NULL; } mutex_unlock(&crypto_default_null_skcipher_lock); } EXPORT_SYMBOL_GPL(crypto_put_default_null_skcipher); static int __init crypto_null_mod_init(void) { int ret = 0; ret = crypto_register_algs(null_algs, ARRAY_SIZE(null_algs)); if (ret < 0) goto out; ret = crypto_register_shash(&digest_null); if (ret < 0) goto out_unregister_algs; ret = crypto_register_skcipher(&skcipher_null); if (ret < 0) goto out_unregister_shash; return 0; out_unregister_shash: crypto_unregister_shash(&digest_null); out_unregister_algs: crypto_unregister_algs(null_algs, ARRAY_SIZE(null_algs)); out: return ret; } static void __exit crypto_null_mod_fini(void) { crypto_unregister_algs(null_algs, ARRAY_SIZE(null_algs)); crypto_unregister_shash(&digest_null); crypto_unregister_skcipher(&skcipher_null); } subsys_initcall(crypto_null_mod_init); module_exit(crypto_null_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Null Cryptographic Algorithms");
2 2 1 1 1 1 1 4 4 2 2 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" struct rss_req_info { struct ethnl_req_info base; u32 rss_context; }; struct rss_reply_data { struct ethnl_reply_data base; bool no_key_fields; u32 indir_size; u32 hkey_size; u32 hfunc; u32 input_xfrm; u32 *indir_table; u8 *hkey; }; #define RSS_REQINFO(__req_base) \ container_of(__req_base, struct rss_req_info, base) #define RSS_REPDATA(__reply_base) \ container_of(__reply_base, struct rss_reply_data, base) const struct nla_policy ethnl_rss_get_policy[] = { [ETHTOOL_A_RSS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_RSS_CONTEXT] = { .type = NLA_U32 }, [ETHTOOL_A_RSS_START_CONTEXT] = { .type = NLA_U32 }, }; static int rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb, struct netlink_ext_ack *extack) { struct rss_req_info *request = RSS_REQINFO(req_info); if (tb[ETHTOOL_A_RSS_CONTEXT]) request->rss_context = nla_get_u32(tb[ETHTOOL_A_RSS_CONTEXT]); if (tb[ETHTOOL_A_RSS_START_CONTEXT]) { NL_SET_BAD_ATTR(extack, tb[ETHTOOL_A_RSS_START_CONTEXT]); return -EINVAL; } return 0; } static int rss_prepare_get(const struct rss_req_info *request, struct net_device *dev, struct rss_reply_data *data, const struct genl_info *info) { struct ethtool_rxfh_param rxfh = {}; const struct ethtool_ops *ops; u32 total_size, indir_bytes; u8 *rss_config; int ret; ops = dev->ethtool_ops; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; data->indir_size = 0; data->hkey_size = 0; if (ops->get_rxfh_indir_size) data->indir_size = ops->get_rxfh_indir_size(dev); if (ops->get_rxfh_key_size) data->hkey_size = ops->get_rxfh_key_size(dev); indir_bytes = data->indir_size * sizeof(u32); total_size = indir_bytes + data->hkey_size; rss_config = kzalloc(total_size, GFP_KERNEL); if (!rss_config) { ret = -ENOMEM; goto out_ops; } if (data->indir_size) data->indir_table = (u32 *)rss_config; if (data->hkey_size) data->hkey = rss_config + indir_bytes; rxfh.indir_size = data->indir_size; rxfh.indir = data->indir_table; rxfh.key_size = data->hkey_size; rxfh.key = data->hkey; ret = ops->get_rxfh(dev, &rxfh); if (ret) goto out_ops; data->hfunc = rxfh.hfunc; data->input_xfrm = rxfh.input_xfrm; out_ops: ethnl_ops_complete(dev); return ret; } static int rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, struct rss_reply_data *data, const struct genl_info *info) { struct ethtool_rxfh_context *ctx; u32 total_size, indir_bytes; u8 *rss_config; data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key; ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context); if (!ctx) return -ENOENT; data->indir_size = ctx->indir_size; data->hkey_size = ctx->key_size; data->hfunc = ctx->hfunc; data->input_xfrm = ctx->input_xfrm; indir_bytes = data->indir_size * sizeof(u32); total_size = indir_bytes + data->hkey_size; rss_config = kzalloc(total_size, GFP_KERNEL); if (!rss_config) return -ENOMEM; data->indir_table = (u32 *)rss_config; memcpy(data->indir_table, ethtool_rxfh_context_indir(ctx), indir_bytes); if (data->hkey_size) { data->hkey = rss_config + indir_bytes; memcpy(data->hkey, ethtool_rxfh_context_key(ctx), data->hkey_size); } return 0; } static int rss_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct rss_reply_data *data = RSS_REPDATA(reply_base); struct rss_req_info *request = RSS_REQINFO(req_base); struct net_device *dev = reply_base->dev; const struct ethtool_ops *ops; ops = dev->ethtool_ops; if (!ops->get_rxfh) return -EOPNOTSUPP; /* Some drivers don't handle rss_context */ if (request->rss_context) { if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context) return -EOPNOTSUPP; return rss_prepare_ctx(request, dev, data, info); } return rss_prepare_get(request, dev, data, info); } static int rss_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct rss_reply_data *data = RSS_REPDATA(reply_base); int len; len = nla_total_size(sizeof(u32)) + /* _RSS_CONTEXT */ nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */ nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */ nla_total_size(data->hkey_size); /* _RSS_HKEY */ return len; } static int rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct rss_reply_data *data = RSS_REPDATA(reply_base); struct rss_req_info *request = RSS_REQINFO(req_base); if (request->rss_context && nla_put_u32(skb, ETHTOOL_A_RSS_CONTEXT, request->rss_context)) return -EMSGSIZE; if ((data->indir_size && nla_put(skb, ETHTOOL_A_RSS_INDIR, sizeof(u32) * data->indir_size, data->indir_table))) return -EMSGSIZE; if (data->no_key_fields) return 0; if ((data->hfunc && nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) || (data->input_xfrm && nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) || (data->hkey_size && nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey))) return -EMSGSIZE; return 0; } static void rss_cleanup_data(struct ethnl_reply_data *reply_base) { const struct rss_reply_data *data = RSS_REPDATA(reply_base); kfree(data->indir_table); } struct rss_nl_dump_ctx { unsigned long ifindex; unsigned long ctx_idx; /* User wants to only dump contexts from given ifindex */ unsigned int match_ifindex; unsigned int start_ctx; }; static struct rss_nl_dump_ctx *rss_dump_ctx(struct netlink_callback *cb) { NL_ASSERT_CTX_FITS(struct rss_nl_dump_ctx); return (struct rss_nl_dump_ctx *)cb->ctx; } int ethnl_rss_dump_start(struct netlink_callback *cb) { const struct genl_info *info = genl_info_dump(cb); struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb); struct ethnl_req_info req_info = {}; struct nlattr **tb = info->attrs; int ret; /* Filtering by context not supported */ if (tb[ETHTOOL_A_RSS_CONTEXT]) { NL_SET_BAD_ATTR(info->extack, tb[ETHTOOL_A_RSS_CONTEXT]); return -EINVAL; } if (tb[ETHTOOL_A_RSS_START_CONTEXT]) { ctx->start_ctx = nla_get_u32(tb[ETHTOOL_A_RSS_START_CONTEXT]); ctx->ctx_idx = ctx->start_ctx; } ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_RSS_HEADER], sock_net(cb->skb->sk), cb->extack, false); if (req_info.dev) { ctx->match_ifindex = req_info.dev->ifindex; ctx->ifindex = ctx->match_ifindex; ethnl_parse_header_dev_put(&req_info); req_info.dev = NULL; } return ret; } static int rss_dump_one_ctx(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, u32 rss_context) { const struct genl_info *info = genl_info_dump(cb); struct rss_reply_data data = {}; struct rss_req_info req = {}; void *ehdr; int ret; req.rss_context = rss_context; ehdr = ethnl_dump_put(skb, cb, ETHTOOL_MSG_RSS_GET_REPLY); if (!ehdr) return -EMSGSIZE; ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_RSS_HEADER); if (ret < 0) goto err_cancel; /* Context 0 is not currently storred or cached in the XArray */ if (!rss_context) ret = rss_prepare_get(&req, dev, &data, info); else ret = rss_prepare_ctx(&req, dev, &data, info); if (ret) goto err_cancel; ret = rss_fill_reply(skb, &req.base, &data.base); if (ret) goto err_cleanup; genlmsg_end(skb, ehdr); rss_cleanup_data(&data.base); return 0; err_cleanup: rss_cleanup_data(&data.base); err_cancel: genlmsg_cancel(skb, ehdr); return ret; } static int rss_dump_one_dev(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev) { struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb); int ret; if (!dev->ethtool_ops->get_rxfh) return 0; if (!ctx->ctx_idx) { ret = rss_dump_one_ctx(skb, cb, dev, 0); if (ret) return ret; ctx->ctx_idx++; } for (; xa_find(&dev->ethtool->rss_ctx, &ctx->ctx_idx, ULONG_MAX, XA_PRESENT); ctx->ctx_idx++) { ret = rss_dump_one_ctx(skb, cb, dev, ctx->ctx_idx); if (ret) return ret; } ctx->ctx_idx = ctx->start_ctx; return 0; } int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb); struct net *net = sock_net(skb->sk); struct net_device *dev; int ret = 0; rtnl_lock(); for_each_netdev_dump(net, dev, ctx->ifindex) { if (ctx->match_ifindex && ctx->match_ifindex != ctx->ifindex) break; ret = rss_dump_one_dev(skb, cb, dev); if (ret) break; } rtnl_unlock(); return ret; } const struct ethnl_request_ops ethnl_rss_request_ops = { .request_cmd = ETHTOOL_MSG_RSS_GET, .reply_cmd = ETHTOOL_MSG_RSS_GET_REPLY, .hdr_attr = ETHTOOL_A_RSS_HEADER, .req_info_size = sizeof(struct rss_req_info), .reply_data_size = sizeof(struct rss_reply_data), .parse_request = rss_parse_request, .prepare_data = rss_prepare_data, .reply_size = rss_reply_size, .fill_reply = rss_fill_reply, .cleanup_data = rss_cleanup_data, };
2 2 1 1 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 // SPDX-License-Identifier: GPL-2.0-only #include <linux/phy.h> #include <linux/ethtool_netlink.h> #include "netlink.h" #include "common.h" struct plca_req_info { struct ethnl_req_info base; }; struct plca_reply_data { struct ethnl_reply_data base; struct phy_plca_cfg plca_cfg; struct phy_plca_status plca_st; }; // Helpers ------------------------------------------------------------------ // #define PLCA_REPDATA(__reply_base) \ container_of(__reply_base, struct plca_reply_data, base) // PLCA get configuration message ------------------------------------------- // const struct nla_policy ethnl_plca_get_cfg_policy[] = { [ETHTOOL_A_PLCA_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), }; static void plca_update_sint(int *dst, struct nlattr **tb, u32 attrid, bool *mod) { const struct nlattr *attr = tb[attrid]; if (!attr || WARN_ON_ONCE(attrid >= ARRAY_SIZE(ethnl_plca_set_cfg_policy))) return; switch (ethnl_plca_set_cfg_policy[attrid].type) { case NLA_U8: *dst = nla_get_u8(attr); break; case NLA_U32: *dst = nla_get_u32(attr); break; default: WARN_ON_ONCE(1); } *mod = true; } static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct plca_reply_data *data = PLCA_REPDATA(reply_base); struct net_device *dev = reply_base->dev; const struct ethtool_phy_ops *ops; struct nlattr **tb = info->attrs; struct phy_device *phydev; int ret; phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER], info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; goto out; } // note: rtnl_lock is held already by ethnl_default_doit ops = ethtool_phy_ops; if (!ops || !ops->get_plca_cfg) { ret = -EOPNOTSUPP; goto out; } ret = ethnl_ops_begin(dev); if (ret < 0) goto out; memset(&data->plca_cfg, 0xff, sizeof_field(struct plca_reply_data, plca_cfg)); ret = ops->get_plca_cfg(phydev, &data->plca_cfg); ethnl_ops_complete(dev); out: return ret; } static int plca_get_cfg_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { return nla_total_size(sizeof(u16)) + /* _VERSION */ nla_total_size(sizeof(u8)) + /* _ENABLED */ nla_total_size(sizeof(u32)) + /* _NODE_CNT */ nla_total_size(sizeof(u32)) + /* _NODE_ID */ nla_total_size(sizeof(u32)) + /* _TO_TIMER */ nla_total_size(sizeof(u32)) + /* _BURST_COUNT */ nla_total_size(sizeof(u32)); /* _BURST_TIMER */ } static int plca_get_cfg_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct plca_reply_data *data = PLCA_REPDATA(reply_base); const struct phy_plca_cfg *plca = &data->plca_cfg; if ((plca->version >= 0 && nla_put_u16(skb, ETHTOOL_A_PLCA_VERSION, plca->version)) || (plca->enabled >= 0 && nla_put_u8(skb, ETHTOOL_A_PLCA_ENABLED, !!plca->enabled)) || (plca->node_id >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_NODE_ID, plca->node_id)) || (plca->node_cnt >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_NODE_CNT, plca->node_cnt)) || (plca->to_tmr >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_TO_TMR, plca->to_tmr)) || (plca->burst_cnt >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_BURST_CNT, plca->burst_cnt)) || (plca->burst_tmr >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_BURST_TMR, plca->burst_tmr))) return -EMSGSIZE; return 0; }; // PLCA set configuration message ------------------------------------------- // const struct nla_policy ethnl_plca_set_cfg_policy[] = { [ETHTOOL_A_PLCA_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), [ETHTOOL_A_PLCA_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_PLCA_NODE_ID] = NLA_POLICY_MAX(NLA_U32, 255), [ETHTOOL_A_PLCA_NODE_CNT] = NLA_POLICY_RANGE(NLA_U32, 1, 255), [ETHTOOL_A_PLCA_TO_TMR] = NLA_POLICY_MAX(NLA_U32, 255), [ETHTOOL_A_PLCA_BURST_CNT] = NLA_POLICY_MAX(NLA_U32, 255), [ETHTOOL_A_PLCA_BURST_TMR] = NLA_POLICY_MAX(NLA_U32, 255), }; static int ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_phy_ops *ops; struct nlattr **tb = info->attrs; struct phy_plca_cfg plca_cfg; struct phy_device *phydev; bool mod = false; int ret; phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PLCA_HEADER], info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) return -EOPNOTSUPP; ops = ethtool_phy_ops; if (!ops || !ops->set_plca_cfg) return -EOPNOTSUPP; memset(&plca_cfg, 0xff, sizeof(plca_cfg)); plca_update_sint(&plca_cfg.enabled, tb, ETHTOOL_A_PLCA_ENABLED, &mod); plca_update_sint(&plca_cfg.node_id, tb, ETHTOOL_A_PLCA_NODE_ID, &mod); plca_update_sint(&plca_cfg.node_cnt, tb, ETHTOOL_A_PLCA_NODE_CNT, &mod); plca_update_sint(&plca_cfg.to_tmr, tb, ETHTOOL_A_PLCA_TO_TMR, &mod); plca_update_sint(&plca_cfg.burst_cnt, tb, ETHTOOL_A_PLCA_BURST_CNT, &mod); plca_update_sint(&plca_cfg.burst_tmr, tb, ETHTOOL_A_PLCA_BURST_TMR, &mod); if (!mod) return 0; ret = ops->set_plca_cfg(phydev, &plca_cfg, info->extack); return ret < 0 ? ret : 1; } const struct ethnl_request_ops ethnl_plca_cfg_request_ops = { .request_cmd = ETHTOOL_MSG_PLCA_GET_CFG, .reply_cmd = ETHTOOL_MSG_PLCA_GET_CFG_REPLY, .hdr_attr = ETHTOOL_A_PLCA_HEADER, .req_info_size = sizeof(struct plca_req_info), .reply_data_size = sizeof(struct plca_reply_data), .prepare_data = plca_get_cfg_prepare_data, .reply_size = plca_get_cfg_reply_size, .fill_reply = plca_get_cfg_fill_reply, .set = ethnl_set_plca, .set_ntf_cmd = ETHTOOL_MSG_PLCA_NTF, }; // PLCA get status message -------------------------------------------------- // const struct nla_policy ethnl_plca_get_status_policy[] = { [ETHTOOL_A_PLCA_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), }; static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct plca_reply_data *data = PLCA_REPDATA(reply_base); struct net_device *dev = reply_base->dev; const struct ethtool_phy_ops *ops; struct nlattr **tb = info->attrs; struct phy_device *phydev; int ret; phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER], info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; goto out; } // note: rtnl_lock is held already by ethnl_default_doit ops = ethtool_phy_ops; if (!ops || !ops->get_plca_status) { ret = -EOPNOTSUPP; goto out; } ret = ethnl_ops_begin(dev); if (ret < 0) goto out; memset(&data->plca_st, 0xff, sizeof_field(struct plca_reply_data, plca_st)); ret = ops->get_plca_status(phydev, &data->plca_st); ethnl_ops_complete(dev); out: return ret; } static int plca_get_status_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { return nla_total_size(sizeof(u8)); /* _STATUS */ } static int plca_get_status_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct plca_reply_data *data = PLCA_REPDATA(reply_base); const u8 status = data->plca_st.pst; if (nla_put_u8(skb, ETHTOOL_A_PLCA_STATUS, !!status)) return -EMSGSIZE; return 0; }; const struct ethnl_request_ops ethnl_plca_status_request_ops = { .request_cmd = ETHTOOL_MSG_PLCA_GET_STATUS, .reply_cmd = ETHTOOL_MSG_PLCA_GET_STATUS_REPLY, .hdr_attr = ETHTOOL_A_PLCA_HEADER, .req_info_size = sizeof(struct plca_req_info), .reply_data_size = sizeof(struct plca_reply_data), .prepare_data = plca_get_status_prepare_data, .reply_size = plca_get_status_reply_size, .fill_reply = plca_get_status_fill_reply, };
4 7 198 1 5 11 123 3 192 8 5 2 21 21 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 /* SPDX-License-Identifier: GPL-2.0 */ /* * * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. * * on-disk ntfs structs */ // clang-format off #ifndef _LINUX_NTFS3_NTFS_H #define _LINUX_NTFS3_NTFS_H #include <linux/blkdev.h> #include <linux/build_bug.h> #include <linux/kernel.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> #include "debug.h" /* TODO: Check 4K MFT record and 512 bytes cluster. */ /* Check each run for marked clusters. */ #define NTFS3_CHECK_FREE_CLST #define NTFS_NAME_LEN 255 /* * ntfs.sys used 500 maximum links on-disk struct allows up to 0xffff. * xfstest generic/041 creates 3003 hardlinks. */ #define NTFS_LINK_MAX 4000 /* * Activate to use 64 bit clusters instead of 32 bits in ntfs.sys. * Logical and virtual cluster number if needed, may be * redefined to use 64 bit value. */ //#define CONFIG_NTFS3_64BIT_CLUSTER #define NTFS_LZNT_MAX_CLUSTER 4096 #define NTFS_LZNT_CUNIT 4 #define NTFS_LZNT_CLUSTERS (1u<<NTFS_LZNT_CUNIT) struct GUID { __le32 Data1; __le16 Data2; __le16 Data3; u8 Data4[8]; }; /* * This struct repeats layout of ATTR_FILE_NAME * at offset 0x40. * It used to store global constants NAME_MFT/NAME_MIRROR... * most constant names are shorter than 10. */ struct cpu_str { u8 len; u8 unused; u16 name[]; }; struct le_str { u8 len; u8 unused; __le16 name[]; }; static_assert(SECTOR_SHIFT == 9); #ifdef CONFIG_NTFS3_64BIT_CLUSTER typedef u64 CLST; static_assert(sizeof(size_t) == 8); #else typedef u32 CLST; #endif #define SPARSE_LCN64 ((u64)-1) #define SPARSE_LCN ((CLST)-1) #define RESIDENT_LCN ((CLST)-2) #define COMPRESSED_LCN ((CLST)-3) enum RECORD_NUM { MFT_REC_MFT = 0, MFT_REC_MIRR = 1, MFT_REC_LOG = 2, MFT_REC_VOL = 3, MFT_REC_ATTR = 4, MFT_REC_ROOT = 5, MFT_REC_BITMAP = 6, MFT_REC_BOOT = 7, MFT_REC_BADCLUST = 8, MFT_REC_SECURE = 9, MFT_REC_UPCASE = 10, MFT_REC_EXTEND = 11, MFT_REC_RESERVED = 12, MFT_REC_FREE = 16, MFT_REC_USER = 24, }; enum ATTR_TYPE { ATTR_ZERO = cpu_to_le32(0x00), ATTR_STD = cpu_to_le32(0x10), ATTR_LIST = cpu_to_le32(0x20), ATTR_NAME = cpu_to_le32(0x30), ATTR_ID = cpu_to_le32(0x40), ATTR_SECURE = cpu_to_le32(0x50), ATTR_LABEL = cpu_to_le32(0x60), ATTR_VOL_INFO = cpu_to_le32(0x70), ATTR_DATA = cpu_to_le32(0x80), ATTR_ROOT = cpu_to_le32(0x90), ATTR_ALLOC = cpu_to_le32(0xA0), ATTR_BITMAP = cpu_to_le32(0xB0), ATTR_REPARSE = cpu_to_le32(0xC0), ATTR_EA_INFO = cpu_to_le32(0xD0), ATTR_EA = cpu_to_le32(0xE0), ATTR_PROPERTYSET = cpu_to_le32(0xF0), ATTR_LOGGED_UTILITY_STREAM = cpu_to_le32(0x100), ATTR_END = cpu_to_le32(0xFFFFFFFF) }; static_assert(sizeof(enum ATTR_TYPE) == 4); enum FILE_ATTRIBUTE { FILE_ATTRIBUTE_READONLY = cpu_to_le32(0x00000001), FILE_ATTRIBUTE_HIDDEN = cpu_to_le32(0x00000002), FILE_ATTRIBUTE_SYSTEM = cpu_to_le32(0x00000004), FILE_ATTRIBUTE_ARCHIVE = cpu_to_le32(0x00000020), FILE_ATTRIBUTE_DEVICE = cpu_to_le32(0x00000040), FILE_ATTRIBUTE_TEMPORARY = cpu_to_le32(0x00000100), FILE_ATTRIBUTE_SPARSE_FILE = cpu_to_le32(0x00000200), FILE_ATTRIBUTE_REPARSE_POINT = cpu_to_le32(0x00000400), FILE_ATTRIBUTE_COMPRESSED = cpu_to_le32(0x00000800), FILE_ATTRIBUTE_OFFLINE = cpu_to_le32(0x00001000), FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = cpu_to_le32(0x00002000), FILE_ATTRIBUTE_ENCRYPTED = cpu_to_le32(0x00004000), FILE_ATTRIBUTE_VALID_FLAGS = cpu_to_le32(0x00007fb7), FILE_ATTRIBUTE_DIRECTORY = cpu_to_le32(0x10000000), FILE_ATTRIBUTE_INDEX = cpu_to_le32(0x20000000) }; static_assert(sizeof(enum FILE_ATTRIBUTE) == 4); extern const struct cpu_str NAME_MFT; extern const struct cpu_str NAME_MIRROR; extern const struct cpu_str NAME_LOGFILE; extern const struct cpu_str NAME_VOLUME; extern const struct cpu_str NAME_ATTRDEF; extern const struct cpu_str NAME_ROOT; extern const struct cpu_str NAME_BITMAP; extern const struct cpu_str NAME_BOOT; extern const struct cpu_str NAME_BADCLUS; extern const struct cpu_str NAME_QUOTA; extern const struct cpu_str NAME_SECURE; extern const struct cpu_str NAME_UPCASE; extern const struct cpu_str NAME_EXTEND; extern const struct cpu_str NAME_OBJID; extern const struct cpu_str NAME_REPARSE; extern const struct cpu_str NAME_USNJRNL; extern const __le16 I30_NAME[4]; extern const __le16 SII_NAME[4]; extern const __le16 SDH_NAME[4]; extern const __le16 SO_NAME[2]; extern const __le16 SQ_NAME[2]; extern const __le16 SR_NAME[2]; extern const __le16 BAD_NAME[4]; extern const __le16 SDS_NAME[4]; extern const __le16 WOF_NAME[17]; /* WofCompressedData */ /* MFT record number structure. */ struct MFT_REF { __le32 low; // The low part of the number. __le16 high; // The high part of the number. __le16 seq; // The sequence number of MFT record. }; static_assert(sizeof(__le64) == sizeof(struct MFT_REF)); static inline CLST ino_get(const struct MFT_REF *ref) { #ifdef CONFIG_NTFS3_64BIT_CLUSTER return le32_to_cpu(ref->low) | ((u64)le16_to_cpu(ref->high) << 32); #else return le32_to_cpu(ref->low); #endif } struct NTFS_BOOT { u8 jump_code[3]; // 0x00: Jump to boot code. u8 system_id[8]; // 0x03: System ID, equals "NTFS " // NOTE: This member is not aligned(!) // bytes_per_sector[0] must be 0. // bytes_per_sector[1] must be multiplied by 256. u8 bytes_per_sector[2]; // 0x0B: Bytes per sector. u8 sectors_per_clusters;// 0x0D: Sectors per cluster. u8 unused1[7]; u8 media_type; // 0x15: Media type (0xF8 - harddisk) u8 unused2[2]; __le16 sct_per_track; // 0x18: number of sectors per track. __le16 heads; // 0x1A: number of heads per cylinder. __le32 hidden_sectors; // 0x1C: number of 'hidden' sectors. u8 unused3[4]; u8 bios_drive_num; // 0x24: BIOS drive number =0x80. u8 unused4; u8 signature_ex; // 0x26: Extended BOOT signature =0x80. u8 unused5; __le64 sectors_per_volume;// 0x28: Size of volume in sectors. __le64 mft_clst; // 0x30: First cluster of $MFT __le64 mft2_clst; // 0x38: First cluster of $MFTMirr s8 record_size; // 0x40: Size of MFT record in clusters(sectors). u8 unused6[3]; s8 index_size; // 0x44: Size of INDX record in clusters(sectors). u8 unused7[3]; __le64 serial_num; // 0x48: Volume serial number __le32 check_sum; // 0x50: Simple additive checksum of all // of the u32's which precede the 'check_sum'. u8 boot_code[0x200 - 0x50 - 2 - 4]; // 0x54: u8 boot_magic[2]; // 0x1FE: Boot signature =0x55 + 0xAA }; static_assert(sizeof(struct NTFS_BOOT) == 0x200); enum NTFS_SIGNATURE { NTFS_FILE_SIGNATURE = cpu_to_le32(0x454C4946), // 'FILE' NTFS_INDX_SIGNATURE = cpu_to_le32(0x58444E49), // 'INDX' NTFS_CHKD_SIGNATURE = cpu_to_le32(0x444B4843), // 'CHKD' NTFS_RSTR_SIGNATURE = cpu_to_le32(0x52545352), // 'RSTR' NTFS_RCRD_SIGNATURE = cpu_to_le32(0x44524352), // 'RCRD' NTFS_BAAD_SIGNATURE = cpu_to_le32(0x44414142), // 'BAAD' NTFS_HOLE_SIGNATURE = cpu_to_le32(0x454C4F48), // 'HOLE' NTFS_FFFF_SIGNATURE = cpu_to_le32(0xffffffff), }; static_assert(sizeof(enum NTFS_SIGNATURE) == 4); /* MFT Record header structure. */ struct NTFS_RECORD_HEADER { /* Record magic number, equals 'FILE'/'INDX'/'RSTR'/'RCRD'. */ enum NTFS_SIGNATURE sign; // 0x00: __le16 fix_off; // 0x04: __le16 fix_num; // 0x06: __le64 lsn; // 0x08: Log file sequence number, }; static_assert(sizeof(struct NTFS_RECORD_HEADER) == 0x10); static inline int is_baad(const struct NTFS_RECORD_HEADER *hdr) { return hdr->sign == NTFS_BAAD_SIGNATURE; } /* Possible bits in struct MFT_REC.flags. */ enum RECORD_FLAG { RECORD_FLAG_IN_USE = cpu_to_le16(0x0001), RECORD_FLAG_DIR = cpu_to_le16(0x0002), RECORD_FLAG_SYSTEM = cpu_to_le16(0x0004), RECORD_FLAG_INDEX = cpu_to_le16(0x0008), }; /* MFT Record structure. */ struct MFT_REC { struct NTFS_RECORD_HEADER rhdr; // 'FILE' __le16 seq; // 0x10: Sequence number for this record. __le16 hard_links; // 0x12: The number of hard links to record. __le16 attr_off; // 0x14: Offset to attributes. __le16 flags; // 0x16: See RECORD_FLAG. __le32 used; // 0x18: The size of used part. __le32 total; // 0x1C: Total record size. struct MFT_REF parent_ref; // 0x20: Parent MFT record. __le16 next_attr_id; // 0x28: The next attribute Id. __le16 res; // 0x2A: High part of MFT record? __le32 mft_record; // 0x2C: Current MFT record number. __le16 fixups[]; // 0x30: }; #define MFTRECORD_FIXUP_OFFSET_1 offsetof(struct MFT_REC, res) #define MFTRECORD_FIXUP_OFFSET_3 offsetof(struct MFT_REC, fixups) /* * define MFTRECORD_FIXUP_OFFSET as MFTRECORD_FIXUP_OFFSET_3 (0x30) * to format new mft records with bigger header (as current ntfs.sys does) * * define MFTRECORD_FIXUP_OFFSET as MFTRECORD_FIXUP_OFFSET_1 (0x2A) * to format new mft records with smaller header (as old ntfs.sys did) * Both variants are valid. */ #define MFTRECORD_FIXUP_OFFSET MFTRECORD_FIXUP_OFFSET_1 static_assert(MFTRECORD_FIXUP_OFFSET_1 == 0x2A); static_assert(MFTRECORD_FIXUP_OFFSET_3 == 0x30); static inline bool is_rec_base(const struct MFT_REC *rec) { const struct MFT_REF *r = &rec->parent_ref; return !r->low && !r->high && !r->seq; } static inline bool is_mft_rec5(const struct MFT_REC *rec) { return le16_to_cpu(rec->rhdr.fix_off) >= offsetof(struct MFT_REC, fixups); } static inline bool is_rec_inuse(const struct MFT_REC *rec) { return rec->flags & RECORD_FLAG_IN_USE; } static inline bool clear_rec_inuse(struct MFT_REC *rec) { return rec->flags &= ~RECORD_FLAG_IN_USE; } /* Possible values of ATTR_RESIDENT.flags */ #define RESIDENT_FLAG_INDEXED 0x01 struct ATTR_RESIDENT { __le32 data_size; // 0x10: The size of data. __le16 data_off; // 0x14: Offset to data. u8 flags; // 0x16: Resident flags ( 1 - indexed ). u8 res; // 0x17: }; // sizeof() = 0x18 struct ATTR_NONRESIDENT { __le64 svcn; // 0x10: Starting VCN of this segment. __le64 evcn; // 0x18: End VCN of this segment. __le16 run_off; // 0x20: Offset to packed runs. // Unit of Compression size for this stream, expressed // as a log of the cluster size. // // 0 means file is not compressed // 1, 2, 3, and 4 are potentially legal values if the // stream is compressed, however the implementation // may only choose to use 4, or possibly 3. // Note that 4 means cluster size time 16. // If convenient the implementation may wish to accept a // reasonable range of legal values here (1-5?), // even if the implementation only generates // a smaller set of values itself. u8 c_unit; // 0x22: u8 res1[5]; // 0x23: __le64 alloc_size; // 0x28: The allocated size of attribute in bytes. // (multiple of cluster size) __le64 data_size; // 0x30: The size of attribute in bytes <= alloc_size. __le64 valid_size; // 0x38: The size of valid part in bytes <= data_size. __le64 total_size; // 0x40: The sum of the allocated clusters for a file. // (present only for the first segment (0 == vcn) // of compressed attribute) }; // sizeof()=0x40 or 0x48 (if compressed) /* Possible values of ATTRIB.flags: */ #define ATTR_FLAG_COMPRESSED cpu_to_le16(0x0001) #define ATTR_FLAG_COMPRESSED_MASK cpu_to_le16(0x00FF) #define ATTR_FLAG_ENCRYPTED cpu_to_le16(0x4000) #define ATTR_FLAG_SPARSED cpu_to_le16(0x8000) struct ATTRIB { enum ATTR_TYPE type; // 0x00: The type of this attribute. __le32 size; // 0x04: The size of this attribute. u8 non_res; // 0x08: Is this attribute non-resident? u8 name_len; // 0x09: This attribute name length. __le16 name_off; // 0x0A: Offset to the attribute name. __le16 flags; // 0x0C: See ATTR_FLAG_XXX. __le16 id; // 0x0E: Unique id (per record). union { struct ATTR_RESIDENT res; // 0x10 struct ATTR_NONRESIDENT nres; // 0x10 }; }; /* Define attribute sizes. */ #define SIZEOF_RESIDENT 0x18 #define SIZEOF_NONRESIDENT_EX 0x48 #define SIZEOF_NONRESIDENT 0x40 #define SIZEOF_RESIDENT_LE cpu_to_le16(0x18) #define SIZEOF_NONRESIDENT_EX_LE cpu_to_le16(0x48) #define SIZEOF_NONRESIDENT_LE cpu_to_le16(0x40) static inline u64 attr_ondisk_size(const struct ATTRIB *attr) { return attr->non_res ? ((attr->flags & (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED)) ? le64_to_cpu(attr->nres.total_size) : le64_to_cpu(attr->nres.alloc_size)) : ALIGN(le32_to_cpu(attr->res.data_size), 8); } static inline u64 attr_size(const struct ATTRIB *attr) { return attr->non_res ? le64_to_cpu(attr->nres.data_size) : le32_to_cpu(attr->res.data_size); } static inline bool is_attr_encrypted(const struct ATTRIB *attr) { return attr->flags & ATTR_FLAG_ENCRYPTED; } static inline bool is_attr_sparsed(const struct ATTRIB *attr) { return attr->flags & ATTR_FLAG_SPARSED; } static inline bool is_attr_compressed(const struct ATTRIB *attr) { return attr->flags & ATTR_FLAG_COMPRESSED; } static inline bool is_attr_ext(const struct ATTRIB *attr) { return attr->flags & (ATTR_FLAG_SPARSED | ATTR_FLAG_COMPRESSED); } static inline bool is_attr_indexed(const struct ATTRIB *attr) { return !attr->non_res && (attr->res.flags & RESIDENT_FLAG_INDEXED); } static inline __le16 const *attr_name(const struct ATTRIB *attr) { return Add2Ptr(attr, le16_to_cpu(attr->name_off)); } static inline u64 attr_svcn(const struct ATTRIB *attr) { return attr->non_res ? le64_to_cpu(attr->nres.svcn) : 0; } static_assert(sizeof(struct ATTRIB) == 0x48); static_assert(sizeof(((struct ATTRIB *)NULL)->res) == 0x08); static_assert(sizeof(((struct ATTRIB *)NULL)->nres) == 0x38); static inline void *resident_data_ex(const struct ATTRIB *attr, u32 datasize) { u32 asize, rsize; u16 off; if (attr->non_res) return NULL; asize = le32_to_cpu(attr->size); off = le16_to_cpu(attr->res.data_off); if (asize < datasize + off) return NULL; rsize = le32_to_cpu(attr->res.data_size); if (rsize < datasize) return NULL; return Add2Ptr(attr, off); } static inline void *resident_data(const struct ATTRIB *attr) { return Add2Ptr(attr, le16_to_cpu(attr->res.data_off)); } static inline void *attr_run(const struct ATTRIB *attr) { return Add2Ptr(attr, le16_to_cpu(attr->nres.run_off)); } /* Standard information attribute (0x10). */ struct ATTR_STD_INFO { __le64 cr_time; // 0x00: File creation file. __le64 m_time; // 0x08: File modification time. __le64 c_time; // 0x10: Last time any attribute was modified. __le64 a_time; // 0x18: File last access time. enum FILE_ATTRIBUTE fa; // 0x20: Standard DOS attributes & more. __le32 max_ver_num; // 0x24: Maximum Number of Versions. __le32 ver_num; // 0x28: Version Number. __le32 class_id; // 0x2C: Class Id from bidirectional Class Id index. }; static_assert(sizeof(struct ATTR_STD_INFO) == 0x30); #define SECURITY_ID_INVALID 0x00000000 #define SECURITY_ID_FIRST 0x00000100 struct ATTR_STD_INFO5 { __le64 cr_time; // 0x00: File creation file. __le64 m_time; // 0x08: File modification time. __le64 c_time; // 0x10: Last time any attribute was modified. __le64 a_time; // 0x18: File last access time. enum FILE_ATTRIBUTE fa; // 0x20: Standard DOS attributes & more. __le32 max_ver_num; // 0x24: Maximum Number of Versions. __le32 ver_num; // 0x28: Version Number. __le32 class_id; // 0x2C: Class Id from bidirectional Class Id index. __le32 owner_id; // 0x30: Owner Id of the user owning the file. __le32 security_id; // 0x34: The Security Id is a key in the $SII Index and $SDS. __le64 quota_charge; // 0x38: __le64 usn; // 0x40: Last Update Sequence Number of the file. This is a direct // index into the file $UsnJrnl. If zero, the USN Journal is // disabled. }; static_assert(sizeof(struct ATTR_STD_INFO5) == 0x48); /* Attribute list entry structure (0x20) */ struct ATTR_LIST_ENTRY { enum ATTR_TYPE type; // 0x00: The type of attribute. __le16 size; // 0x04: The size of this record. u8 name_len; // 0x06: The length of attribute name. u8 name_off; // 0x07: The offset to attribute name. __le64 vcn; // 0x08: Starting VCN of this attribute. struct MFT_REF ref; // 0x10: MFT record number with attribute. __le16 id; // 0x18: struct ATTRIB ID. __le16 name[]; // 0x1A: To get real name use name_off. }; // sizeof(0x20) static inline u32 le_size(u8 name_len) { return ALIGN(offsetof(struct ATTR_LIST_ENTRY, name) + name_len * sizeof(short), 8); } /* Returns 0 if 'attr' has the same type and name. */ static inline int le_cmp(const struct ATTR_LIST_ENTRY *le, const struct ATTRIB *attr) { return le->type != attr->type || le->name_len != attr->name_len || (!le->name_len && memcmp(Add2Ptr(le, le->name_off), Add2Ptr(attr, le16_to_cpu(attr->name_off)), le->name_len * sizeof(short))); } static inline __le16 const *le_name(const struct ATTR_LIST_ENTRY *le) { return Add2Ptr(le, le->name_off); } /* File name types (the field type in struct ATTR_FILE_NAME). */ #define FILE_NAME_POSIX 0 #define FILE_NAME_UNICODE 1 #define FILE_NAME_DOS 2 #define FILE_NAME_UNICODE_AND_DOS (FILE_NAME_DOS | FILE_NAME_UNICODE) /* Filename attribute structure (0x30). */ struct NTFS_DUP_INFO { __le64 cr_time; // 0x00: File creation file. __le64 m_time; // 0x08: File modification time. __le64 c_time; // 0x10: Last time any attribute was modified. __le64 a_time; // 0x18: File last access time. __le64 alloc_size; // 0x20: Data attribute allocated size, multiple of cluster size. __le64 data_size; // 0x28: Data attribute size <= Dataalloc_size. enum FILE_ATTRIBUTE fa; // 0x30: Standard DOS attributes & more. __le16 ea_size; // 0x34: Packed EAs. __le16 reparse; // 0x36: Used by Reparse. }; // 0x38 struct ATTR_FILE_NAME { struct MFT_REF home; // 0x00: MFT record for directory. struct NTFS_DUP_INFO dup;// 0x08: u8 name_len; // 0x40: File name length in words. u8 type; // 0x41: File name type. __le16 name[]; // 0x42: File name. }; static_assert(sizeof(((struct ATTR_FILE_NAME *)NULL)->dup) == 0x38); static_assert(offsetof(struct ATTR_FILE_NAME, name) == 0x42); #define SIZEOF_ATTRIBUTE_FILENAME 0x44 #define SIZEOF_ATTRIBUTE_FILENAME_MAX (0x42 + 255 * 2) static inline struct ATTRIB *attr_from_name(struct ATTR_FILE_NAME *fname) { return (struct ATTRIB *)((char *)fname - SIZEOF_RESIDENT); } static inline u16 fname_full_size(const struct ATTR_FILE_NAME *fname) { /* Don't return struct_size(fname, name, fname->name_len); */ return offsetof(struct ATTR_FILE_NAME, name) + fname->name_len * sizeof(short); } static inline u8 paired_name(u8 type) { if (type == FILE_NAME_UNICODE) return FILE_NAME_DOS; if (type == FILE_NAME_DOS) return FILE_NAME_UNICODE; return FILE_NAME_POSIX; } /* Index entry defines ( the field flags in NtfsDirEntry ). */ #define NTFS_IE_HAS_SUBNODES cpu_to_le16(1) #define NTFS_IE_LAST cpu_to_le16(2) /* Directory entry structure. */ struct NTFS_DE { union { struct MFT_REF ref; // 0x00: MFT record number with this file. struct { __le16 data_off; // 0x00: __le16 data_size; // 0x02: __le32 res; // 0x04: Must be 0. } view; }; __le16 size; // 0x08: The size of this entry. __le16 key_size; // 0x0A: The size of File name length in bytes + 0x42. __le16 flags; // 0x0C: Entry flags: NTFS_IE_XXX. __le16 res; // 0x0E: // Here any indexed attribute can be placed. // One of them is: // struct ATTR_FILE_NAME AttrFileName; // // The last 8 bytes of this structure contains // the VBN of subnode. // !!! Note !!! // This field is presented only if (flags & NTFS_IE_HAS_SUBNODES) // __le64 vbn; }; static_assert(sizeof(struct NTFS_DE) == 0x10); static inline void de_set_vbn_le(struct NTFS_DE *e, __le64 vcn) { __le64 *v = Add2Ptr(e, le16_to_cpu(e->size) - sizeof(__le64)); *v = vcn; } static inline void de_set_vbn(struct NTFS_DE *e, CLST vcn) { __le64 *v = Add2Ptr(e, le16_to_cpu(e->size) - sizeof(__le64)); *v = cpu_to_le64(vcn); } static inline __le64 de_get_vbn_le(const struct NTFS_DE *e) { return *(__le64 *)Add2Ptr(e, le16_to_cpu(e->size) - sizeof(__le64)); } static inline CLST de_get_vbn(const struct NTFS_DE *e) { __le64 *v = Add2Ptr(e, le16_to_cpu(e->size) - sizeof(__le64)); return le64_to_cpu(*v); } static inline struct NTFS_DE *de_get_next(const struct NTFS_DE *e) { return Add2Ptr(e, le16_to_cpu(e->size)); } static inline struct ATTR_FILE_NAME *de_get_fname(const struct NTFS_DE *e) { return le16_to_cpu(e->key_size) >= SIZEOF_ATTRIBUTE_FILENAME ? Add2Ptr(e, sizeof(struct NTFS_DE)) : NULL; } static inline bool de_is_last(const struct NTFS_DE *e) { return e->flags & NTFS_IE_LAST; } static inline bool de_has_vcn(const struct NTFS_DE *e) { return e->flags & NTFS_IE_HAS_SUBNODES; } static inline bool de_has_vcn_ex(const struct NTFS_DE *e) { return (e->flags & NTFS_IE_HAS_SUBNODES) && (u64)(-1) != *((u64 *)Add2Ptr(e, le16_to_cpu(e->size) - sizeof(__le64))); } #define MAX_BYTES_PER_NAME_ENTRY \ ALIGN(sizeof(struct NTFS_DE) + \ offsetof(struct ATTR_FILE_NAME, name) + \ NTFS_NAME_LEN * sizeof(short), 8) #define NTFS_INDEX_HDR_HAS_SUBNODES cpu_to_le32(1) struct INDEX_HDR { __le32 de_off; // 0x00: The offset from the start of this structure // to the first NTFS_DE. __le32 used; // 0x04: The size of this structure plus all // entries (quad-word aligned). __le32 total; // 0x08: The allocated size of for this structure plus all entries. __le32 flags; // 0x0C: 0x00 = Small directory, 0x01 = Large directory. // // de_off + used <= total // }; static_assert(sizeof(struct INDEX_HDR) == 0x10); static inline struct NTFS_DE *hdr_first_de(const struct INDEX_HDR *hdr) { u32 de_off = le32_to_cpu(hdr->de_off); u32 used = le32_to_cpu(hdr->used); struct NTFS_DE *e; u16 esize; if (de_off >= used || de_off + sizeof(struct NTFS_DE) > used ) return NULL; e = Add2Ptr(hdr, de_off); esize = le16_to_cpu(e->size); if (esize < sizeof(struct NTFS_DE) || de_off + esize > used) return NULL; return e; } static inline struct NTFS_DE *hdr_next_de(const struct INDEX_HDR *hdr, const struct NTFS_DE *e) { size_t off = PtrOffset(hdr, e); u32 used = le32_to_cpu(hdr->used); u16 esize; if (off >= used) return NULL; esize = le16_to_cpu(e->size); if (esize < sizeof(struct NTFS_DE) || off + esize + sizeof(struct NTFS_DE) > used) return NULL; return Add2Ptr(e, esize); } static inline bool hdr_has_subnode(const struct INDEX_HDR *hdr) { return hdr->flags & NTFS_INDEX_HDR_HAS_SUBNODES; } struct INDEX_BUFFER { struct NTFS_RECORD_HEADER rhdr; // 'INDX' __le64 vbn; // 0x10: vcn if index >= cluster or vsn id index < cluster struct INDEX_HDR ihdr; // 0x18: }; static_assert(sizeof(struct INDEX_BUFFER) == 0x28); static inline bool ib_is_empty(const struct INDEX_BUFFER *ib) { const struct NTFS_DE *first = hdr_first_de(&ib->ihdr); return !first || de_is_last(first); } static inline bool ib_is_leaf(const struct INDEX_BUFFER *ib) { return !(ib->ihdr.flags & NTFS_INDEX_HDR_HAS_SUBNODES); } /* Index root structure ( 0x90 ). */ enum COLLATION_RULE { NTFS_COLLATION_TYPE_BINARY = cpu_to_le32(0), // $I30 NTFS_COLLATION_TYPE_FILENAME = cpu_to_le32(0x01), // $SII of $Secure and $Q of Quota NTFS_COLLATION_TYPE_UINT = cpu_to_le32(0x10), // $O of Quota NTFS_COLLATION_TYPE_SID = cpu_to_le32(0x11), // $SDH of $Secure NTFS_COLLATION_TYPE_SECURITY_HASH = cpu_to_le32(0x12), // $O of ObjId and "$R" for Reparse NTFS_COLLATION_TYPE_UINTS = cpu_to_le32(0x13) }; static_assert(sizeof(enum COLLATION_RULE) == 4); // struct INDEX_ROOT { enum ATTR_TYPE type; // 0x00: The type of attribute to index on. enum COLLATION_RULE rule; // 0x04: The rule. __le32 index_block_size;// 0x08: The size of index record. u8 index_block_clst; // 0x0C: The number of clusters or sectors per index. u8 res[3]; struct INDEX_HDR ihdr; // 0x10: }; static_assert(sizeof(struct INDEX_ROOT) == 0x20); static_assert(offsetof(struct INDEX_ROOT, ihdr) == 0x10); #define VOLUME_FLAG_DIRTY cpu_to_le16(0x0001) #define VOLUME_FLAG_RESIZE_LOG_FILE cpu_to_le16(0x0002) struct VOLUME_INFO { __le64 res1; // 0x00 u8 major_ver; // 0x08: NTFS major version number (before .) u8 minor_ver; // 0x09: NTFS minor version number (after .) __le16 flags; // 0x0A: Volume flags, see VOLUME_FLAG_XXX }; // sizeof=0xC #define SIZEOF_ATTRIBUTE_VOLUME_INFO 0xc #define NTFS_LABEL_MAX_LENGTH (0x100 / sizeof(short)) #define NTFS_ATTR_INDEXABLE cpu_to_le32(0x00000002) #define NTFS_ATTR_DUPALLOWED cpu_to_le32(0x00000004) #define NTFS_ATTR_MUST_BE_INDEXED cpu_to_le32(0x00000010) #define NTFS_ATTR_MUST_BE_NAMED cpu_to_le32(0x00000020) #define NTFS_ATTR_MUST_BE_RESIDENT cpu_to_le32(0x00000040) #define NTFS_ATTR_LOG_ALWAYS cpu_to_le32(0x00000080) /* $AttrDef file entry. */ struct ATTR_DEF_ENTRY { __le16 name[0x40]; // 0x00: Attr name. enum ATTR_TYPE type; // 0x80: struct ATTRIB type. __le32 res; // 0x84: enum COLLATION_RULE rule; // 0x88: __le32 flags; // 0x8C: NTFS_ATTR_XXX (see above). __le64 min_sz; // 0x90: Minimum attribute data size. __le64 max_sz; // 0x98: Maximum attribute data size. }; static_assert(sizeof(struct ATTR_DEF_ENTRY) == 0xa0); /* Object ID (0x40) */ struct OBJECT_ID { struct GUID ObjId; // 0x00: Unique Id assigned to file. // Birth Volume Id is the Object Id of the Volume on. // which the Object Id was allocated. It never changes. struct GUID BirthVolumeId; //0x10: // Birth Object Id is the first Object Id that was // ever assigned to this MFT Record. I.e. If the Object Id // is changed for some reason, this field will reflect the // original value of the Object Id. struct GUID BirthObjectId; // 0x20: // Domain Id is currently unused but it is intended to be // used in a network environment where the local machine is // part of a Windows 2000 Domain. This may be used in a Windows // 2000 Advanced Server managed domain. struct GUID DomainId; // 0x30: }; static_assert(sizeof(struct OBJECT_ID) == 0x40); /* O Directory entry structure ( rule = 0x13 ) */ struct NTFS_DE_O { struct NTFS_DE de; struct GUID ObjId; // 0x10: Unique Id assigned to file. struct MFT_REF ref; // 0x20: MFT record number with this file. // Birth Volume Id is the Object Id of the Volume on // which the Object Id was allocated. It never changes. struct GUID BirthVolumeId; // 0x28: // Birth Object Id is the first Object Id that was // ever assigned to this MFT Record. I.e. If the Object Id // is changed for some reason, this field will reflect the // original value of the Object Id. // This field is valid if data_size == 0x48. struct GUID BirthObjectId; // 0x38: // Domain Id is currently unused but it is intended // to be used in a network environment where the local // machine is part of a Windows 2000 Domain. This may be // used in a Windows 2000 Advanced Server managed domain. struct GUID BirthDomainId; // 0x48: }; static_assert(sizeof(struct NTFS_DE_O) == 0x58); /* Q Directory entry structure ( rule = 0x11 ) */ struct NTFS_DE_Q { struct NTFS_DE de; __le32 owner_id; // 0x10: Unique Id assigned to file /* here is 0x30 bytes of user quota. NOTE: 4 byte aligned! */ __le32 Version; // 0x14: 0x02 __le32 Flags; // 0x18: Quota flags, see above __le64 BytesUsed; // 0x1C: __le64 ChangeTime; // 0x24: __le64 WarningLimit; // 0x28: __le64 HardLimit; // 0x34: __le64 ExceededTime; // 0x3C: // SID is placed here }__packed; // sizeof() = 0x44 static_assert(sizeof(struct NTFS_DE_Q) == 0x44); #define SecurityDescriptorsBlockSize 0x40000 // 256K #define SecurityDescriptorMaxSize 0x20000 // 128K #define Log2OfSecurityDescriptorsBlockSize 18 struct SECURITY_KEY { __le32 hash; // Hash value for descriptor __le32 sec_id; // Security Id (guaranteed unique) }; /* Security descriptors (the content of $Secure::SDS data stream) */ struct SECURITY_HDR { struct SECURITY_KEY key; // 0x00: Security Key. __le64 off; // 0x08: Offset of this entry in the file. __le32 size; // 0x10: Size of this entry, 8 byte aligned. /* * Security descriptor itself is placed here. * Total size is 16 byte aligned. */ } __packed; static_assert(sizeof(struct SECURITY_HDR) == 0x14); /* SII Directory entry structure */ struct NTFS_DE_SII { struct NTFS_DE de; __le32 sec_id; // 0x10: Key: sizeof(security_id) = wKeySize struct SECURITY_HDR sec_hdr; // 0x14: } __packed; static_assert(offsetof(struct NTFS_DE_SII, sec_hdr) == 0x14); static_assert(sizeof(struct NTFS_DE_SII) == 0x28); /* SDH Directory entry structure */ struct NTFS_DE_SDH { struct NTFS_DE de; struct SECURITY_KEY key; // 0x10: Key struct SECURITY_HDR sec_hdr; // 0x18: Data __le16 magic[2]; // 0x2C: 0x00490049 "I I" }; #define SIZEOF_SDH_DIRENTRY 0x30 struct REPARSE_KEY { __le32 ReparseTag; // 0x00: Reparse Tag struct MFT_REF ref; // 0x04: MFT record number with this file }; // sizeof() = 0x0C static_assert(offsetof(struct REPARSE_KEY, ref) == 0x04); #define SIZEOF_REPARSE_KEY 0x0C /* Reparse Directory entry structure */ struct NTFS_DE_R { struct NTFS_DE de; struct REPARSE_KEY key; // 0x10: Reparse Key. u32 zero; // 0x1c: }; // sizeof() = 0x20 static_assert(sizeof(struct NTFS_DE_R) == 0x20); /* CompressReparseBuffer.WofVersion */ #define WOF_CURRENT_VERSION cpu_to_le32(1) /* CompressReparseBuffer.WofProvider */ #define WOF_PROVIDER_WIM cpu_to_le32(1) /* CompressReparseBuffer.WofProvider */ #define WOF_PROVIDER_SYSTEM cpu_to_le32(2) /* CompressReparseBuffer.ProviderVer */ #define WOF_PROVIDER_CURRENT_VERSION cpu_to_le32(1) #define WOF_COMPRESSION_XPRESS4K cpu_to_le32(0) // 4k #define WOF_COMPRESSION_LZX32K cpu_to_le32(1) // 32k #define WOF_COMPRESSION_XPRESS8K cpu_to_le32(2) // 8k #define WOF_COMPRESSION_XPRESS16K cpu_to_le32(3) // 16k /* * ATTR_REPARSE (0xC0) * * The reparse struct GUID structure is used by all 3rd party layered drivers to * store data in a reparse point. For non-Microsoft tags, The struct GUID field * cannot be GUID_NULL. * The constraints on reparse tags are defined below. * Microsoft tags can also be used with this format of the reparse point buffer. */ struct REPARSE_POINT { __le32 ReparseTag; // 0x00: __le16 ReparseDataLength;// 0x04: __le16 Reserved; struct GUID Guid; // 0x08: // // Here GenericReparseBuffer is placed // }; static_assert(sizeof(struct REPARSE_POINT) == 0x18); /* * The value of the following constant needs to satisfy the following * conditions: * (1) Be at least as large as the largest of the reserved tags. * (2) Be strictly smaller than all the tags in use. */ #define IO_REPARSE_TAG_RESERVED_RANGE 1 /* * The reparse tags are a ULONG. The 32 bits are laid out as follows: * * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 * +-+-+-+-+-----------------------+-------------------------------+ * |M|R|N|R| Reserved bits | Reparse Tag Value | * +-+-+-+-+-----------------------+-------------------------------+ * * M is the Microsoft bit. When set to 1, it denotes a tag owned by Microsoft. * All ISVs must use a tag with a 0 in this position. * Note: If a Microsoft tag is used by non-Microsoft software, the * behavior is not defined. * * R is reserved. Must be zero for non-Microsoft tags. * * N is name surrogate. When set to 1, the file represents another named * entity in the system. * * The M and N bits are OR-able. * The following macros check for the M and N bit values: */ /* * Macro to determine whether a reparse point tag corresponds to a tag * owned by Microsoft. */ #define IsReparseTagMicrosoft(_tag) (((_tag)&IO_REPARSE_TAG_MICROSOFT)) /* Macro to determine whether a reparse point tag is a name surrogate. */ #define IsReparseTagNameSurrogate(_tag) (((_tag)&IO_REPARSE_TAG_NAME_SURROGATE)) /* * The following constant represents the bits that are valid to use in * reparse tags. */ #define IO_REPARSE_TAG_VALID_VALUES 0xF000FFFF /* * Macro to determine whether a reparse tag is a valid tag. */ #define IsReparseTagValid(_tag) \ (!((_tag) & ~IO_REPARSE_TAG_VALID_VALUES) && \ ((_tag) > IO_REPARSE_TAG_RESERVED_RANGE)) /* Microsoft tags for reparse points. */ enum IO_REPARSE_TAG { IO_REPARSE_TAG_SYMBOLIC_LINK = cpu_to_le32(0), IO_REPARSE_TAG_NAME_SURROGATE = cpu_to_le32(0x20000000), IO_REPARSE_TAG_MICROSOFT = cpu_to_le32(0x80000000), IO_REPARSE_TAG_MOUNT_POINT = cpu_to_le32(0xA0000003), IO_REPARSE_TAG_SYMLINK = cpu_to_le32(0xA000000C), IO_REPARSE_TAG_HSM = cpu_to_le32(0xC0000004), IO_REPARSE_TAG_SIS = cpu_to_le32(0x80000007), IO_REPARSE_TAG_DEDUP = cpu_to_le32(0x80000013), IO_REPARSE_TAG_COMPRESS = cpu_to_le32(0x80000017), /* * The reparse tag 0x80000008 is reserved for Microsoft internal use. * May be published in the future. */ /* Microsoft reparse tag reserved for DFS */ IO_REPARSE_TAG_DFS = cpu_to_le32(0x8000000A), /* Microsoft reparse tag reserved for the file system filter manager. */ IO_REPARSE_TAG_FILTER_MANAGER = cpu_to_le32(0x8000000B), /* Non-Microsoft tags for reparse points */ /* Tag allocated to CONGRUENT, May 2000. Used by IFSTEST. */ IO_REPARSE_TAG_IFSTEST_CONGRUENT = cpu_to_le32(0x00000009), /* Tag allocated to ARKIVIO. */ IO_REPARSE_TAG_ARKIVIO = cpu_to_le32(0x0000000C), /* Tag allocated to SOLUTIONSOFT. */ IO_REPARSE_TAG_SOLUTIONSOFT = cpu_to_le32(0x2000000D), /* Tag allocated to COMMVAULT. */ IO_REPARSE_TAG_COMMVAULT = cpu_to_le32(0x0000000E), /* OneDrive?? */ IO_REPARSE_TAG_CLOUD = cpu_to_le32(0x9000001A), IO_REPARSE_TAG_CLOUD_1 = cpu_to_le32(0x9000101A), IO_REPARSE_TAG_CLOUD_2 = cpu_to_le32(0x9000201A), IO_REPARSE_TAG_CLOUD_3 = cpu_to_le32(0x9000301A), IO_REPARSE_TAG_CLOUD_4 = cpu_to_le32(0x9000401A), IO_REPARSE_TAG_CLOUD_5 = cpu_to_le32(0x9000501A), IO_REPARSE_TAG_CLOUD_6 = cpu_to_le32(0x9000601A), IO_REPARSE_TAG_CLOUD_7 = cpu_to_le32(0x9000701A), IO_REPARSE_TAG_CLOUD_8 = cpu_to_le32(0x9000801A), IO_REPARSE_TAG_CLOUD_9 = cpu_to_le32(0x9000901A), IO_REPARSE_TAG_CLOUD_A = cpu_to_le32(0x9000A01A), IO_REPARSE_TAG_CLOUD_B = cpu_to_le32(0x9000B01A), IO_REPARSE_TAG_CLOUD_C = cpu_to_le32(0x9000C01A), IO_REPARSE_TAG_CLOUD_D = cpu_to_le32(0x9000D01A), IO_REPARSE_TAG_CLOUD_E = cpu_to_le32(0x9000E01A), IO_REPARSE_TAG_CLOUD_F = cpu_to_le32(0x9000F01A), }; #define SYMLINK_FLAG_RELATIVE 1 /* Microsoft reparse buffer. (see DDK for details) */ struct REPARSE_DATA_BUFFER { __le32 ReparseTag; // 0x00: __le16 ReparseDataLength; // 0x04: __le16 Reserved; union { /* If ReparseTag == 0xA0000003 (IO_REPARSE_TAG_MOUNT_POINT) */ struct { __le16 SubstituteNameOffset; // 0x08 __le16 SubstituteNameLength; // 0x0A __le16 PrintNameOffset; // 0x0C __le16 PrintNameLength; // 0x0E __le16 PathBuffer[]; // 0x10 } MountPointReparseBuffer; /* * If ReparseTag == 0xA000000C (IO_REPARSE_TAG_SYMLINK) * https://msdn.microsoft.com/en-us/library/cc232006.aspx */ struct { __le16 SubstituteNameOffset; // 0x08 __le16 SubstituteNameLength; // 0x0A __le16 PrintNameOffset; // 0x0C __le16 PrintNameLength; // 0x0E // 0-absolute path 1- relative path, SYMLINK_FLAG_RELATIVE __le32 Flags; // 0x10 __le16 PathBuffer[]; // 0x14 } SymbolicLinkReparseBuffer; /* If ReparseTag == 0x80000017U */ struct { __le32 WofVersion; // 0x08 == 1 /* * 1 - WIM backing provider ("WIMBoot"), * 2 - System compressed file provider */ __le32 WofProvider; // 0x0C: __le32 ProviderVer; // 0x10: == 1 WOF_FILE_PROVIDER_CURRENT_VERSION == 1 __le32 CompressionFormat; // 0x14: 0, 1, 2, 3. See WOF_COMPRESSION_XXX } CompressReparseBuffer; struct { u8 DataBuffer[1]; // 0x08: } GenericReparseBuffer; }; }; /* ATTR_EA_INFO (0xD0) */ #define FILE_NEED_EA 0x80 // See ntifs.h /* * FILE_NEED_EA, indicates that the file to which the EA belongs cannot be * interpreted without understanding the associated extended attributes. */ struct EA_INFO { __le16 size_pack; // 0x00: Size of buffer to hold in packed form. __le16 count; // 0x02: Count of EA's with FILE_NEED_EA bit set. __le32 size; // 0x04: Size of buffer to hold in unpacked form. }; static_assert(sizeof(struct EA_INFO) == 8); /* ATTR_EA (0xE0) */ struct EA_FULL { __le32 size; // 0x00: (not in packed) u8 flags; // 0x04: u8 name_len; // 0x05: __le16 elength; // 0x06: u8 name[]; // 0x08: }; static_assert(offsetof(struct EA_FULL, name) == 8); #define ACL_REVISION 2 #define ACL_REVISION_DS 4 #define SE_SELF_RELATIVE cpu_to_le16(0x8000) struct SECURITY_DESCRIPTOR_RELATIVE { u8 Revision; u8 Sbz1; __le16 Control; __le32 Owner; __le32 Group; __le32 Sacl; __le32 Dacl; }; static_assert(sizeof(struct SECURITY_DESCRIPTOR_RELATIVE) == 0x14); struct ACE_HEADER { u8 AceType; u8 AceFlags; __le16 AceSize; }; static_assert(sizeof(struct ACE_HEADER) == 4); struct ACL { u8 AclRevision; u8 Sbz1; __le16 AclSize; __le16 AceCount; __le16 Sbz2; }; static_assert(sizeof(struct ACL) == 8); struct SID { u8 Revision; u8 SubAuthorityCount; u8 IdentifierAuthority[6]; __le32 SubAuthority[]; }; static_assert(offsetof(struct SID, SubAuthority) == 8); #endif /* _LINUX_NTFS3_NTFS_H */ // clang-format on
5 2 5 5 5 2 5 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 // SPDX-License-Identifier: GPL-2.0-or-later /* Handle vlserver selection and rotation. * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/signal.h> #include "internal.h" #include "afs_vl.h" /* * Begin an operation on a volume location server. */ bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, struct key *key) { static atomic_t debug_ids; memset(vc, 0, sizeof(*vc)); vc->cell = cell; vc->key = key; vc->cumul_error.error = -EDESTADDRREQ; vc->nr_iterations = -1; if (signal_pending(current)) { vc->cumul_error.error = -EINTR; vc->flags |= AFS_VL_CURSOR_STOP; return false; } vc->debug_id = atomic_inc_return(&debug_ids); return true; } /* * Begin iteration through a server list, starting with the last used server if * possible, or the last recorded good server if not. */ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) { struct afs_cell *cell = vc->cell; unsigned int dns_lookup_count; if (cell->dns_source == DNS_RECORD_UNAVAILABLE || cell->dns_expiry <= ktime_get_real_seconds()) { dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags); afs_queue_cell(cell, afs_cell_trace_get_queue_dns); if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { if (wait_var_event_interruptible( &cell->dns_lookup_count, smp_load_acquire(&cell->dns_lookup_count) != dns_lookup_count) < 0) { vc->cumul_error.error = -ERESTARTSYS; return false; } } /* Status load is ordered after lookup counter load */ if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) { pr_warn("No record of cell %s\n", cell->name); vc->cumul_error.error = -ENOENT; return false; } if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { vc->cumul_error.error = -EDESTADDRREQ; return false; } } read_lock(&cell->vl_servers_lock); vc->server_list = afs_get_vlserverlist( rcu_dereference_protected(cell->vl_servers, lockdep_is_held(&cell->vl_servers_lock))); read_unlock(&cell->vl_servers_lock); if (!vc->server_list->nr_servers) return false; vc->untried_servers = (1UL << vc->server_list->nr_servers) - 1; vc->server_index = -1; return true; } /* * Select the vlserver to use. May be called multiple times to rotate * through the vlservers. */ bool afs_select_vlserver(struct afs_vl_cursor *vc) { struct afs_addr_list *alist = vc->alist; struct afs_vlserver *vlserver; unsigned long set, failed; unsigned int rtt; s32 abort_code = vc->call_abort_code; int error = vc->call_error, i; vc->nr_iterations++; _enter("VC=%x+%x,%d{%lx},%d{%lx},%d,%d", vc->debug_id, vc->nr_iterations, vc->server_index, vc->untried_servers, vc->addr_index, vc->addr_tried, error, abort_code); if (vc->flags & AFS_VL_CURSOR_STOP) { _leave(" = f [stopped]"); return false; } if (vc->nr_iterations == 0) goto start; WRITE_ONCE(alist->addrs[vc->addr_index].last_error, error); /* Evaluate the result of the previous operation, if there was one. */ switch (error) { default: case 0: /* Success or local failure. Stop. */ vc->cumul_error.error = error; vc->flags |= AFS_VL_CURSOR_STOP; _leave(" = f [okay/local %d]", vc->cumul_error.error); return false; case -ECONNABORTED: /* The far side rejected the operation on some grounds. This * might involve the server being busy or the volume having been moved. */ switch (abort_code) { case AFSVL_IO: case AFSVL_BADVOLOPER: case AFSVL_NOMEM: /* The server went weird. */ afs_prioritise_error(&vc->cumul_error, -EREMOTEIO, abort_code); //write_lock(&vc->cell->vl_servers_lock); //vc->server_list->weird_mask |= 1 << vc->server_index; //write_unlock(&vc->cell->vl_servers_lock); goto next_server; default: afs_prioritise_error(&vc->cumul_error, error, abort_code); goto failed; } case -ERFKILL: case -EADDRNOTAVAIL: case -ENETUNREACH: case -EHOSTUNREACH: case -EHOSTDOWN: case -ECONNREFUSED: case -ETIMEDOUT: case -ETIME: _debug("no conn %d", error); afs_prioritise_error(&vc->cumul_error, error, 0); goto iterate_address; case -ECONNRESET: _debug("call reset"); afs_prioritise_error(&vc->cumul_error, error, 0); vc->flags |= AFS_VL_CURSOR_RETRY; goto next_server; case -EOPNOTSUPP: _debug("notsupp"); goto next_server; } restart_from_beginning: _debug("restart"); if (vc->call_responded && vc->addr_index != vc->alist->preferred && test_bit(alist->preferred, &vc->addr_tried)) WRITE_ONCE(alist->preferred, vc->addr_index); afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_restart); alist = vc->alist = NULL; afs_put_vlserverlist(vc->cell->net, vc->server_list); vc->server_list = NULL; if (vc->flags & AFS_VL_CURSOR_RETRIED) goto failed; vc->flags |= AFS_VL_CURSOR_RETRIED; start: _debug("start"); ASSERTCMP(alist, ==, NULL); if (!afs_start_vl_iteration(vc)) goto failed; error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); if (error < 0) { afs_prioritise_error(&vc->cumul_error, error, 0); goto failed; } pick_server: _debug("pick [%lx]", vc->untried_servers); ASSERTCMP(alist, ==, NULL); error = afs_wait_for_vl_probes(vc->server_list, vc->untried_servers); if (error < 0) { afs_prioritise_error(&vc->cumul_error, error, 0); goto failed; } /* Pick the untried server with the lowest RTT. */ vc->server_index = vc->server_list->preferred; if (test_bit(vc->server_index, &vc->untried_servers)) goto selected_server; vc->server_index = -1; rtt = UINT_MAX; for (i = 0; i < vc->server_list->nr_servers; i++) { struct afs_vlserver *s = vc->server_list->servers[i].server; if (!test_bit(i, &vc->untried_servers) || !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) continue; if (s->probe.rtt <= rtt) { vc->server_index = i; rtt = s->probe.rtt; } } if (vc->server_index == -1) goto no_more_servers; selected_server: _debug("use %d", vc->server_index); __clear_bit(vc->server_index, &vc->untried_servers); /* We're starting on a different vlserver from the list. We need to * check it, find its address list and probe its capabilities before we * use it. */ vlserver = vc->server_list->servers[vc->server_index].server; vc->server = vlserver; _debug("USING VLSERVER: %s", vlserver->name); read_lock(&vlserver->lock); alist = rcu_dereference_protected(vlserver->addresses, lockdep_is_held(&vlserver->lock)); vc->alist = afs_get_addrlist(alist, afs_alist_trace_get_vlrotate_set); read_unlock(&vlserver->lock); vc->addr_tried = 0; vc->addr_index = -1; iterate_address: /* Iterate over the current server's address list to try and find an * address on which it will respond to us. */ set = READ_ONCE(alist->responded); failed = READ_ONCE(alist->probe_failed); vc->addr_index = READ_ONCE(alist->preferred); _debug("%lx-%lx-%lx,%d", set, failed, vc->addr_tried, vc->addr_index); set &= ~(failed | vc->addr_tried); if (!set) goto next_server; if (!test_bit(vc->addr_index, &set)) vc->addr_index = __ffs(set); set_bit(vc->addr_index, &vc->addr_tried); vc->alist = alist; _debug("VL address %d/%d", vc->addr_index, alist->nr_addrs); vc->call_responded = false; _leave(" = t %pISpc", rxrpc_kernel_remote_addr(alist->addrs[vc->addr_index].peer)); return true; next_server: _debug("next"); ASSERT(alist); if (vc->call_responded && vc->addr_index != alist->preferred && test_bit(alist->preferred, &vc->addr_tried)) WRITE_ONCE(alist->preferred, vc->addr_index); afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_next); alist = vc->alist = NULL; goto pick_server; no_more_servers: /* That's all the servers poked to no good effect. Try again if some * of them were busy. */ if (vc->flags & AFS_VL_CURSOR_RETRY) goto restart_from_beginning; for (i = 0; i < vc->server_list->nr_servers; i++) { struct afs_vlserver *s = vc->server_list->servers[i].server; if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) vc->cumul_error.responded = true; afs_prioritise_error(&vc->cumul_error, READ_ONCE(s->probe.error), s->probe.abort_code); } failed: if (alist) { if (vc->call_responded && vc->addr_index != alist->preferred && test_bit(alist->preferred, &vc->addr_tried)) WRITE_ONCE(alist->preferred, vc->addr_index); afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_fail); alist = vc->alist = NULL; } vc->flags |= AFS_VL_CURSOR_STOP; _leave(" = f [failed %d]", vc->cumul_error.error); return false; } /* * Dump cursor state in the case of the error being EDESTADDRREQ. */ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) { struct afs_cell *cell = vc->cell; static int count; int i; if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) return; count++; rcu_read_lock(); pr_notice("EDESTADDR occurred\n"); pr_notice("CELL: %s err=%d\n", cell->name, cell->error); pr_notice("DNS: src=%u st=%u lc=%x\n", cell->dns_source, cell->dns_status, cell->dns_lookup_count); pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", vc->untried_servers, vc->server_index, vc->nr_iterations, vc->flags, vc->cumul_error.error); pr_notice("VC: call er=%d ac=%d r=%u\n", vc->call_error, vc->call_abort_code, vc->call_responded); if (vc->server_list) { const struct afs_vlserver_list *sl = vc->server_list; pr_notice("VC: SL nr=%u ix=%u\n", sl->nr_servers, sl->index); for (i = 0; i < sl->nr_servers; i++) { const struct afs_vlserver *s = sl->servers[i].server; pr_notice("VC: server %s+%hu fl=%lx E=%hd\n", s->name, s->port, s->flags, s->probe.error); if (s->addresses) { const struct afs_addr_list *a = rcu_dereference(s->addresses); pr_notice("VC: - nr=%u/%u/%u pf=%u\n", a->nr_ipv4, a->nr_addrs, a->max_addrs, a->preferred); pr_notice("VC: - R=%lx F=%lx\n", a->responded, a->probe_failed); if (a == vc->alist) pr_notice("VC: - current\n"); } } } pr_notice("AC: t=%lx ax=%u\n", vc->addr_tried, vc->addr_index); rcu_read_unlock(); } /* * Tidy up a volume location server cursor and unlock the vnode. */ int afs_end_vlserver_operation(struct afs_vl_cursor *vc) { struct afs_net *net = vc->cell->net; _enter("VC=%x+%x", vc->debug_id, vc->nr_iterations); switch (vc->cumul_error.error) { case -EDESTADDRREQ: case -EADDRNOTAVAIL: case -ENETUNREACH: case -EHOSTUNREACH: afs_vl_dump_edestaddrreq(vc); break; } if (vc->alist) { if (vc->call_responded && vc->addr_index != vc->alist->preferred && test_bit(vc->alist->preferred, &vc->addr_tried)) WRITE_ONCE(vc->alist->preferred, vc->addr_index); afs_put_addrlist(vc->alist, afs_alist_trace_put_vlrotate_end); vc->alist = NULL; } afs_put_vlserverlist(net, vc->server_list); return vc->cumul_error.error; }
12 14 14 12 9 9 9 12 12 12 14 12 12 25 15 12 12 51 51 51 51 51 48 32 31 32 32 32 13 23 23 35 35 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 // SPDX-License-Identifier: GPL-2.0-only /* * LED Triggers Core * * Copyright 2005-2007 Openedhand Ltd. * * Author: Richard Purdie <rpurdie@openedhand.com> */ #include <linux/export.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/device.h> #include <linux/timer.h> #include <linux/rwsem.h> #include <linux/leds.h> #include <linux/slab.h> #include <linux/mm.h> #include "leds.h" /* * Nests outside led_cdev->trigger_lock */ static DECLARE_RWSEM(triggers_list_lock); static LIST_HEAD(trigger_list); /* Used by LED Class */ static inline bool trigger_relevant(struct led_classdev *led_cdev, struct led_trigger *trig) { return !trig->trigger_type || trig->trigger_type == led_cdev->trigger_type; } ssize_t led_trigger_write(struct file *filp, struct kobject *kobj, const struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct device *dev = kobj_to_dev(kobj); struct led_classdev *led_cdev = dev_get_drvdata(dev); struct led_trigger *trig; int ret = count; mutex_lock(&led_cdev->led_access); if (led_sysfs_is_disabled(led_cdev)) { ret = -EBUSY; goto unlock; } if (sysfs_streq(buf, "none")) { led_trigger_remove(led_cdev); goto unlock; } down_read(&triggers_list_lock); list_for_each_entry(trig, &trigger_list, next_trig) { if (sysfs_streq(buf, trig->name) && trigger_relevant(led_cdev, trig)) { down_write(&led_cdev->trigger_lock); led_trigger_set(led_cdev, trig); up_write(&led_cdev->trigger_lock); up_read(&triggers_list_lock); goto unlock; } } /* we come here only if buf matches no trigger */ ret = -EINVAL; up_read(&triggers_list_lock); unlock: mutex_unlock(&led_cdev->led_access); return ret; } EXPORT_SYMBOL_GPL(led_trigger_write); __printf(3, 4) static int led_trigger_snprintf(char *buf, ssize_t size, const char *fmt, ...) { va_list args; int i; va_start(args, fmt); if (size <= 0) i = vsnprintf(NULL, 0, fmt, args); else i = vscnprintf(buf, size, fmt, args); va_end(args); return i; } static int led_trigger_format(char *buf, size_t size, struct led_classdev *led_cdev) { struct led_trigger *trig; int len = led_trigger_snprintf(buf, size, "%s", led_cdev->trigger ? "none" : "[none]"); list_for_each_entry(trig, &trigger_list, next_trig) { bool hit; if (!trigger_relevant(led_cdev, trig)) continue; hit = led_cdev->trigger && !strcmp(led_cdev->trigger->name, trig->name); len += led_trigger_snprintf(buf + len, size - len, " %s%s%s", hit ? "[" : "", trig->name, hit ? "]" : ""); } len += led_trigger_snprintf(buf + len, size - len, "\n"); return len; } /* * It was stupid to create 10000 cpu triggers, but we are stuck with it now. * Don't make that mistake again. We work around it here by creating binary * attribute, which is not limited by length. This is _not_ good design, do not * copy it. */ ssize_t led_trigger_read(struct file *filp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t pos, size_t count) { struct device *dev = kobj_to_dev(kobj); struct led_classdev *led_cdev = dev_get_drvdata(dev); void *data; int len; down_read(&triggers_list_lock); down_read(&led_cdev->trigger_lock); len = led_trigger_format(NULL, 0, led_cdev); data = kvmalloc(len + 1, GFP_KERNEL); if (!data) { up_read(&led_cdev->trigger_lock); up_read(&triggers_list_lock); return -ENOMEM; } len = led_trigger_format(data, len + 1, led_cdev); up_read(&led_cdev->trigger_lock); up_read(&triggers_list_lock); len = memory_read_from_buffer(buf, count, &pos, data, len); kvfree(data); return len; } EXPORT_SYMBOL_GPL(led_trigger_read); /* Caller must ensure led_cdev->trigger_lock held */ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig) { char *event = NULL; char *envp[2]; const char *name; int ret; if (!led_cdev->trigger && !trig) return 0; name = trig ? trig->name : "none"; event = kasprintf(GFP_KERNEL, "TRIGGER=%s", name); /* Remove any existing trigger */ if (led_cdev->trigger) { spin_lock(&led_cdev->trigger->leddev_list_lock); list_del_rcu(&led_cdev->trig_list); spin_unlock(&led_cdev->trigger->leddev_list_lock); /* ensure it's no longer visible on the led_cdevs list */ synchronize_rcu(); cancel_work_sync(&led_cdev->set_brightness_work); led_stop_software_blink(led_cdev); device_remove_groups(led_cdev->dev, led_cdev->trigger->groups); if (led_cdev->trigger->deactivate) led_cdev->trigger->deactivate(led_cdev); led_cdev->trigger = NULL; led_cdev->trigger_data = NULL; led_cdev->activated = false; led_cdev->flags &= ~LED_INIT_DEFAULT_TRIGGER; led_set_brightness(led_cdev, LED_OFF); } if (trig) { spin_lock(&trig->leddev_list_lock); list_add_tail_rcu(&led_cdev->trig_list, &trig->led_cdevs); spin_unlock(&trig->leddev_list_lock); led_cdev->trigger = trig; /* * Some activate() calls use led_trigger_event() to initialize * the brightness of the LED for which the trigger is being set. * Ensure the led_cdev is visible on trig->led_cdevs for this. */ synchronize_rcu(); /* * If "set brightness to 0" is pending in workqueue, * we don't want that to be reordered after ->activate() */ flush_work(&led_cdev->set_brightness_work); ret = 0; if (trig->activate) ret = trig->activate(led_cdev); else led_set_brightness(led_cdev, trig->brightness); if (ret) goto err_activate; ret = device_add_groups(led_cdev->dev, trig->groups); if (ret) { dev_err(led_cdev->dev, "Failed to add trigger attributes\n"); goto err_add_groups; } } if (event) { envp[0] = event; envp[1] = NULL; if (kobject_uevent_env(&led_cdev->dev->kobj, KOBJ_CHANGE, envp)) dev_err(led_cdev->dev, "%s: Error sending uevent\n", __func__); kfree(event); } return 0; err_add_groups: if (trig->deactivate) trig->deactivate(led_cdev); err_activate: spin_lock(&led_cdev->trigger->leddev_list_lock); list_del_rcu(&led_cdev->trig_list); spin_unlock(&led_cdev->trigger->leddev_list_lock); synchronize_rcu(); led_cdev->trigger = NULL; led_cdev->trigger_data = NULL; led_set_brightness(led_cdev, LED_OFF); kfree(event); return ret; } EXPORT_SYMBOL_GPL(led_trigger_set); void led_trigger_remove(struct led_classdev *led_cdev) { down_write(&led_cdev->trigger_lock); led_trigger_set(led_cdev, NULL); up_write(&led_cdev->trigger_lock); } EXPORT_SYMBOL_GPL(led_trigger_remove); static bool led_match_default_trigger(struct led_classdev *led_cdev, struct led_trigger *trig) { if (!strcmp(led_cdev->default_trigger, trig->name) && trigger_relevant(led_cdev, trig)) { led_cdev->flags |= LED_INIT_DEFAULT_TRIGGER; led_trigger_set(led_cdev, trig); return true; } return false; } void led_trigger_set_default(struct led_classdev *led_cdev) { struct led_trigger *trig; bool found = false; if (!led_cdev->default_trigger) return; down_read(&triggers_list_lock); down_write(&led_cdev->trigger_lock); list_for_each_entry(trig, &trigger_list, next_trig) { found = led_match_default_trigger(led_cdev, trig); if (found) break; } up_write(&led_cdev->trigger_lock); up_read(&triggers_list_lock); /* * If default trigger wasn't found, maybe trigger module isn't loaded yet. * Once loaded it will re-probe with all led_cdev's. */ if (!found) request_module_nowait("ledtrig:%s", led_cdev->default_trigger); } EXPORT_SYMBOL_GPL(led_trigger_set_default); /* LED Trigger Interface */ int led_trigger_register(struct led_trigger *trig) { struct led_classdev *led_cdev; struct led_trigger *_trig; spin_lock_init(&trig->leddev_list_lock); INIT_LIST_HEAD(&trig->led_cdevs); down_write(&triggers_list_lock); /* Make sure the trigger's name isn't already in use */ list_for_each_entry(_trig, &trigger_list, next_trig) { if (!strcmp(_trig->name, trig->name) && (trig->trigger_type == _trig->trigger_type || !trig->trigger_type || !_trig->trigger_type)) { up_write(&triggers_list_lock); return -EEXIST; } } /* Add to the list of led triggers */ list_add_tail(&trig->next_trig, &trigger_list); up_write(&triggers_list_lock); /* Register with any LEDs that have this as a default trigger */ down_read(&leds_list_lock); list_for_each_entry(led_cdev, &leds_list, node) { down_write(&led_cdev->trigger_lock); if (!led_cdev->trigger && led_cdev->default_trigger) led_match_default_trigger(led_cdev, trig); up_write(&led_cdev->trigger_lock); } up_read(&leds_list_lock); return 0; } EXPORT_SYMBOL_GPL(led_trigger_register); void led_trigger_unregister(struct led_trigger *trig) { struct led_classdev *led_cdev; if (list_empty_careful(&trig->next_trig)) return; /* Remove from the list of led triggers */ down_write(&triggers_list_lock); list_del_init(&trig->next_trig); up_write(&triggers_list_lock); /* Remove anyone actively using this trigger */ down_read(&leds_list_lock); list_for_each_entry(led_cdev, &leds_list, node) { down_write(&led_cdev->trigger_lock); if (led_cdev->trigger == trig) led_trigger_set(led_cdev, NULL); up_write(&led_cdev->trigger_lock); } up_read(&leds_list_lock); } EXPORT_SYMBOL_GPL(led_trigger_unregister); static void devm_led_trigger_release(struct device *dev, void *res) { led_trigger_unregister(*(struct led_trigger **)res); } int devm_led_trigger_register(struct device *dev, struct led_trigger *trig) { struct led_trigger **dr; int rc; dr = devres_alloc(devm_led_trigger_release, sizeof(*dr), GFP_KERNEL); if (!dr) return -ENOMEM; *dr = trig; rc = led_trigger_register(trig); if (rc) devres_free(dr); else devres_add(dev, dr); return rc; } EXPORT_SYMBOL_GPL(devm_led_trigger_register); /* Simple LED Trigger Interface */ void led_trigger_event(struct led_trigger *trig, enum led_brightness brightness) { struct led_classdev *led_cdev; if (!trig) return; trig->brightness = brightness; rcu_read_lock(); list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) led_set_brightness(led_cdev, brightness); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(led_trigger_event); void led_mc_trigger_event(struct led_trigger *trig, unsigned int *intensity_value, unsigned int num_colors, enum led_brightness brightness) { struct led_classdev *led_cdev; if (!trig) return; rcu_read_lock(); list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) { if (!(led_cdev->flags & LED_MULTI_COLOR)) continue; led_mc_set_brightness(led_cdev, intensity_value, num_colors, brightness); } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(led_mc_trigger_event); static void led_trigger_blink_setup(struct led_trigger *trig, unsigned long delay_on, unsigned long delay_off, int oneshot, int invert) { struct led_classdev *led_cdev; if (!trig) return; rcu_read_lock(); list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) { if (oneshot) led_blink_set_oneshot(led_cdev, &delay_on, &delay_off, invert); else led_blink_set_nosleep(led_cdev, delay_on, delay_off); } rcu_read_unlock(); } void led_trigger_blink(struct led_trigger *trig, unsigned long delay_on, unsigned long delay_off) { led_trigger_blink_setup(trig, delay_on, delay_off, 0, 0); } EXPORT_SYMBOL_GPL(led_trigger_blink); void led_trigger_blink_oneshot(struct led_trigger *trig, unsigned long delay_on, unsigned long delay_off, int invert) { led_trigger_blink_setup(trig, delay_on, delay_off, 1, invert); } EXPORT_SYMBOL_GPL(led_trigger_blink_oneshot); void led_trigger_register_simple(const char *name, struct led_trigger **tp) { struct led_trigger *trig; int err; trig = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); if (trig) { trig->name = name; err = led_trigger_register(trig); if (err < 0) { kfree(trig); trig = NULL; pr_warn("LED trigger %s failed to register (%d)\n", name, err); } } else { pr_warn("LED trigger %s failed to register (no memory)\n", name); } *tp = trig; } EXPORT_SYMBOL_GPL(led_trigger_register_simple); void led_trigger_unregister_simple(struct led_trigger *trig) { if (trig) led_trigger_unregister(trig); kfree(trig); } EXPORT_SYMBOL_GPL(led_trigger_unregister_simple);
8 2 2 2 2 2 2 2 2 4 4 4 4 6 6 6 4 3 3 1 1 4 1 1 1 1 2 2 2 2 2 2 2 1 1 5 2 1 1 1 2 5 3 2 6 6 6 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ #include "devl_internal.h" struct devlink_region { struct devlink *devlink; struct devlink_port *port; struct list_head list; union { const struct devlink_region_ops *ops; const struct devlink_port_region_ops *port_ops; }; struct mutex snapshot_lock; /* protects snapshot_list, * max_snapshots and cur_snapshots * consistency. */ struct list_head snapshot_list; u32 max_snapshots; u32 cur_snapshots; u64 size; }; struct devlink_snapshot { struct list_head list; struct devlink_region *region; u8 *data; u32 id; }; static struct devlink_region * devlink_region_get_by_name(struct devlink *devlink, const char *region_name) { struct devlink_region *region; list_for_each_entry(region, &devlink->region_list, list) if (!strcmp(region->ops->name, region_name)) return region; return NULL; } static struct devlink_region * devlink_port_region_get_by_name(struct devlink_port *port, const char *region_name) { struct devlink_region *region; list_for_each_entry(region, &port->region_list, list) if (!strcmp(region->ops->name, region_name)) return region; return NULL; } static struct devlink_snapshot * devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) { struct devlink_snapshot *snapshot; list_for_each_entry(snapshot, &region->snapshot_list, list) if (snapshot->id == id) return snapshot; return NULL; } static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg, struct devlink *devlink, struct devlink_snapshot *snapshot) { struct nlattr *snap_attr; int err; snap_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOT); if (!snap_attr) return -EMSGSIZE; err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id); if (err) goto nla_put_failure; nla_nest_end(msg, snap_attr); return 0; nla_put_failure: nla_nest_cancel(msg, snap_attr); return err; } static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg, struct devlink *devlink, struct devlink_region *region) { struct devlink_snapshot *snapshot; struct nlattr *snapshots_attr; int err; snapshots_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOTS); if (!snapshots_attr) return -EMSGSIZE; list_for_each_entry(snapshot, &region->snapshot_list, list) { err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot); if (err) goto nla_put_failure; } nla_nest_end(msg, snapshots_attr); return 0; nla_put_failure: nla_nest_cancel(msg, snapshots_attr); return err; } static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags, struct devlink_region *region) { void *hdr; int err; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; err = devlink_nl_put_handle(msg, devlink); if (err) goto nla_put_failure; if (region->port) { err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, region->port->index); if (err) goto nla_put_failure; } err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME, region->ops->name); if (err) goto nla_put_failure; err = devlink_nl_put_u64(msg, DEVLINK_ATTR_REGION_SIZE, region->size); if (err) goto nla_put_failure; err = nla_put_u32(msg, DEVLINK_ATTR_REGION_MAX_SNAPSHOTS, region->max_snapshots); if (err) goto nla_put_failure; err = devlink_nl_region_snapshots_id_put(msg, devlink, region); if (err) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return err; } static struct sk_buff * devlink_nl_region_notify_build(struct devlink_region *region, struct devlink_snapshot *snapshot, enum devlink_command cmd, u32 portid, u32 seq) { struct devlink *devlink = region->devlink; struct sk_buff *msg; void *hdr; int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return ERR_PTR(-ENOMEM); hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, 0, cmd); if (!hdr) { err = -EMSGSIZE; goto out_free_msg; } err = devlink_nl_put_handle(msg, devlink); if (err) goto out_cancel_msg; if (region->port) { err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, region->port->index); if (err) goto out_cancel_msg; } err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME, region->ops->name); if (err) goto out_cancel_msg; if (snapshot) { err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id); if (err) goto out_cancel_msg; } else { err = devlink_nl_put_u64(msg, DEVLINK_ATTR_REGION_SIZE, region->size); if (err) goto out_cancel_msg; } genlmsg_end(msg, hdr); return msg; out_cancel_msg: genlmsg_cancel(msg, hdr); out_free_msg: nlmsg_free(msg); return ERR_PTR(err); } static void devlink_nl_region_notify(struct devlink_region *region, struct devlink_snapshot *snapshot, enum devlink_command cmd) { struct devlink *devlink = region->devlink; struct sk_buff *msg; WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL); if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0); if (IS_ERR(msg)) return; devlink_nl_notify_send(devlink, msg); } void devlink_regions_notify_register(struct devlink *devlink) { struct devlink_region *region; list_for_each_entry(region, &devlink->region_list, list) devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); } void devlink_regions_notify_unregister(struct devlink *devlink) { struct devlink_region *region; list_for_each_entry_reverse(region, &devlink->region_list, list) devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL); } /** * __devlink_snapshot_id_increment - Increment number of snapshots using an id * @devlink: devlink instance * @id: the snapshot id * * Track when a new snapshot begins using an id. Load the count for the * given id from the snapshot xarray, increment it, and store it back. * * Called when a new snapshot is created with the given id. * * The id *must* have been previously allocated by * devlink_region_snapshot_id_get(). * * Returns 0 on success, or an error on failure. */ static int __devlink_snapshot_id_increment(struct devlink *devlink, u32 id) { unsigned long count; void *p; int err; xa_lock(&devlink->snapshot_ids); p = xa_load(&devlink->snapshot_ids, id); if (WARN_ON(!p)) { err = -EINVAL; goto unlock; } if (WARN_ON(!xa_is_value(p))) { err = -EINVAL; goto unlock; } count = xa_to_value(p); count++; err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), GFP_ATOMIC)); unlock: xa_unlock(&devlink->snapshot_ids); return err; } /** * __devlink_snapshot_id_decrement - Decrease number of snapshots using an id * @devlink: devlink instance * @id: the snapshot id * * Track when a snapshot is deleted and stops using an id. Load the count * for the given id from the snapshot xarray, decrement it, and store it * back. * * If the count reaches zero, erase this id from the xarray, freeing it * up for future re-use by devlink_region_snapshot_id_get(). * * Called when a snapshot using the given id is deleted, and when the * initial allocator of the id is finished using it. */ static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id) { unsigned long count; void *p; xa_lock(&devlink->snapshot_ids); p = xa_load(&devlink->snapshot_ids, id); if (WARN_ON(!p)) goto unlock; if (WARN_ON(!xa_is_value(p))) goto unlock; count = xa_to_value(p); if (count > 1) { count--; __xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), GFP_ATOMIC); } else { /* If this was the last user, we can erase this id */ __xa_erase(&devlink->snapshot_ids, id); } unlock: xa_unlock(&devlink->snapshot_ids); } /** * __devlink_snapshot_id_insert - Insert a specific snapshot ID * @devlink: devlink instance * @id: the snapshot id * * Mark the given snapshot id as used by inserting a zero value into the * snapshot xarray. * * This must be called while holding the devlink instance lock. Unlike * devlink_snapshot_id_get, the initial reference count is zero, not one. * It is expected that the id will immediately be used before * releasing the devlink instance lock. * * Returns zero on success, or an error code if the snapshot id could not * be inserted. */ static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id) { int err; xa_lock(&devlink->snapshot_ids); if (xa_load(&devlink->snapshot_ids, id)) { xa_unlock(&devlink->snapshot_ids); return -EEXIST; } err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(0), GFP_ATOMIC)); xa_unlock(&devlink->snapshot_ids); return err; } /** * __devlink_region_snapshot_id_get - get snapshot ID * @devlink: devlink instance * @id: storage to return snapshot id * * Allocates a new snapshot id. Returns zero on success, or a negative * error on failure. Must be called while holding the devlink instance * lock. * * Snapshot IDs are tracked using an xarray which stores the number of * users of the snapshot id. * * Note that the caller of this function counts as a 'user', in order to * avoid race conditions. The caller must release its hold on the * snapshot by using devlink_region_snapshot_id_put. */ static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id) { return xa_alloc(&devlink->snapshot_ids, id, xa_mk_value(1), xa_limit_32b, GFP_KERNEL); } /** * __devlink_region_snapshot_create - create a new snapshot * This will add a new snapshot of a region. The snapshot * will be stored on the region struct and can be accessed * from devlink. This is useful for future analyses of snapshots. * Multiple snapshots can be created on a region. * The @snapshot_id should be obtained using the getter function. * * Must be called only while holding the region snapshot lock. * * @region: devlink region of the snapshot * @data: snapshot data * @snapshot_id: snapshot id to be created */ static int __devlink_region_snapshot_create(struct devlink_region *region, u8 *data, u32 snapshot_id) { struct devlink *devlink = region->devlink; struct devlink_snapshot *snapshot; int err; lockdep_assert_held(&region->snapshot_lock); /* check if region can hold one more snapshot */ if (region->cur_snapshots == region->max_snapshots) return -ENOSPC; if (devlink_region_snapshot_get_by_id(region, snapshot_id)) return -EEXIST; snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL); if (!snapshot) return -ENOMEM; err = __devlink_snapshot_id_increment(devlink, snapshot_id); if (err) goto err_snapshot_id_increment; snapshot->id = snapshot_id; snapshot->region = region; snapshot->data = data; list_add_tail(&snapshot->list, &region->snapshot_list); region->cur_snapshots++; devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_NEW); return 0; err_snapshot_id_increment: kfree(snapshot); return err; } static void devlink_region_snapshot_del(struct devlink_region *region, struct devlink_snapshot *snapshot) { struct devlink *devlink = region->devlink; lockdep_assert_held(&region->snapshot_lock); devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL); region->cur_snapshots--; list_del(&snapshot->list); region->ops->destructor(snapshot->data); __devlink_snapshot_id_decrement(devlink, snapshot->id); kfree(snapshot); } int devlink_nl_region_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_port *port = NULL; struct devlink_region *region; const char *region_name; struct sk_buff *msg; unsigned int index; int err; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) return -EINVAL; if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) { index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); port = devlink_port_get_by_index(devlink, index); if (!port) return -ENODEV; } region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]); if (port) region = devlink_port_region_get_by_name(port, region_name); else region = devlink_region_get_by_name(devlink, region_name); if (!region) return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_region_fill(msg, devlink, DEVLINK_CMD_REGION_GET, info->snd_portid, info->snd_seq, 0, region); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int devlink_nl_cmd_region_get_port_dumpit(struct sk_buff *msg, struct netlink_callback *cb, struct devlink_port *port, int *idx, int start, int flags) { struct devlink_region *region; int err = 0; list_for_each_entry(region, &port->region_list, list) { if (*idx < start) { (*idx)++; continue; } err = devlink_nl_region_fill(msg, port->devlink, DEVLINK_CMD_REGION_GET, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, region); if (err) goto out; (*idx)++; } out: return err; } static int devlink_nl_region_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_region *region; struct devlink_port *port; unsigned long port_index; int idx = 0; int err; list_for_each_entry(region, &devlink->region_list, list) { if (idx < state->idx) { idx++; continue; } err = devlink_nl_region_fill(msg, devlink, DEVLINK_CMD_REGION_GET, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, region); if (err) { state->idx = idx; return err; } idx++; } xa_for_each(&devlink->ports, port_index, port) { err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, &idx, state->idx, flags); if (err) { state->idx = idx; return err; } } return 0; } int devlink_nl_region_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return devlink_nl_dumpit(skb, cb, devlink_nl_region_get_dump_one); } int devlink_nl_region_del_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_snapshot *snapshot; struct devlink_port *port = NULL; struct devlink_region *region; const char *region_name; unsigned int index; u32 snapshot_id; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME) || GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_SNAPSHOT_ID)) return -EINVAL; region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]); snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]); if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) { index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); port = devlink_port_get_by_index(devlink, index); if (!port) return -ENODEV; } if (port) region = devlink_port_region_get_by_name(port, region_name); else region = devlink_region_get_by_name(devlink, region_name); if (!region) return -EINVAL; mutex_lock(&region->snapshot_lock); snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); if (!snapshot) { mutex_unlock(&region->snapshot_lock); return -EINVAL; } devlink_region_snapshot_del(region, snapshot); mutex_unlock(&region->snapshot_lock); return 0; } int devlink_nl_region_new_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_snapshot *snapshot; struct devlink_port *port = NULL; struct nlattr *snapshot_id_attr; struct devlink_region *region; const char *region_name; unsigned int index; u32 snapshot_id; u8 *data; int err; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) { NL_SET_ERR_MSG(info->extack, "No region name provided"); return -EINVAL; } region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]); if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) { index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); port = devlink_port_get_by_index(devlink, index); if (!port) return -ENODEV; } if (port) region = devlink_port_region_get_by_name(port, region_name); else region = devlink_region_get_by_name(devlink, region_name); if (!region) { NL_SET_ERR_MSG(info->extack, "The requested region does not exist"); return -EINVAL; } if (!region->ops->snapshot) { NL_SET_ERR_MSG(info->extack, "The requested region does not support taking an immediate snapshot"); return -EOPNOTSUPP; } mutex_lock(&region->snapshot_lock); if (region->cur_snapshots == region->max_snapshots) { NL_SET_ERR_MSG(info->extack, "The region has reached the maximum number of stored snapshots"); err = -ENOSPC; goto unlock; } snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]; if (snapshot_id_attr) { snapshot_id = nla_get_u32(snapshot_id_attr); if (devlink_region_snapshot_get_by_id(region, snapshot_id)) { NL_SET_ERR_MSG(info->extack, "The requested snapshot id is already in use"); err = -EEXIST; goto unlock; } err = __devlink_snapshot_id_insert(devlink, snapshot_id); if (err) goto unlock; } else { err = __devlink_region_snapshot_id_get(devlink, &snapshot_id); if (err) { NL_SET_ERR_MSG(info->extack, "Failed to allocate a new snapshot id"); goto unlock; } } if (port) err = region->port_ops->snapshot(port, region->port_ops, info->extack, &data); else err = region->ops->snapshot(devlink, region->ops, info->extack, &data); if (err) goto err_snapshot_capture; err = __devlink_region_snapshot_create(region, data, snapshot_id); if (err) goto err_snapshot_create; if (!snapshot_id_attr) { struct sk_buff *msg; snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); if (WARN_ON(!snapshot)) { err = -EINVAL; goto unlock; } msg = devlink_nl_region_notify_build(region, snapshot, DEVLINK_CMD_REGION_NEW, info->snd_portid, info->snd_seq); err = PTR_ERR_OR_ZERO(msg); if (err) goto err_notify; err = genlmsg_reply(msg, info); if (err) goto err_notify; } mutex_unlock(&region->snapshot_lock); return 0; err_snapshot_create: region->ops->destructor(data); err_snapshot_capture: __devlink_snapshot_id_decrement(devlink, snapshot_id); mutex_unlock(&region->snapshot_lock); return err; err_notify: devlink_region_snapshot_del(region, snapshot); unlock: mutex_unlock(&region->snapshot_lock); return err; } static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg, u8 *chunk, u32 chunk_size, u64 addr) { struct nlattr *chunk_attr; int err; chunk_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_CHUNK); if (!chunk_attr) return -EINVAL; err = nla_put(msg, DEVLINK_ATTR_REGION_CHUNK_DATA, chunk_size, chunk); if (err) goto nla_put_failure; err = devlink_nl_put_u64(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr); if (err) goto nla_put_failure; nla_nest_end(msg, chunk_attr); return 0; nla_put_failure: nla_nest_cancel(msg, chunk_attr); return err; } #define DEVLINK_REGION_READ_CHUNK_SIZE 256 typedef int devlink_chunk_fill_t(void *cb_priv, u8 *chunk, u32 chunk_size, u64 curr_offset, struct netlink_ext_ack *extack); static int devlink_nl_region_read_fill(struct sk_buff *skb, devlink_chunk_fill_t *cb, void *cb_priv, u64 start_offset, u64 end_offset, u64 *new_offset, struct netlink_ext_ack *extack) { u64 curr_offset = start_offset; int err = 0; u8 *data; /* Allocate and re-use a single buffer */ data = kmalloc(DEVLINK_REGION_READ_CHUNK_SIZE, GFP_KERNEL); if (!data) return -ENOMEM; *new_offset = start_offset; while (curr_offset < end_offset) { u32 data_size; data_size = min_t(u32, end_offset - curr_offset, DEVLINK_REGION_READ_CHUNK_SIZE); err = cb(cb_priv, data, data_size, curr_offset, extack); if (err) break; err = devlink_nl_cmd_region_read_chunk_fill(skb, data, data_size, curr_offset); if (err) break; curr_offset += data_size; } *new_offset = curr_offset; kfree(data); return err; } static int devlink_region_snapshot_fill(void *cb_priv, u8 *chunk, u32 chunk_size, u64 curr_offset, struct netlink_ext_ack __always_unused *extack) { struct devlink_snapshot *snapshot = cb_priv; memcpy(chunk, &snapshot->data[curr_offset], chunk_size); return 0; } static int devlink_region_port_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size, u64 curr_offset, struct netlink_ext_ack *extack) { struct devlink_region *region = cb_priv; return region->port_ops->read(region->port, region->port_ops, extack, curr_offset, chunk_size, chunk); } static int devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size, u64 curr_offset, struct netlink_ext_ack *extack) { struct devlink_region *region = cb_priv; return region->ops->read(region->devlink, region->ops, extack, curr_offset, chunk_size, chunk); } int devlink_nl_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { const struct genl_dumpit_info *info = genl_dumpit_info(cb); struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct nlattr *chunks_attr, *region_attr, *snapshot_attr; u64 ret_offset, start_offset, end_offset = U64_MAX; struct nlattr **attrs = info->info.attrs; struct devlink_port *port = NULL; devlink_chunk_fill_t *region_cb; struct devlink_region *region; const char *region_name; struct devlink *devlink; unsigned int index; void *region_cb_priv; void *hdr; int err; start_offset = state->start_offset; devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs, false); if (IS_ERR(devlink)) return PTR_ERR(devlink); if (!attrs[DEVLINK_ATTR_REGION_NAME]) { NL_SET_ERR_MSG(cb->extack, "No region name provided"); err = -EINVAL; goto out_unlock; } if (attrs[DEVLINK_ATTR_PORT_INDEX]) { index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]); port = devlink_port_get_by_index(devlink, index); if (!port) { err = -ENODEV; goto out_unlock; } } region_attr = attrs[DEVLINK_ATTR_REGION_NAME]; region_name = nla_data(region_attr); if (port) region = devlink_port_region_get_by_name(port, region_name); else region = devlink_region_get_by_name(devlink, region_name); if (!region) { NL_SET_ERR_MSG_ATTR(cb->extack, region_attr, "Requested region does not exist"); err = -EINVAL; goto out_unlock; } snapshot_attr = attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]; if (!snapshot_attr) { if (!nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) { NL_SET_ERR_MSG(cb->extack, "No snapshot id provided"); err = -EINVAL; goto out_unlock; } if (!region->ops->read) { NL_SET_ERR_MSG(cb->extack, "Requested region does not support direct read"); err = -EOPNOTSUPP; goto out_unlock; } if (port) region_cb = &devlink_region_port_direct_fill; else region_cb = &devlink_region_direct_fill; region_cb_priv = region; } else { struct devlink_snapshot *snapshot; u32 snapshot_id; if (nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) { NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Direct region read does not use snapshot"); err = -EINVAL; goto out_unlock; } snapshot_id = nla_get_u32(snapshot_attr); snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); if (!snapshot) { NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Requested snapshot does not exist"); err = -EINVAL; goto out_unlock; } region_cb = &devlink_region_snapshot_fill; region_cb_priv = snapshot; } if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] && attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) { if (!start_offset) start_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]); end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]); end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]); } if (end_offset > region->size) end_offset = region->size; /* return 0 if there is no further data to read */ if (start_offset == end_offset) { err = 0; goto out_unlock; } hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, DEVLINK_CMD_REGION_READ); if (!hdr) { err = -EMSGSIZE; goto out_unlock; } err = devlink_nl_put_handle(skb, devlink); if (err) goto nla_put_failure; if (region->port) { err = nla_put_u32(skb, DEVLINK_ATTR_PORT_INDEX, region->port->index); if (err) goto nla_put_failure; } err = nla_put_string(skb, DEVLINK_ATTR_REGION_NAME, region_name); if (err) goto nla_put_failure; chunks_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_REGION_CHUNKS); if (!chunks_attr) { err = -EMSGSIZE; goto nla_put_failure; } err = devlink_nl_region_read_fill(skb, region_cb, region_cb_priv, start_offset, end_offset, &ret_offset, cb->extack); if (err && err != -EMSGSIZE) goto nla_put_failure; /* Check if there was any progress done to prevent infinite loop */ if (ret_offset == start_offset) { err = -EINVAL; goto nla_put_failure; } state->start_offset = ret_offset; nla_nest_end(skb, chunks_attr); genlmsg_end(skb, hdr); devl_unlock(devlink); devlink_put(devlink); return skb->len; nla_put_failure: genlmsg_cancel(skb, hdr); out_unlock: devl_unlock(devlink); devlink_put(devlink); return err; } /** * devl_region_create - create a new address region * * @devlink: devlink * @ops: region operations and name * @region_max_snapshots: Maximum supported number of snapshots for region * @region_size: size of region */ struct devlink_region *devl_region_create(struct devlink *devlink, const struct devlink_region_ops *ops, u32 region_max_snapshots, u64 region_size) { struct devlink_region *region; devl_assert_locked(devlink); if (WARN_ON(!ops) || WARN_ON(!ops->destructor)) return ERR_PTR(-EINVAL); if (devlink_region_get_by_name(devlink, ops->name)) return ERR_PTR(-EEXIST); region = kzalloc(sizeof(*region), GFP_KERNEL); if (!region) return ERR_PTR(-ENOMEM); region->devlink = devlink; region->max_snapshots = region_max_snapshots; region->ops = ops; region->size = region_size; INIT_LIST_HEAD(&region->snapshot_list); mutex_init(&region->snapshot_lock); list_add_tail(&region->list, &devlink->region_list); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); return region; } EXPORT_SYMBOL_GPL(devl_region_create); /** * devlink_region_create - create a new address region * * @devlink: devlink * @ops: region operations and name * @region_max_snapshots: Maximum supported number of snapshots for region * @region_size: size of region * * Context: Takes and release devlink->lock <mutex>. */ struct devlink_region * devlink_region_create(struct devlink *devlink, const struct devlink_region_ops *ops, u32 region_max_snapshots, u64 region_size) { struct devlink_region *region; devl_lock(devlink); region = devl_region_create(devlink, ops, region_max_snapshots, region_size); devl_unlock(devlink); return region; } EXPORT_SYMBOL_GPL(devlink_region_create); /** * devlink_port_region_create - create a new address region for a port * * @port: devlink port * @ops: region operations and name * @region_max_snapshots: Maximum supported number of snapshots for region * @region_size: size of region * * Context: Takes and release devlink->lock <mutex>. */ struct devlink_region * devlink_port_region_create(struct devlink_port *port, const struct devlink_port_region_ops *ops, u32 region_max_snapshots, u64 region_size) { struct devlink *devlink = port->devlink; struct devlink_region *region; int err = 0; ASSERT_DEVLINK_PORT_INITIALIZED(port); if (WARN_ON(!ops) || WARN_ON(!ops->destructor)) return ERR_PTR(-EINVAL); devl_lock(devlink); if (devlink_port_region_get_by_name(port, ops->name)) { err = -EEXIST; goto unlock; } region = kzalloc(sizeof(*region), GFP_KERNEL); if (!region) { err = -ENOMEM; goto unlock; } region->devlink = devlink; region->port = port; region->max_snapshots = region_max_snapshots; region->port_ops = ops; region->size = region_size; INIT_LIST_HEAD(&region->snapshot_list); mutex_init(&region->snapshot_lock); list_add_tail(&region->list, &port->region_list); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); devl_unlock(devlink); return region; unlock: devl_unlock(devlink); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(devlink_port_region_create); /** * devl_region_destroy - destroy address region * * @region: devlink region to destroy */ void devl_region_destroy(struct devlink_region *region) { struct devlink *devlink = region->devlink; struct devlink_snapshot *snapshot, *ts; devl_assert_locked(devlink); /* Free all snapshots of region */ mutex_lock(&region->snapshot_lock); list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list) devlink_region_snapshot_del(region, snapshot); mutex_unlock(&region->snapshot_lock); list_del(&region->list); mutex_destroy(&region->snapshot_lock); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL); kfree(region); } EXPORT_SYMBOL_GPL(devl_region_destroy); /** * devlink_region_destroy - destroy address region * * @region: devlink region to destroy * * Context: Takes and release devlink->lock <mutex>. */ void devlink_region_destroy(struct devlink_region *region) { struct devlink *devlink = region->devlink; devl_lock(devlink); devl_region_destroy(region); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_region_destroy); /** * devlink_region_snapshot_id_get - get snapshot ID * * This callback should be called when adding a new snapshot, * Driver should use the same id for multiple snapshots taken * on multiple regions at the same time/by the same trigger. * * The caller of this function must use devlink_region_snapshot_id_put * when finished creating regions using this id. * * Returns zero on success, or a negative error code on failure. * * @devlink: devlink * @id: storage to return id */ int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id) { return __devlink_region_snapshot_id_get(devlink, id); } EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get); /** * devlink_region_snapshot_id_put - put snapshot ID reference * * This should be called by a driver after finishing creating snapshots * with an id. Doing so ensures that the ID can later be released in the * event that all snapshots using it have been destroyed. * * @devlink: devlink * @id: id to release reference on */ void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id) { __devlink_snapshot_id_decrement(devlink, id); } EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put); /** * devlink_region_snapshot_create - create a new snapshot * This will add a new snapshot of a region. The snapshot * will be stored on the region struct and can be accessed * from devlink. This is useful for future analyses of snapshots. * Multiple snapshots can be created on a region. * The @snapshot_id should be obtained using the getter function. * * @region: devlink region of the snapshot * @data: snapshot data * @snapshot_id: snapshot id to be created */ int devlink_region_snapshot_create(struct devlink_region *region, u8 *data, u32 snapshot_id) { int err; mutex_lock(&region->snapshot_lock); err = __devlink_region_snapshot_create(region, data, snapshot_id); mutex_unlock(&region->snapshot_lock); return err; } EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
3 38 1 36 36 36 24 12 29 25 3 54 8 49 54 27 2 25 1 24 23 1 27 26 1 5 1 4 4 4 48 48 37 28 5 4 19 48 44 4 21 21 21 21 63 18 20 18 3 21 31 30 31 1 20 5 26 27 21 27 23 4 19 4 19 19 4 4 27 28 27 4 19 25 26 1 27 23 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 // SPDX-License-Identifier: GPL-2.0-or-later /* * localalloc.c * * Node local data allocation * * Copyright (C) 2002, 2004 Oracle. All rights reserved. */ #include <linux/fs.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> #include <linux/bitops.h> #include <cluster/masklog.h> #include "ocfs2.h" #include "alloc.h" #include "blockcheck.h" #include "dlmglue.h" #include "inode.h" #include "journal.h" #include "localalloc.h" #include "suballoc.h" #include "super.h" #include "sysfile.h" #include "ocfs2_trace.h" #include "buffer_head_io.h" #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, struct ocfs2_dinode *alloc, u32 *numbits, struct ocfs2_alloc_reservation *resv); static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, handle_t *handle, struct ocfs2_dinode *alloc, struct inode *main_bm_inode, struct buffer_head *main_bm_bh); static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, struct ocfs2_alloc_context **ac, struct inode **bitmap_inode, struct buffer_head **bitmap_bh); static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, handle_t *handle, struct ocfs2_alloc_context *ac); static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, struct inode *local_alloc_inode); /* * ocfs2_la_default_mb() - determine a default size, in megabytes of * the local alloc. * * Generally, we'd like to pick as large a local alloc as * possible. Performance on large workloads tends to scale * proportionally to la size. In addition to that, the reservations * code functions more efficiently as it can reserve more windows for * write. * * Some things work against us when trying to choose a large local alloc: * * - We need to ensure our sizing is picked to leave enough space in * group descriptors for other allocations (such as block groups, * etc). Picking default sizes which are a multiple of 4 could help * - block groups are allocated in 2mb and 4mb chunks. * * - Likewise, we don't want to starve other nodes of bits on small * file systems. This can easily be taken care of by limiting our * default to a reasonable size (256M) on larger cluster sizes. * * - Some file systems can't support very large sizes - 4k and 8k in * particular are limited to less than 128 and 256 megabytes respectively. * * The following reference table shows group descriptor and local * alloc maximums at various cluster sizes (4k blocksize) * * csize: 4K group: 126M la: 121M * csize: 8K group: 252M la: 243M * csize: 16K group: 504M la: 486M * csize: 32K group: 1008M la: 972M * csize: 64K group: 2016M la: 1944M * csize: 128K group: 4032M la: 3888M * csize: 256K group: 8064M la: 7776M * csize: 512K group: 16128M la: 15552M * csize: 1024K group: 32256M la: 31104M */ #define OCFS2_LA_MAX_DEFAULT_MB 256 #define OCFS2_LA_OLD_DEFAULT 8 unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) { unsigned int la_mb; unsigned int gd_mb; unsigned int la_max_mb; unsigned int megs_per_slot; struct super_block *sb = osb->sb; gd_mb = ocfs2_clusters_to_megabytes(osb->sb, 8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat)); /* * This takes care of files systems with very small group * descriptors - 512 byte blocksize at cluster sizes lower * than 16K and also 1k blocksize with 4k cluster size. */ if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192) || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096)) return OCFS2_LA_OLD_DEFAULT; /* * Leave enough room for some block groups and make the final * value we work from a multiple of 4. */ gd_mb -= 16; gd_mb &= 0xFFFFFFFB; la_mb = gd_mb; /* * Keep window sizes down to a reasonable default */ if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) { /* * Some clustersize / blocksize combinations will have * given us a larger than OCFS2_LA_MAX_DEFAULT_MB * default size, but get poor distribution when * limited to exactly 256 megabytes. * * As an example, 16K clustersize at 4K blocksize * gives us a cluster group size of 504M. Paring the * local alloc size down to 256 however, would give us * only one window and around 200MB left in the * cluster group. Instead, find the first size below * 256 which would give us an even distribution. * * Larger cluster group sizes actually work out pretty * well when pared to 256, so we don't have to do this * for any group that fits more than two * OCFS2_LA_MAX_DEFAULT_MB windows. */ if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB)) la_mb = 256; else { unsigned int gd_mult = gd_mb; while (gd_mult > 256) gd_mult = gd_mult >> 1; la_mb = gd_mult; } } megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots; megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot); /* Too many nodes, too few disk clusters. */ if (megs_per_slot < la_mb) la_mb = megs_per_slot; /* We can't store more bits than we can in a block. */ la_max_mb = ocfs2_clusters_to_megabytes(osb->sb, ocfs2_local_alloc_size(sb) * 8); if (la_mb > la_max_mb) la_mb = la_max_mb; return la_mb; } void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb) { struct super_block *sb = osb->sb; unsigned int la_default_mb = ocfs2_la_default_mb(osb); unsigned int la_max_mb; la_max_mb = ocfs2_clusters_to_megabytes(sb, ocfs2_local_alloc_size(sb) * 8); trace_ocfs2_la_set_sizes(requested_mb, la_max_mb, la_default_mb); if (requested_mb == -1) { /* No user request - use defaults */ osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, la_default_mb); } else if (requested_mb > la_max_mb) { /* Request is too big, we give the maximum available */ osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, la_max_mb); } else { osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, requested_mb); } osb->local_alloc_bits = osb->local_alloc_default_bits; } static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) { return (osb->local_alloc_state == OCFS2_LA_THROTTLED || osb->local_alloc_state == OCFS2_LA_ENABLED); } void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, unsigned int num_clusters) { if (num_clusters >= osb->local_alloc_default_bits) { spin_lock(&osb->osb_lock); if (osb->local_alloc_state == OCFS2_LA_DISABLED || osb->local_alloc_state == OCFS2_LA_THROTTLED) { cancel_delayed_work(&osb->la_enable_wq); osb->local_alloc_state = OCFS2_LA_ENABLED; } spin_unlock(&osb->osb_lock); } } void ocfs2_la_enable_worker(struct work_struct *work) { struct ocfs2_super *osb = container_of(work, struct ocfs2_super, la_enable_wq.work); spin_lock(&osb->osb_lock); osb->local_alloc_state = OCFS2_LA_ENABLED; spin_unlock(&osb->osb_lock); } /* * Tell us whether a given allocation should use the local alloc * file. Otherwise, it has to go to the main bitmap. * * This function does semi-dirty reads of local alloc size and state! * This is ok however, as the values are re-checked once under mutex. */ int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) { int ret = 0; int la_bits; spin_lock(&osb->osb_lock); la_bits = osb->local_alloc_bits; if (!ocfs2_la_state_enabled(osb)) goto bail; /* la_bits should be at least twice the size (in clusters) of * a new block group. We want to be sure block group * allocations go through the local alloc, so allow an * allocation to take up to half the bitmap. */ if (bits > (la_bits / 2)) goto bail; ret = 1; bail: trace_ocfs2_alloc_should_use_local( (unsigned long long)bits, osb->local_alloc_state, la_bits, ret); spin_unlock(&osb->osb_lock); return ret; } int ocfs2_load_local_alloc(struct ocfs2_super *osb) { int status = 0; struct ocfs2_dinode *alloc = NULL; struct buffer_head *alloc_bh = NULL; u32 num_used; struct inode *inode = NULL; struct ocfs2_local_alloc *la; if (osb->local_alloc_bits == 0) goto bail; if (osb->local_alloc_bits >= osb->bitmap_cpg) { mlog(ML_NOTICE, "Requested local alloc window %d is larger " "than max possible %u. Using defaults.\n", osb->local_alloc_bits, (osb->bitmap_cpg - 1)); osb->local_alloc_bits = ocfs2_megabytes_to_clusters(osb->sb, ocfs2_la_default_mb(osb)); } /* read the alloc off disk */ inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, osb->slot_num); if (!inode) { status = -EINVAL; mlog_errno(status); goto bail; } status = ocfs2_read_inode_block_full(inode, &alloc_bh, OCFS2_BH_IGNORE_CACHE); if (status < 0) { mlog_errno(status); goto bail; } alloc = (struct ocfs2_dinode *) alloc_bh->b_data; la = OCFS2_LOCAL_ALLOC(alloc); if (!(le32_to_cpu(alloc->i_flags) & (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) { mlog(ML_ERROR, "Invalid local alloc inode, %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); status = -EINVAL; goto bail; } if ((la->la_size == 0) || (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) { mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n", le16_to_cpu(la->la_size)); status = -EINVAL; goto bail; } /* do a little verification. */ num_used = ocfs2_local_alloc_count_bits(alloc); /* hopefully the local alloc has always been recovered before * we load it. */ if (num_used || alloc->id1.bitmap1.i_used || alloc->id1.bitmap1.i_total || la->la_bm_off) { mlog(ML_ERROR, "inconsistent detected, clean journal with" " unrecovered local alloc, please run fsck.ocfs2!\n" "found = %u, set = %u, taken = %u, off = %u\n", num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), le32_to_cpu(alloc->id1.bitmap1.i_total), le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off)); status = -EINVAL; goto bail; } osb->local_alloc_bh = alloc_bh; osb->local_alloc_state = OCFS2_LA_ENABLED; bail: if (status < 0) brelse(alloc_bh); iput(inode); trace_ocfs2_load_local_alloc(osb->local_alloc_bits); if (status) mlog_errno(status); return status; } /* * return any unused bits to the bitmap and write out a clean * local_alloc. * * local_alloc_bh is optional. If not passed, we will simply use the * one off osb. If you do pass it however, be warned that it *will* be * returned brelse'd and NULL'd out.*/ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) { int status; handle_t *handle; struct inode *local_alloc_inode = NULL; struct buffer_head *bh = NULL; struct buffer_head *main_bm_bh = NULL; struct inode *main_bm_inode = NULL; struct ocfs2_dinode *alloc_copy = NULL; struct ocfs2_dinode *alloc = NULL; cancel_delayed_work(&osb->la_enable_wq); if (osb->ocfs2_wq) flush_workqueue(osb->ocfs2_wq); if (osb->local_alloc_state == OCFS2_LA_UNUSED) goto out; local_alloc_inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, osb->slot_num); if (!local_alloc_inode) { status = -ENOENT; mlog_errno(status); goto out; } osb->local_alloc_state = OCFS2_LA_DISABLED; ocfs2_resmap_uninit(&osb->osb_la_resmap); main_bm_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); if (!main_bm_inode) { status = -EINVAL; mlog_errno(status); goto out; } inode_lock(main_bm_inode); status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); if (status < 0) { mlog_errno(status); goto out_mutex; } /* WINDOW_MOVE_CREDITS is a bit heavy... */ handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); if (IS_ERR(handle)) { mlog_errno(PTR_ERR(handle)); handle = NULL; goto out_unlock; } bh = osb->local_alloc_bh; alloc = (struct ocfs2_dinode *) bh->b_data; alloc_copy = kmemdup(alloc, bh->b_size, GFP_NOFS); if (!alloc_copy) { status = -ENOMEM; goto out_commit; } status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_commit; } ocfs2_clear_local_alloc(alloc); ocfs2_journal_dirty(handle, bh); brelse(bh); osb->local_alloc_bh = NULL; osb->local_alloc_state = OCFS2_LA_UNUSED; status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, main_bm_inode, main_bm_bh); if (status < 0) mlog_errno(status); out_commit: ocfs2_commit_trans(osb, handle); out_unlock: brelse(main_bm_bh); ocfs2_inode_unlock(main_bm_inode, 1); out_mutex: inode_unlock(main_bm_inode); iput(main_bm_inode); out: iput(local_alloc_inode); kfree(alloc_copy); } /* * We want to free the bitmap bits outside of any recovery context as * we'll need a cluster lock to do so, but we must clear the local * alloc before giving up the recovered nodes journal. To solve this, * we kmalloc a copy of the local alloc before it's change for the * caller to process with ocfs2_complete_local_alloc_recovery */ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, int slot_num, struct ocfs2_dinode **alloc_copy) { int status = 0; struct buffer_head *alloc_bh = NULL; struct inode *inode = NULL; struct ocfs2_dinode *alloc; trace_ocfs2_begin_local_alloc_recovery(slot_num); *alloc_copy = NULL; inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, slot_num); if (!inode) { status = -EINVAL; mlog_errno(status); goto bail; } inode_lock(inode); status = ocfs2_read_inode_block_full(inode, &alloc_bh, OCFS2_BH_IGNORE_CACHE); if (status < 0) { mlog_errno(status); goto bail; } *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL); if (!(*alloc_copy)) { status = -ENOMEM; goto bail; } memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size); alloc = (struct ocfs2_dinode *) alloc_bh->b_data; ocfs2_clear_local_alloc(alloc); ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode)); if (status < 0) mlog_errno(status); bail: if (status < 0) { kfree(*alloc_copy); *alloc_copy = NULL; } brelse(alloc_bh); if (inode) { inode_unlock(inode); iput(inode); } if (status) mlog_errno(status); return status; } /* * Step 2: By now, we've completed the journal recovery, we've stamped * a clean local alloc on disk and dropped the node out of the * recovery map. Dlm locks will no longer stall, so lets clear out the * main bitmap. */ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, struct ocfs2_dinode *alloc) { int status; handle_t *handle; struct buffer_head *main_bm_bh = NULL; struct inode *main_bm_inode; main_bm_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); if (!main_bm_inode) { status = -EINVAL; mlog_errno(status); goto out; } inode_lock(main_bm_inode); status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); if (status < 0) { mlog_errno(status); goto out_mutex; } handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; mlog_errno(status); goto out_unlock; } /* we want the bitmap change to be recorded on disk asap */ handle->h_sync = 1; status = ocfs2_sync_local_to_main(osb, handle, alloc, main_bm_inode, main_bm_bh); if (status < 0) mlog_errno(status); ocfs2_commit_trans(osb, handle); out_unlock: ocfs2_inode_unlock(main_bm_inode, 1); out_mutex: inode_unlock(main_bm_inode); brelse(main_bm_bh); iput(main_bm_inode); out: if (!status) ocfs2_init_steal_slots(osb); if (status) mlog_errno(status); return status; } /* * make sure we've got at least bits_wanted contiguous bits in the * local alloc. You lose them when you drop i_rwsem. * * We will add ourselves to the transaction passed in, but may start * our own in order to shift windows. */ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, u32 bits_wanted, struct ocfs2_alloc_context *ac) { int status; struct ocfs2_dinode *alloc; struct inode *local_alloc_inode; unsigned int free_bits; BUG_ON(!ac); local_alloc_inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, osb->slot_num); if (!local_alloc_inode) { status = -ENOENT; mlog_errno(status); goto bail; } inode_lock(local_alloc_inode); /* * We must double check state and allocator bits because * another process may have changed them while holding i_rwsem. */ spin_lock(&osb->osb_lock); if (!ocfs2_la_state_enabled(osb) || (bits_wanted > osb->local_alloc_bits)) { spin_unlock(&osb->osb_lock); status = -ENOSPC; goto bail; } spin_unlock(&osb->osb_lock); alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; #ifdef CONFIG_OCFS2_DEBUG_FS if (le32_to_cpu(alloc->id1.bitmap1.i_used) != ocfs2_local_alloc_count_bits(alloc)) { status = ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n", (unsigned long long)le64_to_cpu(alloc->i_blkno), le32_to_cpu(alloc->id1.bitmap1.i_used), ocfs2_local_alloc_count_bits(alloc)); goto bail; } #endif free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - le32_to_cpu(alloc->id1.bitmap1.i_used); if (bits_wanted > free_bits) { /* uhoh, window change time. */ status = ocfs2_local_alloc_slide_window(osb, local_alloc_inode); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } /* * Under certain conditions, the window slide code * might have reduced the number of bits available or * disabled the local alloc entirely. Re-check * here and return -ENOSPC if necessary. */ status = -ENOSPC; if (!ocfs2_la_state_enabled(osb)) goto bail; free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - le32_to_cpu(alloc->id1.bitmap1.i_used); if (bits_wanted > free_bits) goto bail; } ac->ac_inode = local_alloc_inode; /* We should never use localalloc from another slot */ ac->ac_alloc_slot = osb->slot_num; ac->ac_which = OCFS2_AC_USE_LOCAL; get_bh(osb->local_alloc_bh); ac->ac_bh = osb->local_alloc_bh; status = 0; bail: if (status < 0 && local_alloc_inode) { inode_unlock(local_alloc_inode); iput(local_alloc_inode); } trace_ocfs2_reserve_local_alloc_bits( (unsigned long long)ac->ac_max_block, bits_wanted, osb->slot_num, status); if (status) mlog_errno(status); return status; } int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, handle_t *handle, struct ocfs2_alloc_context *ac, u32 bits_wanted, u32 *bit_off, u32 *num_bits) { int status, start; struct inode *local_alloc_inode; void *bitmap; struct ocfs2_dinode *alloc; struct ocfs2_local_alloc *la; BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); local_alloc_inode = ac->ac_inode; alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; la = OCFS2_LOCAL_ALLOC(alloc); start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted, ac->ac_resv); if (start == -1) { /* TODO: Shouldn't we just BUG here? */ status = -ENOSPC; mlog_errno(status); goto bail; } bitmap = la->la_bitmap; *bit_off = le32_to_cpu(la->la_bm_off) + start; *num_bits = bits_wanted; status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), osb->local_alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start, bits_wanted); while(bits_wanted--) ocfs2_set_bit(start++, bitmap); le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); ocfs2_journal_dirty(handle, osb->local_alloc_bh); bail: if (status) mlog_errno(status); return status; } int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb, handle_t *handle, struct ocfs2_alloc_context *ac, u32 bit_off, u32 num_bits) { int status, start; u32 clear_bits; struct inode *local_alloc_inode; void *bitmap; struct ocfs2_dinode *alloc; struct ocfs2_local_alloc *la; BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); local_alloc_inode = ac->ac_inode; alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; la = OCFS2_LOCAL_ALLOC(alloc); bitmap = la->la_bitmap; start = bit_off - le32_to_cpu(la->la_bm_off); clear_bits = num_bits; status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), osb->local_alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } while (clear_bits--) ocfs2_clear_bit(start++, bitmap); le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits); ocfs2_journal_dirty(handle, osb->local_alloc_bh); bail: return status; } static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) { u32 count; struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); count = memweight(la->la_bitmap, le16_to_cpu(la->la_size)); trace_ocfs2_local_alloc_count_bits(count); return count; } static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, struct ocfs2_dinode *alloc, u32 *numbits, struct ocfs2_alloc_reservation *resv) { int numfound = 0, bitoff, left, startoff; int local_resv = 0; struct ocfs2_alloc_reservation r; void *bitmap = NULL; struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap; if (!alloc->id1.bitmap1.i_total) { bitoff = -1; goto bail; } if (!resv) { local_resv = 1; ocfs2_resv_init_once(&r); ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP); resv = &r; } numfound = *numbits; if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) { if (numfound < *numbits) *numbits = numfound; goto bail; } /* * Code error. While reservations are enabled, local * allocation should _always_ go through them. */ BUG_ON(osb->osb_resv_level != 0); /* * Reservations are disabled. Handle this the old way. */ bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; numfound = bitoff = startoff = 0; left = le32_to_cpu(alloc->id1.bitmap1.i_total); while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) < left) { /* Ok, we found a zero bit... is it contig. or do we * start over?*/ if (bitoff == startoff) { /* we found a zero */ numfound++; startoff++; } else { /* got a zero after some ones */ numfound = 1; startoff = bitoff+1; } /* we got everything we needed */ if (numfound == *numbits) { /* mlog(0, "Found it all!\n"); */ break; } } trace_ocfs2_local_alloc_find_clear_bits_search_bitmap(bitoff, numfound); if (numfound == *numbits) bitoff = startoff - numfound; else bitoff = -1; bail: if (local_resv) ocfs2_resv_discard(resmap, resv); trace_ocfs2_local_alloc_find_clear_bits(*numbits, le32_to_cpu(alloc->id1.bitmap1.i_total), bitoff, numfound); return bitoff; } static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc) { struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); int i; alloc->id1.bitmap1.i_total = 0; alloc->id1.bitmap1.i_used = 0; la->la_bm_off = 0; for(i = 0; i < le16_to_cpu(la->la_size); i++) la->la_bitmap[i] = 0; } #if 0 /* turn this on and uncomment below to aid debugging window shifts. */ static void ocfs2_verify_zero_bits(unsigned long *bitmap, unsigned int start, unsigned int count) { unsigned int tmp = count; while(tmp--) { if (ocfs2_test_bit(start + tmp, bitmap)) { printk("ocfs2_verify_zero_bits: start = %u, count = " "%u\n", start, count); printk("ocfs2_verify_zero_bits: bit %u is set!", start + tmp); BUG(); } } } #endif /* * sync the local alloc to main bitmap. * * assumes you've already locked the main bitmap -- the bitmap inode * passed is used for caching. */ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, handle_t *handle, struct ocfs2_dinode *alloc, struct inode *main_bm_inode, struct buffer_head *main_bm_bh) { int status = 0; int bit_off, left, count, start; u64 la_start_blk; u64 blkno; void *bitmap; struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); trace_ocfs2_sync_local_to_main( le32_to_cpu(alloc->id1.bitmap1.i_total), le32_to_cpu(alloc->id1.bitmap1.i_used)); if (!alloc->id1.bitmap1.i_total) { goto bail; } if (le32_to_cpu(alloc->id1.bitmap1.i_used) == le32_to_cpu(alloc->id1.bitmap1.i_total)) { goto bail; } la_start_blk = ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(la->la_bm_off)); bitmap = la->la_bitmap; start = count = 0; left = le32_to_cpu(alloc->id1.bitmap1.i_total); while (1) { bit_off = ocfs2_find_next_zero_bit(bitmap, left, start); if ((bit_off < left) && (bit_off == start)) { count++; start++; continue; } if (count) { blkno = la_start_blk + ocfs2_clusters_to_blocks(osb->sb, start - count); trace_ocfs2_sync_local_to_main_free( count, start - count, (unsigned long long)la_start_blk, (unsigned long long)blkno); status = ocfs2_release_clusters(handle, main_bm_inode, main_bm_bh, blkno, count); if (status < 0) { mlog_errno(status); goto bail; } } if (bit_off >= left) break; count = 1; start = bit_off + 1; } bail: if (status) mlog_errno(status); return status; } enum ocfs2_la_event { OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has * enough bits theoretically * free, but a contiguous * allocation could not be * found. */ OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have * enough bits free to satisfy * our request. */ }; #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) /* * Given an event, calculate the size of our next local alloc window. * * This should always be called under i_rwsem of the local alloc inode * so that local alloc disabling doesn't race with processes trying to * use the allocator. * * Returns the state which the local alloc was left in. This value can * be ignored by some paths. */ static int ocfs2_recalc_la_window(struct ocfs2_super *osb, enum ocfs2_la_event event) { unsigned int bits; int state; spin_lock(&osb->osb_lock); if (osb->local_alloc_state == OCFS2_LA_DISABLED) { WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); goto out_unlock; } /* * ENOSPC and fragmentation are treated similarly for now. */ if (event == OCFS2_LA_EVENT_ENOSPC || event == OCFS2_LA_EVENT_FRAGMENTED) { /* * We ran out of contiguous space in the primary * bitmap. Drastically reduce the number of bits used * by local alloc until we have to disable it. */ bits = osb->local_alloc_bits >> 1; if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { /* * By setting state to THROTTLED, we'll keep * the number of local alloc bits used down * until an event occurs which would give us * reason to assume the bitmap situation might * have changed. */ osb->local_alloc_state = OCFS2_LA_THROTTLED; osb->local_alloc_bits = bits; } else { osb->local_alloc_state = OCFS2_LA_DISABLED; } queue_delayed_work(osb->ocfs2_wq, &osb->la_enable_wq, OCFS2_LA_ENABLE_INTERVAL); goto out_unlock; } /* * Don't increase the size of the local alloc window until we * know we might be able to fulfill the request. Otherwise, we * risk bouncing around the global bitmap during periods of * low space. */ if (osb->local_alloc_state != OCFS2_LA_THROTTLED) osb->local_alloc_bits = osb->local_alloc_default_bits; out_unlock: state = osb->local_alloc_state; spin_unlock(&osb->osb_lock); return state; } static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, struct ocfs2_alloc_context **ac, struct inode **bitmap_inode, struct buffer_head **bitmap_bh) { int status; *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); if (!(*ac)) { status = -ENOMEM; mlog_errno(status); goto bail; } retry_enospc: (*ac)->ac_bits_wanted = osb->local_alloc_bits; status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); if (status == -ENOSPC) { if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == OCFS2_LA_DISABLED) goto bail; ocfs2_free_ac_resource(*ac); memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); goto retry_enospc; } if (status < 0) { mlog_errno(status); goto bail; } *bitmap_inode = (*ac)->ac_inode; igrab(*bitmap_inode); *bitmap_bh = (*ac)->ac_bh; get_bh(*bitmap_bh); status = 0; bail: if ((status < 0) && *ac) { ocfs2_free_alloc_context(*ac); *ac = NULL; } if (status) mlog_errno(status); return status; } /* * pass it the bitmap lock in lock_bh if you have it. */ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, handle_t *handle, struct ocfs2_alloc_context *ac) { int status = 0; u32 cluster_off, cluster_count; struct ocfs2_dinode *alloc = NULL; struct ocfs2_local_alloc *la; alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; la = OCFS2_LOCAL_ALLOC(alloc); trace_ocfs2_local_alloc_new_window( le32_to_cpu(alloc->id1.bitmap1.i_total), osb->local_alloc_bits); /* Instruct the allocation code to try the most recently used * cluster group. We'll re-record the group used this pass * below. */ ac->ac_last_group = osb->la_last_gd; /* we used the generic suballoc reserve function, but we set * everything up nicely, so there's no reason why we can't use * the more specific cluster api to claim bits. */ status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits, &cluster_off, &cluster_count); if (status == -ENOSPC) { retry_enospc: /* * Note: We could also try syncing the journal here to * allow use of any free bits which the current * transaction can't give us access to. --Mark */ if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == OCFS2_LA_DISABLED) goto bail; ac->ac_bits_wanted = osb->local_alloc_bits; status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits, &cluster_off, &cluster_count); if (status == -ENOSPC) goto retry_enospc; /* * We only shrunk the *minimum* number of in our * request - it's entirely possible that the allocator * might give us more than we asked for. */ if (status == 0) { spin_lock(&osb->osb_lock); osb->local_alloc_bits = cluster_count; spin_unlock(&osb->osb_lock); } } if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } osb->la_last_gd = ac->ac_last_group; la->la_bm_off = cpu_to_le32(cluster_off); alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); /* just in case... In the future when we find space ourselves, * we don't have to get all contiguous -- but we'll have to * set all previously used bits in bitmap and update * la_bits_set before setting the bits in the main bitmap. */ alloc->id1.bitmap1.i_used = 0; memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, le16_to_cpu(la->la_size)); ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count, OCFS2_LOCAL_ALLOC(alloc)->la_bitmap); trace_ocfs2_local_alloc_new_window_result( le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off), le32_to_cpu(alloc->id1.bitmap1.i_total)); bail: if (status) mlog_errno(status); return status; } /* Note that we do *NOT* lock the local alloc inode here as * it's been locked already for us. */ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, struct inode *local_alloc_inode) { int status = 0; struct buffer_head *main_bm_bh = NULL; struct inode *main_bm_inode = NULL; handle_t *handle = NULL; struct ocfs2_dinode *alloc; struct ocfs2_dinode *alloc_copy = NULL; struct ocfs2_alloc_context *ac = NULL; ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); /* This will lock the main bitmap for us. */ status = ocfs2_local_alloc_reserve_for_window(osb, &ac, &main_bm_inode, &main_bm_bh); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; mlog_errno(status); goto bail; } alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; /* We want to clear the local alloc before doing anything * else, so that if we error later during this operation, * local alloc shutdown won't try to double free main bitmap * bits. Make a copy so the sync function knows which bits to * free. */ alloc_copy = kmemdup(alloc, osb->local_alloc_bh->b_size, GFP_NOFS); if (!alloc_copy) { status = -ENOMEM; mlog_errno(status); goto bail; } status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), osb->local_alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } ocfs2_clear_local_alloc(alloc); ocfs2_journal_dirty(handle, osb->local_alloc_bh); status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, main_bm_inode, main_bm_bh); if (status < 0) { mlog_errno(status); goto bail; } status = ocfs2_local_alloc_new_window(osb, handle, ac); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } atomic_inc(&osb->alloc_stats.moves); bail: if (handle) ocfs2_commit_trans(osb, handle); brelse(main_bm_bh); iput(main_bm_inode); kfree(alloc_copy); if (ac) ocfs2_free_alloc_context(ac); if (status) mlog_errno(status); return status; }
10 10 10 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 // SPDX-License-Identifier: GPL-2.0-or-later /* * cgroups support for the BFQ I/O scheduler. */ #include <linux/module.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/cgroup.h> #include <linux/ktime.h> #include <linux/rbtree.h> #include <linux/ioprio.h> #include <linux/sbitmap.h> #include <linux/delay.h> #include "elevator.h" #include "bfq-iosched.h" #ifdef CONFIG_BFQ_CGROUP_DEBUG static int bfq_stat_init(struct bfq_stat *stat, gfp_t gfp) { int ret; ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); if (ret) return ret; atomic64_set(&stat->aux_cnt, 0); return 0; } static void bfq_stat_exit(struct bfq_stat *stat) { percpu_counter_destroy(&stat->cpu_cnt); } /** * bfq_stat_add - add a value to a bfq_stat * @stat: target bfq_stat * @val: value to add * * Add @val to @stat. The caller must ensure that IRQ on the same CPU * don't re-enter this function for the same counter. */ static inline void bfq_stat_add(struct bfq_stat *stat, uint64_t val) { percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); } /** * bfq_stat_read - read the current value of a bfq_stat * @stat: bfq_stat to read */ static inline uint64_t bfq_stat_read(struct bfq_stat *stat) { return percpu_counter_sum_positive(&stat->cpu_cnt); } /** * bfq_stat_reset - reset a bfq_stat * @stat: bfq_stat to reset */ static inline void bfq_stat_reset(struct bfq_stat *stat) { percpu_counter_set(&stat->cpu_cnt, 0); atomic64_set(&stat->aux_cnt, 0); } /** * bfq_stat_add_aux - add a bfq_stat into another's aux count * @to: the destination bfq_stat * @from: the source * * Add @from's count including the aux one to @to's aux count. */ static inline void bfq_stat_add_aux(struct bfq_stat *to, struct bfq_stat *from) { atomic64_add(bfq_stat_read(from) + atomic64_read(&from->aux_cnt), &to->aux_cnt); } /** * blkg_prfill_stat - prfill callback for bfq_stat * @sf: seq_file to print to * @pd: policy private data of interest * @off: offset to the bfq_stat in @pd * * prfill callback for printing a bfq_stat. */ static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off) { return __blkg_prfill_u64(sf, pd, bfq_stat_read((void *)pd + off)); } /* bfqg stats flags */ enum bfqg_stats_flags { BFQG_stats_waiting = 0, BFQG_stats_idling, BFQG_stats_empty, }; #define BFQG_FLAG_FNS(name) \ static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \ { \ stats->flags |= (1 << BFQG_stats_##name); \ } \ static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \ { \ stats->flags &= ~(1 << BFQG_stats_##name); \ } \ static int bfqg_stats_##name(struct bfqg_stats *stats) \ { \ return (stats->flags & (1 << BFQG_stats_##name)) != 0; \ } \ BFQG_FLAG_FNS(waiting) BFQG_FLAG_FNS(idling) BFQG_FLAG_FNS(empty) #undef BFQG_FLAG_FNS /* This should be called with the scheduler lock held. */ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) { u64 now; if (!bfqg_stats_waiting(stats)) return; now = blk_time_get_ns(); if (now > stats->start_group_wait_time) bfq_stat_add(&stats->group_wait_time, now - stats->start_group_wait_time); bfqg_stats_clear_waiting(stats); } /* This should be called with the scheduler lock held. */ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, struct bfq_group *curr_bfqg) { struct bfqg_stats *stats = &bfqg->stats; if (bfqg_stats_waiting(stats)) return; if (bfqg == curr_bfqg) return; stats->start_group_wait_time = blk_time_get_ns(); bfqg_stats_mark_waiting(stats); } /* This should be called with the scheduler lock held. */ static void bfqg_stats_end_empty_time(struct bfqg_stats *stats) { u64 now; if (!bfqg_stats_empty(stats)) return; now = blk_time_get_ns(); if (now > stats->start_empty_time) bfq_stat_add(&stats->empty_time, now - stats->start_empty_time); bfqg_stats_clear_empty(stats); } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { bfq_stat_add(&bfqg->stats.dequeue, 1); } void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; if (blkg_rwstat_total(&stats->queued)) return; /* * group is already marked empty. This can happen if bfqq got new * request in parent group and moved to this group while being added * to service tree. Just ignore the event and move on. */ if (bfqg_stats_empty(stats)) return; stats->start_empty_time = blk_time_get_ns(); bfqg_stats_mark_empty(stats); } void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; if (bfqg_stats_idling(stats)) { u64 now = blk_time_get_ns(); if (now > stats->start_idle_time) bfq_stat_add(&stats->idle_time, now - stats->start_idle_time); bfqg_stats_clear_idling(stats); } } void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; stats->start_idle_time = blk_time_get_ns(); bfqg_stats_mark_idling(stats); } void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; bfq_stat_add(&stats->avg_queue_size_sum, blkg_rwstat_total(&stats->queued)); bfq_stat_add(&stats->avg_queue_size_samples, 1); bfqg_stats_update_group_wait_time(stats); } void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, blk_opf_t opf) { blkg_rwstat_add(&bfqg->stats.queued, opf, 1); bfqg_stats_end_empty_time(&bfqg->stats); if (!(bfqq == bfqg->bfqd->in_service_queue)) bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); } void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { blkg_rwstat_add(&bfqg->stats.queued, opf, -1); } void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { blkg_rwstat_add(&bfqg->stats.merged, opf, 1); } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, u64 io_start_time_ns, blk_opf_t opf) { struct bfqg_stats *stats = &bfqg->stats; u64 now = blk_time_get_ns(); if (now > io_start_time_ns) blkg_rwstat_add(&stats->service_time, opf, now - io_start_time_ns); if (io_start_time_ns > start_time_ns) blkg_rwstat_add(&stats->wait_time, opf, io_start_time_ns - start_time_ns); } #else /* CONFIG_BFQ_CGROUP_DEBUG */ void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { } void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, u64 io_start_time_ns, blk_opf_t opf) { } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { } void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { } #endif /* CONFIG_BFQ_CGROUP_DEBUG */ #ifdef CONFIG_BFQ_GROUP_IOSCHED /* * blk-cgroup policy-related handlers * The following functions help in converting between blk-cgroup * internal structures and BFQ-specific structures. */ static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd) { return pd ? container_of(pd, struct bfq_group, pd) : NULL; } struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg) { return pd_to_blkg(&bfqg->pd); } static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg) { return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq)); } /* * bfq_group handlers * The following functions help in navigating the bfq_group hierarchy * by allowing to find the parent of a bfq_group or the bfq_group * associated to a bfq_queue. */ static struct bfq_group *bfqg_parent(struct bfq_group *bfqg) { struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent; return pblkg ? blkg_to_bfqg(pblkg) : NULL; } struct bfq_group *bfqq_group(struct bfq_queue *bfqq) { struct bfq_entity *group_entity = bfqq->entity.parent; return group_entity ? container_of(group_entity, struct bfq_group, entity) : bfqq->bfqd->root_group; } /* * The following two functions handle get and put of a bfq_group by * wrapping the related blk-cgroup hooks. */ static void bfqg_get(struct bfq_group *bfqg) { refcount_inc(&bfqg->ref); } static void bfqg_put(struct bfq_group *bfqg) { if (refcount_dec_and_test(&bfqg->ref)) kfree(bfqg); } static void bfqg_and_blkg_get(struct bfq_group *bfqg) { /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ bfqg_get(bfqg); blkg_get(bfqg_to_blkg(bfqg)); } void bfqg_and_blkg_put(struct bfq_group *bfqg) { blkg_put(bfqg_to_blkg(bfqg)); bfqg_put(bfqg); } void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq) { struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg); if (!bfqg) return; blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq)); blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1); } /* @stats = 0 */ static void bfqg_stats_reset(struct bfqg_stats *stats) { #ifdef CONFIG_BFQ_CGROUP_DEBUG /* queued stats shouldn't be cleared */ blkg_rwstat_reset(&stats->merged); blkg_rwstat_reset(&stats->service_time); blkg_rwstat_reset(&stats->wait_time); bfq_stat_reset(&stats->time); bfq_stat_reset(&stats->avg_queue_size_sum); bfq_stat_reset(&stats->avg_queue_size_samples); bfq_stat_reset(&stats->dequeue); bfq_stat_reset(&stats->group_wait_time); bfq_stat_reset(&stats->idle_time); bfq_stat_reset(&stats->empty_time); #endif } /* @to += @from */ static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from) { if (!to || !from) return; #ifdef CONFIG_BFQ_CGROUP_DEBUG /* queued stats shouldn't be cleared */ blkg_rwstat_add_aux(&to->merged, &from->merged); blkg_rwstat_add_aux(&to->service_time, &from->service_time); blkg_rwstat_add_aux(&to->wait_time, &from->wait_time); bfq_stat_add_aux(&from->time, &from->time); bfq_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum); bfq_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples); bfq_stat_add_aux(&to->dequeue, &from->dequeue); bfq_stat_add_aux(&to->group_wait_time, &from->group_wait_time); bfq_stat_add_aux(&to->idle_time, &from->idle_time); bfq_stat_add_aux(&to->empty_time, &from->empty_time); #endif } /* * Transfer @bfqg's stats to its parent's aux counts so that the ancestors' * recursive stats can still account for the amount used by this bfqg after * it's gone. */ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg) { struct bfq_group *parent; if (!bfqg) /* root_group */ return; parent = bfqg_parent(bfqg); lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock); if (unlikely(!parent)) return; bfqg_stats_add_aux(&parent->stats, &bfqg->stats); bfqg_stats_reset(&bfqg->stats); } void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); entity->weight = entity->new_weight; entity->orig_weight = entity->new_weight; if (bfqq) { bfqq->ioprio = bfqq->new_ioprio; bfqq->ioprio_class = bfqq->new_ioprio_class; /* * Make sure that bfqg and its associated blkg do not * disappear before entity. */ bfqg_and_blkg_get(bfqg); } entity->parent = bfqg->my_entity; /* NULL for root group */ entity->sched_data = &bfqg->sched_data; } static void bfqg_stats_exit(struct bfqg_stats *stats) { blkg_rwstat_exit(&stats->bytes); blkg_rwstat_exit(&stats->ios); #ifdef CONFIG_BFQ_CGROUP_DEBUG blkg_rwstat_exit(&stats->merged); blkg_rwstat_exit(&stats->service_time); blkg_rwstat_exit(&stats->wait_time); blkg_rwstat_exit(&stats->queued); bfq_stat_exit(&stats->time); bfq_stat_exit(&stats->avg_queue_size_sum); bfq_stat_exit(&stats->avg_queue_size_samples); bfq_stat_exit(&stats->dequeue); bfq_stat_exit(&stats->group_wait_time); bfq_stat_exit(&stats->idle_time); bfq_stat_exit(&stats->empty_time); #endif } static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp) { if (blkg_rwstat_init(&stats->bytes, gfp) || blkg_rwstat_init(&stats->ios, gfp)) goto error; #ifdef CONFIG_BFQ_CGROUP_DEBUG if (blkg_rwstat_init(&stats->merged, gfp) || blkg_rwstat_init(&stats->service_time, gfp) || blkg_rwstat_init(&stats->wait_time, gfp) || blkg_rwstat_init(&stats->queued, gfp) || bfq_stat_init(&stats->time, gfp) || bfq_stat_init(&stats->avg_queue_size_sum, gfp) || bfq_stat_init(&stats->avg_queue_size_samples, gfp) || bfq_stat_init(&stats->dequeue, gfp) || bfq_stat_init(&stats->group_wait_time, gfp) || bfq_stat_init(&stats->idle_time, gfp) || bfq_stat_init(&stats->empty_time, gfp)) goto error; #endif return 0; error: bfqg_stats_exit(stats); return -ENOMEM; } static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd) { return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL; } static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg) { return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq)); } static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) { struct bfq_group_data *bgd; bgd = kzalloc(sizeof(*bgd), gfp); if (!bgd) return NULL; bgd->weight = CGROUP_WEIGHT_DFL; return &bgd->pd; } static void bfq_cpd_free(struct blkcg_policy_data *cpd) { kfree(cpd_to_bfqgd(cpd)); } static struct blkg_policy_data *bfq_pd_alloc(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp) { struct bfq_group *bfqg; bfqg = kzalloc_node(sizeof(*bfqg), gfp, disk->node_id); if (!bfqg) return NULL; if (bfqg_stats_init(&bfqg->stats, gfp)) { kfree(bfqg); return NULL; } /* see comments in bfq_bic_update_cgroup for why refcounting */ refcount_set(&bfqg->ref, 1); return &bfqg->pd; } static void bfq_pd_init(struct blkg_policy_data *pd) { struct blkcg_gq *blkg = pd_to_blkg(pd); struct bfq_group *bfqg = blkg_to_bfqg(blkg); struct bfq_data *bfqd = blkg->q->elevator->elevator_data; struct bfq_entity *entity = &bfqg->entity; struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg); entity->orig_weight = entity->weight = entity->new_weight = d->weight; entity->my_sched_data = &bfqg->sched_data; entity->last_bfqq_created = NULL; bfqg->my_entity = entity; /* * the root_group's will be set to NULL * in bfq_init_queue() */ bfqg->bfqd = bfqd; bfqg->active_entities = 0; bfqg->num_queues_with_pending_reqs = 0; bfqg->rq_pos_tree = RB_ROOT; } static void bfq_pd_free(struct blkg_policy_data *pd) { struct bfq_group *bfqg = pd_to_bfqg(pd); bfqg_stats_exit(&bfqg->stats); bfqg_put(bfqg); } static void bfq_pd_reset_stats(struct blkg_policy_data *pd) { struct bfq_group *bfqg = pd_to_bfqg(pd); bfqg_stats_reset(&bfqg->stats); } static void bfq_group_set_parent(struct bfq_group *bfqg, struct bfq_group *parent) { struct bfq_entity *entity; entity = &bfqg->entity; entity->parent = parent->my_entity; entity->sched_data = &parent->sched_data; } static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg) { struct bfq_group *parent; struct bfq_entity *entity; /* * Update chain of bfq_groups as we might be handling a leaf group * which, along with some of its relatives, has not been hooked yet * to the private hierarchy of BFQ. */ entity = &bfqg->entity; for_each_entity(entity) { struct bfq_group *curr_bfqg = container_of(entity, struct bfq_group, entity); if (curr_bfqg != bfqd->root_group) { parent = bfqg_parent(curr_bfqg); if (!parent) parent = bfqd->root_group; bfq_group_set_parent(curr_bfqg, parent); } } } struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { struct blkcg_gq *blkg = bio->bi_blkg; struct bfq_group *bfqg; while (blkg) { if (!blkg->online) { blkg = blkg->parent; continue; } bfqg = blkg_to_bfqg(blkg); if (bfqg->pd.online) { bio_associate_blkg_from_css(bio, &blkg->blkcg->css); return bfqg; } blkg = blkg->parent; } bio_associate_blkg_from_css(bio, &bfqg_to_blkg(bfqd->root_group)->blkcg->css); return bfqd->root_group; } /** * bfq_bfqq_move - migrate @bfqq to @bfqg. * @bfqd: queue descriptor. * @bfqq: the queue to move. * @bfqg: the group to move to. * * Move @bfqq to @bfqg, deactivating it from its old group and reactivating * it on the new one. Avoid putting the entity on the old group idle tree. * * Must be called under the scheduler lock, to make sure that the blkg * owning @bfqg does not disappear (see comments in * bfq_bic_update_cgroup on guaranteeing the consistency of blkg * objects). */ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_group *bfqg) { struct bfq_entity *entity = &bfqq->entity; struct bfq_group *old_parent = bfqq_group(bfqq); bool has_pending_reqs = false; /* * No point to move bfqq to the same group, which can happen when * root group is offlined */ if (old_parent == bfqg) return; /* * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group * until elevator exit. */ if (bfqq == &bfqd->oom_bfqq) return; /* * Get extra reference to prevent bfqq from being freed in * next possible expire or deactivate. */ bfqq->ref++; if (entity->in_groups_with_pending_reqs) { has_pending_reqs = true; bfq_del_bfqq_in_groups_with_pending_reqs(bfqq); } /* If bfqq is empty, then bfq_bfqq_expire also invokes * bfq_del_bfqq_busy, thereby removing bfqq and its entity * from data structures related to current group. Otherwise we * need to remove bfqq explicitly with bfq_deactivate_bfqq, as * we do below. */ if (bfqq == bfqd->in_service_queue) bfq_bfqq_expire(bfqd, bfqd->in_service_queue, false, BFQQE_PREEMPTED); if (bfq_bfqq_busy(bfqq)) bfq_deactivate_bfqq(bfqd, bfqq, false, false); else if (entity->on_st_or_in_serv) bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); bfqg_and_blkg_put(old_parent); bfq_reassign_last_bfqq(bfqq, NULL); entity->parent = bfqg->my_entity; entity->sched_data = &bfqg->sched_data; /* pin down bfqg and its associated blkg */ bfqg_and_blkg_get(bfqg); if (has_pending_reqs) bfq_add_bfqq_in_groups_with_pending_reqs(bfqq); if (bfq_bfqq_busy(bfqq)) { if (unlikely(!bfqd->nonrot_with_queueing)) bfq_pos_tree_add_move(bfqd, bfqq); bfq_activate_bfqq(bfqd, bfqq); } if (!bfqd->in_service_queue && !bfqd->tot_rq_in_driver) bfq_schedule_dispatch(bfqd); /* release extra ref taken above, bfqq may happen to be freed now */ bfq_put_queue(bfqq); } static void bfq_sync_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *sync_bfqq, struct bfq_io_cq *bic, struct bfq_group *bfqg, unsigned int act_idx) { struct bfq_queue *bfqq; if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { /* We are the only user of this bfqq, just move it */ if (sync_bfqq->entity.sched_data != &bfqg->sched_data) bfq_bfqq_move(bfqd, sync_bfqq, bfqg); return; } /* * The queue was merged to a different queue. Check * that the merge chain still belongs to the same * cgroup. */ for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) if (bfqq->entity.sched_data != &bfqg->sched_data) break; if (bfqq) { /* * Some queue changed cgroup so the merge is not valid * anymore. We cannot easily just cancel the merge (by * clearing new_bfqq) as there may be other processes * using this queue and holding refs to all queues * below sync_bfqq->new_bfqq. Similarly if the merge * already happened, we need to detach from bfqq now * so that we cannot merge bio to a request from the * old cgroup. */ bfq_put_cooperator(sync_bfqq); bic_set_bfqq(bic, NULL, true, act_idx); bfq_release_process_ref(bfqd, sync_bfqq); } } /** * __bfq_bic_change_cgroup - move @bic to @bfqg. * @bfqd: the queue descriptor. * @bic: the bic to move. * @bfqg: the group to move to. * * Move bic to blkcg, assuming that bfqd->lock is held; which makes * sure that the reference to cgroup is valid across the call (see * comments in bfq_bic_update_cgroup on this issue) */ static void __bfq_bic_change_cgroup(struct bfq_data *bfqd, struct bfq_io_cq *bic, struct bfq_group *bfqg) { unsigned int act_idx; for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) { struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx); struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx); if (async_bfqq && async_bfqq->entity.sched_data != &bfqg->sched_data) { bic_set_bfqq(bic, NULL, false, act_idx); bfq_release_process_ref(bfqd, async_bfqq); } if (sync_bfqq) bfq_sync_bfqq_move(bfqd, sync_bfqq, bic, bfqg, act_idx); } } void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) { struct bfq_data *bfqd = bic_to_bfqd(bic); struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio); uint64_t serial_nr; serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger * spuriously on a newly created cic but there's no harm. */ if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) return; /* * New cgroup for this process. Make sure it is linked to bfq internal * cgroup hierarchy. */ bfq_link_bfqg(bfqd, bfqg); __bfq_bic_change_cgroup(bfqd, bic, bfqg); bic->blkcg_serial_nr = serial_nr; } /** * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. * @st: the service tree being flushed. */ static void bfq_flush_idle_tree(struct bfq_service_tree *st) { struct bfq_entity *entity = st->first_idle; for (; entity ; entity = st->first_idle) __bfq_deactivate_entity(entity, false); } /** * bfq_reparent_leaf_entity - move leaf entity to the root_group. * @bfqd: the device data structure with the root group. * @entity: the entity to move, if entity is a leaf; or the parent entity * of an active leaf entity to move, if entity is not a leaf. * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, struct bfq_entity *entity, int ioprio_class) { struct bfq_queue *bfqq; struct bfq_entity *child_entity = entity; while (child_entity->my_sched_data) { /* leaf not reached yet */ struct bfq_sched_data *child_sd = child_entity->my_sched_data; struct bfq_service_tree *child_st = child_sd->service_tree + ioprio_class; struct rb_root *child_active = &child_st->active; child_entity = bfq_entity_of(rb_first(child_active)); if (!child_entity) child_entity = child_sd->in_service_entity; } bfqq = bfq_entity_to_bfqq(child_entity); bfq_bfqq_move(bfqd, bfqq, bfqd->root_group); } /** * bfq_reparent_active_queues - move to the root group all active queues. * @bfqd: the device data structure with the root group. * @bfqg: the group to move from. * @st: the service tree to start the search from. * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_active_queues(struct bfq_data *bfqd, struct bfq_group *bfqg, struct bfq_service_tree *st, int ioprio_class) { struct rb_root *active = &st->active; struct bfq_entity *entity; while ((entity = bfq_entity_of(rb_first(active)))) bfq_reparent_leaf_entity(bfqd, entity, ioprio_class); if (bfqg->sched_data.in_service_entity) bfq_reparent_leaf_entity(bfqd, bfqg->sched_data.in_service_entity, ioprio_class); } /** * bfq_pd_offline - deactivate the entity associated with @pd, * and reparent its children entities. * @pd: descriptor of the policy going offline. * * blkio already grabs the queue_lock for us, so no need to use * RCU-based magic */ static void bfq_pd_offline(struct blkg_policy_data *pd) { struct bfq_service_tree *st; struct bfq_group *bfqg = pd_to_bfqg(pd); struct bfq_data *bfqd = bfqg->bfqd; struct bfq_entity *entity = bfqg->my_entity; unsigned long flags; int i; spin_lock_irqsave(&bfqd->lock, flags); if (!entity) /* root group */ goto put_async_queues; /* * Empty all service_trees belonging to this group before * deactivating the group itself. */ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { st = bfqg->sched_data.service_tree + i; /* * It may happen that some queues are still active * (busy) upon group destruction (if the corresponding * processes have been forced to terminate). We move * all the leaf entities corresponding to these queues * to the root_group. * Also, it may happen that the group has an entity * in service, which is disconnected from the active * tree: it must be moved, too. * There is no need to put the sync queues, as the * scheduler has taken no reference. */ bfq_reparent_active_queues(bfqd, bfqg, st, i); /* * The idle tree may still contain bfq_queues * belonging to exited task because they never * migrated to a different cgroup from the one being * destroyed now. In addition, even * bfq_reparent_active_queues() may happen to add some * entities to the idle tree. It happens if, in some * of the calls to bfq_bfqq_move() performed by * bfq_reparent_active_queues(), the queue to move is * empty and gets expired. */ bfq_flush_idle_tree(st); } __bfq_deactivate_entity(entity, false); put_async_queues: bfq_put_async_queues(bfqd, bfqg); spin_unlock_irqrestore(&bfqd->lock, flags); /* * @blkg is going offline and will be ignored by * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so * that they don't get lost. If IOs complete after this point, the * stats for them will be lost. Oh well... */ bfqg_stats_xfer_dead(bfqg); } void bfq_end_wr_async(struct bfq_data *bfqd) { struct blkcg_gq *blkg; list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) { struct bfq_group *bfqg = blkg_to_bfqg(blkg); bfq_end_wr_async_queues(bfqd, bfqg); } bfq_end_wr_async_queues(bfqd, bfqd->root_group); } static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); unsigned int val = 0; if (bfqgd) val = bfqgd->weight; seq_printf(sf, "%u\n", val); return 0; } static u64 bfqg_prfill_weight_device(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = pd_to_bfqg(pd); if (!bfqg->entity.dev_weight) return 0; return __blkg_prfill_u64(sf, pd, bfqg->entity.dev_weight); } static int bfq_io_show_weight(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); seq_printf(sf, "default %u\n", bfqgd->weight); blkcg_print_blkgs(sf, blkcg, bfqg_prfill_weight_device, &blkcg_policy_bfq, 0, false); return 0; } static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 dev_weight) { weight = dev_weight ?: weight; bfqg->entity.dev_weight = dev_weight; /* * Setting the prio_changed flag of the entity * to 1 with new_weight == weight would re-set * the value of the weight to its ioprio mapping. * Set the flag only if necessary. */ if ((unsigned short)weight != bfqg->entity.new_weight) { bfqg->entity.new_weight = (unsigned short)weight; /* * Make sure that the above new value has been * stored in bfqg->entity.new_weight before * setting the prio_changed flag. In fact, * this flag may be read asynchronously (in * critical sections protected by a different * lock than that held here), and finding this * flag set may cause the execution of the code * for updating parameters whose value may * depend also on bfqg->entity.new_weight (in * __bfq_entity_update_weight_prio). * This barrier makes sure that the new value * of bfqg->entity.new_weight is correctly * seen in that code. */ smp_wmb(); bfqg->entity.prio_changed = 1; } } static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css, struct cftype *cftype, u64 val) { struct blkcg *blkcg = css_to_blkcg(css); struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); struct blkcg_gq *blkg; int ret = -ERANGE; if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT) return ret; ret = 0; spin_lock_irq(&blkcg->lock); bfqgd->weight = (unsigned short)val; hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct bfq_group *bfqg = blkg_to_bfqg(blkg); if (bfqg) bfq_group_set_weight(bfqg, val, 0); } spin_unlock_irq(&blkcg->lock); return ret; } static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { int ret; struct blkg_conf_ctx ctx; struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct bfq_group *bfqg; u64 v; blkg_conf_init(&ctx, buf); ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx); if (ret) goto out; if (sscanf(ctx.body, "%llu", &v) == 1) { /* require "default" on dfl */ ret = -ERANGE; if (!v) goto out; } else if (!strcmp(strim(ctx.body), "default")) { v = 0; } else { ret = -EINVAL; goto out; } bfqg = blkg_to_bfqg(ctx.blkg); ret = -ERANGE; if (!v || (v >= BFQ_MIN_WEIGHT && v <= BFQ_MAX_WEIGHT)) { bfq_group_set_weight(bfqg, bfqg->entity.weight, v); ret = 0; } out: blkg_conf_exit(&ctx); return ret ?: nbytes; } static ssize_t bfq_io_set_weight(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { char *endp; int ret; u64 v; buf = strim(buf); /* "WEIGHT" or "default WEIGHT" sets the default weight */ v = simple_strtoull(buf, &endp, 0); if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) { ret = bfq_io_set_weight_legacy(of_css(of), NULL, v); return ret ?: nbytes; } return bfq_io_set_device_weight(of, buf, nbytes, off); } static int bfqg_print_rwstat(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat, &blkcg_policy_bfq, seq_cft(sf)->private, true); return 0; } static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct blkg_rwstat_sample sum; blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum); return __blkg_prfill_rwstat(sf, pd, &sum); } static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq, seq_cft(sf)->private, true); return 0; } #ifdef CONFIG_BFQ_CGROUP_DEBUG static int bfqg_print_stat(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat, &blkcg_policy_bfq, seq_cft(sf)->private, false); return 0; } static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct blkcg_gq *blkg = pd_to_blkg(pd); struct blkcg_gq *pos_blkg; struct cgroup_subsys_state *pos_css; u64 sum = 0; lockdep_assert_held(&blkg->q->queue_lock); rcu_read_lock(); blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { struct bfq_stat *stat; if (!pos_blkg->online) continue; stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off; sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt); } rcu_read_unlock(); return __blkg_prfill_u64(sf, pd, sum); } static int bfqg_print_stat_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_stat_recursive, &blkcg_policy_bfq, seq_cft(sf)->private, false); return 0; } static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg); u64 sum = blkg_rwstat_total(&bfqg->stats.bytes); return __blkg_prfill_u64(sf, pd, sum >> 9); } static int bfqg_print_stat_sectors(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false); return 0; } static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct blkg_rwstat_sample tmp; blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq, offsetof(struct bfq_group, stats.bytes), &tmp); return __blkg_prfill_u64(sf, pd, (tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9); } static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0, false); return 0; } static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = pd_to_bfqg(pd); u64 samples = bfq_stat_read(&bfqg->stats.avg_queue_size_samples); u64 v = 0; if (samples) { v = bfq_stat_read(&bfqg->stats.avg_queue_size_sum); v = div64_u64(v, samples); } __blkg_prfill_u64(sf, pd, v); return 0; } /* print avg_queue_size */ static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_avg_queue_size, &blkcg_policy_bfq, 0, false); return 0; } #endif /* CONFIG_BFQ_CGROUP_DEBUG */ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { int ret; ret = blkcg_activate_policy(bfqd->queue->disk, &blkcg_policy_bfq); if (ret) return NULL; return blkg_to_bfqg(bfqd->queue->root_blkg); } struct blkcg_policy blkcg_policy_bfq = { .dfl_cftypes = bfq_blkg_files, .legacy_cftypes = bfq_blkcg_legacy_files, .cpd_alloc_fn = bfq_cpd_alloc, .cpd_free_fn = bfq_cpd_free, .pd_alloc_fn = bfq_pd_alloc, .pd_init_fn = bfq_pd_init, .pd_offline_fn = bfq_pd_offline, .pd_free_fn = bfq_pd_free, .pd_reset_stats_fn = bfq_pd_reset_stats, }; struct cftype bfq_blkcg_legacy_files[] = { { .name = "bfq.weight", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = bfq_io_show_weight_legacy, .write_u64 = bfq_io_set_weight_legacy, }, { .name = "bfq.weight_device", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = bfq_io_show_weight, .write = bfq_io_set_weight, }, /* statistics, covers only the tasks in the bfqg */ { .name = "bfq.io_service_bytes", .private = offsetof(struct bfq_group, stats.bytes), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_serviced", .private = offsetof(struct bfq_group, stats.ios), .seq_show = bfqg_print_rwstat, }, #ifdef CONFIG_BFQ_CGROUP_DEBUG { .name = "bfq.time", .private = offsetof(struct bfq_group, stats.time), .seq_show = bfqg_print_stat, }, { .name = "bfq.sectors", .seq_show = bfqg_print_stat_sectors, }, { .name = "bfq.io_service_time", .private = offsetof(struct bfq_group, stats.service_time), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_wait_time", .private = offsetof(struct bfq_group, stats.wait_time), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_merged", .private = offsetof(struct bfq_group, stats.merged), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_queued", .private = offsetof(struct bfq_group, stats.queued), .seq_show = bfqg_print_rwstat, }, #endif /* CONFIG_BFQ_CGROUP_DEBUG */ /* the same statistics which cover the bfqg and its descendants */ { .name = "bfq.io_service_bytes_recursive", .private = offsetof(struct bfq_group, stats.bytes), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_serviced_recursive", .private = offsetof(struct bfq_group, stats.ios), .seq_show = bfqg_print_rwstat_recursive, }, #ifdef CONFIG_BFQ_CGROUP_DEBUG { .name = "bfq.time_recursive", .private = offsetof(struct bfq_group, stats.time), .seq_show = bfqg_print_stat_recursive, }, { .name = "bfq.sectors_recursive", .seq_show = bfqg_print_stat_sectors_recursive, }, { .name = "bfq.io_service_time_recursive", .private = offsetof(struct bfq_group, stats.service_time), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_wait_time_recursive", .private = offsetof(struct bfq_group, stats.wait_time), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_merged_recursive", .private = offsetof(struct bfq_group, stats.merged), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_queued_recursive", .private = offsetof(struct bfq_group, stats.queued), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.avg_queue_size", .seq_show = bfqg_print_avg_queue_size, }, { .name = "bfq.group_wait_time", .private = offsetof(struct bfq_group, stats.group_wait_time), .seq_show = bfqg_print_stat, }, { .name = "bfq.idle_time", .private = offsetof(struct bfq_group, stats.idle_time), .seq_show = bfqg_print_stat, }, { .name = "bfq.empty_time", .private = offsetof(struct bfq_group, stats.empty_time), .seq_show = bfqg_print_stat, }, { .name = "bfq.dequeue", .private = offsetof(struct bfq_group, stats.dequeue), .seq_show = bfqg_print_stat, }, #endif /* CONFIG_BFQ_CGROUP_DEBUG */ { } /* terminate */ }; struct cftype bfq_blkg_files[] = { { .name = "bfq.weight", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = bfq_io_show_weight, .write = bfq_io_set_weight, }, {} /* terminate */ }; #else /* CONFIG_BFQ_GROUP_IOSCHED */ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_group *bfqg) {} void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); entity->weight = entity->new_weight; entity->orig_weight = entity->new_weight; if (bfqq) { bfqq->ioprio = bfqq->new_ioprio; bfqq->ioprio_class = bfqq->new_ioprio_class; } entity->sched_data = &bfqg->sched_data; } void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {} void bfq_end_wr_async(struct bfq_data *bfqd) { bfq_end_wr_async_queues(bfqd, bfqd->root_group); } struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { return bfqd->root_group; } struct bfq_group *bfqq_group(struct bfq_queue *bfqq) { return bfqq->bfqd->root_group; } void bfqg_and_blkg_put(struct bfq_group *bfqg) {} struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { struct bfq_group *bfqg; int i; bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); if (!bfqg) return NULL; for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; return bfqg; } #endif /* CONFIG_BFQ_GROUP_IOSCHED */
4698 2910 3026 47 47 2600 1550 839 840 843 23 82 834 23 838 6 6 2 6 813 815 516 764 810 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 // SPDX-License-Identifier: GPL-2.0 /* * class.c - basic device class management * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs * Copyright (c) 2003-2004 Greg Kroah-Hartman * Copyright (c) 2003-2004 IBM Corp. */ #include <linux/device/class.h> #include <linux/device.h> #include <linux/module.h> #include <linux/init.h> #include <linux/string.h> #include <linux/kdev_t.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/mutex.h> #include "base.h" /* /sys/class */ static struct kset *class_kset; #define to_class_attr(_attr) container_of(_attr, struct class_attribute, attr) /** * class_to_subsys - Turn a struct class into a struct subsys_private * * @class: pointer to the struct bus_type to look up * * The driver core internals need to work on the subsys_private structure, not * the external struct class pointer. This function walks the list of * registered classes in the system and finds the matching one and returns the * internal struct subsys_private that relates to that class. * * Note, the reference count of the return value is INCREMENTED if it is not * NULL. A call to subsys_put() must be done when finished with the pointer in * order for it to be properly freed. */ struct subsys_private *class_to_subsys(const struct class *class) { struct subsys_private *sp = NULL; struct kobject *kobj; if (!class || !class_kset) return NULL; spin_lock(&class_kset->list_lock); if (list_empty(&class_kset->list)) goto done; list_for_each_entry(kobj, &class_kset->list, entry) { struct kset *kset = container_of(kobj, struct kset, kobj); sp = container_of_const(kset, struct subsys_private, subsys); if (sp->class == class) goto done; } sp = NULL; done: sp = subsys_get(sp); spin_unlock(&class_kset->list_lock); return sp; } static ssize_t class_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct class_attribute *class_attr = to_class_attr(attr); struct subsys_private *cp = to_subsys_private(kobj); ssize_t ret = -EIO; if (class_attr->show) ret = class_attr->show(cp->class, class_attr, buf); return ret; } static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct class_attribute *class_attr = to_class_attr(attr); struct subsys_private *cp = to_subsys_private(kobj); ssize_t ret = -EIO; if (class_attr->store) ret = class_attr->store(cp->class, class_attr, buf, count); return ret; } static void class_release(struct kobject *kobj) { struct subsys_private *cp = to_subsys_private(kobj); const struct class *class = cp->class; pr_debug("class '%s': release.\n", class->name); if (class->class_release) class->class_release(class); else pr_debug("class '%s' does not have a release() function, " "be careful\n", class->name); lockdep_unregister_key(&cp->lock_key); kfree(cp); } static const struct kobj_ns_type_operations *class_child_ns_type(const struct kobject *kobj) { const struct subsys_private *cp = to_subsys_private(kobj); const struct class *class = cp->class; return class->ns_type; } static const struct sysfs_ops class_sysfs_ops = { .show = class_attr_show, .store = class_attr_store, }; static const struct kobj_type class_ktype = { .sysfs_ops = &class_sysfs_ops, .release = class_release, .child_ns_type = class_child_ns_type, }; int class_create_file_ns(const struct class *cls, const struct class_attribute *attr, const void *ns) { struct subsys_private *sp = class_to_subsys(cls); int error; if (!sp) return -EINVAL; error = sysfs_create_file_ns(&sp->subsys.kobj, &attr->attr, ns); subsys_put(sp); return error; } EXPORT_SYMBOL_GPL(class_create_file_ns); void class_remove_file_ns(const struct class *cls, const struct class_attribute *attr, const void *ns) { struct subsys_private *sp = class_to_subsys(cls); if (!sp) return; sysfs_remove_file_ns(&sp->subsys.kobj, &attr->attr, ns); subsys_put(sp); } EXPORT_SYMBOL_GPL(class_remove_file_ns); static struct device *klist_class_to_dev(struct klist_node *n) { struct device_private *p = to_device_private_class(n); return p->device; } static void klist_class_dev_get(struct klist_node *n) { struct device *dev = klist_class_to_dev(n); get_device(dev); } static void klist_class_dev_put(struct klist_node *n) { struct device *dev = klist_class_to_dev(n); put_device(dev); } int class_register(const struct class *cls) { struct subsys_private *cp; struct lock_class_key *key; int error; pr_debug("device class '%s': registering\n", cls->name); if (cls->ns_type && !cls->namespace) { pr_err("%s: class '%s' does not have namespace\n", __func__, cls->name); return -EINVAL; } if (!cls->ns_type && cls->namespace) { pr_err("%s: class '%s' does not have ns_type\n", __func__, cls->name); return -EINVAL; } cp = kzalloc(sizeof(*cp), GFP_KERNEL); if (!cp) return -ENOMEM; klist_init(&cp->klist_devices, klist_class_dev_get, klist_class_dev_put); INIT_LIST_HEAD(&cp->interfaces); kset_init(&cp->glue_dirs); key = &cp->lock_key; lockdep_register_key(key); __mutex_init(&cp->mutex, "subsys mutex", key); error = kobject_set_name(&cp->subsys.kobj, "%s", cls->name); if (error) goto err_out; cp->subsys.kobj.kset = class_kset; cp->subsys.kobj.ktype = &class_ktype; cp->class = cls; error = kset_register(&cp->subsys); if (error) goto err_out; error = sysfs_create_groups(&cp->subsys.kobj, cls->class_groups); if (error) { kobject_del(&cp->subsys.kobj); kfree_const(cp->subsys.kobj.name); goto err_out; } return 0; err_out: lockdep_unregister_key(key); kfree(cp); return error; } EXPORT_SYMBOL_GPL(class_register); void class_unregister(const struct class *cls) { struct subsys_private *sp = class_to_subsys(cls); if (!sp) return; pr_debug("device class '%s': unregistering\n", cls->name); sysfs_remove_groups(&sp->subsys.kobj, cls->class_groups); kset_unregister(&sp->subsys); subsys_put(sp); } EXPORT_SYMBOL_GPL(class_unregister); static void class_create_release(const struct class *cls) { pr_debug("%s called for %s\n", __func__, cls->name); kfree(cls); } /** * class_create - create a struct class structure * @name: pointer to a string for the name of this class. * * This is used to create a struct class pointer that can then be used * in calls to device_create(). * * Returns &struct class pointer on success, or ERR_PTR() on error. * * Note, the pointer created here is to be destroyed when finished by * making a call to class_destroy(). */ struct class *class_create(const char *name) { struct class *cls; int retval; cls = kzalloc(sizeof(*cls), GFP_KERNEL); if (!cls) { retval = -ENOMEM; goto error; } cls->name = name; cls->class_release = class_create_release; retval = class_register(cls); if (retval) goto error; return cls; error: kfree(cls); return ERR_PTR(retval); } EXPORT_SYMBOL_GPL(class_create); /** * class_destroy - destroys a struct class structure * @cls: pointer to the struct class that is to be destroyed * * Note, the pointer to be destroyed must have been created with a call * to class_create(). */ void class_destroy(const struct class *cls) { if (IS_ERR_OR_NULL(cls)) return; class_unregister(cls); } EXPORT_SYMBOL_GPL(class_destroy); /** * class_dev_iter_init - initialize class device iterator * @iter: class iterator to initialize * @class: the class we wanna iterate over * @start: the device to start iterating from, if any * @type: device_type of the devices to iterate over, NULL for all * * Initialize class iterator @iter such that it iterates over devices * of @class. If @start is set, the list iteration will start there, * otherwise if it is NULL, the iteration starts at the beginning of * the list. */ void class_dev_iter_init(struct class_dev_iter *iter, const struct class *class, const struct device *start, const struct device_type *type) { struct subsys_private *sp = class_to_subsys(class); struct klist_node *start_knode = NULL; memset(iter, 0, sizeof(*iter)); if (!sp) { pr_crit("%s: class %p was not registered yet\n", __func__, class); return; } if (start) start_knode = &start->p->knode_class; klist_iter_init_node(&sp->klist_devices, &iter->ki, start_knode); iter->type = type; iter->sp = sp; } EXPORT_SYMBOL_GPL(class_dev_iter_init); /** * class_dev_iter_next - iterate to the next device * @iter: class iterator to proceed * * Proceed @iter to the next device and return it. Returns NULL if * iteration is complete. * * The returned device is referenced and won't be released till * iterator is proceed to the next device or exited. The caller is * free to do whatever it wants to do with the device including * calling back into class code. */ struct device *class_dev_iter_next(struct class_dev_iter *iter) { struct klist_node *knode; struct device *dev; if (!iter->sp) return NULL; while (1) { knode = klist_next(&iter->ki); if (!knode) return NULL; dev = klist_class_to_dev(knode); if (!iter->type || iter->type == dev->type) return dev; } } EXPORT_SYMBOL_GPL(class_dev_iter_next); /** * class_dev_iter_exit - finish iteration * @iter: class iterator to finish * * Finish an iteration. Always call this function after iteration is * complete whether the iteration ran till the end or not. */ void class_dev_iter_exit(struct class_dev_iter *iter) { klist_iter_exit(&iter->ki); subsys_put(iter->sp); } EXPORT_SYMBOL_GPL(class_dev_iter_exit); /** * class_for_each_device - device iterator * @class: the class we're iterating * @start: the device to start with in the list, if any. * @data: data for the callback * @fn: function to be called for each device * * Iterate over @class's list of devices, and call @fn for each, * passing it @data. If @start is set, the list iteration will start * there, otherwise if it is NULL, the iteration starts at the * beginning of the list. * * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. * * @fn is allowed to do anything including calling back into class * code. There's no locking restriction. */ int class_for_each_device(const struct class *class, const struct device *start, void *data, device_iter_t fn) { struct subsys_private *sp = class_to_subsys(class); struct class_dev_iter iter; struct device *dev; int error = 0; if (!class) return -EINVAL; if (!sp) { WARN(1, "%s called for class '%s' before it was registered", __func__, class->name); return -EINVAL; } class_dev_iter_init(&iter, class, start, NULL); while ((dev = class_dev_iter_next(&iter))) { error = fn(dev, data); if (error) break; } class_dev_iter_exit(&iter); subsys_put(sp); return error; } EXPORT_SYMBOL_GPL(class_for_each_device); /** * class_find_device - device iterator for locating a particular device * @class: the class we're iterating * @start: Device to begin with * @data: data for the match function * @match: function to check device * * This is similar to the class_for_each_dev() function above, but it * returns a reference to a device that is 'found' for later use, as * determined by the @match callback. * * The callback should return 0 if the device doesn't match and non-zero * if it does. If the callback returns non-zero, this function will * return to the caller and not iterate over any more devices. * * Note, you will need to drop the reference with put_device() after use. * * @match is allowed to do anything including calling back into class * code. There's no locking restriction. */ struct device *class_find_device(const struct class *class, const struct device *start, const void *data, device_match_t match) { struct subsys_private *sp = class_to_subsys(class); struct class_dev_iter iter; struct device *dev; if (!class) return NULL; if (!sp) { WARN(1, "%s called for class '%s' before it was registered", __func__, class->name); return NULL; } class_dev_iter_init(&iter, class, start, NULL); while ((dev = class_dev_iter_next(&iter))) { if (match(dev, data)) { get_device(dev); break; } } class_dev_iter_exit(&iter); subsys_put(sp); return dev; } EXPORT_SYMBOL_GPL(class_find_device); int class_interface_register(struct class_interface *class_intf) { struct subsys_private *sp; const struct class *parent; struct class_dev_iter iter; struct device *dev; if (!class_intf || !class_intf->class) return -ENODEV; parent = class_intf->class; sp = class_to_subsys(parent); if (!sp) return -EINVAL; /* * Reference in sp is now incremented and will be dropped when * the interface is removed in the call to class_interface_unregister() */ mutex_lock(&sp->mutex); list_add_tail(&class_intf->node, &sp->interfaces); if (class_intf->add_dev) { class_dev_iter_init(&iter, parent, NULL, NULL); while ((dev = class_dev_iter_next(&iter))) class_intf->add_dev(dev); class_dev_iter_exit(&iter); } mutex_unlock(&sp->mutex); return 0; } EXPORT_SYMBOL_GPL(class_interface_register); void class_interface_unregister(struct class_interface *class_intf) { struct subsys_private *sp; const struct class *parent = class_intf->class; struct class_dev_iter iter; struct device *dev; if (!parent) return; sp = class_to_subsys(parent); if (!sp) return; mutex_lock(&sp->mutex); list_del_init(&class_intf->node); if (class_intf->remove_dev) { class_dev_iter_init(&iter, parent, NULL, NULL); while ((dev = class_dev_iter_next(&iter))) class_intf->remove_dev(dev); class_dev_iter_exit(&iter); } mutex_unlock(&sp->mutex); /* * Decrement the reference count twice, once for the class_to_subsys() * call in the start of this function, and the second one from the * reference increment in class_interface_register() */ subsys_put(sp); subsys_put(sp); } EXPORT_SYMBOL_GPL(class_interface_unregister); ssize_t show_class_attr_string(const struct class *class, const struct class_attribute *attr, char *buf) { struct class_attribute_string *cs; cs = container_of(attr, struct class_attribute_string, attr); return sysfs_emit(buf, "%s\n", cs->str); } EXPORT_SYMBOL_GPL(show_class_attr_string); struct class_compat { struct kobject *kobj; }; /** * class_compat_register - register a compatibility class * @name: the name of the class * * Compatibility class are meant as a temporary user-space compatibility * workaround when converting a family of class devices to a bus devices. */ struct class_compat *class_compat_register(const char *name) { struct class_compat *cls; cls = kmalloc(sizeof(struct class_compat), GFP_KERNEL); if (!cls) return NULL; cls->kobj = kobject_create_and_add(name, &class_kset->kobj); if (!cls->kobj) { kfree(cls); return NULL; } return cls; } EXPORT_SYMBOL_GPL(class_compat_register); /** * class_compat_unregister - unregister a compatibility class * @cls: the class to unregister */ void class_compat_unregister(struct class_compat *cls) { kobject_put(cls->kobj); kfree(cls); } EXPORT_SYMBOL_GPL(class_compat_unregister); /** * class_compat_create_link - create a compatibility class device link to * a bus device * @cls: the compatibility class * @dev: the target bus device */ int class_compat_create_link(struct class_compat *cls, struct device *dev) { return sysfs_create_link(cls->kobj, &dev->kobj, dev_name(dev)); } EXPORT_SYMBOL_GPL(class_compat_create_link); /** * class_compat_remove_link - remove a compatibility class device link to * a bus device * @cls: the compatibility class * @dev: the target bus device */ void class_compat_remove_link(struct class_compat *cls, struct device *dev) { sysfs_remove_link(cls->kobj, dev_name(dev)); } EXPORT_SYMBOL_GPL(class_compat_remove_link); /** * class_is_registered - determine if at this moment in time, a class is * registered in the driver core or not. * @class: the class to check * * Returns a boolean to state if the class is registered in the driver core * or not. Note that the value could switch right after this call is made, * so only use this in places where you "know" it is safe to do so (usually * to determine if the specific class has been registered yet or not). * * Be careful in using this. */ bool class_is_registered(const struct class *class) { struct subsys_private *sp = class_to_subsys(class); bool is_initialized = false; if (sp) { is_initialized = true; subsys_put(sp); } return is_initialized; } EXPORT_SYMBOL_GPL(class_is_registered); int __init classes_init(void) { class_kset = kset_create_and_add("class", NULL, NULL); if (!class_kset) return -ENOMEM; return 0; }
60 60 1 59 2 59 60 60 55 2 25 6 6 2 25 10 25 20 53 17 3 4 18 2 2 17 17 17 17 15 2 2 4 2 17 3 17 16 17 2 2 1 1 1 1 1 2 2 2 2 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 10 10 10 10 10 10 10 1 10 10 17 17 8 8 8 8 8 1 4 4 4 2 3 2 1 1 1 2 2 2 6 1 1 1 1 8 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2005 */ /* * jfs_xtree.c: extent allocation descriptor B+-tree manager */ #include <linux/fs.h> #include <linux/module.h> #include <linux/quotaops.h> #include <linux/seq_file.h> #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_metapage.h" #include "jfs_dmap.h" #include "jfs_dinode.h" #include "jfs_superblock.h" #include "jfs_debug.h" /* * xtree local flag */ #define XT_INSERT 0x00000001 /* * xtree key/entry comparison: extent offset * * return: * -1: k < start of extent * 0: start_of_extent <= k <= end_of_extent * 1: k > end_of_extent */ #define XT_CMP(CMP, K, X, OFFSET64)\ {\ OFFSET64 = offsetXAD(X);\ (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ ((K) < OFFSET64) ? -1 : 0;\ } /* write a xad entry */ #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ {\ (XAD)->flag = (FLAG);\ XADoffset((XAD), (OFF));\ XADlength((XAD), (LEN));\ XADaddress((XAD), (ADDR));\ } #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) /* get page buffer for specified block address */ /* ToDo: Replace this ugly macro with a function */ #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ do { \ BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot); \ if (!(RC)) { \ if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \ (le16_to_cpu((P)->header.nextindex) > \ le16_to_cpu((P)->header.maxentry)) || \ (le16_to_cpu((P)->header.maxentry) > \ (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \ jfs_error((IP)->i_sb, \ "XT_GETPAGE: xtree page corrupt\n"); \ BT_PUTPAGE(MP); \ MP = NULL; \ RC = -EIO; \ } \ } \ } while (0) /* for consistency */ #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) /* xtree entry parameter descriptor */ struct xtsplit { struct metapage *mp; s16 index; u8 flag; s64 off; s64 addr; int len; struct pxdlist *pxdlist; }; /* * statistics */ #ifdef CONFIG_JFS_STATISTICS static struct { uint search; uint fastSearch; uint split; } xtStat; #endif /* * forward references */ static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, struct btstack * btstack, int flag); static int xtSplitUp(tid_t tid, struct inode *ip, struct xtsplit * split, struct btstack * btstack); static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp); static int xtSplitRoot(tid_t tid, struct inode *ip, struct xtsplit * split, struct metapage ** rmpp); /* * xtLookup() * * function: map a single page into a physical extent; */ int xtLookup(struct inode *ip, s64 lstart, s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) { int rc = 0; struct btstack btstack; int cmp; s64 bn; struct metapage *mp; xtpage_t *p; int index; xad_t *xad; s64 next, size, xoff, xend; int xlen; s64 xaddr; *paddr = 0; *plen = llen; if (!no_check) { /* is lookup offset beyond eof ? */ size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> JFS_SBI(ip->i_sb)->l2bsize; if (lstart >= size) return 0; } /* * search for the xad entry covering the logical extent */ //search: if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { jfs_err("xtLookup: xtSearch returned %d", rc); return rc; } /* * compute the physical extent covering logical extent * * N.B. search may have failed (e.g., hole in sparse file), * and returned the index of the next entry. */ /* retrieve search result */ XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); /* is xad found covering start of logical extent ? * lstart is a page start address, * i.e., lstart cannot start in a hole; */ if (cmp) { if (next) *plen = min(next - lstart, llen); goto out; } /* * lxd covered by xad */ xad = &p->xad[index]; xoff = offsetXAD(xad); xlen = lengthXAD(xad); xend = xoff + xlen; xaddr = addressXAD(xad); /* initialize new pxd */ *pflag = xad->flag; *paddr = xaddr + (lstart - xoff); /* a page must be fully covered by an xad */ *plen = min(xend - lstart, llen); out: XT_PUTPAGE(mp); return rc; } /* * xtSearch() * * function: search for the xad entry covering specified offset. * * parameters: * ip - file object; * xoff - extent offset; * nextp - address of next extent (if any) for search miss * cmpp - comparison result: * btstack - traverse stack; * flag - search process flag (XT_INSERT); * * returns: * btstack contains (bn, index) of search path traversed to the entry. * *cmpp is set to result of comparison with the entry returned. * the page containing the entry is pinned at exit. */ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, int *cmpp, struct btstack * btstack, int flag) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); int rc = 0; int cmp = 1; /* init for empty page */ s64 bn; /* block number */ struct metapage *mp; /* page buffer */ xtpage_t *p; /* page */ xad_t *xad; int base, index, lim, btindex; struct btframe *btsp; int nsplit = 0; /* number of pages to split */ s64 t64; s64 next = 0; INCREMENT(xtStat.search); BT_CLR(btstack); btstack->nsplit = 0; /* * search down tree from root: * * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of * internal page, child page Pi contains entry with k, Ki <= K < Kj. * * if entry with search key K is not found * internal page search find the entry with largest key Ki * less than K which point to the child page to search; * leaf page search find the entry with smallest key Kj * greater than K so that the returned index is the position of * the entry to be shifted right for insertion of new entry. * for empty tree, search key is greater than any key of the tree. * * by convention, root bn = 0. */ for (bn = 0;;) { /* get/pin the page to search */ XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) return rc; /* try sequential access heuristics with the previous * access entry in target leaf page: * once search narrowed down into the target leaf, * key must either match an entry in the leaf or * key entry does not exist in the tree; */ //fastSearch: if ((jfs_ip->btorder & BT_SEQUENTIAL) && (p->header.flag & BT_LEAF) && (index = jfs_ip->btindex) < le16_to_cpu(p->header.nextindex)) { xad = &p->xad[index]; t64 = offsetXAD(xad); if (xoff < t64 + lengthXAD(xad)) { if (xoff >= t64) { *cmpp = 0; goto out; } /* stop sequential access heuristics */ goto binarySearch; } else { /* (t64 + lengthXAD(xad)) <= xoff */ /* try next sequential entry */ index++; if (index < le16_to_cpu(p->header.nextindex)) { xad++; t64 = offsetXAD(xad); if (xoff < t64 + lengthXAD(xad)) { if (xoff >= t64) { *cmpp = 0; goto out; } /* miss: key falls between * previous and this entry */ *cmpp = 1; next = t64; goto out; } /* (xoff >= t64 + lengthXAD(xad)); * matching entry may be further out: * stop heuristic search */ /* stop sequential access heuristics */ goto binarySearch; } /* (index == p->header.nextindex); * miss: key entry does not exist in * the target leaf/tree */ *cmpp = 1; goto out; } /* * if hit, return index of the entry found, and * if miss, where new entry with search key is * to be inserted; */ out: /* compute number of pages to split */ if (flag & XT_INSERT) { if (p->header.nextindex == /* little-endian */ p->header.maxentry) nsplit++; else nsplit = 0; btstack->nsplit = nsplit; } /* save search result */ btsp = btstack->top; btsp->bn = bn; btsp->index = index; btsp->mp = mp; /* update sequential access heuristics */ jfs_ip->btindex = index; if (nextp) *nextp = next; INCREMENT(xtStat.fastSearch); return 0; } /* well, ... full search now */ binarySearch: lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; /* * binary search with search key K on the current page */ for (base = XTENTRYSTART; lim; lim >>= 1) { index = base + (lim >> 1); XT_CMP(cmp, xoff, &p->xad[index], t64); if (cmp == 0) { /* * search hit */ /* search hit - leaf page: * return the entry found */ if (p->header.flag & BT_LEAF) { *cmpp = cmp; /* compute number of pages to split */ if (flag & XT_INSERT) { if (p->header.nextindex == p->header.maxentry) nsplit++; else nsplit = 0; btstack->nsplit = nsplit; } /* save search result */ btsp = btstack->top; btsp->bn = bn; btsp->index = index; btsp->mp = mp; /* init sequential access heuristics */ btindex = jfs_ip->btindex; if (index == btindex || index == btindex + 1) jfs_ip->btorder = BT_SEQUENTIAL; else jfs_ip->btorder = BT_RANDOM; jfs_ip->btindex = index; return 0; } /* search hit - internal page: * descend/search its child page */ if (index < le16_to_cpu(p->header.nextindex)-1) next = offsetXAD(&p->xad[index + 1]); goto next; } if (cmp > 0) { base = index + 1; --lim; } } /* * search miss * * base is the smallest index with key (Kj) greater than * search key (K) and may be zero or maxentry index. */ if (base < le16_to_cpu(p->header.nextindex)) next = offsetXAD(&p->xad[base]); /* * search miss - leaf page: * * return location of entry (base) where new entry with * search key K is to be inserted. */ if (p->header.flag & BT_LEAF) { *cmpp = cmp; /* compute number of pages to split */ if (flag & XT_INSERT) { if (p->header.nextindex == p->header.maxentry) nsplit++; else nsplit = 0; btstack->nsplit = nsplit; } /* save search result */ btsp = btstack->top; btsp->bn = bn; btsp->index = base; btsp->mp = mp; /* init sequential access heuristics */ btindex = jfs_ip->btindex; if (base == btindex || base == btindex + 1) jfs_ip->btorder = BT_SEQUENTIAL; else jfs_ip->btorder = BT_RANDOM; jfs_ip->btindex = base; if (nextp) *nextp = next; return 0; } /* * search miss - non-leaf page: * * if base is non-zero, decrement base by one to get the parent * entry of the child page to search. */ index = base ? base - 1 : base; /* * go down to child page */ next: /* update number of pages to split */ if (p->header.nextindex == p->header.maxentry) nsplit++; else nsplit = 0; /* push (bn, index) of the parent page/entry */ if (BT_STACK_FULL(btstack)) { jfs_error(ip->i_sb, "stack overrun!\n"); XT_PUTPAGE(mp); return -EIO; } BT_PUSH(btstack, bn, index); /* get the child page block number */ bn = addressXAD(&p->xad[index]); /* unpin the parent page */ XT_PUTPAGE(mp); } } /* * xtInsert() * * function: * * parameter: * tid - transaction id; * ip - file object; * xflag - extent flag (XAD_NOTRECORDED): * xoff - extent offset; * xlen - extent length; * xaddrp - extent address pointer (in/out): * if (*xaddrp) * caller allocated data extent at *xaddrp; * else * allocate data extent and return its xaddr; * flag - * * return: */ int xtInsert(tid_t tid, /* transaction id */ struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, int flag) { int rc = 0; s64 xaddr, hint; struct metapage *mp; /* meta-page buffer */ xtpage_t *p; /* base B+-tree index page */ s64 bn; int index, nextindex; struct btstack btstack; /* traverse stack */ struct xtsplit split; /* split information */ xad_t *xad; int cmp; s64 next; struct tlock *tlck; struct xtlock *xtlck; jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); /* * search for the entry location at which to insert: * * xtFastSearch() and xtSearch() both returns (leaf page * pinned, index at which to insert). * n.b. xtSearch() may return index of maxentry of * the full page. */ if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); /* This test must follow XT_GETSEARCH since mp must be valid if * we branch to out: */ if ((cmp == 0) || (next && (xlen > next - xoff))) { rc = -EEXIST; goto out; } /* * allocate data extent requested * * allocation hint: last xad */ if ((xaddr = *xaddrp) == 0) { if (index > XTENTRYSTART) { xad = &p->xad[index - 1]; hint = addressXAD(xad) + lengthXAD(xad) - 1; } else hint = 0; if ((rc = dquot_alloc_block(ip, xlen))) goto out; if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { dquot_free_block(ip, xlen); goto out; } } /* * insert entry for new extent */ xflag |= XAD_NEW; /* * if the leaf page is full, split the page and * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page. */ nextindex = le16_to_cpu(p->header.nextindex); if (nextindex == le16_to_cpu(p->header.maxentry)) { split.mp = mp; split.index = index; split.flag = xflag; split.off = xoff; split.len = xlen; split.addr = xaddr; split.pxdlist = NULL; if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { /* undo data extent allocation */ if (*xaddrp == 0) { dbFree(ip, xaddr, (s64) xlen); dquot_free_block(ip, xlen); } return rc; } *xaddrp = xaddr; return 0; } /* * insert the new entry into the leaf page */ /* * acquire a transaction lock on the leaf page; * * action: xad insertion/extension; */ BT_MARK_DIRTY(mp, ip); /* if insert into middle, shift right remaining entries. */ if (index < nextindex) memmove(&p->xad[index + 1], &p->xad[index], (nextindex - index) * sizeof(xad_t)); /* insert the new entry: mark the entry NEW */ xad = &p->xad[index]; XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); /* advance next available entry index */ le16_add_cpu(&p->header.nextindex, 1); /* Don't log it if there are no links to the file */ if (!test_cflag(COMMIT_Nolink, ip)) { tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); xtlck = (struct xtlock *) & tlck->lock; xtlck->lwm.offset = (xtlck->lwm.offset) ? min(index, (int)xtlck->lwm.offset) : index; xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; } *xaddrp = xaddr; out: /* unpin the leaf page */ XT_PUTPAGE(mp); return rc; } /* * xtSplitUp() * * function: * split full pages as propagating insertion up the tree * * parameter: * tid - transaction id; * ip - file object; * split - entry parameter descriptor; * btstack - traverse stack from xtSearch() * * return: */ static int xtSplitUp(tid_t tid, struct inode *ip, struct xtsplit * split, struct btstack * btstack) { int rc = 0; struct metapage *smp; xtpage_t *sp; /* split page */ struct metapage *rmp; s64 rbn; /* new right page block number */ struct metapage *rcmp; xtpage_t *rcp; /* right child page */ s64 rcbn; /* right child page block number */ int skip; /* index of entry of insertion */ int nextindex; /* next available entry index of p */ struct btframe *parent; /* parent page entry on traverse stack */ xad_t *xad; s64 xaddr; int xlen; int nsplit; /* number of pages split */ struct pxdlist pxdlist; pxd_t *pxd; struct tlock *tlck; struct xtlock *xtlck; smp = split->mp; sp = XT_PAGE(ip, smp); /* is inode xtree root extension/inline EA area free ? */ if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && (JFS_IP(ip)->mode2 & INLINEEA)) { sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); JFS_IP(ip)->mode2 &= ~INLINEEA; BT_MARK_DIRTY(smp, ip); /* * acquire a transaction lock on the leaf page; * * action: xad insertion/extension; */ /* if insert into middle, shift right remaining entries. */ skip = split->index; nextindex = le16_to_cpu(sp->header.nextindex); if (skip < nextindex) memmove(&sp->xad[skip + 1], &sp->xad[skip], (nextindex - skip) * sizeof(xad_t)); /* insert the new entry: mark the entry NEW */ xad = &sp->xad[skip]; XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); /* advance next available entry index */ le16_add_cpu(&sp->header.nextindex, 1); /* Don't log it if there are no links to the file */ if (!test_cflag(COMMIT_Nolink, ip)) { tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); xtlck = (struct xtlock *) & tlck->lock; xtlck->lwm.offset = (xtlck->lwm.offset) ? min(skip, (int)xtlck->lwm.offset) : skip; xtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - xtlck->lwm.offset; } return 0; } /* * allocate new index blocks to cover index page split(s) * * allocation hint: ? */ if (split->pxdlist == NULL) { nsplit = btstack->nsplit; split->pxdlist = &pxdlist; pxdlist.maxnpxd = pxdlist.npxd = 0; pxd = &pxdlist.pxd[0]; xlen = JFS_SBI(ip->i_sb)->nbperpage; for (; nsplit > 0; nsplit--, pxd++) { if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) == 0) { PXDaddress(pxd, xaddr); PXDlength(pxd, xlen); pxdlist.maxnpxd++; continue; } /* undo allocation */ XT_PUTPAGE(smp); return rc; } } /* * Split leaf page <sp> into <sp> and a new right page <rp>. * * The split routines insert the new entry into the leaf page, * and acquire txLock as appropriate. * return <rp> pinned and its block number <rpbn>. */ rc = (sp->header.flag & BT_ROOT) ? xtSplitRoot(tid, ip, split, &rmp) : xtSplitPage(tid, ip, split, &rmp, &rbn); XT_PUTPAGE(smp); if (rc) return -EIO; /* * propagate up the router entry for the leaf page just split * * insert a router entry for the new page into the parent page, * propagate the insert/split up the tree by walking back the stack * of (bn of parent page, index of child page entry in parent page) * that were traversed during the search for the page that split. * * the propagation of insert/split up the tree stops if the root * splits or the page inserted into doesn't have to split to hold * the new entry. * * the parent entry for the split page remains the same, and * a new entry is inserted at its right with the first key and * block number of the new right page. * * There are a maximum of 3 pages pinned at any time: * right child, left parent and right parent (when the parent splits) * to keep the child page pinned while working on the parent. * make sure that all pins are released at exit. */ while ((parent = BT_POP(btstack)) != NULL) { /* parent page specified by stack frame <parent> */ /* keep current child pages <rcp> pinned */ rcmp = rmp; rcbn = rbn; rcp = XT_PAGE(ip, rcmp); /* * insert router entry in parent for new right child page <rp> */ /* get/pin the parent page <sp> */ XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); if (rc) { XT_PUTPAGE(rcmp); return rc; } /* * The new key entry goes ONE AFTER the index of parent entry, * because the split was to the right. */ skip = parent->index + 1; /* * split or shift right remaining entries of the parent page */ nextindex = le16_to_cpu(sp->header.nextindex); /* * parent page is full - split the parent page */ if (nextindex == le16_to_cpu(sp->header.maxentry)) { /* init for parent page split */ split->mp = smp; split->index = skip; /* index at insert */ split->flag = XAD_NEW; split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); split->len = JFS_SBI(ip->i_sb)->nbperpage; split->addr = rcbn; /* unpin previous right child page */ XT_PUTPAGE(rcmp); /* The split routines insert the new entry, * and acquire txLock as appropriate. * return <rp> pinned and its block number <rpbn>. */ rc = (sp->header.flag & BT_ROOT) ? xtSplitRoot(tid, ip, split, &rmp) : xtSplitPage(tid, ip, split, &rmp, &rbn); if (rc) { XT_PUTPAGE(smp); return rc; } XT_PUTPAGE(smp); /* keep new child page <rp> pinned */ } /* * parent page is not full - insert in parent page */ else { /* * insert router entry in parent for the right child * page from the first entry of the right child page: */ /* * acquire a transaction lock on the parent page; * * action: router xad insertion; */ BT_MARK_DIRTY(smp, ip); /* * if insert into middle, shift right remaining entries */ if (skip < nextindex) memmove(&sp->xad[skip + 1], &sp->xad[skip], (nextindex - skip) << L2XTSLOTSIZE); /* insert the router entry */ xad = &sp->xad[skip]; XT_PUTENTRY(xad, XAD_NEW, offsetXAD(&rcp->xad[XTENTRYSTART]), JFS_SBI(ip->i_sb)->nbperpage, rcbn); /* advance next available entry index. */ le16_add_cpu(&sp->header.nextindex, 1); /* Don't log it if there are no links to the file */ if (!test_cflag(COMMIT_Nolink, ip)) { tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); xtlck = (struct xtlock *) & tlck->lock; xtlck->lwm.offset = (xtlck->lwm.offset) ? min(skip, (int)xtlck->lwm.offset) : skip; xtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - xtlck->lwm.offset; } /* unpin parent page */ XT_PUTPAGE(smp); /* exit propagate up */ break; } } /* unpin current right page */ XT_PUTPAGE(rmp); return 0; } /* * xtSplitPage() * * function: * split a full non-root page into * original/split/left page and new right page * i.e., the original/split page remains as left page. * * parameter: * int tid, * struct inode *ip, * struct xtsplit *split, * struct metapage **rmpp, * u64 *rbnp, * * return: * Pointer to page in which to insert or NULL on error. */ static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) { int rc = 0; struct metapage *smp; xtpage_t *sp; struct metapage *rmp; xtpage_t *rp; /* new right page allocated */ s64 rbn; /* new right page block number */ struct metapage *mp; xtpage_t *p; s64 nextbn; int skip, maxentry, middle, righthalf, n; xad_t *xad; struct pxdlist *pxdlist; pxd_t *pxd; struct tlock *tlck; struct xtlock *sxtlck = NULL, *rxtlck = NULL; int quota_allocation = 0; smp = split->mp; sp = XT_PAGE(ip, smp); INCREMENT(xtStat.split); pxdlist = split->pxdlist; pxd = &pxdlist->pxd[pxdlist->npxd]; pxdlist->npxd++; rbn = addressPXD(pxd); /* Allocate blocks to quota. */ rc = dquot_alloc_block(ip, lengthPXD(pxd)); if (rc) goto clean_up; quota_allocation += lengthPXD(pxd); /* * allocate the new right page for the split */ rmp = get_metapage(ip, rbn, PSIZE, 1); if (rmp == NULL) { rc = -EIO; goto clean_up; } jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); BT_MARK_DIRTY(rmp, ip); /* * action: new page; */ rp = (xtpage_t *) rmp->data; rp->header.self = *pxd; rp->header.flag = sp->header.flag & BT_TYPE; rp->header.maxentry = sp->header.maxentry; /* little-endian */ rp->header.nextindex = cpu_to_le16(XTENTRYSTART); BT_MARK_DIRTY(smp, ip); /* Don't log it if there are no links to the file */ if (!test_cflag(COMMIT_Nolink, ip)) { /* * acquire a transaction lock on the new right page; */ tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); rxtlck = (struct xtlock *) & tlck->lock; rxtlck->lwm.offset = XTENTRYSTART; /* * acquire a transaction lock on the split page */ tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); sxtlck = (struct xtlock *) & tlck->lock; } /* * initialize/update sibling pointers of <sp> and <rp> */ nextbn = le64_to_cpu(sp->header.next); rp->header.next = cpu_to_le64(nextbn); rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); sp->header.next = cpu_to_le64(rbn); skip = split->index; /* * sequential append at tail (after last entry of last page) * * if splitting the last page on a level because of appending * a entry to it (skip is maxentry), it's likely that the access is * sequential. adding an empty page on the side of the level is less * work and can push the fill factor much higher than normal. * if we're wrong it's no big deal - we will do the split the right * way next time. * (it may look like it's equally easy to do a similar hack for * reverse sorted data, that is, split the tree left, but it's not. * Be my guest.) */ if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { /* * acquire a transaction lock on the new/right page; * * action: xad insertion; */ /* insert entry at the first entry of the new right page */ xad = &rp->xad[XTENTRYSTART]; XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); if (!test_cflag(COMMIT_Nolink, ip)) { /* rxtlck->lwm.offset = XTENTRYSTART; */ rxtlck->lwm.length = 1; } *rmpp = rmp; *rbnp = rbn; jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); return 0; } /* * non-sequential insert (at possibly middle page) */ /* * update previous pointer of old next/right page of <sp> */ if (nextbn != 0) { XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); if (rc) { XT_PUTPAGE(rmp); goto clean_up; } BT_MARK_DIRTY(mp, ip); /* * acquire a transaction lock on the next page; * * action:sibling pointer update; */ if (!test_cflag(COMMIT_Nolink, ip)) tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); p->header.prev = cpu_to_le64(rbn); /* sibling page may have been updated previously, or * it may be updated later; */ XT_PUTPAGE(mp); } /* * split the data between the split and new/right pages */ maxentry = le16_to_cpu(sp->header.maxentry); middle = maxentry >> 1; righthalf = maxentry - middle; /* * skip index in old split/left page - insert into left page: */ if (skip <= middle) { /* move right half of split page to the new right page */ memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], righthalf << L2XTSLOTSIZE); /* shift right tail of left half to make room for new entry */ if (skip < middle) memmove(&sp->xad[skip + 1], &sp->xad[skip], (middle - skip) << L2XTSLOTSIZE); /* insert new entry */ xad = &sp->xad[skip]; XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); /* update page header */ sp->header.nextindex = cpu_to_le16(middle + 1); if (!test_cflag(COMMIT_Nolink, ip)) { sxtlck->lwm.offset = (sxtlck->lwm.offset) ? min(skip, (int)sxtlck->lwm.offset) : skip; } rp->header.nextindex = cpu_to_le16(XTENTRYSTART + righthalf); } /* * skip index in new right page - insert into right page: */ else { /* move left head of right half to right page */ n = skip - middle; memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], n << L2XTSLOTSIZE); /* insert new entry */ n += XTENTRYSTART; xad = &rp->xad[n]; XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); /* move right tail of right half to right page */ if (skip < maxentry) memmove(&rp->xad[n + 1], &sp->xad[skip], (maxentry - skip) << L2XTSLOTSIZE); /* update page header */ sp->header.nextindex = cpu_to_le16(middle); if (!test_cflag(COMMIT_Nolink, ip)) { sxtlck->lwm.offset = (sxtlck->lwm.offset) ? min(middle, (int)sxtlck->lwm.offset) : middle; } rp->header.nextindex = cpu_to_le16(XTENTRYSTART + righthalf + 1); } if (!test_cflag(COMMIT_Nolink, ip)) { sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - sxtlck->lwm.offset; /* rxtlck->lwm.offset = XTENTRYSTART; */ rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - XTENTRYSTART; } *rmpp = rmp; *rbnp = rbn; jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); return rc; clean_up: /* Rollback quota allocation. */ if (quota_allocation) dquot_free_block(ip, quota_allocation); return (rc); } /* * xtSplitRoot() * * function: * split the full root page into original/root/split page and new * right page * i.e., root remains fixed in tree anchor (inode) and the root is * copied to a single new right child page since root page << * non-root page, and the split root page contains a single entry * for the new right child page. * * parameter: * int tid, * struct inode *ip, * struct xtsplit *split, * struct metapage **rmpp) * * return: * Pointer to page in which to insert or NULL on error. */ static int xtSplitRoot(tid_t tid, struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) { xtpage_t *sp; struct metapage *rmp; xtpage_t *rp; s64 rbn; int skip, nextindex; xad_t *xad; pxd_t *pxd; struct pxdlist *pxdlist; struct tlock *tlck; struct xtlock *xtlck; int rc; sp = (xtpage_t *) &JFS_IP(ip)->i_xtroot; INCREMENT(xtStat.split); /* * allocate a single (right) child page */ pxdlist = split->pxdlist; pxd = &pxdlist->pxd[pxdlist->npxd]; pxdlist->npxd++; rbn = addressPXD(pxd); rmp = get_metapage(ip, rbn, PSIZE, 1); if (rmp == NULL) return -EIO; /* Allocate blocks to quota. */ rc = dquot_alloc_block(ip, lengthPXD(pxd)); if (rc) { release_metapage(rmp); return rc; } jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); /* * acquire a transaction lock on the new right page; * * action: new page; */ BT_MARK_DIRTY(rmp, ip); rp = (xtpage_t *) rmp->data; rp->header.flag = (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; rp->header.self = *pxd; rp->header.nextindex = cpu_to_le16(XTENTRYSTART); rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); /* initialize sibling pointers */ rp->header.next = 0; rp->header.prev = 0; /* * copy the in-line root page into new right page extent */ nextindex = le16_to_cpu(sp->header.maxentry); memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); /* * insert the new entry into the new right/child page * (skip index in the new right page will not change) */ skip = split->index; /* if insert into middle, shift right remaining entries */ if (skip != nextindex) memmove(&rp->xad[skip + 1], &rp->xad[skip], (nextindex - skip) * sizeof(xad_t)); xad = &rp->xad[skip]; XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); /* update page header */ rp->header.nextindex = cpu_to_le16(nextindex + 1); if (!test_cflag(COMMIT_Nolink, ip)) { tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); xtlck = (struct xtlock *) & tlck->lock; xtlck->lwm.offset = XTENTRYSTART; xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - XTENTRYSTART; } /* * reset the root * * init root with the single entry for the new right page * set the 1st entry offset to 0, which force the left-most key * at any level of the tree to be less than any search key. */ /* * acquire a transaction lock on the root page (in-memory inode); * * action: root split; */ BT_MARK_DIRTY(split->mp, ip); xad = &sp->xad[XTENTRYSTART]; XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); /* update page header of root */ sp->header.flag &= ~BT_LEAF; sp->header.flag |= BT_INTERNAL; sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); if (!test_cflag(COMMIT_Nolink, ip)) { tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); xtlck = (struct xtlock *) & tlck->lock; xtlck->lwm.offset = XTENTRYSTART; xtlck->lwm.length = 1; } *rmpp = rmp; jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); return 0; } /* * xtExtend() * * function: extend in-place; * * note: existing extent may or may not have been committed. * caller is responsible for pager buffer cache update, and * working block allocation map update; * update pmap: alloc whole extended extent; */ int xtExtend(tid_t tid, /* transaction id */ struct inode *ip, s64 xoff, /* delta extent offset */ s32 xlen, /* delta extent length */ int flag) { int rc = 0; int cmp; struct metapage *mp; /* meta-page buffer */ xtpage_t *p; /* base B+-tree index page */ s64 bn; int index, nextindex, len; struct btstack btstack; /* traverse stack */ struct xtsplit split; /* split information */ xad_t *xad; s64 xaddr; struct tlock *tlck; struct xtlock *xtlck = NULL; jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); /* there must exist extent to be extended */ if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); if (cmp != 0) { XT_PUTPAGE(mp); jfs_error(ip->i_sb, "xtSearch did not find extent\n"); return -EIO; } /* extension must be contiguous */ xad = &p->xad[index]; if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { XT_PUTPAGE(mp); jfs_error(ip->i_sb, "extension is not contiguous\n"); return -EIO; } /* * acquire a transaction lock on the leaf page; * * action: xad insertion/extension; */ BT_MARK_DIRTY(mp, ip); if (!test_cflag(COMMIT_Nolink, ip)) { tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); xtlck = (struct xtlock *) & tlck->lock; } /* extend will overflow extent ? */ xlen = lengthXAD(xad) + xlen; if ((len = xlen - MAXXLEN) <= 0) goto extendOld; /* * extent overflow: insert entry for new extent */ //insertNew: xoff = offsetXAD(xad) + MAXXLEN; xaddr = addressXAD(xad) + MAXXLEN; nextindex = le16_to_cpu(p->header.nextindex); /* * if the leaf page is full, insert the new entry and * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page. */ if (nextindex == le16_to_cpu(p->header.maxentry)) { /* xtSpliUp() unpins leaf pages */ split.mp = mp; split.index = index + 1; split.flag = XAD_NEW; split.off = xoff; /* split offset */ split.len = len; split.addr = xaddr; split.pxdlist = NULL; if ((rc = xtSplitUp(tid, ip, &split, &btstack))) return rc; /* get back old page */ XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) return rc; /* * if leaf root has been split, original root has been * copied to new child page, i.e., original entry now * resides on the new child page; */ if (p->header.flag & BT_INTERNAL) { ASSERT(p->header.nextindex == cpu_to_le16(XTENTRYSTART + 1)); xad = &p->xad[XTENTRYSTART]; bn = addressXAD(xad); XT_PUTPAGE(mp); /* get new child page */ XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) return rc; BT_MARK_DIRTY(mp, ip); if (!test_cflag(COMMIT_Nolink, ip)) { tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); xtlck = (struct xtlock *) & tlck->lock; } } } /* * insert the new entry into the leaf page */ else { /* insert the new entry: mark the entry NEW */ xad = &p->xad[index + 1]; XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); /* advance next available entry index */ le16_add_cpu(&p->header.nextindex, 1); } /* get back old entry */ xad = &p->xad[index]; xlen = MAXXLEN; /* * extend old extent */ extendOld: XADlength(xad, xlen); if (!(xad->flag & XAD_NEW)) xad->flag |= XAD_EXTENDED; if (!test_cflag(COMMIT_Nolink, ip)) { xtlck->lwm.offset = (xtlck->lwm.offset) ? min(index, (int)xtlck->lwm.offset) : index; xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; } /* unpin the leaf page */ XT_PUTPAGE(mp); return rc; } /* * xtUpdate() * * function: update XAD; * * update extent for allocated_but_not_recorded or * compressed extent; * * parameter: * nxad - new XAD; * logical extent of the specified XAD must be completely * contained by an existing XAD; */ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) { /* new XAD */ int rc = 0; int cmp; struct metapage *mp; /* meta-page buffer */ xtpage_t *p; /* base B+-tree index page */ s64 bn; int index0, index, newindex, nextindex; struct btstack btstack; /* traverse stack */ struct xtsplit split; /* split information */ xad_t *xad, *lxad, *rxad; int xflag; s64 nxoff, xoff; int nxlen, xlen, lxlen, rxlen; s64 nxaddr, xaddr; struct tlock *tlck; struct xtlock *xtlck = NULL; int newpage = 0; /* there must exist extent to be tailgated */ nxoff = offsetXAD(nxad); nxlen = lengthXAD(nxad); nxaddr = addressXAD(nxad); if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); if (cmp != 0) { XT_PUTPAGE(mp); jfs_error(ip->i_sb, "Could not find extent\n"); return -EIO; } BT_MARK_DIRTY(mp, ip); /* * acquire tlock of the leaf page containing original entry */ if (!test_cflag(COMMIT_Nolink, ip)) { tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); xtlck = (struct xtlock *) & tlck->lock; } xad = &p->xad[index0]; xflag = xad->flag; xoff = offsetXAD(xad); xlen = lengthXAD(xad); xaddr = addressXAD(xad); /* nXAD must be completely contained within XAD */ if ((xoff > nxoff) || (nxoff + nxlen > xoff + xlen)) { XT_PUTPAGE(mp); jfs_error(ip->i_sb, "nXAD in not completely contained within XAD\n"); return -EIO; } index = index0; newindex = index + 1; nextindex = le16_to_cpu(p->header.nextindex); if (xoff < nxoff) goto coalesceRight; /* * coalesce with left XAD */ /* is XAD first entry of page ? */ if (index == XTENTRYSTART) goto replace; /* is nXAD logically and physically contiguous with lXAD ? */ lxad = &p->xad[index - 1]; lxlen = lengthXAD(lxad); if (!(lxad->flag & XAD_NOTRECORDED) && (nxoff == offsetXAD(lxad) + lxlen) && (nxaddr == addressXAD(lxad) + lxlen) && (lxlen + nxlen < MAXXLEN)) { /* extend right lXAD */ index0 = index - 1; XADlength(lxad, lxlen + nxlen); /* If we just merged two extents together, need to make sure the * right extent gets logged. If the left one is marked XAD_NEW, * then we know it will be logged. Otherwise, mark as * XAD_EXTENDED */ if (!(lxad->flag & XAD_NEW)) lxad->flag |= XAD_EXTENDED; if (xlen > nxlen) { /* truncate XAD */ XADoffset(xad, xoff + nxlen); XADlength(xad, xlen - nxlen); XADaddress(xad, xaddr + nxlen); goto out; } else { /* (xlen == nxlen) */ /* remove XAD */ if (index < nextindex - 1) memmove(&p->xad[index], &p->xad[index + 1], (nextindex - index - 1) << L2XTSLOTSIZE); p->header.nextindex = cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1); index = index0; newindex = index + 1; nextindex = le16_to_cpu(p->header.nextindex); xoff = nxoff = offsetXAD(lxad); xlen = nxlen = lxlen + nxlen; xaddr = nxaddr = addressXAD(lxad); goto coalesceRight; } } /* * replace XAD with nXAD */ replace: /* (nxoff == xoff) */ if (nxlen == xlen) { /* replace XAD with nXAD:recorded */ *xad = *nxad; xad->flag = xflag & ~XAD_NOTRECORDED; goto coalesceRight; } else /* (nxlen < xlen) */ goto updateLeft; /* * coalesce with right XAD */ coalesceRight: /* (xoff <= nxoff) */ /* is XAD last entry of page ? */ if (newindex == nextindex) { if (xoff == nxoff) goto out; goto updateRight; } /* is nXAD logically and physically contiguous with rXAD ? */ rxad = &p->xad[index + 1]; rxlen = lengthXAD(rxad); if (!(rxad->flag & XAD_NOTRECORDED) && (nxoff + nxlen == offsetXAD(rxad)) && (nxaddr + nxlen == addressXAD(rxad)) && (rxlen + nxlen < MAXXLEN)) { /* extend left rXAD */ XADoffset(rxad, nxoff); XADlength(rxad, rxlen + nxlen); XADaddress(rxad, nxaddr); /* If we just merged two extents together, need to make sure * the left extent gets logged. If the right one is marked * XAD_NEW, then we know it will be logged. Otherwise, mark as * XAD_EXTENDED */ if (!(rxad->flag & XAD_NEW)) rxad->flag |= XAD_EXTENDED; if (xlen > nxlen) /* truncate XAD */ XADlength(xad, xlen - nxlen); else { /* (xlen == nxlen) */ /* remove XAD */ memmove(&p->xad[index], &p->xad[index + 1], (nextindex - index - 1) << L2XTSLOTSIZE); p->header.nextindex = cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1); } goto out; } else if (xoff == nxoff) goto out; if (xoff >= nxoff) { XT_PUTPAGE(mp); jfs_error(ip->i_sb, "xoff >= nxoff\n"); return -EIO; } /* * split XAD into (lXAD, nXAD): * * |---nXAD---> * --|----------XAD----------|-- * |-lXAD-| */ updateRight: /* (xoff < nxoff) */ /* truncate old XAD as lXAD:not_recorded */ xad = &p->xad[index]; XADlength(xad, nxoff - xoff); /* insert nXAD:recorded */ if (nextindex == le16_to_cpu(p->header.maxentry)) { /* xtSpliUp() unpins leaf pages */ split.mp = mp; split.index = newindex; split.flag = xflag & ~XAD_NOTRECORDED; split.off = nxoff; split.len = nxlen; split.addr = nxaddr; split.pxdlist = NULL; if ((rc = xtSplitUp(tid, ip, &split, &btstack))) return rc; /* get back old page */ XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) return rc; /* * if leaf root has been split, original root has been * copied to new child page, i.e., original entry now * resides on the new child page; */ if (p->header.flag & BT_INTERNAL) { ASSERT(p->header.nextindex == cpu_to_le16(XTENTRYSTART + 1)); xad = &p->xad[XTENTRYSTART]; bn = addressXAD(xad); XT_PUTPAGE(mp); /* get new child page */ XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) return rc; BT_MARK_DIRTY(mp, ip); if (!test_cflag(COMMIT_Nolink, ip)) { tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); xtlck = (struct xtlock *) & tlck->lock; } } else { /* is nXAD on new page ? */ if (newindex > (le16_to_cpu(p->header.maxentry) >> 1)) { newindex = newindex - le16_to_cpu(p->header.nextindex) + XTENTRYSTART; newpage = 1; } } } else { /* if insert into middle, shift right remaining entries */ if (newindex < nextindex) memmove(&p->xad[newindex + 1], &p->xad[newindex], (nextindex - newindex) << L2XTSLOTSIZE); /* insert the entry */ xad = &p->xad[newindex]; *xad = *nxad; xad->flag = xflag & ~XAD_NOTRECORDED; /* advance next available entry index. */ p->header.nextindex = cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); } /* * does nXAD force 3-way split ? * * |---nXAD--->| * --|----------XAD-------------|-- * |-lXAD-| |-rXAD -| */ if (nxoff + nxlen == xoff + xlen) goto out; /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ if (newpage) { /* close out old page */ if (!test_cflag(COMMIT_Nolink, ip)) { xtlck->lwm.offset = (xtlck->lwm.offset) ? min(index0, (int)xtlck->lwm.offset) : index0; xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; } bn = le64_to_cpu(p->header.next); XT_PUTPAGE(mp); /* get new right page */ XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) return rc; BT_MARK_DIRTY(mp, ip); if (!test_cflag(COMMIT_Nolink, ip)) { tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); xtlck = (struct xtlock *) & tlck->lock; } index0 = index = newindex; } else index++; newindex = index + 1; nextindex = le16_to_cpu(p->header.nextindex); xlen = xlen - (nxoff - xoff); xoff = nxoff; xaddr = nxaddr; /* recompute split pages */ if (nextindex == le16_to_cpu(p->header.maxentry)) { XT_PUTPAGE(mp); if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); if (cmp != 0) { XT_PUTPAGE(mp); jfs_error(ip->i_sb, "xtSearch failed\n"); return -EIO; } if (index0 != index) { XT_PUTPAGE(mp); jfs_error(ip->i_sb, "unexpected value of index\n"); return -EIO; } } /* * split XAD into (nXAD, rXAD) * * ---nXAD---| * --|----------XAD----------|-- * |-rXAD-| */ updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ /* update old XAD with nXAD:recorded */ xad = &p->xad[index]; *xad = *nxad; xad->flag = xflag & ~XAD_NOTRECORDED; /* insert rXAD:not_recorded */ xoff = xoff + nxlen; xlen = xlen - nxlen; xaddr = xaddr + nxlen; if (nextindex == le16_to_cpu(p->header.maxentry)) { /* printf("xtUpdate.updateLeft.split p:0x%p\n", p); */ /* xtSpliUp() unpins leaf pages */ split.mp = mp; split.index = newindex; split.flag = xflag; split.off = xoff; split.len = xlen; split.addr = xaddr; split.pxdlist = NULL; if ((rc = xtSplitUp(tid, ip, &split, &btstack))) return rc; /* get back old page */ XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) return rc; /* * if leaf root has been split, original root has been * copied to new child page, i.e., original entry now * resides on the new child page; */ if (p->header.flag & BT_INTERNAL) { ASSERT(p->header.nextindex == cpu_to_le16(XTENTRYSTART + 1)); xad = &p->xad[XTENTRYSTART]; bn = addressXAD(xad); XT_PUTPAGE(mp); /* get new child page */ XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) return rc; BT_MARK_DIRTY(mp, ip); if (!test_cflag(COMMIT_Nolink, ip)) { tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); xtlck = (struct xtlock *) & tlck->lock; } } } else { /* if insert into middle, shift right remaining entries */ if (newindex < nextindex) memmove(&p->xad[newindex + 1], &p->xad[newindex], (nextindex - newindex) << L2XTSLOTSIZE); /* insert the entry */ xad = &p->xad[newindex]; XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); /* advance next available entry index. */ p->header.nextindex = cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); } out: if (!test_cflag(COMMIT_Nolink, ip)) { xtlck->lwm.offset = (xtlck->lwm.offset) ? min(index0, (int)xtlck->lwm.offset) : index0; xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; } /* unpin the leaf page */ XT_PUTPAGE(mp); return rc; } /* * xtAppend() * * function: grow in append mode from contiguous region specified ; * * parameter: * tid - transaction id; * ip - file object; * xflag - extent flag: * xoff - extent offset; * maxblocks - max extent length; * xlen - extent length (in/out); * xaddrp - extent address pointer (in/out): * flag - * * return: */ int xtAppend(tid_t tid, /* transaction id */ struct inode *ip, int xflag, s64 xoff, s32 maxblocks, s32 * xlenp, /* (in/out) */ s64 * xaddrp, /* (in/out) */ int flag) { int rc = 0; struct metapage *mp; /* meta-page buffer */ xtpage_t *p; /* base B+-tree index page */ s64 bn, xaddr; int index, nextindex; struct btstack btstack; /* traverse stack */ struct xtsplit split; /* split information */ xad_t *xad; int cmp; struct tlock *tlck; struct xtlock *xtlck; int nsplit, nblocks, xlen; struct pxdlist pxdlist; pxd_t *pxd; s64 next; xaddr = *xaddrp; xlen = *xlenp; jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", (ulong) xoff, maxblocks, xlen, (ulong) xaddr); /* * search for the entry location at which to insert: * * xtFastSearch() and xtSearch() both returns (leaf page * pinned, index at which to insert). * n.b. xtSearch() may return index of maxentry of * the full page. */ if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); if (cmp == 0) { rc = -EEXIST; goto out; } if (next) xlen = min(xlen, (int)(next - xoff)); //insert: /* * insert entry for new extent */ xflag |= XAD_NEW; /* * if the leaf page is full, split the page and * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page. */ nextindex = le16_to_cpu(p->header.nextindex); if (nextindex < le16_to_cpu(p->header.maxentry)) goto insertLeaf; /* * allocate new index blocks to cover index page split(s) */ nsplit = btstack.nsplit; split.pxdlist = &pxdlist; pxdlist.maxnpxd = pxdlist.npxd = 0; pxd = &pxdlist.pxd[0]; nblocks = JFS_SBI(ip->i_sb)->nbperpage; for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { PXDaddress(pxd, xaddr); PXDlength(pxd, nblocks); pxdlist.maxnpxd++; continue; } /* undo allocation */ goto out; } xlen = min(xlen, maxblocks); /* * allocate data extent requested */ if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) goto out; split.mp = mp; split.index = index; split.flag = xflag; split.off = xoff; split.len = xlen; split.addr = xaddr; if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { /* undo data extent allocation */ dbFree(ip, *xaddrp, (s64) * xlenp); return rc; } *xaddrp = xaddr; *xlenp = xlen; return 0; /* * insert the new entry into the leaf page */ insertLeaf: /* * allocate data extent requested */ if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) goto out; BT_MARK_DIRTY(mp, ip); /* * acquire a transaction lock on the leaf page; * * action: xad insertion/extension; */ tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); xtlck = (struct xtlock *) & tlck->lock; /* insert the new entry: mark the entry NEW */ xad = &p->xad[index]; XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); /* advance next available entry index */ le16_add_cpu(&p->header.nextindex, 1); xtlck->lwm.offset = (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; *xaddrp = xaddr; *xlenp = xlen; out: /* unpin the leaf page */ XT_PUTPAGE(mp); return rc; } /* * xtInitRoot() * * initialize file root (inline in inode) */ void xtInitRoot(tid_t tid, struct inode *ip) { xtroot_t *p; /* * acquire a transaction lock on the root * * action: */ txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, tlckXTREE | tlckNEW); p = &JFS_IP(ip)->i_xtroot; p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; p->header.nextindex = cpu_to_le16(XTENTRYSTART); if (S_ISDIR(ip->i_mode)) p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); else { p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); ip->i_size = 0; } return; } /* * We can run into a deadlock truncating a file with a large number of * xtree pages (large fragmented file). A robust fix would entail a * reservation system where we would reserve a number of metadata pages * and tlocks which we would be guaranteed without a deadlock. Without * this, a partial fix is to limit number of metadata pages we will lock * in a single transaction. Currently we will truncate the file so that * no more than 50 leaf pages will be locked. The caller of xtTruncate * will be responsible for ensuring that the current transaction gets * committed, and that subsequent transactions are created to truncate * the file further if needed. */ #define MAX_TRUNCATE_LEAVES 50 /* * xtTruncate() * * function: * traverse for truncation logging backward bottom up; * terminate at the last extent entry at the current subtree * root page covering new down size. * truncation may occur within the last extent entry. * * parameter: * int tid, * struct inode *ip, * s64 newsize, * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} * * return: * * note: * PWMAP: * 1. truncate (non-COMMIT_NOLINK file) * by jfs_truncate() or jfs_open(O_TRUNC): * xtree is updated; * 2. truncate index table of directory when last entry removed * map update via tlock at commit time; * PMAP: * Call xtTruncate_pmap instead * WMAP: * 1. remove (free zero link count) on last reference release * (pmap has been freed at commit zero link count); * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): * xtree is updated; * map update directly at truncation time; * * if (DELETE) * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); * else if (TRUNCATE) * must write LOG_NOREDOPAGE for deleted index page; * * pages may already have been tlocked by anonymous transactions * during file growth (i.e., write) before truncation; * * except last truncated entry, deleted entries remains as is * in the page (nextindex is updated) for other use * (e.g., log/update allocation map): this avoid copying the page * info but delay free of pages; * */ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) { int rc = 0; s64 teof; struct metapage *mp; xtpage_t *p; s64 bn; int index, nextindex; xad_t *xad; s64 xoff, xaddr; int xlen, len, freexlen; struct btstack btstack; struct btframe *parent; struct tblock *tblk = NULL; struct tlock *tlck = NULL; struct xtlock *xtlck = NULL; struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ s64 nfreed; int freed, log; int locked_leaves = 0; /* save object truncation type */ if (tid) { tblk = tid_to_tblock(tid); tblk->xflag |= flag; } nfreed = 0; flag &= COMMIT_MAP; assert(flag != COMMIT_PMAP); if (flag == COMMIT_PWMAP) log = 1; else { log = 0; xadlock.flag = mlckFREEXADLIST; xadlock.index = 1; } /* * if the newsize is not an integral number of pages, * the file between newsize and next page boundary will * be cleared. * if truncating into a file hole, it will cause * a full block to be allocated for the logical block. */ /* * release page blocks of truncated region <teof, eof> * * free the data blocks from the leaf index blocks. * delete the parent index entries corresponding to * the freed child data/index blocks. * free the index blocks themselves which aren't needed * in new sized file. * * index blocks are updated only if the blocks are to be * retained in the new sized file. * if type is PMAP, the data and index pages are NOT * freed, and the data and index blocks are NOT freed * from working map. * (this will allow continued access of data/index of * temporary file (zerolink count file truncated to zero-length)). */ teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> JFS_SBI(ip->i_sb)->l2bsize; /* clear stack */ BT_CLR(&btstack); /* * start with root * * root resides in the inode */ bn = 0; /* * first access of each page: */ getPage: XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) return rc; /* process entries backward from last index */ index = le16_to_cpu(p->header.nextindex) - 1; /* Since this is the rightmost page at this level, and we may have * already freed a page that was formerly to the right, let's make * sure that the next pointer is zero. */ if (p->header.next) { if (log) /* * Make sure this change to the header is logged. * If we really truncate this leaf, the flag * will be changed to tlckTRUNCATE */ tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); BT_MARK_DIRTY(mp, ip); p->header.next = 0; } if (p->header.flag & BT_INTERNAL) goto getChild; /* * leaf page */ freed = 0; /* does region covered by leaf page precede Teof ? */ xad = &p->xad[index]; xoff = offsetXAD(xad); xlen = lengthXAD(xad); if (teof >= xoff + xlen) { XT_PUTPAGE(mp); goto getParent; } /* (re)acquire tlock of the leaf page */ if (log) { if (++locked_leaves > MAX_TRUNCATE_LEAVES) { /* * We need to limit the size of the transaction * to avoid exhausting pagecache & tlocks */ XT_PUTPAGE(mp); newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; goto getParent; } tlck = txLock(tid, ip, mp, tlckXTREE); tlck->type = tlckXTREE | tlckTRUNCATE; xtlck = (struct xtlock *) & tlck->lock; xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; } BT_MARK_DIRTY(mp, ip); /* * scan backward leaf page entries */ for (; index >= XTENTRYSTART; index--) { xad = &p->xad[index]; xoff = offsetXAD(xad); xlen = lengthXAD(xad); xaddr = addressXAD(xad); /* * The "data" for a directory is indexed by the block * device's address space. This metadata must be invalidated * here */ if (S_ISDIR(ip->i_mode) && (teof == 0)) invalidate_xad_metapages(ip, *xad); /* * entry beyond eof: continue scan of current page * xad * ---|---=======-------> * eof */ if (teof < xoff) { nfreed += xlen; continue; } /* * (xoff <= teof): last entry to be deleted from page; * If other entries remain in page: keep and update the page. */ /* * eof == entry_start: delete the entry * xad * -------|=======-------> * eof * */ if (teof == xoff) { nfreed += xlen; if (index == XTENTRYSTART) break; nextindex = index; } /* * eof within the entry: truncate the entry. * xad * -------===|===-------> * eof */ else if (teof < xoff + xlen) { /* update truncated entry */ len = teof - xoff; freexlen = xlen - len; XADlength(xad, len); /* save pxd of truncated extent in tlck */ xaddr += len; if (log) { /* COMMIT_PWMAP */ xtlck->lwm.offset = (xtlck->lwm.offset) ? min(index, (int)xtlck->lwm.offset) : index; xtlck->lwm.length = index + 1 - xtlck->lwm.offset; xtlck->twm.offset = index; pxdlock = (struct pxd_lock *) & xtlck->pxdlock; pxdlock->flag = mlckFREEPXD; PXDaddress(&pxdlock->pxd, xaddr); PXDlength(&pxdlock->pxd, freexlen); } /* free truncated extent */ else { /* COMMIT_WMAP */ pxdlock = (struct pxd_lock *) & xadlock; pxdlock->flag = mlckFREEPXD; PXDaddress(&pxdlock->pxd, xaddr); PXDlength(&pxdlock->pxd, freexlen); txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); /* reset map lock */ xadlock.flag = mlckFREEXADLIST; } /* current entry is new last entry; */ nextindex = index + 1; nfreed += freexlen; } /* * eof beyond the entry: * xad * -------=======---|---> * eof */ else { /* (xoff + xlen < teof) */ nextindex = index + 1; } if (nextindex < le16_to_cpu(p->header.nextindex)) { if (!log) { /* COMMIT_WAMP */ xadlock.xdlist = &p->xad[nextindex]; xadlock.count = le16_to_cpu(p->header.nextindex) - nextindex; txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); } p->header.nextindex = cpu_to_le16(nextindex); } XT_PUTPAGE(mp); /* assert(freed == 0); */ goto getParent; } /* end scan of leaf page entries */ freed = 1; /* * leaf page become empty: free the page if type != PMAP */ if (log) { /* COMMIT_PWMAP */ /* txCommit() with tlckFREE: * free data extents covered by leaf [XTENTRYSTART:hwm); * invalidate leaf if COMMIT_PWMAP; * if (TRUNCATE), will write LOG_NOREDOPAGE; */ tlck->type = tlckXTREE | tlckFREE; } else { /* COMMIT_WAMP */ /* free data extents covered by leaf */ xadlock.xdlist = &p->xad[XTENTRYSTART]; xadlock.count = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); } if (p->header.flag & BT_ROOT) { p->header.flag &= ~BT_INTERNAL; p->header.flag |= BT_LEAF; p->header.nextindex = cpu_to_le16(XTENTRYSTART); XT_PUTPAGE(mp); /* debug */ goto out; } else { if (log) { /* COMMIT_PWMAP */ /* page will be invalidated at tx completion */ XT_PUTPAGE(mp); } else { /* COMMIT_WMAP */ if (mp->lid) lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; /* invalidate empty leaf page */ discard_metapage(mp); } } /* * the leaf page become empty: delete the parent entry * for the leaf page if the parent page is to be kept * in the new sized file. */ /* * go back up to the parent page */ getParent: /* pop/restore parent entry for the current child page */ if ((parent = BT_POP(&btstack)) == NULL) /* current page must have been root */ goto out; /* get back the parent page */ bn = parent->bn; XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) return rc; index = parent->index; /* * child page was not empty: */ if (freed == 0) { /* has any entry deleted from parent ? */ if (index < le16_to_cpu(p->header.nextindex) - 1) { /* (re)acquire tlock on the parent page */ if (log) { /* COMMIT_PWMAP */ /* txCommit() with tlckTRUNCATE: * free child extents covered by parent [); */ tlck = txLock(tid, ip, mp, tlckXTREE); xtlck = (struct xtlock *) & tlck->lock; if (!(tlck->type & tlckTRUNCATE)) { xtlck->hwm.offset = le16_to_cpu(p->header. nextindex) - 1; tlck->type = tlckXTREE | tlckTRUNCATE; } } else { /* COMMIT_WMAP */ /* free child extents covered by parent */ xadlock.xdlist = &p->xad[index + 1]; xadlock.count = le16_to_cpu(p->header.nextindex) - index - 1; txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); } BT_MARK_DIRTY(mp, ip); p->header.nextindex = cpu_to_le16(index + 1); } XT_PUTPAGE(mp); goto getParent; } /* * child page was empty: */ nfreed += lengthXAD(&p->xad[index]); /* * During working map update, child page's tlock must be handled * before parent's. This is because the parent's tlock will cause * the child's disk space to be marked available in the wmap, so * it's important that the child page be released by that time. * * ToDo: tlocks should be on doubly-linked list, so we can * quickly remove it and add it to the end. */ /* * Move parent page's tlock to the end of the tid's tlock list */ if (log && mp->lid && (tblk->last != mp->lid) && lid_to_tlock(mp->lid)->tid) { lid_t lid = mp->lid; struct tlock *prev; tlck = lid_to_tlock(lid); if (tblk->next == lid) tblk->next = tlck->next; else { for (prev = lid_to_tlock(tblk->next); prev->next != lid; prev = lid_to_tlock(prev->next)) { assert(prev->next); } prev->next = tlck->next; } lid_to_tlock(tblk->last)->next = lid; tlck->next = 0; tblk->last = lid; } /* * parent page become empty: free the page */ if (index == XTENTRYSTART) { if (log) { /* COMMIT_PWMAP */ /* txCommit() with tlckFREE: * free child extents covered by parent; * invalidate parent if COMMIT_PWMAP; */ tlck = txLock(tid, ip, mp, tlckXTREE); xtlck = (struct xtlock *) & tlck->lock; xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; tlck->type = tlckXTREE | tlckFREE; } else { /* COMMIT_WMAP */ /* free child extents covered by parent */ xadlock.xdlist = &p->xad[XTENTRYSTART]; xadlock.count = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); } BT_MARK_DIRTY(mp, ip); if (p->header.flag & BT_ROOT) { p->header.flag &= ~BT_INTERNAL; p->header.flag |= BT_LEAF; p->header.nextindex = cpu_to_le16(XTENTRYSTART); if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { /* * Shrink root down to allow inline * EA (otherwise fsck complains) */ p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); JFS_IP(ip)->mode2 |= INLINEEA; } XT_PUTPAGE(mp); /* debug */ goto out; } else { if (log) { /* COMMIT_PWMAP */ /* page will be invalidated at tx completion */ XT_PUTPAGE(mp); } else { /* COMMIT_WMAP */ if (mp->lid) lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; /* invalidate parent page */ discard_metapage(mp); } /* parent has become empty and freed: * go back up to its parent page */ /* freed = 1; */ goto getParent; } } /* * parent page still has entries for front region; */ else { /* try truncate region covered by preceding entry * (process backward) */ index--; /* go back down to the child page corresponding * to the entry */ goto getChild; } /* * internal page: go down to child page of current entry */ getChild: /* save current parent entry for the child page */ if (BT_STACK_FULL(&btstack)) { jfs_error(ip->i_sb, "stack overrun!\n"); XT_PUTPAGE(mp); return -EIO; } BT_PUSH(&btstack, bn, index); /* get child page */ xad = &p->xad[index]; bn = addressXAD(xad); /* * first access of each internal entry: */ /* release parent page */ XT_PUTPAGE(mp); /* process the child page */ goto getPage; out: /* * update file resource stat */ /* set size */ if (S_ISDIR(ip->i_mode) && !newsize) ip->i_size = 1; /* fsck hates zero-length directories */ else ip->i_size = newsize; /* update quota allocation to reflect freed blocks */ dquot_free_block(ip, nfreed); /* * free tlock of invalidated pages */ if (flag == COMMIT_WMAP) txFreelock(ip); return newsize; } /* * xtTruncate_pmap() * * function: * Perform truncate to zero length for deleted file, leaving the * xtree and working map untouched. This allows the file to * be accessed via open file handles, while the delete of the file * is committed to disk. * * parameter: * tid_t tid, * struct inode *ip, * s64 committed_size) * * return: new committed size * * note: * * To avoid deadlock by holding too many transaction locks, the * truncation may be broken up into multiple transactions. * The committed_size keeps track of part of the file has been * freed from the pmaps. */ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) { s64 bn; struct btstack btstack; int cmp; int index; int locked_leaves = 0; struct metapage *mp; xtpage_t *p; struct btframe *parent; int rc; struct tblock *tblk; struct tlock *tlck = NULL; xad_t *xad; int xlen; s64 xoff; struct xtlock *xtlck = NULL; /* save object truncation type */ tblk = tid_to_tblock(tid); tblk->xflag |= COMMIT_PMAP; /* clear stack */ BT_CLR(&btstack); if (committed_size) { xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); if (rc) return rc; XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); if (cmp != 0) { XT_PUTPAGE(mp); jfs_error(ip->i_sb, "did not find extent\n"); return -EIO; } } else { /* * start with root * * root resides in the inode */ bn = 0; /* * first access of each page: */ getPage: XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) return rc; /* process entries backward from last index */ index = le16_to_cpu(p->header.nextindex) - 1; if (p->header.flag & BT_INTERNAL) goto getChild; } /* * leaf page */ if (++locked_leaves > MAX_TRUNCATE_LEAVES) { /* * We need to limit the size of the transaction * to avoid exhausting pagecache & tlocks */ xad = &p->xad[index]; xoff = offsetXAD(xad); xlen = lengthXAD(xad); XT_PUTPAGE(mp); return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; } tlck = txLock(tid, ip, mp, tlckXTREE); tlck->type = tlckXTREE | tlckFREE; xtlck = (struct xtlock *) & tlck->lock; xtlck->hwm.offset = index; XT_PUTPAGE(mp); /* * go back up to the parent page */ getParent: /* pop/restore parent entry for the current child page */ if ((parent = BT_POP(&btstack)) == NULL) /* current page must have been root */ goto out; /* get back the parent page */ bn = parent->bn; XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) return rc; index = parent->index; /* * parent page become empty: free the page */ if (index == XTENTRYSTART) { /* txCommit() with tlckFREE: * free child extents covered by parent; * invalidate parent if COMMIT_PWMAP; */ tlck = txLock(tid, ip, mp, tlckXTREE); xtlck = (struct xtlock *) & tlck->lock; xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; tlck->type = tlckXTREE | tlckFREE; XT_PUTPAGE(mp); if (p->header.flag & BT_ROOT) { goto out; } else { goto getParent; } } /* * parent page still has entries for front region; */ else index--; /* * internal page: go down to child page of current entry */ getChild: /* save current parent entry for the child page */ if (BT_STACK_FULL(&btstack)) { jfs_error(ip->i_sb, "stack overrun!\n"); XT_PUTPAGE(mp); return -EIO; } BT_PUSH(&btstack, bn, index); /* get child page */ xad = &p->xad[index]; bn = addressXAD(xad); /* * first access of each internal entry: */ /* release parent page */ XT_PUTPAGE(mp); /* process the child page */ goto getPage; out: return 0; } #ifdef CONFIG_JFS_STATISTICS int jfs_xtstat_proc_show(struct seq_file *m, void *v) { seq_printf(m, "JFS Xtree statistics\n" "====================\n" "searches = %d\n" "fast searches = %d\n" "splits = %d\n", xtStat.search, xtStat.fastSearch, xtStat.split); return 0; } #endif
3 3 3 3 3 3 2 1 2 1 3 3 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 // SPDX-License-Identifier: GPL-2.0-or-later /* * Driver for NXP PN533 NFC Chip - core functions * * Copyright (C) 2011 Instituto Nokia de Tecnologia * Copyright (C) 2012-2013 Tieto Poland */ #include <linux/device.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/nfc.h> #include <linux/netdevice.h> #include <net/nfc/nfc.h> #include "pn533.h" #define VERSION "0.3" /* How much time we spend listening for initiators */ #define PN533_LISTEN_TIME 2 /* Delay between each poll frame (ms) */ #define PN533_POLL_INTERVAL 10 /* structs for pn533 commands */ /* PN533_CMD_GET_FIRMWARE_VERSION */ struct pn533_fw_version { u8 ic; u8 ver; u8 rev; u8 support; }; /* PN533_CMD_RF_CONFIGURATION */ #define PN533_CFGITEM_RF_FIELD 0x01 #define PN533_CFGITEM_TIMING 0x02 #define PN533_CFGITEM_MAX_RETRIES 0x05 #define PN533_CFGITEM_PASORI 0x82 #define PN533_CFGITEM_RF_FIELD_AUTO_RFCA 0x2 #define PN533_CFGITEM_RF_FIELD_ON 0x1 #define PN533_CFGITEM_RF_FIELD_OFF 0x0 #define PN533_CONFIG_TIMING_102 0xb #define PN533_CONFIG_TIMING_204 0xc #define PN533_CONFIG_TIMING_409 0xd #define PN533_CONFIG_TIMING_819 0xe #define PN533_CONFIG_MAX_RETRIES_NO_RETRY 0x00 #define PN533_CONFIG_MAX_RETRIES_ENDLESS 0xFF struct pn533_config_max_retries { u8 mx_rty_atr; u8 mx_rty_psl; u8 mx_rty_passive_act; } __packed; struct pn533_config_timing { u8 rfu; u8 atr_res_timeout; u8 dep_timeout; } __packed; /* PN533_CMD_IN_LIST_PASSIVE_TARGET */ /* felica commands opcode */ #define PN533_FELICA_OPC_SENSF_REQ 0 #define PN533_FELICA_OPC_SENSF_RES 1 /* felica SENSF_REQ parameters */ #define PN533_FELICA_SENSF_SC_ALL 0xFFFF #define PN533_FELICA_SENSF_RC_NO_SYSTEM_CODE 0 #define PN533_FELICA_SENSF_RC_SYSTEM_CODE 1 #define PN533_FELICA_SENSF_RC_ADVANCED_PROTOCOL 2 /* type B initiator_data values */ #define PN533_TYPE_B_AFI_ALL_FAMILIES 0 #define PN533_TYPE_B_POLL_METHOD_TIMESLOT 0 #define PN533_TYPE_B_POLL_METHOD_PROBABILISTIC 1 union pn533_cmd_poll_initdata { struct { u8 afi; u8 polling_method; } __packed type_b; struct { u8 opcode; __be16 sc; u8 rc; u8 tsn; } __packed felica; }; struct pn533_poll_modulations { struct { u8 maxtg; u8 brty; union pn533_cmd_poll_initdata initiator_data; } __packed data; u8 len; }; static const struct pn533_poll_modulations poll_mod[] = { [PN533_POLL_MOD_106KBPS_A] = { .data = { .maxtg = 1, .brty = 0, }, .len = 2, }, [PN533_POLL_MOD_212KBPS_FELICA] = { .data = { .maxtg = 1, .brty = 1, .initiator_data.felica = { .opcode = PN533_FELICA_OPC_SENSF_REQ, .sc = PN533_FELICA_SENSF_SC_ALL, .rc = PN533_FELICA_SENSF_RC_SYSTEM_CODE, .tsn = 0x03, }, }, .len = 7, }, [PN533_POLL_MOD_424KBPS_FELICA] = { .data = { .maxtg = 1, .brty = 2, .initiator_data.felica = { .opcode = PN533_FELICA_OPC_SENSF_REQ, .sc = PN533_FELICA_SENSF_SC_ALL, .rc = PN533_FELICA_SENSF_RC_SYSTEM_CODE, .tsn = 0x03, }, }, .len = 7, }, [PN533_POLL_MOD_106KBPS_JEWEL] = { .data = { .maxtg = 1, .brty = 4, }, .len = 2, }, [PN533_POLL_MOD_847KBPS_B] = { .data = { .maxtg = 1, .brty = 8, .initiator_data.type_b = { .afi = PN533_TYPE_B_AFI_ALL_FAMILIES, .polling_method = PN533_TYPE_B_POLL_METHOD_TIMESLOT, }, }, .len = 3, }, [PN533_LISTEN_MOD] = { .len = 0, }, }; /* PN533_CMD_IN_ATR */ struct pn533_cmd_activate_response { u8 status; u8 nfcid3t[10]; u8 didt; u8 bst; u8 brt; u8 to; u8 ppt; /* optional */ u8 gt[]; } __packed; struct pn533_cmd_jump_dep_response { u8 status; u8 tg; u8 nfcid3t[10]; u8 didt; u8 bst; u8 brt; u8 to; u8 ppt; /* optional */ u8 gt[]; } __packed; struct pn532_autopoll_resp { u8 type; u8 ln; u8 tg; u8 tgdata[]; }; /* PN532_CMD_IN_AUTOPOLL */ #define PN532_AUTOPOLL_POLLNR_INFINITE 0xff #define PN532_AUTOPOLL_PERIOD 0x03 /* in units of 150 ms */ #define PN532_AUTOPOLL_TYPE_GENERIC_106 0x00 #define PN532_AUTOPOLL_TYPE_GENERIC_212 0x01 #define PN532_AUTOPOLL_TYPE_GENERIC_424 0x02 #define PN532_AUTOPOLL_TYPE_JEWEL 0x04 #define PN532_AUTOPOLL_TYPE_MIFARE 0x10 #define PN532_AUTOPOLL_TYPE_FELICA212 0x11 #define PN532_AUTOPOLL_TYPE_FELICA424 0x12 #define PN532_AUTOPOLL_TYPE_ISOA 0x20 #define PN532_AUTOPOLL_TYPE_ISOB 0x23 #define PN532_AUTOPOLL_TYPE_DEP_PASSIVE_106 0x40 #define PN532_AUTOPOLL_TYPE_DEP_PASSIVE_212 0x41 #define PN532_AUTOPOLL_TYPE_DEP_PASSIVE_424 0x42 #define PN532_AUTOPOLL_TYPE_DEP_ACTIVE_106 0x80 #define PN532_AUTOPOLL_TYPE_DEP_ACTIVE_212 0x81 #define PN532_AUTOPOLL_TYPE_DEP_ACTIVE_424 0x82 /* PN533_TG_INIT_AS_TARGET */ #define PN533_INIT_TARGET_PASSIVE 0x1 #define PN533_INIT_TARGET_DEP 0x2 #define PN533_INIT_TARGET_RESP_FRAME_MASK 0x3 #define PN533_INIT_TARGET_RESP_ACTIVE 0x1 #define PN533_INIT_TARGET_RESP_DEP 0x4 /* The rule: value(high byte) + value(low byte) + checksum = 0 */ static inline u8 pn533_ext_checksum(u16 value) { return ~(u8)(((value & 0xFF00) >> 8) + (u8)(value & 0xFF)) + 1; } /* The rule: value + checksum = 0 */ static inline u8 pn533_std_checksum(u8 value) { return ~value + 1; } /* The rule: sum(data elements) + checksum = 0 */ static u8 pn533_std_data_checksum(u8 *data, int datalen) { u8 sum = 0; int i; for (i = 0; i < datalen; i++) sum += data[i]; return pn533_std_checksum(sum); } static void pn533_std_tx_frame_init(void *_frame, u8 cmd_code) { struct pn533_std_frame *frame = _frame; frame->preamble = 0; frame->start_frame = cpu_to_be16(PN533_STD_FRAME_SOF); PN533_STD_FRAME_IDENTIFIER(frame) = PN533_STD_FRAME_DIR_OUT; PN533_FRAME_CMD(frame) = cmd_code; frame->datalen = 2; } static void pn533_std_tx_frame_finish(void *_frame) { struct pn533_std_frame *frame = _frame; frame->datalen_checksum = pn533_std_checksum(frame->datalen); PN533_STD_FRAME_CHECKSUM(frame) = pn533_std_data_checksum(frame->data, frame->datalen); PN533_STD_FRAME_POSTAMBLE(frame) = 0; } static void pn533_std_tx_update_payload_len(void *_frame, int len) { struct pn533_std_frame *frame = _frame; frame->datalen += len; } static bool pn533_std_rx_frame_is_valid(void *_frame, struct pn533 *dev) { u8 checksum; struct pn533_std_frame *stdf = _frame; if (stdf->start_frame != cpu_to_be16(PN533_STD_FRAME_SOF)) return false; if (likely(!PN533_STD_IS_EXTENDED(stdf))) { /* Standard frame code */ dev->ops->rx_header_len = PN533_STD_FRAME_HEADER_LEN; checksum = pn533_std_checksum(stdf->datalen); if (checksum != stdf->datalen_checksum) return false; checksum = pn533_std_data_checksum(stdf->data, stdf->datalen); if (checksum != PN533_STD_FRAME_CHECKSUM(stdf)) return false; } else { /* Extended */ struct pn533_ext_frame *eif = _frame; dev->ops->rx_header_len = PN533_EXT_FRAME_HEADER_LEN; checksum = pn533_ext_checksum(be16_to_cpu(eif->datalen)); if (checksum != eif->datalen_checksum) return false; /* check data checksum */ checksum = pn533_std_data_checksum(eif->data, be16_to_cpu(eif->datalen)); if (checksum != PN533_EXT_FRAME_CHECKSUM(eif)) return false; } return true; } bool pn533_rx_frame_is_ack(void *_frame) { struct pn533_std_frame *frame = _frame; if (frame->start_frame != cpu_to_be16(PN533_STD_FRAME_SOF)) return false; if (frame->datalen != 0 || frame->datalen_checksum != 0xFF) return false; return true; } EXPORT_SYMBOL_GPL(pn533_rx_frame_is_ack); static inline int pn533_std_rx_frame_size(void *frame) { struct pn533_std_frame *f = frame; /* check for Extended Information frame */ if (PN533_STD_IS_EXTENDED(f)) { struct pn533_ext_frame *eif = frame; return sizeof(struct pn533_ext_frame) + be16_to_cpu(eif->datalen) + PN533_STD_FRAME_TAIL_LEN; } return sizeof(struct pn533_std_frame) + f->datalen + PN533_STD_FRAME_TAIL_LEN; } static u8 pn533_std_get_cmd_code(void *frame) { struct pn533_std_frame *f = frame; struct pn533_ext_frame *eif = frame; if (PN533_STD_IS_EXTENDED(f)) return PN533_FRAME_CMD(eif); else return PN533_FRAME_CMD(f); } bool pn533_rx_frame_is_cmd_response(struct pn533 *dev, void *frame) { return (dev->ops->get_cmd_code(frame) == PN533_CMD_RESPONSE(dev->cmd->code)); } EXPORT_SYMBOL_GPL(pn533_rx_frame_is_cmd_response); static struct pn533_frame_ops pn533_std_frame_ops = { .tx_frame_init = pn533_std_tx_frame_init, .tx_frame_finish = pn533_std_tx_frame_finish, .tx_update_payload_len = pn533_std_tx_update_payload_len, .tx_header_len = PN533_STD_FRAME_HEADER_LEN, .tx_tail_len = PN533_STD_FRAME_TAIL_LEN, .rx_is_frame_valid = pn533_std_rx_frame_is_valid, .rx_frame_size = pn533_std_rx_frame_size, .rx_header_len = PN533_STD_FRAME_HEADER_LEN, .rx_tail_len = PN533_STD_FRAME_TAIL_LEN, .max_payload_len = PN533_STD_FRAME_MAX_PAYLOAD_LEN, .get_cmd_code = pn533_std_get_cmd_code, }; static void pn533_build_cmd_frame(struct pn533 *dev, u8 cmd_code, struct sk_buff *skb) { /* payload is already there, just update datalen */ int payload_len = skb->len; struct pn533_frame_ops *ops = dev->ops; skb_push(skb, ops->tx_header_len); skb_put(skb, ops->tx_tail_len); ops->tx_frame_init(skb->data, cmd_code); ops->tx_update_payload_len(skb->data, payload_len); ops->tx_frame_finish(skb->data); } static int pn533_send_async_complete(struct pn533 *dev) { struct pn533_cmd *cmd = dev->cmd; struct sk_buff *resp; int status, rc = 0; if (!cmd) { dev_dbg(dev->dev, "%s: cmd not set\n", __func__); goto done; } dev_kfree_skb(cmd->req); status = cmd->status; resp = cmd->resp; if (status < 0) { rc = cmd->complete_cb(dev, cmd->complete_cb_context, ERR_PTR(status)); dev_kfree_skb(resp); goto done; } /* when no response is set we got interrupted */ if (!resp) resp = ERR_PTR(-EINTR); if (!IS_ERR(resp)) { skb_pull(resp, dev->ops->rx_header_len); skb_trim(resp, resp->len - dev->ops->rx_tail_len); } rc = cmd->complete_cb(dev, cmd->complete_cb_context, resp); done: kfree(cmd); dev->cmd = NULL; return rc; } static int __pn533_send_async(struct pn533 *dev, u8 cmd_code, struct sk_buff *req, pn533_send_async_complete_t complete_cb, void *complete_cb_context) { struct pn533_cmd *cmd; int rc = 0; dev_dbg(dev->dev, "Sending command 0x%x\n", cmd_code); cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->code = cmd_code; cmd->req = req; cmd->complete_cb = complete_cb; cmd->complete_cb_context = complete_cb_context; pn533_build_cmd_frame(dev, cmd_code, req); mutex_lock(&dev->cmd_lock); if (!dev->cmd_pending) { dev->cmd = cmd; rc = dev->phy_ops->send_frame(dev, req); if (rc) { dev->cmd = NULL; goto error; } dev->cmd_pending = 1; goto unlock; } dev_dbg(dev->dev, "%s Queueing command 0x%x\n", __func__, cmd_code); INIT_LIST_HEAD(&cmd->queue); list_add_tail(&cmd->queue, &dev->cmd_queue); goto unlock; error: kfree(cmd); unlock: mutex_unlock(&dev->cmd_lock); return rc; } static int pn533_send_data_async(struct pn533 *dev, u8 cmd_code, struct sk_buff *req, pn533_send_async_complete_t complete_cb, void *complete_cb_context) { return __pn533_send_async(dev, cmd_code, req, complete_cb, complete_cb_context); } static int pn533_send_cmd_async(struct pn533 *dev, u8 cmd_code, struct sk_buff *req, pn533_send_async_complete_t complete_cb, void *complete_cb_context) { return __pn533_send_async(dev, cmd_code, req, complete_cb, complete_cb_context); } /* * pn533_send_cmd_direct_async * * The function sends a priority cmd directly to the chip omitting the cmd * queue. It's intended to be used by chaining mechanism of received responses * where the host has to request every single chunk of data before scheduling * next cmd from the queue. */ static int pn533_send_cmd_direct_async(struct pn533 *dev, u8 cmd_code, struct sk_buff *req, pn533_send_async_complete_t complete_cb, void *complete_cb_context) { struct pn533_cmd *cmd; int rc; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->code = cmd_code; cmd->req = req; cmd->complete_cb = complete_cb; cmd->complete_cb_context = complete_cb_context; pn533_build_cmd_frame(dev, cmd_code, req); dev->cmd = cmd; rc = dev->phy_ops->send_frame(dev, req); if (rc < 0) { dev->cmd = NULL; kfree(cmd); } return rc; } static void pn533_wq_cmd_complete(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, cmd_complete_work); int rc; rc = pn533_send_async_complete(dev); if (rc != -EINPROGRESS) queue_work(dev->wq, &dev->cmd_work); } static void pn533_wq_cmd(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, cmd_work); struct pn533_cmd *cmd; int rc; mutex_lock(&dev->cmd_lock); if (list_empty(&dev->cmd_queue)) { dev->cmd_pending = 0; mutex_unlock(&dev->cmd_lock); return; } cmd = list_first_entry(&dev->cmd_queue, struct pn533_cmd, queue); list_del(&cmd->queue); mutex_unlock(&dev->cmd_lock); dev->cmd = cmd; rc = dev->phy_ops->send_frame(dev, cmd->req); if (rc < 0) { dev->cmd = NULL; dev_kfree_skb(cmd->req); kfree(cmd); return; } } struct pn533_sync_cmd_response { struct sk_buff *resp; struct completion done; }; static int pn533_send_sync_complete(struct pn533 *dev, void *_arg, struct sk_buff *resp) { struct pn533_sync_cmd_response *arg = _arg; arg->resp = resp; complete(&arg->done); return 0; } /* pn533_send_cmd_sync * * Please note the req parameter is freed inside the function to * limit a number of return value interpretations by the caller. * * 1. negative in case of error during TX path -> req should be freed * * 2. negative in case of error during RX path -> req should not be freed * as it's been already freed at the beginning of RX path by * async_complete_cb. * * 3. valid pointer in case of successful RX path * * A caller has to check a return value with IS_ERR macro. If the test pass, * the returned pointer is valid. * */ static struct sk_buff *pn533_send_cmd_sync(struct pn533 *dev, u8 cmd_code, struct sk_buff *req) { int rc; struct pn533_sync_cmd_response arg; init_completion(&arg.done); rc = pn533_send_cmd_async(dev, cmd_code, req, pn533_send_sync_complete, &arg); if (rc) { dev_kfree_skb(req); return ERR_PTR(rc); } wait_for_completion(&arg.done); return arg.resp; } static struct sk_buff *pn533_alloc_skb(struct pn533 *dev, unsigned int size) { struct sk_buff *skb; skb = alloc_skb(dev->ops->tx_header_len + size + dev->ops->tx_tail_len, GFP_KERNEL); if (skb) skb_reserve(skb, dev->ops->tx_header_len); return skb; } struct pn533_target_type_a { __be16 sens_res; u8 sel_res; u8 nfcid_len; u8 nfcid_data[]; } __packed; #define PN533_TYPE_A_SENS_RES_NFCID1(x) ((u8)((be16_to_cpu(x) & 0x00C0) >> 6)) #define PN533_TYPE_A_SENS_RES_SSD(x) ((u8)((be16_to_cpu(x) & 0x001F) >> 0)) #define PN533_TYPE_A_SENS_RES_PLATCONF(x) ((u8)((be16_to_cpu(x) & 0x0F00) >> 8)) #define PN533_TYPE_A_SENS_RES_SSD_JEWEL 0x00 #define PN533_TYPE_A_SENS_RES_PLATCONF_JEWEL 0x0C #define PN533_TYPE_A_SEL_PROT(x) (((x) & 0x60) >> 5) #define PN533_TYPE_A_SEL_CASCADE(x) (((x) & 0x04) >> 2) #define PN533_TYPE_A_SEL_PROT_MIFARE 0 #define PN533_TYPE_A_SEL_PROT_ISO14443 1 #define PN533_TYPE_A_SEL_PROT_DEP 2 #define PN533_TYPE_A_SEL_PROT_ISO14443_DEP 3 static bool pn533_target_type_a_is_valid(struct pn533_target_type_a *type_a, int target_data_len) { u8 ssd; u8 platconf; if (target_data_len < sizeof(struct pn533_target_type_a)) return false; /* * The length check of nfcid[] and ats[] are not being performed because * the values are not being used */ /* Requirement 4.6.3.3 from NFC Forum Digital Spec */ ssd = PN533_TYPE_A_SENS_RES_SSD(type_a->sens_res); platconf = PN533_TYPE_A_SENS_RES_PLATCONF(type_a->sens_res); if ((ssd == PN533_TYPE_A_SENS_RES_SSD_JEWEL && platconf != PN533_TYPE_A_SENS_RES_PLATCONF_JEWEL) || (ssd != PN533_TYPE_A_SENS_RES_SSD_JEWEL && platconf == PN533_TYPE_A_SENS_RES_PLATCONF_JEWEL)) return false; /* Requirements 4.8.2.1, 4.8.2.3, 4.8.2.5 and 4.8.2.7 from NFC Forum */ if (PN533_TYPE_A_SEL_CASCADE(type_a->sel_res) != 0) return false; if (type_a->nfcid_len > NFC_NFCID1_MAXSIZE) return false; return true; } static int pn533_target_found_type_a(struct nfc_target *nfc_tgt, u8 *tgt_data, int tgt_data_len) { struct pn533_target_type_a *tgt_type_a; tgt_type_a = (struct pn533_target_type_a *)tgt_data; if (!pn533_target_type_a_is_valid(tgt_type_a, tgt_data_len)) return -EPROTO; switch (PN533_TYPE_A_SEL_PROT(tgt_type_a->sel_res)) { case PN533_TYPE_A_SEL_PROT_MIFARE: nfc_tgt->supported_protocols = NFC_PROTO_MIFARE_MASK; break; case PN533_TYPE_A_SEL_PROT_ISO14443: nfc_tgt->supported_protocols = NFC_PROTO_ISO14443_MASK; break; case PN533_TYPE_A_SEL_PROT_DEP: nfc_tgt->supported_protocols = NFC_PROTO_NFC_DEP_MASK; break; case PN533_TYPE_A_SEL_PROT_ISO14443_DEP: nfc_tgt->supported_protocols = NFC_PROTO_ISO14443_MASK | NFC_PROTO_NFC_DEP_MASK; break; } nfc_tgt->sens_res = be16_to_cpu(tgt_type_a->sens_res); nfc_tgt->sel_res = tgt_type_a->sel_res; nfc_tgt->nfcid1_len = tgt_type_a->nfcid_len; memcpy(nfc_tgt->nfcid1, tgt_type_a->nfcid_data, nfc_tgt->nfcid1_len); return 0; } struct pn533_target_felica { u8 pol_res; u8 opcode; u8 nfcid2[NFC_NFCID2_MAXSIZE]; u8 pad[8]; /* optional */ u8 syst_code[]; } __packed; #define PN533_FELICA_SENSF_NFCID2_DEP_B1 0x01 #define PN533_FELICA_SENSF_NFCID2_DEP_B2 0xFE static bool pn533_target_felica_is_valid(struct pn533_target_felica *felica, int target_data_len) { if (target_data_len < sizeof(struct pn533_target_felica)) return false; if (felica->opcode != PN533_FELICA_OPC_SENSF_RES) return false; return true; } static int pn533_target_found_felica(struct nfc_target *nfc_tgt, u8 *tgt_data, int tgt_data_len) { struct pn533_target_felica *tgt_felica; tgt_felica = (struct pn533_target_felica *)tgt_data; if (!pn533_target_felica_is_valid(tgt_felica, tgt_data_len)) return -EPROTO; if ((tgt_felica->nfcid2[0] == PN533_FELICA_SENSF_NFCID2_DEP_B1) && (tgt_felica->nfcid2[1] == PN533_FELICA_SENSF_NFCID2_DEP_B2)) nfc_tgt->supported_protocols = NFC_PROTO_NFC_DEP_MASK; else nfc_tgt->supported_protocols = NFC_PROTO_FELICA_MASK; memcpy(nfc_tgt->sensf_res, &tgt_felica->opcode, 9); nfc_tgt->sensf_res_len = 9; memcpy(nfc_tgt->nfcid2, tgt_felica->nfcid2, NFC_NFCID2_MAXSIZE); nfc_tgt->nfcid2_len = NFC_NFCID2_MAXSIZE; return 0; } struct pn533_target_jewel { __be16 sens_res; u8 jewelid[4]; } __packed; static bool pn533_target_jewel_is_valid(struct pn533_target_jewel *jewel, int target_data_len) { u8 ssd; u8 platconf; if (target_data_len < sizeof(struct pn533_target_jewel)) return false; /* Requirement 4.6.3.3 from NFC Forum Digital Spec */ ssd = PN533_TYPE_A_SENS_RES_SSD(jewel->sens_res); platconf = PN533_TYPE_A_SENS_RES_PLATCONF(jewel->sens_res); if ((ssd == PN533_TYPE_A_SENS_RES_SSD_JEWEL && platconf != PN533_TYPE_A_SENS_RES_PLATCONF_JEWEL) || (ssd != PN533_TYPE_A_SENS_RES_SSD_JEWEL && platconf == PN533_TYPE_A_SENS_RES_PLATCONF_JEWEL)) return false; return true; } static int pn533_target_found_jewel(struct nfc_target *nfc_tgt, u8 *tgt_data, int tgt_data_len) { struct pn533_target_jewel *tgt_jewel; tgt_jewel = (struct pn533_target_jewel *)tgt_data; if (!pn533_target_jewel_is_valid(tgt_jewel, tgt_data_len)) return -EPROTO; nfc_tgt->supported_protocols = NFC_PROTO_JEWEL_MASK; nfc_tgt->sens_res = be16_to_cpu(tgt_jewel->sens_res); nfc_tgt->nfcid1_len = 4; memcpy(nfc_tgt->nfcid1, tgt_jewel->jewelid, nfc_tgt->nfcid1_len); return 0; } struct pn533_type_b_prot_info { u8 bitrate; u8 fsci_type; u8 fwi_adc_fo; } __packed; #define PN533_TYPE_B_PROT_FCSI(x) (((x) & 0xF0) >> 4) #define PN533_TYPE_B_PROT_TYPE(x) (((x) & 0x0F) >> 0) #define PN533_TYPE_B_PROT_TYPE_RFU_MASK 0x8 struct pn533_type_b_sens_res { u8 opcode; u8 nfcid[4]; u8 appdata[4]; struct pn533_type_b_prot_info prot_info; } __packed; #define PN533_TYPE_B_OPC_SENSB_RES 0x50 struct pn533_target_type_b { struct pn533_type_b_sens_res sensb_res; u8 attrib_res_len; u8 attrib_res[]; } __packed; static bool pn533_target_type_b_is_valid(struct pn533_target_type_b *type_b, int target_data_len) { if (target_data_len < sizeof(struct pn533_target_type_b)) return false; if (type_b->sensb_res.opcode != PN533_TYPE_B_OPC_SENSB_RES) return false; if (PN533_TYPE_B_PROT_TYPE(type_b->sensb_res.prot_info.fsci_type) & PN533_TYPE_B_PROT_TYPE_RFU_MASK) return false; return true; } static int pn533_target_found_type_b(struct nfc_target *nfc_tgt, u8 *tgt_data, int tgt_data_len) { struct pn533_target_type_b *tgt_type_b; tgt_type_b = (struct pn533_target_type_b *)tgt_data; if (!pn533_target_type_b_is_valid(tgt_type_b, tgt_data_len)) return -EPROTO; nfc_tgt->supported_protocols = NFC_PROTO_ISO14443_B_MASK; return 0; } static void pn533_poll_reset_mod_list(struct pn533 *dev); static int pn533_target_found(struct pn533 *dev, u8 tg, u8 *tgdata, int tgdata_len) { struct nfc_target nfc_tgt; int rc; dev_dbg(dev->dev, "%s: modulation=%d\n", __func__, dev->poll_mod_curr); if (tg != 1) return -EPROTO; memset(&nfc_tgt, 0, sizeof(struct nfc_target)); switch (dev->poll_mod_curr) { case PN533_POLL_MOD_106KBPS_A: rc = pn533_target_found_type_a(&nfc_tgt, tgdata, tgdata_len); break; case PN533_POLL_MOD_212KBPS_FELICA: case PN533_POLL_MOD_424KBPS_FELICA: rc = pn533_target_found_felica(&nfc_tgt, tgdata, tgdata_len); break; case PN533_POLL_MOD_106KBPS_JEWEL: rc = pn533_target_found_jewel(&nfc_tgt, tgdata, tgdata_len); break; case PN533_POLL_MOD_847KBPS_B: rc = pn533_target_found_type_b(&nfc_tgt, tgdata, tgdata_len); break; default: nfc_err(dev->dev, "Unknown current poll modulation\n"); return -EPROTO; } if (rc) return rc; if (!(nfc_tgt.supported_protocols & dev->poll_protocols)) { dev_dbg(dev->dev, "The Tg found doesn't have the desired protocol\n"); return -EAGAIN; } dev_dbg(dev->dev, "Target found - supported protocols: 0x%x\n", nfc_tgt.supported_protocols); dev->tgt_available_prots = nfc_tgt.supported_protocols; pn533_poll_reset_mod_list(dev); nfc_targets_found(dev->nfc_dev, &nfc_tgt, 1); return 0; } static inline void pn533_poll_next_mod(struct pn533 *dev) { dev->poll_mod_curr = (dev->poll_mod_curr + 1) % dev->poll_mod_count; } static void pn533_poll_reset_mod_list(struct pn533 *dev) { dev->poll_mod_count = 0; } static void pn533_poll_add_mod(struct pn533 *dev, u8 mod_index) { dev->poll_mod_active[dev->poll_mod_count] = (struct pn533_poll_modulations *)&poll_mod[mod_index]; dev->poll_mod_count++; } static void pn533_poll_create_mod_list(struct pn533 *dev, u32 im_protocols, u32 tm_protocols) { pn533_poll_reset_mod_list(dev); if ((im_protocols & NFC_PROTO_MIFARE_MASK) || (im_protocols & NFC_PROTO_ISO14443_MASK) || (im_protocols & NFC_PROTO_NFC_DEP_MASK)) pn533_poll_add_mod(dev, PN533_POLL_MOD_106KBPS_A); if (im_protocols & NFC_PROTO_FELICA_MASK || im_protocols & NFC_PROTO_NFC_DEP_MASK) { pn533_poll_add_mod(dev, PN533_POLL_MOD_212KBPS_FELICA); pn533_poll_add_mod(dev, PN533_POLL_MOD_424KBPS_FELICA); } if (im_protocols & NFC_PROTO_JEWEL_MASK) pn533_poll_add_mod(dev, PN533_POLL_MOD_106KBPS_JEWEL); if (im_protocols & NFC_PROTO_ISO14443_B_MASK) pn533_poll_add_mod(dev, PN533_POLL_MOD_847KBPS_B); if (tm_protocols) pn533_poll_add_mod(dev, PN533_LISTEN_MOD); } static int pn533_start_poll_complete(struct pn533 *dev, struct sk_buff *resp) { u8 nbtg, tg, *tgdata; int rc, tgdata_len; /* Toggle the DEP polling */ if (dev->poll_protocols & NFC_PROTO_NFC_DEP_MASK) dev->poll_dep = 1; nbtg = resp->data[0]; tg = resp->data[1]; tgdata = &resp->data[2]; tgdata_len = resp->len - 2; /* nbtg + tg */ if (nbtg) { rc = pn533_target_found(dev, tg, tgdata, tgdata_len); /* We must stop the poll after a valid target found */ if (rc == 0) return 0; } return -EAGAIN; } static struct sk_buff *pn533_alloc_poll_tg_frame(struct pn533 *dev) { struct sk_buff *skb; u8 *felica, *nfcid3; u8 *gbytes = dev->gb; size_t gbytes_len = dev->gb_len; u8 felica_params[18] = {0x1, 0xfe, /* DEP */ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, /* random */ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff}; /* System code */ u8 mifare_params[6] = {0x1, 0x1, /* SENS_RES */ 0x0, 0x0, 0x0, 0x40}; /* SEL_RES for DEP */ unsigned int skb_len = 36 + /* * mode (1), mifare (6), * felica (18), nfcid3 (10), gb_len (1) */ gbytes_len + 1; /* len Tk*/ skb = pn533_alloc_skb(dev, skb_len); if (!skb) return NULL; /* DEP support only */ skb_put_u8(skb, PN533_INIT_TARGET_DEP); /* MIFARE params */ skb_put_data(skb, mifare_params, 6); /* Felica params */ felica = skb_put_data(skb, felica_params, 18); get_random_bytes(felica + 2, 6); /* NFCID3 */ nfcid3 = skb_put_zero(skb, 10); memcpy(nfcid3, felica, 8); /* General bytes */ skb_put_u8(skb, gbytes_len); skb_put_data(skb, gbytes, gbytes_len); /* Len Tk */ skb_put_u8(skb, 0); return skb; } static void pn533_wq_tm_mi_recv(struct work_struct *work); static struct sk_buff *pn533_build_response(struct pn533 *dev); static int pn533_tm_get_data_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { struct sk_buff *skb; u8 status, ret, mi; int rc; if (IS_ERR(resp)) { skb_queue_purge(&dev->resp_q); return PTR_ERR(resp); } status = resp->data[0]; ret = status & PN533_CMD_RET_MASK; mi = status & PN533_CMD_MI_MASK; skb_pull(resp, sizeof(status)); if (ret != PN533_CMD_RET_SUCCESS) { rc = -EIO; goto error; } skb_queue_tail(&dev->resp_q, resp); if (mi) { queue_work(dev->wq, &dev->mi_tm_rx_work); return -EINPROGRESS; } skb = pn533_build_response(dev); if (!skb) { rc = -EIO; goto error; } return nfc_tm_data_received(dev->nfc_dev, skb); error: nfc_tm_deactivated(dev->nfc_dev); dev->tgt_mode = 0; skb_queue_purge(&dev->resp_q); dev_kfree_skb(resp); return rc; } static void pn533_wq_tm_mi_recv(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, mi_tm_rx_work); struct sk_buff *skb; int rc; skb = pn533_alloc_skb(dev, 0); if (!skb) return; rc = pn533_send_cmd_direct_async(dev, PN533_CMD_TG_GET_DATA, skb, pn533_tm_get_data_complete, NULL); if (rc < 0) dev_kfree_skb(skb); } static int pn533_tm_send_complete(struct pn533 *dev, void *arg, struct sk_buff *resp); static void pn533_wq_tm_mi_send(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, mi_tm_tx_work); struct sk_buff *skb; int rc; /* Grab the first skb in the queue */ skb = skb_dequeue(&dev->fragment_skb); if (skb == NULL) { /* No more data */ /* Reset the queue for future use */ skb_queue_head_init(&dev->fragment_skb); goto error; } /* last entry - remove MI bit */ if (skb_queue_len(&dev->fragment_skb) == 0) { rc = pn533_send_cmd_direct_async(dev, PN533_CMD_TG_SET_DATA, skb, pn533_tm_send_complete, NULL); } else rc = pn533_send_cmd_direct_async(dev, PN533_CMD_TG_SET_META_DATA, skb, pn533_tm_send_complete, NULL); if (rc == 0) /* success */ return; dev_err(dev->dev, "Error %d when trying to perform set meta data_exchange", rc); dev_kfree_skb(skb); error: dev->phy_ops->send_ack(dev, GFP_KERNEL); queue_work(dev->wq, &dev->cmd_work); } static void pn533_wq_tg_get_data(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, tg_work); struct sk_buff *skb; int rc; skb = pn533_alloc_skb(dev, 0); if (!skb) return; rc = pn533_send_data_async(dev, PN533_CMD_TG_GET_DATA, skb, pn533_tm_get_data_complete, NULL); if (rc < 0) dev_kfree_skb(skb); } #define ATR_REQ_GB_OFFSET 17 static int pn533_init_target_complete(struct pn533 *dev, struct sk_buff *resp) { u8 mode, *cmd, comm_mode = NFC_COMM_PASSIVE, *gb; size_t gb_len; int rc; if (resp->len < ATR_REQ_GB_OFFSET + 1) return -EINVAL; mode = resp->data[0]; cmd = &resp->data[1]; dev_dbg(dev->dev, "Target mode 0x%x len %d\n", mode, resp->len); if ((mode & PN533_INIT_TARGET_RESP_FRAME_MASK) == PN533_INIT_TARGET_RESP_ACTIVE) comm_mode = NFC_COMM_ACTIVE; if ((mode & PN533_INIT_TARGET_RESP_DEP) == 0) /* Only DEP supported */ return -EOPNOTSUPP; gb = cmd + ATR_REQ_GB_OFFSET; gb_len = resp->len - (ATR_REQ_GB_OFFSET + 1); rc = nfc_tm_activated(dev->nfc_dev, NFC_PROTO_NFC_DEP_MASK, comm_mode, gb, gb_len); if (rc < 0) { nfc_err(dev->dev, "Error when signaling target activation\n"); return rc; } dev->tgt_mode = 1; queue_work(dev->wq, &dev->tg_work); return 0; } static void pn533_listen_mode_timer(struct timer_list *t) { struct pn533 *dev = from_timer(dev, t, listen_timer); dev->cancel_listen = 1; pn533_poll_next_mod(dev); queue_delayed_work(dev->wq, &dev->poll_work, msecs_to_jiffies(PN533_POLL_INTERVAL)); } static int pn533_rf_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { int rc = 0; if (IS_ERR(resp)) { rc = PTR_ERR(resp); nfc_err(dev->dev, "RF setting error %d\n", rc); return rc; } queue_delayed_work(dev->wq, &dev->poll_work, msecs_to_jiffies(PN533_POLL_INTERVAL)); dev_kfree_skb(resp); return rc; } static void pn533_wq_rf(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, rf_work); struct sk_buff *skb; int rc; skb = pn533_alloc_skb(dev, 2); if (!skb) return; skb_put_u8(skb, PN533_CFGITEM_RF_FIELD); skb_put_u8(skb, PN533_CFGITEM_RF_FIELD_AUTO_RFCA); rc = pn533_send_cmd_async(dev, PN533_CMD_RF_CONFIGURATION, skb, pn533_rf_complete, NULL); if (rc < 0) { dev_kfree_skb(skb); nfc_err(dev->dev, "RF setting error %d\n", rc); } } static int pn533_poll_dep_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { struct pn533_cmd_jump_dep_response *rsp; struct nfc_target nfc_target; u8 target_gt_len; int rc; if (IS_ERR(resp)) return PTR_ERR(resp); memset(&nfc_target, 0, sizeof(struct nfc_target)); rsp = (struct pn533_cmd_jump_dep_response *)resp->data; rc = rsp->status & PN533_CMD_RET_MASK; if (rc != PN533_CMD_RET_SUCCESS) { /* Not target found, turn radio off */ queue_work(dev->wq, &dev->rf_work); dev_kfree_skb(resp); return 0; } dev_dbg(dev->dev, "Creating new target"); nfc_target.supported_protocols = NFC_PROTO_NFC_DEP_MASK; nfc_target.nfcid1_len = 10; memcpy(nfc_target.nfcid1, rsp->nfcid3t, nfc_target.nfcid1_len); rc = nfc_targets_found(dev->nfc_dev, &nfc_target, 1); if (rc) goto error; dev->tgt_available_prots = 0; dev->tgt_active_prot = NFC_PROTO_NFC_DEP; /* ATR_RES general bytes are located at offset 17 */ target_gt_len = resp->len - 17; rc = nfc_set_remote_general_bytes(dev->nfc_dev, rsp->gt, target_gt_len); if (!rc) { rc = nfc_dep_link_is_up(dev->nfc_dev, dev->nfc_dev->targets[0].idx, 0, NFC_RF_INITIATOR); if (!rc) pn533_poll_reset_mod_list(dev); } error: dev_kfree_skb(resp); return rc; } #define PASSIVE_DATA_LEN 5 static int pn533_poll_dep(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct sk_buff *skb; int rc, skb_len; u8 *next, nfcid3[NFC_NFCID3_MAXSIZE]; u8 passive_data[PASSIVE_DATA_LEN] = {0x00, 0xff, 0xff, 0x00, 0x3}; if (!dev->gb) { dev->gb = nfc_get_local_general_bytes(nfc_dev, &dev->gb_len); if (!dev->gb || !dev->gb_len) { dev->poll_dep = 0; queue_work(dev->wq, &dev->rf_work); } } skb_len = 3 + dev->gb_len; /* ActPass + BR + Next */ skb_len += PASSIVE_DATA_LEN; /* NFCID3 */ skb_len += NFC_NFCID3_MAXSIZE; nfcid3[0] = 0x1; nfcid3[1] = 0xfe; get_random_bytes(nfcid3 + 2, 6); skb = pn533_alloc_skb(dev, skb_len); if (!skb) return -ENOMEM; skb_put_u8(skb, 0x01); /* Active */ skb_put_u8(skb, 0x02); /* 424 kbps */ next = skb_put(skb, 1); /* Next */ *next = 0; /* Copy passive data */ skb_put_data(skb, passive_data, PASSIVE_DATA_LEN); *next |= 1; /* Copy NFCID3 (which is NFCID2 from SENSF_RES) */ skb_put_data(skb, nfcid3, NFC_NFCID3_MAXSIZE); *next |= 2; skb_put_data(skb, dev->gb, dev->gb_len); *next |= 4; /* We have some Gi */ rc = pn533_send_cmd_async(dev, PN533_CMD_IN_JUMP_FOR_DEP, skb, pn533_poll_dep_complete, NULL); if (rc < 0) dev_kfree_skb(skb); return rc; } static int pn533_autopoll_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { struct pn532_autopoll_resp *apr; struct nfc_target nfc_tgt; u8 nbtg; int rc; if (IS_ERR(resp)) { rc = PTR_ERR(resp); nfc_err(dev->dev, "%s autopoll complete error %d\n", __func__, rc); if (rc == -ENOENT) { if (dev->poll_mod_count != 0) return rc; goto stop_poll; } else if (rc < 0) { nfc_err(dev->dev, "Error %d when running autopoll\n", rc); goto stop_poll; } } nbtg = resp->data[0]; if ((nbtg > 2) || (nbtg <= 0)) return -EAGAIN; apr = (struct pn532_autopoll_resp *)&resp->data[1]; while (nbtg--) { memset(&nfc_tgt, 0, sizeof(struct nfc_target)); switch (apr->type) { case PN532_AUTOPOLL_TYPE_ISOA: dev_dbg(dev->dev, "ISOA\n"); rc = pn533_target_found_type_a(&nfc_tgt, apr->tgdata, apr->ln - 1); break; case PN532_AUTOPOLL_TYPE_FELICA212: case PN532_AUTOPOLL_TYPE_FELICA424: dev_dbg(dev->dev, "FELICA\n"); rc = pn533_target_found_felica(&nfc_tgt, apr->tgdata, apr->ln - 1); break; case PN532_AUTOPOLL_TYPE_JEWEL: dev_dbg(dev->dev, "JEWEL\n"); rc = pn533_target_found_jewel(&nfc_tgt, apr->tgdata, apr->ln - 1); break; case PN532_AUTOPOLL_TYPE_ISOB: dev_dbg(dev->dev, "ISOB\n"); rc = pn533_target_found_type_b(&nfc_tgt, apr->tgdata, apr->ln - 1); break; case PN532_AUTOPOLL_TYPE_MIFARE: dev_dbg(dev->dev, "Mifare\n"); rc = pn533_target_found_type_a(&nfc_tgt, apr->tgdata, apr->ln - 1); break; default: nfc_err(dev->dev, "Unknown current poll modulation\n"); rc = -EPROTO; } if (rc) goto done; if (!(nfc_tgt.supported_protocols & dev->poll_protocols)) { nfc_err(dev->dev, "The Tg found doesn't have the desired protocol\n"); rc = -EAGAIN; goto done; } dev->tgt_available_prots = nfc_tgt.supported_protocols; apr = (struct pn532_autopoll_resp *) (apr->tgdata + (apr->ln - 1)); } pn533_poll_reset_mod_list(dev); nfc_targets_found(dev->nfc_dev, &nfc_tgt, 1); done: dev_kfree_skb(resp); return rc; stop_poll: nfc_err(dev->dev, "autopoll operation has been stopped\n"); pn533_poll_reset_mod_list(dev); dev->poll_protocols = 0; return rc; } static int pn533_poll_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { struct pn533_poll_modulations *cur_mod; int rc; if (IS_ERR(resp)) { rc = PTR_ERR(resp); nfc_err(dev->dev, "%s Poll complete error %d\n", __func__, rc); if (rc == -ENOENT) { if (dev->poll_mod_count != 0) return rc; goto stop_poll; } else if (rc < 0) { nfc_err(dev->dev, "Error %d when running poll\n", rc); goto stop_poll; } } cur_mod = dev->poll_mod_active[dev->poll_mod_curr]; if (cur_mod->len == 0) { /* Target mode */ del_timer(&dev->listen_timer); rc = pn533_init_target_complete(dev, resp); goto done; } /* Initiator mode */ rc = pn533_start_poll_complete(dev, resp); if (!rc) goto done; if (!dev->poll_mod_count) { dev_dbg(dev->dev, "Polling has been stopped\n"); goto done; } pn533_poll_next_mod(dev); /* Not target found, turn radio off */ queue_work(dev->wq, &dev->rf_work); done: dev_kfree_skb(resp); return rc; stop_poll: nfc_err(dev->dev, "Polling operation has been stopped\n"); pn533_poll_reset_mod_list(dev); dev->poll_protocols = 0; return rc; } static struct sk_buff *pn533_alloc_poll_in_frame(struct pn533 *dev, struct pn533_poll_modulations *mod) { struct sk_buff *skb; skb = pn533_alloc_skb(dev, mod->len); if (!skb) return NULL; skb_put_data(skb, &mod->data, mod->len); return skb; } static int pn533_send_poll_frame(struct pn533 *dev) { struct pn533_poll_modulations *mod; struct sk_buff *skb; int rc; u8 cmd_code; mod = dev->poll_mod_active[dev->poll_mod_curr]; dev_dbg(dev->dev, "%s mod len %d\n", __func__, mod->len); if ((dev->poll_protocols & NFC_PROTO_NFC_DEP_MASK) && dev->poll_dep) { dev->poll_dep = 0; return pn533_poll_dep(dev->nfc_dev); } if (mod->len == 0) { /* Listen mode */ cmd_code = PN533_CMD_TG_INIT_AS_TARGET; skb = pn533_alloc_poll_tg_frame(dev); } else { /* Polling mode */ cmd_code = PN533_CMD_IN_LIST_PASSIVE_TARGET; skb = pn533_alloc_poll_in_frame(dev, mod); } if (!skb) { nfc_err(dev->dev, "Failed to allocate skb\n"); return -ENOMEM; } rc = pn533_send_cmd_async(dev, cmd_code, skb, pn533_poll_complete, NULL); if (rc < 0) { dev_kfree_skb(skb); nfc_err(dev->dev, "Polling loop error %d\n", rc); } return rc; } static void pn533_wq_poll(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, poll_work.work); struct pn533_poll_modulations *cur_mod; int rc; cur_mod = dev->poll_mod_active[dev->poll_mod_curr]; dev_dbg(dev->dev, "%s cancel_listen %d modulation len %d\n", __func__, dev->cancel_listen, cur_mod->len); if (dev->cancel_listen == 1) { dev->cancel_listen = 0; dev->phy_ops->abort_cmd(dev, GFP_ATOMIC); } rc = pn533_send_poll_frame(dev); if (rc) return; if (cur_mod->len == 0 && dev->poll_mod_count > 1) mod_timer(&dev->listen_timer, jiffies + PN533_LISTEN_TIME * HZ); } static int pn533_start_poll(struct nfc_dev *nfc_dev, u32 im_protocols, u32 tm_protocols) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct pn533_poll_modulations *cur_mod; struct sk_buff *skb; u8 rand_mod; int rc; dev_dbg(dev->dev, "%s: im protocols 0x%x tm protocols 0x%x\n", __func__, im_protocols, tm_protocols); if (dev->tgt_active_prot) { nfc_err(dev->dev, "Cannot poll with a target already activated\n"); return -EBUSY; } if (dev->tgt_mode) { nfc_err(dev->dev, "Cannot poll while already being activated\n"); return -EBUSY; } if (tm_protocols) { dev->gb = nfc_get_local_general_bytes(nfc_dev, &dev->gb_len); if (dev->gb == NULL) tm_protocols = 0; } dev->poll_protocols = im_protocols; dev->listen_protocols = tm_protocols; if (dev->device_type == PN533_DEVICE_PN532_AUTOPOLL) { skb = pn533_alloc_skb(dev, 4 + 6); if (!skb) return -ENOMEM; *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_POLLNR_INFINITE; *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_PERIOD; if ((im_protocols & NFC_PROTO_MIFARE_MASK) && (im_protocols & NFC_PROTO_ISO14443_MASK) && (im_protocols & NFC_PROTO_NFC_DEP_MASK)) *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_GENERIC_106; else { if (im_protocols & NFC_PROTO_MIFARE_MASK) *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_MIFARE; if (im_protocols & NFC_PROTO_ISO14443_MASK) *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_ISOA; if (im_protocols & NFC_PROTO_NFC_DEP_MASK) { *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_DEP_PASSIVE_106; *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_DEP_PASSIVE_212; *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_DEP_PASSIVE_424; } } if (im_protocols & NFC_PROTO_FELICA_MASK || im_protocols & NFC_PROTO_NFC_DEP_MASK) { *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_FELICA212; *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_FELICA424; } if (im_protocols & NFC_PROTO_JEWEL_MASK) *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_JEWEL; if (im_protocols & NFC_PROTO_ISO14443_B_MASK) *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_ISOB; if (tm_protocols) *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_DEP_ACTIVE_106; rc = pn533_send_cmd_async(dev, PN533_CMD_IN_AUTOPOLL, skb, pn533_autopoll_complete, NULL); if (rc < 0) dev_kfree_skb(skb); else dev->poll_mod_count++; return rc; } pn533_poll_create_mod_list(dev, im_protocols, tm_protocols); if (!dev->poll_mod_count) { nfc_err(dev->dev, "Poll mod list is empty\n"); return -EINVAL; } /* Do not always start polling from the same modulation */ get_random_bytes(&rand_mod, sizeof(rand_mod)); rand_mod %= dev->poll_mod_count; dev->poll_mod_curr = rand_mod; cur_mod = dev->poll_mod_active[dev->poll_mod_curr]; rc = pn533_send_poll_frame(dev); /* Start listen timer */ if (!rc && cur_mod->len == 0 && dev->poll_mod_count > 1) mod_timer(&dev->listen_timer, jiffies + PN533_LISTEN_TIME * HZ); return rc; } static void pn533_stop_poll(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); del_timer(&dev->listen_timer); if (!dev->poll_mod_count) { dev_dbg(dev->dev, "Polling operation was not running\n"); return; } dev->phy_ops->abort_cmd(dev, GFP_KERNEL); flush_delayed_work(&dev->poll_work); pn533_poll_reset_mod_list(dev); } static int pn533_activate_target_nfcdep(struct pn533 *dev) { struct pn533_cmd_activate_response *rsp; u16 gt_len; int rc; struct sk_buff *skb; struct sk_buff *resp; skb = pn533_alloc_skb(dev, sizeof(u8) * 2); /*TG + Next*/ if (!skb) return -ENOMEM; skb_put_u8(skb, 1); /* TG */ skb_put_u8(skb, 0); /* Next */ resp = pn533_send_cmd_sync(dev, PN533_CMD_IN_ATR, skb); if (IS_ERR(resp)) return PTR_ERR(resp); rsp = (struct pn533_cmd_activate_response *)resp->data; rc = rsp->status & PN533_CMD_RET_MASK; if (rc != PN533_CMD_RET_SUCCESS) { nfc_err(dev->dev, "Target activation failed (error 0x%x)\n", rc); dev_kfree_skb(resp); return -EIO; } /* ATR_RES general bytes are located at offset 16 */ gt_len = resp->len - 16; rc = nfc_set_remote_general_bytes(dev->nfc_dev, rsp->gt, gt_len); dev_kfree_skb(resp); return rc; } static int pn533_activate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, u32 protocol) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); int rc; dev_dbg(dev->dev, "%s: protocol=%u\n", __func__, protocol); if (dev->poll_mod_count) { nfc_err(dev->dev, "Cannot activate while polling\n"); return -EBUSY; } if (dev->tgt_active_prot) { nfc_err(dev->dev, "There is already an active target\n"); return -EBUSY; } if (!dev->tgt_available_prots) { nfc_err(dev->dev, "There is no available target to activate\n"); return -EINVAL; } if (!(dev->tgt_available_prots & (1 << protocol))) { nfc_err(dev->dev, "Target doesn't support requested proto %u\n", protocol); return -EINVAL; } if (protocol == NFC_PROTO_NFC_DEP) { rc = pn533_activate_target_nfcdep(dev); if (rc) { nfc_err(dev->dev, "Activating target with DEP failed %d\n", rc); return rc; } } dev->tgt_active_prot = protocol; dev->tgt_available_prots = 0; return 0; } static int pn533_deactivate_target_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { int rc = 0; if (IS_ERR(resp)) { rc = PTR_ERR(resp); nfc_err(dev->dev, "Target release error %d\n", rc); return rc; } rc = resp->data[0] & PN533_CMD_RET_MASK; if (rc != PN533_CMD_RET_SUCCESS) nfc_err(dev->dev, "Error 0x%x when releasing the target\n", rc); dev_kfree_skb(resp); return rc; } static void pn533_deactivate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, u8 mode) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct sk_buff *skb; int rc; if (!dev->tgt_active_prot) { nfc_err(dev->dev, "There is no active target\n"); return; } dev->tgt_active_prot = 0; skb_queue_purge(&dev->resp_q); skb = pn533_alloc_skb(dev, sizeof(u8)); if (!skb) return; skb_put_u8(skb, 1); /* TG*/ rc = pn533_send_cmd_async(dev, PN533_CMD_IN_RELEASE, skb, pn533_deactivate_target_complete, NULL); if (rc < 0) { dev_kfree_skb(skb); nfc_err(dev->dev, "Target release error %d\n", rc); } } static int pn533_in_dep_link_up_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { struct pn533_cmd_jump_dep_response *rsp; u8 target_gt_len; int rc; u8 active = *(u8 *)arg; kfree(arg); if (IS_ERR(resp)) return PTR_ERR(resp); if (dev->tgt_available_prots && !(dev->tgt_available_prots & (1 << NFC_PROTO_NFC_DEP))) { nfc_err(dev->dev, "The target does not support DEP\n"); rc = -EINVAL; goto error; } rsp = (struct pn533_cmd_jump_dep_response *)resp->data; rc = rsp->status & PN533_CMD_RET_MASK; if (rc != PN533_CMD_RET_SUCCESS) { nfc_err(dev->dev, "Bringing DEP link up failed (error 0x%x)\n", rc); goto error; } if (!dev->tgt_available_prots) { struct nfc_target nfc_target; dev_dbg(dev->dev, "Creating new target\n"); memset(&nfc_target, 0, sizeof(struct nfc_target)); nfc_target.supported_protocols = NFC_PROTO_NFC_DEP_MASK; nfc_target.nfcid1_len = 10; memcpy(nfc_target.nfcid1, rsp->nfcid3t, nfc_target.nfcid1_len); rc = nfc_targets_found(dev->nfc_dev, &nfc_target, 1); if (rc) goto error; dev->tgt_available_prots = 0; } dev->tgt_active_prot = NFC_PROTO_NFC_DEP; /* ATR_RES general bytes are located at offset 17 */ target_gt_len = resp->len - 17; rc = nfc_set_remote_general_bytes(dev->nfc_dev, rsp->gt, target_gt_len); if (rc == 0) rc = nfc_dep_link_is_up(dev->nfc_dev, dev->nfc_dev->targets[0].idx, !active, NFC_RF_INITIATOR); error: dev_kfree_skb(resp); return rc; } static int pn533_rf_field(struct nfc_dev *nfc_dev, u8 rf); static int pn533_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, u8 comm_mode, u8 *gb, size_t gb_len) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct sk_buff *skb; int rc, skb_len; u8 *next, *arg, nfcid3[NFC_NFCID3_MAXSIZE]; u8 passive_data[PASSIVE_DATA_LEN] = {0x00, 0xff, 0xff, 0x00, 0x3}; if (dev->poll_mod_count) { nfc_err(dev->dev, "Cannot bring the DEP link up while polling\n"); return -EBUSY; } if (dev->tgt_active_prot) { nfc_err(dev->dev, "There is already an active target\n"); return -EBUSY; } skb_len = 3 + gb_len; /* ActPass + BR + Next */ skb_len += PASSIVE_DATA_LEN; /* NFCID3 */ skb_len += NFC_NFCID3_MAXSIZE; if (target && !target->nfcid2_len) { nfcid3[0] = 0x1; nfcid3[1] = 0xfe; get_random_bytes(nfcid3 + 2, 6); } skb = pn533_alloc_skb(dev, skb_len); if (!skb) return -ENOMEM; skb_put_u8(skb, !comm_mode); /* ActPass */ skb_put_u8(skb, 0x02); /* 424 kbps */ next = skb_put(skb, 1); /* Next */ *next = 0; /* Copy passive data */ skb_put_data(skb, passive_data, PASSIVE_DATA_LEN); *next |= 1; /* Copy NFCID3 (which is NFCID2 from SENSF_RES) */ if (target && target->nfcid2_len) memcpy(skb_put(skb, NFC_NFCID3_MAXSIZE), target->nfcid2, target->nfcid2_len); else skb_put_data(skb, nfcid3, NFC_NFCID3_MAXSIZE); *next |= 2; if (gb != NULL && gb_len > 0) { skb_put_data(skb, gb, gb_len); *next |= 4; /* We have some Gi */ } else { *next = 0; } arg = kmalloc(sizeof(*arg), GFP_KERNEL); if (!arg) { dev_kfree_skb(skb); return -ENOMEM; } *arg = !comm_mode; pn533_rf_field(dev->nfc_dev, 0); rc = pn533_send_cmd_async(dev, PN533_CMD_IN_JUMP_FOR_DEP, skb, pn533_in_dep_link_up_complete, arg); if (rc < 0) { dev_kfree_skb(skb); kfree(arg); } return rc; } static int pn533_dep_link_down(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); pn533_poll_reset_mod_list(dev); if (dev->tgt_mode || dev->tgt_active_prot) dev->phy_ops->abort_cmd(dev, GFP_KERNEL); dev->tgt_active_prot = 0; dev->tgt_mode = 0; skb_queue_purge(&dev->resp_q); return 0; } struct pn533_data_exchange_arg { data_exchange_cb_t cb; void *cb_context; }; static struct sk_buff *pn533_build_response(struct pn533 *dev) { struct sk_buff *skb, *tmp, *t; unsigned int skb_len = 0, tmp_len = 0; if (skb_queue_empty(&dev->resp_q)) return NULL; if (skb_queue_len(&dev->resp_q) == 1) { skb = skb_dequeue(&dev->resp_q); goto out; } skb_queue_walk_safe(&dev->resp_q, tmp, t) skb_len += tmp->len; dev_dbg(dev->dev, "%s total length %d\n", __func__, skb_len); skb = alloc_skb(skb_len, GFP_KERNEL); if (skb == NULL) goto out; skb_put(skb, skb_len); skb_queue_walk_safe(&dev->resp_q, tmp, t) { memcpy(skb->data + tmp_len, tmp->data, tmp->len); tmp_len += tmp->len; } out: skb_queue_purge(&dev->resp_q); return skb; } static int pn533_data_exchange_complete(struct pn533 *dev, void *_arg, struct sk_buff *resp) { struct pn533_data_exchange_arg *arg = _arg; struct sk_buff *skb; int rc = 0; u8 status, ret, mi; if (IS_ERR(resp)) { rc = PTR_ERR(resp); goto _error; } status = resp->data[0]; ret = status & PN533_CMD_RET_MASK; mi = status & PN533_CMD_MI_MASK; skb_pull(resp, sizeof(status)); if (ret != PN533_CMD_RET_SUCCESS) { nfc_err(dev->dev, "Exchanging data failed (error 0x%x)\n", ret); rc = -EIO; goto error; } skb_queue_tail(&dev->resp_q, resp); if (mi) { dev->cmd_complete_mi_arg = arg; queue_work(dev->wq, &dev->mi_rx_work); return -EINPROGRESS; } /* Prepare for the next round */ if (skb_queue_len(&dev->fragment_skb) > 0) { dev->cmd_complete_dep_arg = arg; queue_work(dev->wq, &dev->mi_tx_work); return -EINPROGRESS; } skb = pn533_build_response(dev); if (!skb) { rc = -ENOMEM; goto error; } arg->cb(arg->cb_context, skb, 0); kfree(arg); return 0; error: dev_kfree_skb(resp); _error: skb_queue_purge(&dev->resp_q); arg->cb(arg->cb_context, NULL, rc); kfree(arg); return rc; } /* * Receive an incoming pn533 frame. skb contains only header and payload. * If skb == NULL, it is a notification that the link below is dead. */ void pn533_recv_frame(struct pn533 *dev, struct sk_buff *skb, int status) { if (!dev->cmd) goto sched_wq; dev->cmd->status = status; if (status != 0) { dev_dbg(dev->dev, "%s: Error received: %d\n", __func__, status); goto sched_wq; } if (skb == NULL) { dev_err(dev->dev, "NULL Frame -> link is dead\n"); goto sched_wq; } if (pn533_rx_frame_is_ack(skb->data)) { dev_dbg(dev->dev, "%s: Received ACK frame\n", __func__); dev_kfree_skb(skb); return; } print_hex_dump_debug("PN533 RX: ", DUMP_PREFIX_NONE, 16, 1, skb->data, dev->ops->rx_frame_size(skb->data), false); if (!dev->ops->rx_is_frame_valid(skb->data, dev)) { nfc_err(dev->dev, "Received an invalid frame\n"); dev->cmd->status = -EIO; } else if (!pn533_rx_frame_is_cmd_response(dev, skb->data)) { nfc_err(dev->dev, "It it not the response to the last command\n"); dev->cmd->status = -EIO; } dev->cmd->resp = skb; sched_wq: queue_work(dev->wq, &dev->cmd_complete_work); } EXPORT_SYMBOL(pn533_recv_frame); /* Split the Tx skb into small chunks */ static int pn533_fill_fragment_skbs(struct pn533 *dev, struct sk_buff *skb) { struct sk_buff *frag; int frag_size; do { /* Remaining size */ if (skb->len > PN533_CMD_DATAFRAME_MAXLEN) frag_size = PN533_CMD_DATAFRAME_MAXLEN; else frag_size = skb->len; /* Allocate and reserve */ frag = pn533_alloc_skb(dev, frag_size); if (!frag) { skb_queue_purge(&dev->fragment_skb); return -ENOMEM; } if (!dev->tgt_mode) { /* Reserve the TG/MI byte */ skb_reserve(frag, 1); /* MI + TG */ if (frag_size == PN533_CMD_DATAFRAME_MAXLEN) *(u8 *)skb_push(frag, sizeof(u8)) = (PN533_CMD_MI_MASK | 1); else *(u8 *)skb_push(frag, sizeof(u8)) = 1; /* TG */ } skb_put_data(frag, skb->data, frag_size); /* Reduce the size of incoming buffer */ skb_pull(skb, frag_size); /* Add this to skb_queue */ skb_queue_tail(&dev->fragment_skb, frag); } while (skb->len > 0); dev_kfree_skb(skb); return skb_queue_len(&dev->fragment_skb); } static int pn533_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct pn533_data_exchange_arg *arg = NULL; int rc; if (!dev->tgt_active_prot) { nfc_err(dev->dev, "Can't exchange data if there is no active target\n"); rc = -EINVAL; goto error; } arg = kmalloc(sizeof(*arg), GFP_KERNEL); if (!arg) { rc = -ENOMEM; goto error; } arg->cb = cb; arg->cb_context = cb_context; switch (dev->device_type) { case PN533_DEVICE_PASORI: if (dev->tgt_active_prot == NFC_PROTO_FELICA) { rc = pn533_send_data_async(dev, PN533_CMD_IN_COMM_THRU, skb, pn533_data_exchange_complete, arg); break; } fallthrough; default: /* jumbo frame ? */ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) { rc = pn533_fill_fragment_skbs(dev, skb); if (rc < 0) goto error; skb = skb_dequeue(&dev->fragment_skb); if (!skb) { rc = -EIO; goto error; } } else { *(u8 *)skb_push(skb, sizeof(u8)) = 1; /* TG */ } rc = pn533_send_data_async(dev, PN533_CMD_IN_DATA_EXCHANGE, skb, pn533_data_exchange_complete, arg); break; } if (rc < 0) /* rc from send_async */ goto error; return 0; error: kfree(arg); dev_kfree_skb(skb); return rc; } static int pn533_tm_send_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { u8 status; if (IS_ERR(resp)) return PTR_ERR(resp); status = resp->data[0]; /* Prepare for the next round */ if (skb_queue_len(&dev->fragment_skb) > 0) { queue_work(dev->wq, &dev->mi_tm_tx_work); return -EINPROGRESS; } dev_kfree_skb(resp); if (status != 0) { nfc_tm_deactivated(dev->nfc_dev); dev->tgt_mode = 0; return 0; } queue_work(dev->wq, &dev->tg_work); return 0; } static int pn533_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); int rc; /* let's split in multiple chunks if size's too big */ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) { rc = pn533_fill_fragment_skbs(dev, skb); if (rc < 0) goto error; /* get the first skb */ skb = skb_dequeue(&dev->fragment_skb); if (!skb) { rc = -EIO; goto error; } rc = pn533_send_data_async(dev, PN533_CMD_TG_SET_META_DATA, skb, pn533_tm_send_complete, NULL); } else { /* Send th skb */ rc = pn533_send_data_async(dev, PN533_CMD_TG_SET_DATA, skb, pn533_tm_send_complete, NULL); } error: if (rc < 0) { dev_kfree_skb(skb); skb_queue_purge(&dev->fragment_skb); } return rc; } static void pn533_wq_mi_recv(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, mi_rx_work); struct sk_buff *skb; int rc; skb = pn533_alloc_skb(dev, PN533_CMD_DATAEXCH_HEAD_LEN); if (!skb) goto error; switch (dev->device_type) { case PN533_DEVICE_PASORI: if (dev->tgt_active_prot == NFC_PROTO_FELICA) { rc = pn533_send_cmd_direct_async(dev, PN533_CMD_IN_COMM_THRU, skb, pn533_data_exchange_complete, dev->cmd_complete_mi_arg); break; } fallthrough; default: skb_put_u8(skb, 1); /*TG*/ rc = pn533_send_cmd_direct_async(dev, PN533_CMD_IN_DATA_EXCHANGE, skb, pn533_data_exchange_complete, dev->cmd_complete_mi_arg); break; } if (rc == 0) /* success */ return; nfc_err(dev->dev, "Error %d when trying to perform data_exchange\n", rc); dev_kfree_skb(skb); kfree(dev->cmd_complete_mi_arg); error: dev->phy_ops->send_ack(dev, GFP_KERNEL); queue_work(dev->wq, &dev->cmd_work); } static void pn533_wq_mi_send(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, mi_tx_work); struct sk_buff *skb; int rc; /* Grab the first skb in the queue */ skb = skb_dequeue(&dev->fragment_skb); if (skb == NULL) { /* No more data */ /* Reset the queue for future use */ skb_queue_head_init(&dev->fragment_skb); goto error; } switch (dev->device_type) { case PN533_DEVICE_PASORI: if (dev->tgt_active_prot != NFC_PROTO_FELICA) { rc = -EIO; break; } rc = pn533_send_cmd_direct_async(dev, PN533_CMD_IN_COMM_THRU, skb, pn533_data_exchange_complete, dev->cmd_complete_dep_arg); break; default: /* Still some fragments? */ rc = pn533_send_cmd_direct_async(dev, PN533_CMD_IN_DATA_EXCHANGE, skb, pn533_data_exchange_complete, dev->cmd_complete_dep_arg); break; } if (rc == 0) /* success */ return; nfc_err(dev->dev, "Error %d when trying to perform data_exchange\n", rc); dev_kfree_skb(skb); kfree(dev->cmd_complete_dep_arg); error: dev->phy_ops->send_ack(dev, GFP_KERNEL); queue_work(dev->wq, &dev->cmd_work); } static int pn533_set_configuration(struct pn533 *dev, u8 cfgitem, u8 *cfgdata, u8 cfgdata_len) { struct sk_buff *skb; struct sk_buff *resp; int skb_len; skb_len = sizeof(cfgitem) + cfgdata_len; /* cfgitem + cfgdata */ skb = pn533_alloc_skb(dev, skb_len); if (!skb) return -ENOMEM; skb_put_u8(skb, cfgitem); skb_put_data(skb, cfgdata, cfgdata_len); resp = pn533_send_cmd_sync(dev, PN533_CMD_RF_CONFIGURATION, skb); if (IS_ERR(resp)) return PTR_ERR(resp); dev_kfree_skb(resp); return 0; } static int pn533_get_firmware_version(struct pn533 *dev, struct pn533_fw_version *fv) { struct sk_buff *skb; struct sk_buff *resp; skb = pn533_alloc_skb(dev, 0); if (!skb) return -ENOMEM; resp = pn533_send_cmd_sync(dev, PN533_CMD_GET_FIRMWARE_VERSION, skb); if (IS_ERR(resp)) return PTR_ERR(resp); fv->ic = resp->data[0]; fv->ver = resp->data[1]; fv->rev = resp->data[2]; fv->support = resp->data[3]; dev_kfree_skb(resp); return 0; } static int pn533_pasori_fw_reset(struct pn533 *dev) { struct sk_buff *skb; struct sk_buff *resp; skb = pn533_alloc_skb(dev, sizeof(u8)); if (!skb) return -ENOMEM; skb_put_u8(skb, 0x1); resp = pn533_send_cmd_sync(dev, 0x18, skb); if (IS_ERR(resp)) return PTR_ERR(resp); dev_kfree_skb(resp); return 0; } static int pn533_rf_field(struct nfc_dev *nfc_dev, u8 rf) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); u8 rf_field = !!rf; int rc; rf_field |= PN533_CFGITEM_RF_FIELD_AUTO_RFCA; rc = pn533_set_configuration(dev, PN533_CFGITEM_RF_FIELD, (u8 *)&rf_field, 1); if (rc) { nfc_err(dev->dev, "Error on setting RF field\n"); return rc; } return 0; } static int pn532_sam_configuration(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct sk_buff *skb; struct sk_buff *resp; skb = pn533_alloc_skb(dev, 1); if (!skb) return -ENOMEM; skb_put_u8(skb, 0x01); resp = pn533_send_cmd_sync(dev, PN533_CMD_SAM_CONFIGURATION, skb); if (IS_ERR(resp)) return PTR_ERR(resp); dev_kfree_skb(resp); return 0; } static int pn533_dev_up(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); int rc; if (dev->phy_ops->dev_up) { rc = dev->phy_ops->dev_up(dev); if (rc) return rc; } if ((dev->device_type == PN533_DEVICE_PN532) || (dev->device_type == PN533_DEVICE_PN532_AUTOPOLL)) { rc = pn532_sam_configuration(nfc_dev); if (rc) return rc; } return pn533_rf_field(nfc_dev, 1); } static int pn533_dev_down(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); int ret; ret = pn533_rf_field(nfc_dev, 0); if (dev->phy_ops->dev_down && !ret) ret = dev->phy_ops->dev_down(dev); return ret; } static const struct nfc_ops pn533_nfc_ops = { .dev_up = pn533_dev_up, .dev_down = pn533_dev_down, .dep_link_up = pn533_dep_link_up, .dep_link_down = pn533_dep_link_down, .start_poll = pn533_start_poll, .stop_poll = pn533_stop_poll, .activate_target = pn533_activate_target, .deactivate_target = pn533_deactivate_target, .im_transceive = pn533_transceive, .tm_send = pn533_tm_send, }; static int pn533_setup(struct pn533 *dev) { struct pn533_config_max_retries max_retries; struct pn533_config_timing timing; u8 pasori_cfg[3] = {0x08, 0x01, 0x08}; int rc; switch (dev->device_type) { case PN533_DEVICE_STD: case PN533_DEVICE_PASORI: case PN533_DEVICE_ACR122U: case PN533_DEVICE_PN532: case PN533_DEVICE_PN532_AUTOPOLL: max_retries.mx_rty_atr = 0x2; max_retries.mx_rty_psl = 0x1; max_retries.mx_rty_passive_act = PN533_CONFIG_MAX_RETRIES_NO_RETRY; timing.rfu = PN533_CONFIG_TIMING_102; timing.atr_res_timeout = PN533_CONFIG_TIMING_102; timing.dep_timeout = PN533_CONFIG_TIMING_204; break; default: nfc_err(dev->dev, "Unknown device type %d\n", dev->device_type); return -EINVAL; } rc = pn533_set_configuration(dev, PN533_CFGITEM_MAX_RETRIES, (u8 *)&max_retries, sizeof(max_retries)); if (rc) { nfc_err(dev->dev, "Error on setting MAX_RETRIES config\n"); return rc; } rc = pn533_set_configuration(dev, PN533_CFGITEM_TIMING, (u8 *)&timing, sizeof(timing)); if (rc) { nfc_err(dev->dev, "Error on setting RF timings\n"); return rc; } switch (dev->device_type) { case PN533_DEVICE_STD: case PN533_DEVICE_PN532: case PN533_DEVICE_PN532_AUTOPOLL: break; case PN533_DEVICE_PASORI: pn533_pasori_fw_reset(dev); rc = pn533_set_configuration(dev, PN533_CFGITEM_PASORI, pasori_cfg, 3); if (rc) { nfc_err(dev->dev, "Error while settings PASORI config\n"); return rc; } pn533_pasori_fw_reset(dev); break; } return 0; } int pn533_finalize_setup(struct pn533 *dev) { struct pn533_fw_version fw_ver; int rc; memset(&fw_ver, 0, sizeof(fw_ver)); rc = pn533_get_firmware_version(dev, &fw_ver); if (rc) { nfc_err(dev->dev, "Unable to get FW version\n"); return rc; } nfc_info(dev->dev, "NXP PN5%02X firmware ver %d.%d now attached\n", fw_ver.ic, fw_ver.ver, fw_ver.rev); rc = pn533_setup(dev); if (rc) return rc; return 0; } EXPORT_SYMBOL_GPL(pn533_finalize_setup); struct pn533 *pn53x_common_init(u32 device_type, enum pn533_protocol_type protocol_type, void *phy, const struct pn533_phy_ops *phy_ops, struct pn533_frame_ops *fops, struct device *dev) { struct pn533 *priv; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return ERR_PTR(-ENOMEM); priv->phy = phy; priv->phy_ops = phy_ops; priv->dev = dev; if (fops != NULL) priv->ops = fops; else priv->ops = &pn533_std_frame_ops; priv->protocol_type = protocol_type; priv->device_type = device_type; mutex_init(&priv->cmd_lock); INIT_WORK(&priv->cmd_work, pn533_wq_cmd); INIT_WORK(&priv->cmd_complete_work, pn533_wq_cmd_complete); INIT_WORK(&priv->mi_rx_work, pn533_wq_mi_recv); INIT_WORK(&priv->mi_tx_work, pn533_wq_mi_send); INIT_WORK(&priv->tg_work, pn533_wq_tg_get_data); INIT_WORK(&priv->mi_tm_rx_work, pn533_wq_tm_mi_recv); INIT_WORK(&priv->mi_tm_tx_work, pn533_wq_tm_mi_send); INIT_DELAYED_WORK(&priv->poll_work, pn533_wq_poll); INIT_WORK(&priv->rf_work, pn533_wq_rf); priv->wq = alloc_ordered_workqueue("pn533", 0); if (priv->wq == NULL) goto error; timer_setup(&priv->listen_timer, pn533_listen_mode_timer, 0); skb_queue_head_init(&priv->resp_q); skb_queue_head_init(&priv->fragment_skb); INIT_LIST_HEAD(&priv->cmd_queue); return priv; error: kfree(priv); return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(pn53x_common_init); void pn53x_common_clean(struct pn533 *priv) { struct pn533_cmd *cmd, *n; /* delete the timer before cleanup the worker */ timer_shutdown_sync(&priv->listen_timer); flush_delayed_work(&priv->poll_work); destroy_workqueue(priv->wq); skb_queue_purge(&priv->resp_q); list_for_each_entry_safe(cmd, n, &priv->cmd_queue, queue) { list_del(&cmd->queue); kfree(cmd); } kfree(priv); } EXPORT_SYMBOL_GPL(pn53x_common_clean); int pn532_i2c_nfc_alloc(struct pn533 *priv, u32 protocols, struct device *parent) { priv->nfc_dev = nfc_allocate_device(&pn533_nfc_ops, protocols, priv->ops->tx_header_len + PN533_CMD_DATAEXCH_HEAD_LEN, priv->ops->tx_tail_len); if (!priv->nfc_dev) return -ENOMEM; nfc_set_parent_dev(priv->nfc_dev, parent); nfc_set_drvdata(priv->nfc_dev, priv); return 0; } EXPORT_SYMBOL_GPL(pn532_i2c_nfc_alloc); int pn53x_register_nfc(struct pn533 *priv, u32 protocols, struct device *parent) { int rc; rc = pn532_i2c_nfc_alloc(priv, protocols, parent); if (rc) return rc; rc = nfc_register_device(priv->nfc_dev); if (rc) nfc_free_device(priv->nfc_dev); return rc; } EXPORT_SYMBOL_GPL(pn53x_register_nfc); void pn53x_unregister_nfc(struct pn533 *priv) { nfc_unregister_device(priv->nfc_dev); nfc_free_device(priv->nfc_dev); } EXPORT_SYMBOL_GPL(pn53x_unregister_nfc); MODULE_AUTHOR("Lauro Ramos Venancio <lauro.venancio@openbossa.org>"); MODULE_AUTHOR("Aloisio Almeida Jr <aloisio.almeida@openbossa.org>"); MODULE_AUTHOR("Waldemar Rymarkiewicz <waldemar.rymarkiewicz@tieto.com>"); MODULE_DESCRIPTION("PN533 driver ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL");
1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 // SPDX-License-Identifier: GPL-2.0+ /* * inode.c -- user mode filesystem api for usb gadget controllers * * Copyright (C) 2003-2004 David Brownell * Copyright (C) 2003 Agilent Technologies */ /* #define VERBOSE_DEBUG */ #include <linux/init.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/pagemap.h> #include <linux/uts.h> #include <linux/wait.h> #include <linux/compiler.h> #include <linux/uaccess.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/string_choices.h> #include <linux/poll.h> #include <linux/kthread.h> #include <linux/aio.h> #include <linux/uio.h> #include <linux/refcount.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/moduleparam.h> #include <linux/usb/gadgetfs.h> #include <linux/usb/gadget.h> #include <linux/usb/composite.h> /* for USB_GADGET_DELAYED_STATUS */ /* Undef helpers from linux/usb/composite.h as gadgetfs redefines them */ #undef DBG #undef ERROR #undef INFO /* * The gadgetfs API maps each endpoint to a file descriptor so that you * can use standard synchronous read/write calls for I/O. There's some * O_NONBLOCK and O_ASYNC/FASYNC style i/o support. Example usermode * drivers show how this works in practice. You can also use AIO to * eliminate I/O gaps between requests, to help when streaming data. * * Key parts that must be USB-specific are protocols defining how the * read/write operations relate to the hardware state machines. There * are two types of files. One type is for the device, implementing ep0. * The other type is for each IN or OUT endpoint. In both cases, the * user mode driver must configure the hardware before using it. * * - First, dev_config() is called when /dev/gadget/$CHIP is configured * (by writing configuration and device descriptors). Afterwards it * may serve as a source of device events, used to handle all control * requests other than basic enumeration. * * - Then, after a SET_CONFIGURATION control request, ep_config() is * called when each /dev/gadget/ep* file is configured (by writing * endpoint descriptors). Afterwards these files are used to write() * IN data or to read() OUT data. To halt the endpoint, a "wrong * direction" request is issued (like reading an IN endpoint). * * Unlike "usbfs" the only ioctl()s are for things that are rare, and maybe * not possible on all hardware. For example, precise fault handling with * respect to data left in endpoint fifos after aborted operations; or * selective clearing of endpoint halts, to implement SET_INTERFACE. */ #define DRIVER_DESC "USB Gadget filesystem" #define DRIVER_VERSION "24 Aug 2004" static const char driver_desc [] = DRIVER_DESC; static const char shortname [] = "gadgetfs"; MODULE_DESCRIPTION (DRIVER_DESC); MODULE_AUTHOR ("David Brownell"); MODULE_LICENSE ("GPL"); static int ep_open(struct inode *, struct file *); /*----------------------------------------------------------------------*/ #define GADGETFS_MAGIC 0xaee71ee7 /* /dev/gadget/$CHIP represents ep0 and the whole device */ enum ep0_state { /* DISABLED is the initial state. */ STATE_DEV_DISABLED = 0, /* Only one open() of /dev/gadget/$CHIP; only one file tracks * ep0/device i/o modes and binding to the controller. Driver * must always write descriptors to initialize the device, then * the device becomes UNCONNECTED until enumeration. */ STATE_DEV_OPENED, /* From then on, ep0 fd is in either of two basic modes: * - (UN)CONNECTED: read usb_gadgetfs_event(s) from it * - SETUP: read/write will transfer control data and succeed; * or if "wrong direction", performs protocol stall */ STATE_DEV_UNCONNECTED, STATE_DEV_CONNECTED, STATE_DEV_SETUP, /* UNBOUND means the driver closed ep0, so the device won't be * accessible again (DEV_DISABLED) until all fds are closed. */ STATE_DEV_UNBOUND, }; /* enough for the whole queue: most events invalidate others */ #define N_EVENT 5 #define RBUF_SIZE 256 struct dev_data { spinlock_t lock; refcount_t count; int udc_usage; enum ep0_state state; /* P: lock */ struct usb_gadgetfs_event event [N_EVENT]; unsigned ev_next; struct fasync_struct *fasync; u8 current_config; /* drivers reading ep0 MUST handle control requests (SETUP) * reported that way; else the host will time out. */ unsigned usermode_setup : 1, setup_in : 1, setup_can_stall : 1, setup_out_ready : 1, setup_out_error : 1, setup_abort : 1, gadget_registered : 1; unsigned setup_wLength; /* the rest is basically write-once */ struct usb_config_descriptor *config, *hs_config; struct usb_device_descriptor *dev; struct usb_request *req; struct usb_gadget *gadget; struct list_head epfiles; void *buf; wait_queue_head_t wait; struct super_block *sb; struct dentry *dentry; /* except this scratch i/o buffer for ep0 */ u8 rbuf[RBUF_SIZE]; }; static inline void get_dev (struct dev_data *data) { refcount_inc (&data->count); } static void put_dev (struct dev_data *data) { if (likely (!refcount_dec_and_test (&data->count))) return; /* needs no more cleanup */ BUG_ON (waitqueue_active (&data->wait)); kfree (data); } static struct dev_data *dev_new (void) { struct dev_data *dev; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return NULL; dev->state = STATE_DEV_DISABLED; refcount_set (&dev->count, 1); spin_lock_init (&dev->lock); INIT_LIST_HEAD (&dev->epfiles); init_waitqueue_head (&dev->wait); return dev; } /*----------------------------------------------------------------------*/ /* other /dev/gadget/$ENDPOINT files represent endpoints */ enum ep_state { STATE_EP_DISABLED = 0, STATE_EP_READY, STATE_EP_ENABLED, STATE_EP_UNBOUND, }; struct ep_data { struct mutex lock; enum ep_state state; refcount_t count; struct dev_data *dev; /* must hold dev->lock before accessing ep or req */ struct usb_ep *ep; struct usb_request *req; ssize_t status; char name [16]; struct usb_endpoint_descriptor desc, hs_desc; struct list_head epfiles; wait_queue_head_t wait; struct dentry *dentry; }; static inline void get_ep (struct ep_data *data) { refcount_inc (&data->count); } static void put_ep (struct ep_data *data) { if (likely (!refcount_dec_and_test (&data->count))) return; put_dev (data->dev); /* needs no more cleanup */ BUG_ON (!list_empty (&data->epfiles)); BUG_ON (waitqueue_active (&data->wait)); kfree (data); } /*----------------------------------------------------------------------*/ /* most "how to use the hardware" policy choices are in userspace: * mapping endpoint roles (which the driver needs) to the capabilities * which the usb controller has. most of those capabilities are exposed * implicitly, starting with the driver name and then endpoint names. */ static const char *CHIP; static DEFINE_MUTEX(sb_mutex); /* Serialize superblock operations */ /*----------------------------------------------------------------------*/ /* NOTE: don't use dev_printk calls before binding to the gadget * at the end of ep0 configuration, or after unbind. */ /* too wordy: dev_printk(level , &(d)->gadget->dev , fmt , ## args) */ #define xprintk(d,level,fmt,args...) \ printk(level "%s: " fmt , shortname , ## args) #ifdef DEBUG #define DBG(dev,fmt,args...) \ xprintk(dev , KERN_DEBUG , fmt , ## args) #else #define DBG(dev,fmt,args...) \ do { } while (0) #endif /* DEBUG */ #ifdef VERBOSE_DEBUG #define VDEBUG DBG #else #define VDEBUG(dev,fmt,args...) \ do { } while (0) #endif /* DEBUG */ #define ERROR(dev,fmt,args...) \ xprintk(dev , KERN_ERR , fmt , ## args) #define INFO(dev,fmt,args...) \ xprintk(dev , KERN_INFO , fmt , ## args) /*----------------------------------------------------------------------*/ /* SYNCHRONOUS ENDPOINT OPERATIONS (bulk/intr/iso) * * After opening, configure non-control endpoints. Then use normal * stream read() and write() requests; and maybe ioctl() to get more * precise FIFO status when recovering from cancellation. */ static void epio_complete (struct usb_ep *ep, struct usb_request *req) { struct ep_data *epdata = ep->driver_data; if (!req->context) return; if (req->status) epdata->status = req->status; else epdata->status = req->actual; complete ((struct completion *)req->context); } /* tasklock endpoint, returning when it's connected. * still need dev->lock to use epdata->ep. */ static int get_ready_ep (unsigned f_flags, struct ep_data *epdata, bool is_write) { int val; if (f_flags & O_NONBLOCK) { if (!mutex_trylock(&epdata->lock)) goto nonblock; if (epdata->state != STATE_EP_ENABLED && (!is_write || epdata->state != STATE_EP_READY)) { mutex_unlock(&epdata->lock); nonblock: val = -EAGAIN; } else val = 0; return val; } val = mutex_lock_interruptible(&epdata->lock); if (val < 0) return val; switch (epdata->state) { case STATE_EP_ENABLED: return 0; case STATE_EP_READY: /* not configured yet */ if (is_write) return 0; fallthrough; case STATE_EP_UNBOUND: /* clean disconnect */ break; // case STATE_EP_DISABLED: /* "can't happen" */ default: /* error! */ pr_debug ("%s: ep %p not available, state %d\n", shortname, epdata, epdata->state); } mutex_unlock(&epdata->lock); return -ENODEV; } static ssize_t ep_io (struct ep_data *epdata, void *buf, unsigned len) { DECLARE_COMPLETION_ONSTACK (done); int value; spin_lock_irq (&epdata->dev->lock); if (likely (epdata->ep != NULL)) { struct usb_request *req = epdata->req; req->context = &done; req->complete = epio_complete; req->buf = buf; req->length = len; value = usb_ep_queue (epdata->ep, req, GFP_ATOMIC); } else value = -ENODEV; spin_unlock_irq (&epdata->dev->lock); if (likely (value == 0)) { value = wait_for_completion_interruptible(&done); if (value != 0) { spin_lock_irq (&epdata->dev->lock); if (likely (epdata->ep != NULL)) { DBG (epdata->dev, "%s i/o interrupted\n", epdata->name); usb_ep_dequeue (epdata->ep, epdata->req); spin_unlock_irq (&epdata->dev->lock); wait_for_completion(&done); if (epdata->status == -ECONNRESET) epdata->status = -EINTR; } else { spin_unlock_irq (&epdata->dev->lock); DBG (epdata->dev, "endpoint gone\n"); wait_for_completion(&done); epdata->status = -ENODEV; } } return epdata->status; } return value; } static int ep_release (struct inode *inode, struct file *fd) { struct ep_data *data = fd->private_data; int value; value = mutex_lock_interruptible(&data->lock); if (value < 0) return value; /* clean up if this can be reopened */ if (data->state != STATE_EP_UNBOUND) { data->state = STATE_EP_DISABLED; data->desc.bDescriptorType = 0; data->hs_desc.bDescriptorType = 0; usb_ep_disable(data->ep); } mutex_unlock(&data->lock); put_ep (data); return 0; } static long ep_ioctl(struct file *fd, unsigned code, unsigned long value) { struct ep_data *data = fd->private_data; int status; if ((status = get_ready_ep (fd->f_flags, data, false)) < 0) return status; spin_lock_irq (&data->dev->lock); if (likely (data->ep != NULL)) { switch (code) { case GADGETFS_FIFO_STATUS: status = usb_ep_fifo_status (data->ep); break; case GADGETFS_FIFO_FLUSH: usb_ep_fifo_flush (data->ep); break; case GADGETFS_CLEAR_HALT: status = usb_ep_clear_halt (data->ep); break; default: status = -ENOTTY; } } else status = -ENODEV; spin_unlock_irq (&data->dev->lock); mutex_unlock(&data->lock); return status; } /*----------------------------------------------------------------------*/ /* ASYNCHRONOUS ENDPOINT I/O OPERATIONS (bulk/intr/iso) */ struct kiocb_priv { struct usb_request *req; struct ep_data *epdata; struct kiocb *iocb; struct mm_struct *mm; struct work_struct work; void *buf; struct iov_iter to; const void *to_free; unsigned actual; }; static int ep_aio_cancel(struct kiocb *iocb) { struct kiocb_priv *priv = iocb->private; struct ep_data *epdata; int value; local_irq_disable(); epdata = priv->epdata; // spin_lock(&epdata->dev->lock); if (likely(epdata && epdata->ep && priv->req)) value = usb_ep_dequeue (epdata->ep, priv->req); else value = -EINVAL; // spin_unlock(&epdata->dev->lock); local_irq_enable(); return value; } static void ep_user_copy_worker(struct work_struct *work) { struct kiocb_priv *priv = container_of(work, struct kiocb_priv, work); struct mm_struct *mm = priv->mm; struct kiocb *iocb = priv->iocb; size_t ret; kthread_use_mm(mm); ret = copy_to_iter(priv->buf, priv->actual, &priv->to); kthread_unuse_mm(mm); if (!ret) ret = -EFAULT; /* completing the iocb can drop the ctx and mm, don't touch mm after */ iocb->ki_complete(iocb, ret); kfree(priv->buf); kfree(priv->to_free); kfree(priv); } static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) { struct kiocb *iocb = req->context; struct kiocb_priv *priv = iocb->private; struct ep_data *epdata = priv->epdata; /* lock against disconnect (and ideally, cancel) */ spin_lock(&epdata->dev->lock); priv->req = NULL; priv->epdata = NULL; /* if this was a write or a read returning no data then we * don't need to copy anything to userspace, so we can * complete the aio request immediately. */ if (priv->to_free == NULL || unlikely(req->actual == 0)) { kfree(req->buf); kfree(priv->to_free); kfree(priv); iocb->private = NULL; iocb->ki_complete(iocb, req->actual ? req->actual : (long)req->status); } else { /* ep_copy_to_user() won't report both; we hide some faults */ if (unlikely(0 != req->status)) DBG(epdata->dev, "%s fault %d len %d\n", ep->name, req->status, req->actual); priv->buf = req->buf; priv->actual = req->actual; INIT_WORK(&priv->work, ep_user_copy_worker); schedule_work(&priv->work); } usb_ep_free_request(ep, req); spin_unlock(&epdata->dev->lock); put_ep(epdata); } static ssize_t ep_aio(struct kiocb *iocb, struct kiocb_priv *priv, struct ep_data *epdata, char *buf, size_t len) { struct usb_request *req; ssize_t value; iocb->private = priv; priv->iocb = iocb; kiocb_set_cancel_fn(iocb, ep_aio_cancel); get_ep(epdata); priv->epdata = epdata; priv->actual = 0; priv->mm = current->mm; /* mm teardown waits for iocbs in exit_aio() */ /* each kiocb is coupled to one usb_request, but we can't * allocate or submit those if the host disconnected. */ spin_lock_irq(&epdata->dev->lock); value = -ENODEV; if (unlikely(epdata->ep == NULL)) goto fail; req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC); value = -ENOMEM; if (unlikely(!req)) goto fail; priv->req = req; req->buf = buf; req->length = len; req->complete = ep_aio_complete; req->context = iocb; value = usb_ep_queue(epdata->ep, req, GFP_ATOMIC); if (unlikely(0 != value)) { usb_ep_free_request(epdata->ep, req); goto fail; } spin_unlock_irq(&epdata->dev->lock); return -EIOCBQUEUED; fail: spin_unlock_irq(&epdata->dev->lock); kfree(priv->to_free); kfree(priv); put_ep(epdata); return value; } static ssize_t ep_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct ep_data *epdata = file->private_data; size_t len = iov_iter_count(to); ssize_t value; char *buf; if ((value = get_ready_ep(file->f_flags, epdata, false)) < 0) return value; /* halt any endpoint by doing a "wrong direction" i/o call */ if (usb_endpoint_dir_in(&epdata->desc)) { if (usb_endpoint_xfer_isoc(&epdata->desc) || !is_sync_kiocb(iocb)) { mutex_unlock(&epdata->lock); return -EINVAL; } DBG (epdata->dev, "%s halt\n", epdata->name); spin_lock_irq(&epdata->dev->lock); if (likely(epdata->ep != NULL)) usb_ep_set_halt(epdata->ep); spin_unlock_irq(&epdata->dev->lock); mutex_unlock(&epdata->lock); return -EBADMSG; } buf = kmalloc(len, GFP_KERNEL); if (unlikely(!buf)) { mutex_unlock(&epdata->lock); return -ENOMEM; } if (is_sync_kiocb(iocb)) { value = ep_io(epdata, buf, len); if (value >= 0 && (copy_to_iter(buf, value, to) != value)) value = -EFAULT; } else { struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL); value = -ENOMEM; if (!priv) goto fail; priv->to_free = dup_iter(&priv->to, to, GFP_KERNEL); if (!iter_is_ubuf(&priv->to) && !priv->to_free) { kfree(priv); goto fail; } value = ep_aio(iocb, priv, epdata, buf, len); if (value == -EIOCBQUEUED) buf = NULL; } fail: kfree(buf); mutex_unlock(&epdata->lock); return value; } static ssize_t ep_config(struct ep_data *, const char *, size_t); static ssize_t ep_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct ep_data *epdata = file->private_data; size_t len = iov_iter_count(from); bool configured; ssize_t value; char *buf; if ((value = get_ready_ep(file->f_flags, epdata, true)) < 0) return value; configured = epdata->state == STATE_EP_ENABLED; /* halt any endpoint by doing a "wrong direction" i/o call */ if (configured && !usb_endpoint_dir_in(&epdata->desc)) { if (usb_endpoint_xfer_isoc(&epdata->desc) || !is_sync_kiocb(iocb)) { mutex_unlock(&epdata->lock); return -EINVAL; } DBG (epdata->dev, "%s halt\n", epdata->name); spin_lock_irq(&epdata->dev->lock); if (likely(epdata->ep != NULL)) usb_ep_set_halt(epdata->ep); spin_unlock_irq(&epdata->dev->lock); mutex_unlock(&epdata->lock); return -EBADMSG; } buf = kmalloc(len, GFP_KERNEL); if (unlikely(!buf)) { mutex_unlock(&epdata->lock); return -ENOMEM; } if (unlikely(!copy_from_iter_full(buf, len, from))) { value = -EFAULT; goto out; } if (unlikely(!configured)) { value = ep_config(epdata, buf, len); } else if (is_sync_kiocb(iocb)) { value = ep_io(epdata, buf, len); } else { struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL); value = -ENOMEM; if (priv) { value = ep_aio(iocb, priv, epdata, buf, len); if (value == -EIOCBQUEUED) buf = NULL; } } out: kfree(buf); mutex_unlock(&epdata->lock); return value; } /*----------------------------------------------------------------------*/ /* used after endpoint configuration */ static const struct file_operations ep_io_operations = { .owner = THIS_MODULE, .open = ep_open, .release = ep_release, .unlocked_ioctl = ep_ioctl, .read_iter = ep_read_iter, .write_iter = ep_write_iter, }; /* ENDPOINT INITIALIZATION * * fd = open ("/dev/gadget/$ENDPOINT", O_RDWR) * status = write (fd, descriptors, sizeof descriptors) * * That write establishes the endpoint configuration, configuring * the controller to process bulk, interrupt, or isochronous transfers * at the right maxpacket size, and so on. * * The descriptors are message type 1, identified by a host order u32 * at the beginning of what's written. Descriptor order is: full/low * speed descriptor, then optional high speed descriptor. */ static ssize_t ep_config (struct ep_data *data, const char *buf, size_t len) { struct usb_ep *ep; u32 tag; int value, length = len; if (data->state != STATE_EP_READY) { value = -EL2HLT; goto fail; } value = len; if (len < USB_DT_ENDPOINT_SIZE + 4) goto fail0; /* we might need to change message format someday */ memcpy(&tag, buf, 4); if (tag != 1) { DBG(data->dev, "config %s, bad tag %d\n", data->name, tag); goto fail0; } buf += 4; len -= 4; /* NOTE: audio endpoint extensions not accepted here; * just don't include the extra bytes. */ /* full/low speed descriptor, then high speed */ memcpy(&data->desc, buf, USB_DT_ENDPOINT_SIZE); if (data->desc.bLength != USB_DT_ENDPOINT_SIZE || data->desc.bDescriptorType != USB_DT_ENDPOINT) goto fail0; if (len != USB_DT_ENDPOINT_SIZE) { if (len != 2 * USB_DT_ENDPOINT_SIZE) goto fail0; memcpy(&data->hs_desc, buf + USB_DT_ENDPOINT_SIZE, USB_DT_ENDPOINT_SIZE); if (data->hs_desc.bLength != USB_DT_ENDPOINT_SIZE || data->hs_desc.bDescriptorType != USB_DT_ENDPOINT) { DBG(data->dev, "config %s, bad hs length or type\n", data->name); goto fail0; } } spin_lock_irq (&data->dev->lock); if (data->dev->state == STATE_DEV_UNBOUND) { value = -ENOENT; goto gone; } else { ep = data->ep; if (ep == NULL) { value = -ENODEV; goto gone; } } switch (data->dev->gadget->speed) { case USB_SPEED_LOW: case USB_SPEED_FULL: ep->desc = &data->desc; break; case USB_SPEED_HIGH: /* fails if caller didn't provide that descriptor... */ ep->desc = &data->hs_desc; break; default: DBG(data->dev, "unconnected, %s init abandoned\n", data->name); value = -EINVAL; goto gone; } value = usb_ep_enable(ep); if (value == 0) { data->state = STATE_EP_ENABLED; value = length; } gone: spin_unlock_irq (&data->dev->lock); if (value < 0) { fail: data->desc.bDescriptorType = 0; data->hs_desc.bDescriptorType = 0; } return value; fail0: value = -EINVAL; goto fail; } static int ep_open (struct inode *inode, struct file *fd) { struct ep_data *data = inode->i_private; int value = -EBUSY; if (mutex_lock_interruptible(&data->lock) != 0) return -EINTR; spin_lock_irq (&data->dev->lock); if (data->dev->state == STATE_DEV_UNBOUND) value = -ENOENT; else if (data->state == STATE_EP_DISABLED) { value = 0; data->state = STATE_EP_READY; get_ep (data); fd->private_data = data; VDEBUG (data->dev, "%s ready\n", data->name); } else DBG (data->dev, "%s state %d\n", data->name, data->state); spin_unlock_irq (&data->dev->lock); mutex_unlock(&data->lock); return value; } /*----------------------------------------------------------------------*/ /* EP0 IMPLEMENTATION can be partly in userspace. * * Drivers that use this facility receive various events, including * control requests the kernel doesn't handle. Drivers that don't * use this facility may be too simple-minded for real applications. */ static inline void ep0_readable (struct dev_data *dev) { wake_up (&dev->wait); kill_fasync (&dev->fasync, SIGIO, POLL_IN); } static void clean_req (struct usb_ep *ep, struct usb_request *req) { struct dev_data *dev = ep->driver_data; if (req->buf != dev->rbuf) { kfree(req->buf); req->buf = dev->rbuf; } req->complete = epio_complete; dev->setup_out_ready = 0; } static void ep0_complete (struct usb_ep *ep, struct usb_request *req) { struct dev_data *dev = ep->driver_data; unsigned long flags; int free = 1; /* for control OUT, data must still get to userspace */ spin_lock_irqsave(&dev->lock, flags); if (!dev->setup_in) { dev->setup_out_error = (req->status != 0); if (!dev->setup_out_error) free = 0; dev->setup_out_ready = 1; ep0_readable (dev); } /* clean up as appropriate */ if (free && req->buf != &dev->rbuf) clean_req (ep, req); req->complete = epio_complete; spin_unlock_irqrestore(&dev->lock, flags); } static int setup_req (struct usb_ep *ep, struct usb_request *req, u16 len) { struct dev_data *dev = ep->driver_data; if (dev->setup_out_ready) { DBG (dev, "ep0 request busy!\n"); return -EBUSY; } if (len > sizeof (dev->rbuf)) req->buf = kmalloc(len, GFP_ATOMIC); if (req->buf == NULL) { req->buf = dev->rbuf; return -ENOMEM; } req->complete = ep0_complete; req->length = len; req->zero = 0; return 0; } static ssize_t ep0_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) { struct dev_data *dev = fd->private_data; ssize_t retval; enum ep0_state state; spin_lock_irq (&dev->lock); if (dev->state <= STATE_DEV_OPENED) { retval = -EINVAL; goto done; } /* report fd mode change before acting on it */ if (dev->setup_abort) { dev->setup_abort = 0; retval = -EIDRM; goto done; } /* control DATA stage */ if ((state = dev->state) == STATE_DEV_SETUP) { if (dev->setup_in) { /* stall IN */ VDEBUG(dev, "ep0in stall\n"); (void) usb_ep_set_halt (dev->gadget->ep0); retval = -EL2HLT; dev->state = STATE_DEV_CONNECTED; } else if (len == 0) { /* ack SET_CONFIGURATION etc */ struct usb_ep *ep = dev->gadget->ep0; struct usb_request *req = dev->req; if ((retval = setup_req (ep, req, 0)) == 0) { ++dev->udc_usage; spin_unlock_irq (&dev->lock); retval = usb_ep_queue (ep, req, GFP_KERNEL); spin_lock_irq (&dev->lock); --dev->udc_usage; } dev->state = STATE_DEV_CONNECTED; /* assume that was SET_CONFIGURATION */ if (dev->current_config) { unsigned power; if (gadget_is_dualspeed(dev->gadget) && (dev->gadget->speed == USB_SPEED_HIGH)) power = dev->hs_config->bMaxPower; else power = dev->config->bMaxPower; usb_gadget_vbus_draw(dev->gadget, 2 * power); } } else { /* collect OUT data */ if ((fd->f_flags & O_NONBLOCK) != 0 && !dev->setup_out_ready) { retval = -EAGAIN; goto done; } spin_unlock_irq (&dev->lock); retval = wait_event_interruptible (dev->wait, dev->setup_out_ready != 0); /* FIXME state could change from under us */ spin_lock_irq (&dev->lock); if (retval) goto done; if (dev->state != STATE_DEV_SETUP) { retval = -ECANCELED; goto done; } dev->state = STATE_DEV_CONNECTED; if (dev->setup_out_error) retval = -EIO; else { len = min (len, (size_t)dev->req->actual); ++dev->udc_usage; spin_unlock_irq(&dev->lock); if (copy_to_user (buf, dev->req->buf, len)) retval = -EFAULT; else retval = len; spin_lock_irq(&dev->lock); --dev->udc_usage; clean_req (dev->gadget->ep0, dev->req); /* NOTE userspace can't yet choose to stall */ } } goto done; } /* else normal: return event data */ if (len < sizeof dev->event [0]) { retval = -EINVAL; goto done; } len -= len % sizeof (struct usb_gadgetfs_event); dev->usermode_setup = 1; scan: /* return queued events right away */ if (dev->ev_next != 0) { unsigned i, n; n = len / sizeof (struct usb_gadgetfs_event); if (dev->ev_next < n) n = dev->ev_next; /* ep0 i/o has special semantics during STATE_DEV_SETUP */ for (i = 0; i < n; i++) { if (dev->event [i].type == GADGETFS_SETUP) { dev->state = STATE_DEV_SETUP; n = i + 1; break; } } spin_unlock_irq (&dev->lock); len = n * sizeof (struct usb_gadgetfs_event); if (copy_to_user (buf, &dev->event, len)) retval = -EFAULT; else retval = len; if (len > 0) { /* NOTE this doesn't guard against broken drivers; * concurrent ep0 readers may lose events. */ spin_lock_irq (&dev->lock); if (dev->ev_next > n) { memmove(&dev->event[0], &dev->event[n], sizeof (struct usb_gadgetfs_event) * (dev->ev_next - n)); } dev->ev_next -= n; spin_unlock_irq (&dev->lock); } return retval; } if (fd->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto done; } switch (state) { default: DBG (dev, "fail %s, state %d\n", __func__, state); retval = -ESRCH; break; case STATE_DEV_UNCONNECTED: case STATE_DEV_CONNECTED: spin_unlock_irq (&dev->lock); DBG (dev, "%s wait\n", __func__); /* wait for events */ retval = wait_event_interruptible (dev->wait, dev->ev_next != 0); if (retval < 0) return retval; spin_lock_irq (&dev->lock); goto scan; } done: spin_unlock_irq (&dev->lock); return retval; } static struct usb_gadgetfs_event * next_event (struct dev_data *dev, enum usb_gadgetfs_event_type type) { struct usb_gadgetfs_event *event; unsigned i; switch (type) { /* these events purge the queue */ case GADGETFS_DISCONNECT: if (dev->state == STATE_DEV_SETUP) dev->setup_abort = 1; fallthrough; case GADGETFS_CONNECT: dev->ev_next = 0; break; case GADGETFS_SETUP: /* previous request timed out */ case GADGETFS_SUSPEND: /* same effect */ /* these events can't be repeated */ for (i = 0; i != dev->ev_next; i++) { if (dev->event [i].type != type) continue; DBG(dev, "discard old event[%d] %d\n", i, type); dev->ev_next--; if (i == dev->ev_next) break; /* indices start at zero, for simplicity */ memmove (&dev->event [i], &dev->event [i + 1], sizeof (struct usb_gadgetfs_event) * (dev->ev_next - i)); } break; default: BUG (); } VDEBUG(dev, "event[%d] = %d\n", dev->ev_next, type); event = &dev->event [dev->ev_next++]; BUG_ON (dev->ev_next > N_EVENT); memset (event, 0, sizeof *event); event->type = type; return event; } static ssize_t ep0_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) { struct dev_data *dev = fd->private_data; ssize_t retval = -ESRCH; /* report fd mode change before acting on it */ if (dev->setup_abort) { dev->setup_abort = 0; retval = -EIDRM; /* data and/or status stage for control request */ } else if (dev->state == STATE_DEV_SETUP) { len = min_t(size_t, len, dev->setup_wLength); if (dev->setup_in) { retval = setup_req (dev->gadget->ep0, dev->req, len); if (retval == 0) { dev->state = STATE_DEV_CONNECTED; ++dev->udc_usage; spin_unlock_irq (&dev->lock); if (copy_from_user (dev->req->buf, buf, len)) retval = -EFAULT; else { if (len < dev->setup_wLength) dev->req->zero = 1; retval = usb_ep_queue ( dev->gadget->ep0, dev->req, GFP_KERNEL); } spin_lock_irq(&dev->lock); --dev->udc_usage; if (retval < 0) { clean_req (dev->gadget->ep0, dev->req); } else retval = len; return retval; } /* can stall some OUT transfers */ } else if (dev->setup_can_stall) { VDEBUG(dev, "ep0out stall\n"); (void) usb_ep_set_halt (dev->gadget->ep0); retval = -EL2HLT; dev->state = STATE_DEV_CONNECTED; } else { DBG(dev, "bogus ep0out stall!\n"); } } else DBG (dev, "fail %s, state %d\n", __func__, dev->state); return retval; } static int ep0_fasync (int f, struct file *fd, int on) { struct dev_data *dev = fd->private_data; // caller must F_SETOWN before signal delivery happens VDEBUG(dev, "%s %s\n", __func__, str_on_off(on)); return fasync_helper (f, fd, on, &dev->fasync); } static struct usb_gadget_driver gadgetfs_driver; static int dev_release (struct inode *inode, struct file *fd) { struct dev_data *dev = fd->private_data; /* closing ep0 === shutdown all */ if (dev->gadget_registered) { usb_gadget_unregister_driver (&gadgetfs_driver); dev->gadget_registered = false; } /* at this point "good" hardware has disconnected the * device from USB; the host won't see it any more. * alternatively, all host requests will time out. */ kfree (dev->buf); dev->buf = NULL; /* other endpoints were all decoupled from this device */ spin_lock_irq(&dev->lock); dev->state = STATE_DEV_DISABLED; spin_unlock_irq(&dev->lock); put_dev (dev); return 0; } static __poll_t ep0_poll (struct file *fd, poll_table *wait) { struct dev_data *dev = fd->private_data; __poll_t mask = 0; if (dev->state <= STATE_DEV_OPENED) return DEFAULT_POLLMASK; poll_wait(fd, &dev->wait, wait); spin_lock_irq(&dev->lock); /* report fd mode change before acting on it */ if (dev->setup_abort) { dev->setup_abort = 0; mask = EPOLLHUP; goto out; } if (dev->state == STATE_DEV_SETUP) { if (dev->setup_in || dev->setup_can_stall) mask = EPOLLOUT; } else { if (dev->ev_next != 0) mask = EPOLLIN; } out: spin_unlock_irq(&dev->lock); return mask; } static long gadget_dev_ioctl (struct file *fd, unsigned code, unsigned long value) { struct dev_data *dev = fd->private_data; struct usb_gadget *gadget = dev->gadget; long ret = -ENOTTY; spin_lock_irq(&dev->lock); if (dev->state == STATE_DEV_OPENED || dev->state == STATE_DEV_UNBOUND) { /* Not bound to a UDC */ } else if (gadget->ops->ioctl) { ++dev->udc_usage; spin_unlock_irq(&dev->lock); ret = gadget->ops->ioctl (gadget, code, value); spin_lock_irq(&dev->lock); --dev->udc_usage; } spin_unlock_irq(&dev->lock); return ret; } /*----------------------------------------------------------------------*/ /* The in-kernel gadget driver handles most ep0 issues, in particular * enumerating the single configuration (as provided from user space). * * Unrecognized ep0 requests may be handled in user space. */ static void make_qualifier (struct dev_data *dev) { struct usb_qualifier_descriptor qual; struct usb_device_descriptor *desc; qual.bLength = sizeof qual; qual.bDescriptorType = USB_DT_DEVICE_QUALIFIER; qual.bcdUSB = cpu_to_le16 (0x0200); desc = dev->dev; qual.bDeviceClass = desc->bDeviceClass; qual.bDeviceSubClass = desc->bDeviceSubClass; qual.bDeviceProtocol = desc->bDeviceProtocol; /* assumes ep0 uses the same value for both speeds ... */ qual.bMaxPacketSize0 = dev->gadget->ep0->maxpacket; qual.bNumConfigurations = 1; qual.bRESERVED = 0; memcpy (dev->rbuf, &qual, sizeof qual); } static int config_buf (struct dev_data *dev, u8 type, unsigned index) { int len; int hs = 0; /* only one configuration */ if (index > 0) return -EINVAL; if (gadget_is_dualspeed(dev->gadget)) { hs = (dev->gadget->speed == USB_SPEED_HIGH); if (type == USB_DT_OTHER_SPEED_CONFIG) hs = !hs; } if (hs) { dev->req->buf = dev->hs_config; len = le16_to_cpu(dev->hs_config->wTotalLength); } else { dev->req->buf = dev->config; len = le16_to_cpu(dev->config->wTotalLength); } ((u8 *)dev->req->buf) [1] = type; return len; } static int gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) { struct dev_data *dev = get_gadget_data (gadget); struct usb_request *req = dev->req; int value = -EOPNOTSUPP; struct usb_gadgetfs_event *event; u16 w_value = le16_to_cpu(ctrl->wValue); u16 w_length = le16_to_cpu(ctrl->wLength); if (w_length > RBUF_SIZE) { if (ctrl->bRequestType & USB_DIR_IN) { /* Cast away the const, we are going to overwrite on purpose. */ __le16 *temp = (__le16 *)&ctrl->wLength; *temp = cpu_to_le16(RBUF_SIZE); w_length = RBUF_SIZE; } else { return value; } } spin_lock (&dev->lock); dev->setup_abort = 0; if (dev->state == STATE_DEV_UNCONNECTED) { if (gadget_is_dualspeed(gadget) && gadget->speed == USB_SPEED_HIGH && dev->hs_config == NULL) { spin_unlock(&dev->lock); ERROR (dev, "no high speed config??\n"); return -EINVAL; } dev->state = STATE_DEV_CONNECTED; INFO (dev, "connected\n"); event = next_event (dev, GADGETFS_CONNECT); event->u.speed = gadget->speed; ep0_readable (dev); /* host may have given up waiting for response. we can miss control * requests handled lower down (device/endpoint status and features); * then ep0_{read,write} will report the wrong status. controller * driver will have aborted pending i/o. */ } else if (dev->state == STATE_DEV_SETUP) dev->setup_abort = 1; req->buf = dev->rbuf; req->context = NULL; switch (ctrl->bRequest) { case USB_REQ_GET_DESCRIPTOR: if (ctrl->bRequestType != USB_DIR_IN) goto unrecognized; switch (w_value >> 8) { case USB_DT_DEVICE: value = min (w_length, (u16) sizeof *dev->dev); dev->dev->bMaxPacketSize0 = dev->gadget->ep0->maxpacket; req->buf = dev->dev; break; case USB_DT_DEVICE_QUALIFIER: if (!dev->hs_config) break; value = min (w_length, (u16) sizeof (struct usb_qualifier_descriptor)); make_qualifier (dev); break; case USB_DT_OTHER_SPEED_CONFIG: case USB_DT_CONFIG: value = config_buf (dev, w_value >> 8, w_value & 0xff); if (value >= 0) value = min (w_length, (u16) value); break; case USB_DT_STRING: goto unrecognized; default: // all others are errors break; } break; /* currently one config, two speeds */ case USB_REQ_SET_CONFIGURATION: if (ctrl->bRequestType != 0) goto unrecognized; if (0 == (u8) w_value) { value = 0; dev->current_config = 0; usb_gadget_vbus_draw(gadget, 8 /* mA */ ); // user mode expected to disable endpoints } else { u8 config, power; if (gadget_is_dualspeed(gadget) && gadget->speed == USB_SPEED_HIGH) { config = dev->hs_config->bConfigurationValue; power = dev->hs_config->bMaxPower; } else { config = dev->config->bConfigurationValue; power = dev->config->bMaxPower; } if (config == (u8) w_value) { value = 0; dev->current_config = config; usb_gadget_vbus_draw(gadget, 2 * power); } } /* report SET_CONFIGURATION like any other control request, * except that usermode may not stall this. the next * request mustn't be allowed start until this finishes: * endpoints and threads set up, etc. * * NOTE: older PXA hardware (before PXA 255: without UDCCFR) * has bad/racey automagic that prevents synchronizing here. * even kernel mode drivers often miss them. */ if (value == 0) { INFO (dev, "configuration #%d\n", dev->current_config); usb_gadget_set_state(gadget, USB_STATE_CONFIGURED); if (dev->usermode_setup) { dev->setup_can_stall = 0; goto delegate; } } break; #ifndef CONFIG_USB_PXA25X /* PXA automagically handles this request too */ case USB_REQ_GET_CONFIGURATION: if (ctrl->bRequestType != 0x80) goto unrecognized; *(u8 *)req->buf = dev->current_config; value = min (w_length, (u16) 1); break; #endif default: unrecognized: VDEBUG (dev, "%s req%02x.%02x v%04x i%04x l%d\n", dev->usermode_setup ? "delegate" : "fail", ctrl->bRequestType, ctrl->bRequest, w_value, le16_to_cpu(ctrl->wIndex), w_length); /* if there's an ep0 reader, don't stall */ if (dev->usermode_setup) { dev->setup_can_stall = 1; delegate: dev->setup_in = (ctrl->bRequestType & USB_DIR_IN) ? 1 : 0; dev->setup_wLength = w_length; dev->setup_out_ready = 0; dev->setup_out_error = 0; /* read DATA stage for OUT right away */ if (unlikely (!dev->setup_in && w_length)) { value = setup_req (gadget->ep0, dev->req, w_length); if (value < 0) break; ++dev->udc_usage; spin_unlock (&dev->lock); value = usb_ep_queue (gadget->ep0, dev->req, GFP_KERNEL); spin_lock (&dev->lock); --dev->udc_usage; if (value < 0) { clean_req (gadget->ep0, dev->req); break; } /* we can't currently stall these */ dev->setup_can_stall = 0; } /* state changes when reader collects event */ event = next_event (dev, GADGETFS_SETUP); event->u.setup = *ctrl; ep0_readable (dev); spin_unlock (&dev->lock); /* * Return USB_GADGET_DELAYED_STATUS as a workaround to * stop some UDC drivers (e.g. dwc3) from automatically * proceeding with the status stage for 0-length * transfers. * Should be removed once all UDC drivers are fixed to * always delay the status stage until a response is * queued to EP0. */ return w_length == 0 ? USB_GADGET_DELAYED_STATUS : 0; } } /* proceed with data transfer and status phases? */ if (value >= 0 && dev->state != STATE_DEV_SETUP) { req->length = value; req->zero = value < w_length; ++dev->udc_usage; spin_unlock (&dev->lock); value = usb_ep_queue (gadget->ep0, req, GFP_KERNEL); spin_lock(&dev->lock); --dev->udc_usage; spin_unlock(&dev->lock); if (value < 0) { DBG (dev, "ep_queue --> %d\n", value); req->status = 0; } return value; } /* device stalls when value < 0 */ spin_unlock (&dev->lock); return value; } static void destroy_ep_files (struct dev_data *dev) { DBG (dev, "%s %d\n", __func__, dev->state); /* dev->state must prevent interference */ spin_lock_irq (&dev->lock); while (!list_empty(&dev->epfiles)) { struct ep_data *ep; struct inode *parent; struct dentry *dentry; /* break link to FS */ ep = list_first_entry (&dev->epfiles, struct ep_data, epfiles); list_del_init (&ep->epfiles); spin_unlock_irq (&dev->lock); dentry = ep->dentry; ep->dentry = NULL; parent = d_inode(dentry->d_parent); /* break link to controller */ mutex_lock(&ep->lock); if (ep->state == STATE_EP_ENABLED) (void) usb_ep_disable (ep->ep); ep->state = STATE_EP_UNBOUND; usb_ep_free_request (ep->ep, ep->req); ep->ep = NULL; mutex_unlock(&ep->lock); wake_up (&ep->wait); put_ep (ep); /* break link to dcache */ inode_lock(parent); d_delete (dentry); dput (dentry); inode_unlock(parent); spin_lock_irq (&dev->lock); } spin_unlock_irq (&dev->lock); } static struct dentry * gadgetfs_create_file (struct super_block *sb, char const *name, void *data, const struct file_operations *fops); static int activate_ep_files (struct dev_data *dev) { struct usb_ep *ep; struct ep_data *data; gadget_for_each_ep (ep, dev->gadget) { data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) goto enomem0; data->state = STATE_EP_DISABLED; mutex_init(&data->lock); init_waitqueue_head (&data->wait); strncpy (data->name, ep->name, sizeof (data->name) - 1); refcount_set (&data->count, 1); data->dev = dev; get_dev (dev); data->ep = ep; ep->driver_data = data; data->req = usb_ep_alloc_request (ep, GFP_KERNEL); if (!data->req) goto enomem1; data->dentry = gadgetfs_create_file (dev->sb, data->name, data, &ep_io_operations); if (!data->dentry) goto enomem2; list_add_tail (&data->epfiles, &dev->epfiles); } return 0; enomem2: usb_ep_free_request (ep, data->req); enomem1: put_dev (dev); kfree (data); enomem0: DBG (dev, "%s enomem\n", __func__); destroy_ep_files (dev); return -ENOMEM; } static void gadgetfs_unbind (struct usb_gadget *gadget) { struct dev_data *dev = get_gadget_data (gadget); DBG (dev, "%s\n", __func__); spin_lock_irq (&dev->lock); dev->state = STATE_DEV_UNBOUND; while (dev->udc_usage > 0) { spin_unlock_irq(&dev->lock); usleep_range(1000, 2000); spin_lock_irq(&dev->lock); } spin_unlock_irq (&dev->lock); destroy_ep_files (dev); gadget->ep0->driver_data = NULL; set_gadget_data (gadget, NULL); /* we've already been disconnected ... no i/o is active */ if (dev->req) usb_ep_free_request (gadget->ep0, dev->req); DBG (dev, "%s done\n", __func__); put_dev (dev); } static struct dev_data *the_device; static int gadgetfs_bind(struct usb_gadget *gadget, struct usb_gadget_driver *driver) { struct dev_data *dev = the_device; if (!dev) return -ESRCH; if (0 != strcmp (CHIP, gadget->name)) { pr_err("%s expected %s controller not %s\n", shortname, CHIP, gadget->name); return -ENODEV; } set_gadget_data (gadget, dev); dev->gadget = gadget; gadget->ep0->driver_data = dev; /* preallocate control response and buffer */ dev->req = usb_ep_alloc_request (gadget->ep0, GFP_KERNEL); if (!dev->req) goto enomem; dev->req->context = NULL; dev->req->complete = epio_complete; if (activate_ep_files (dev) < 0) goto enomem; INFO (dev, "bound to %s driver\n", gadget->name); spin_lock_irq(&dev->lock); dev->state = STATE_DEV_UNCONNECTED; spin_unlock_irq(&dev->lock); get_dev (dev); return 0; enomem: gadgetfs_unbind (gadget); return -ENOMEM; } static void gadgetfs_disconnect (struct usb_gadget *gadget) { struct dev_data *dev = get_gadget_data (gadget); unsigned long flags; spin_lock_irqsave (&dev->lock, flags); if (dev->state == STATE_DEV_UNCONNECTED) goto exit; dev->state = STATE_DEV_UNCONNECTED; INFO (dev, "disconnected\n"); next_event (dev, GADGETFS_DISCONNECT); ep0_readable (dev); exit: spin_unlock_irqrestore (&dev->lock, flags); } static void gadgetfs_suspend (struct usb_gadget *gadget) { struct dev_data *dev = get_gadget_data (gadget); unsigned long flags; INFO (dev, "suspended from state %d\n", dev->state); spin_lock_irqsave(&dev->lock, flags); switch (dev->state) { case STATE_DEV_SETUP: // VERY odd... host died?? case STATE_DEV_CONNECTED: case STATE_DEV_UNCONNECTED: next_event (dev, GADGETFS_SUSPEND); ep0_readable (dev); fallthrough; default: break; } spin_unlock_irqrestore(&dev->lock, flags); } static struct usb_gadget_driver gadgetfs_driver = { .function = (char *) driver_desc, .bind = gadgetfs_bind, .unbind = gadgetfs_unbind, .setup = gadgetfs_setup, .reset = gadgetfs_disconnect, .disconnect = gadgetfs_disconnect, .suspend = gadgetfs_suspend, .driver = { .name = shortname, }, }; /*----------------------------------------------------------------------*/ /* DEVICE INITIALIZATION * * fd = open ("/dev/gadget/$CHIP", O_RDWR) * status = write (fd, descriptors, sizeof descriptors) * * That write establishes the device configuration, so the kernel can * bind to the controller ... guaranteeing it can handle enumeration * at all necessary speeds. Descriptor order is: * * . message tag (u32, host order) ... for now, must be zero; it * would change to support features like multi-config devices * . full/low speed config ... all wTotalLength bytes (with interface, * class, altsetting, endpoint, and other descriptors) * . high speed config ... all descriptors, for high speed operation; * this one's optional except for high-speed hardware * . device descriptor * * Endpoints are not yet enabled. Drivers must wait until device * configuration and interface altsetting changes create * the need to configure (or unconfigure) them. * * After initialization, the device stays active for as long as that * $CHIP file is open. Events must then be read from that descriptor, * such as configuration notifications. */ static int is_valid_config(struct usb_config_descriptor *config, unsigned int total) { return config->bDescriptorType == USB_DT_CONFIG && config->bLength == USB_DT_CONFIG_SIZE && total >= USB_DT_CONFIG_SIZE && config->bConfigurationValue != 0 && (config->bmAttributes & USB_CONFIG_ATT_ONE) != 0 && (config->bmAttributes & USB_CONFIG_ATT_WAKEUP) == 0; /* FIXME if gadget->is_otg, _must_ include an otg descriptor */ /* FIXME check lengths: walk to end */ } static ssize_t dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) { struct dev_data *dev = fd->private_data; ssize_t value, length = len; unsigned total; u32 tag; char *kbuf; spin_lock_irq(&dev->lock); if (dev->state > STATE_DEV_OPENED) { value = ep0_write(fd, buf, len, ptr); spin_unlock_irq(&dev->lock); return value; } spin_unlock_irq(&dev->lock); if ((len < (USB_DT_CONFIG_SIZE + USB_DT_DEVICE_SIZE + 4)) || (len > PAGE_SIZE * 4)) return -EINVAL; /* we might need to change message format someday */ if (copy_from_user (&tag, buf, 4)) return -EFAULT; if (tag != 0) return -EINVAL; buf += 4; length -= 4; kbuf = memdup_user(buf, length); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); spin_lock_irq (&dev->lock); value = -EINVAL; if (dev->buf) { spin_unlock_irq(&dev->lock); kfree(kbuf); return value; } dev->buf = kbuf; /* full or low speed config */ dev->config = (void *) kbuf; total = le16_to_cpu(dev->config->wTotalLength); if (!is_valid_config(dev->config, total) || total > length - USB_DT_DEVICE_SIZE) goto fail; kbuf += total; length -= total; /* optional high speed config */ if (kbuf [1] == USB_DT_CONFIG) { dev->hs_config = (void *) kbuf; total = le16_to_cpu(dev->hs_config->wTotalLength); if (!is_valid_config(dev->hs_config, total) || total > length - USB_DT_DEVICE_SIZE) goto fail; kbuf += total; length -= total; } else { dev->hs_config = NULL; } /* could support multiple configs, using another encoding! */ /* device descriptor (tweaked for paranoia) */ if (length != USB_DT_DEVICE_SIZE) goto fail; dev->dev = (void *)kbuf; if (dev->dev->bLength != USB_DT_DEVICE_SIZE || dev->dev->bDescriptorType != USB_DT_DEVICE || dev->dev->bNumConfigurations != 1) goto fail; dev->dev->bcdUSB = cpu_to_le16 (0x0200); /* triggers gadgetfs_bind(); then we can enumerate. */ spin_unlock_irq (&dev->lock); if (dev->hs_config) gadgetfs_driver.max_speed = USB_SPEED_HIGH; else gadgetfs_driver.max_speed = USB_SPEED_FULL; value = usb_gadget_register_driver(&gadgetfs_driver); if (value != 0) { spin_lock_irq(&dev->lock); goto fail; } else { /* at this point "good" hardware has for the first time * let the USB the host see us. alternatively, if users * unplug/replug that will clear all the error state. * * note: everything running before here was guaranteed * to choke driver model style diagnostics. from here * on, they can work ... except in cleanup paths that * kick in after the ep0 descriptor is closed. */ value = len; dev->gadget_registered = true; } return value; fail: dev->config = NULL; dev->hs_config = NULL; dev->dev = NULL; spin_unlock_irq (&dev->lock); pr_debug ("%s: %s fail %zd, %p\n", shortname, __func__, value, dev); kfree (dev->buf); dev->buf = NULL; return value; } static int gadget_dev_open (struct inode *inode, struct file *fd) { struct dev_data *dev = inode->i_private; int value = -EBUSY; spin_lock_irq(&dev->lock); if (dev->state == STATE_DEV_DISABLED) { dev->ev_next = 0; dev->state = STATE_DEV_OPENED; fd->private_data = dev; get_dev (dev); value = 0; } spin_unlock_irq(&dev->lock); return value; } static const struct file_operations ep0_operations = { .open = gadget_dev_open, .read = ep0_read, .write = dev_config, .fasync = ep0_fasync, .poll = ep0_poll, .unlocked_ioctl = gadget_dev_ioctl, .release = dev_release, }; /*----------------------------------------------------------------------*/ /* FILESYSTEM AND SUPERBLOCK OPERATIONS * * Mounting the filesystem creates a controller file, used first for * device configuration then later for event monitoring. */ /* FIXME PAM etc could set this security policy without mount options * if epfiles inherited ownership and permissons from ep0 ... */ static unsigned default_uid; static unsigned default_gid; static unsigned default_perm = S_IRUSR | S_IWUSR; module_param (default_uid, uint, 0644); module_param (default_gid, uint, 0644); module_param (default_perm, uint, 0644); static struct inode * gadgetfs_make_inode (struct super_block *sb, void *data, const struct file_operations *fops, int mode) { struct inode *inode = new_inode (sb); if (inode) { inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = make_kuid(&init_user_ns, default_uid); inode->i_gid = make_kgid(&init_user_ns, default_gid); simple_inode_init_ts(inode); inode->i_private = data; inode->i_fop = fops; } return inode; } /* creates in fs root directory, so non-renamable and non-linkable. * so inode and dentry are paired, until device reconfig. */ static struct dentry * gadgetfs_create_file (struct super_block *sb, char const *name, void *data, const struct file_operations *fops) { struct dentry *dentry; struct inode *inode; dentry = d_alloc_name(sb->s_root, name); if (!dentry) return NULL; inode = gadgetfs_make_inode (sb, data, fops, S_IFREG | (default_perm & S_IRWXUGO)); if (!inode) { dput(dentry); return NULL; } d_add (dentry, inode); return dentry; } static const struct super_operations gadget_fs_operations = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, }; static int gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc) { struct inode *inode; struct dev_data *dev; int rc; mutex_lock(&sb_mutex); if (the_device) { rc = -ESRCH; goto Done; } CHIP = usb_get_gadget_udc_name(); if (!CHIP) { rc = -ENODEV; goto Done; } /* superblock */ sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = GADGETFS_MAGIC; sb->s_op = &gadget_fs_operations; sb->s_time_gran = 1; /* root inode */ inode = gadgetfs_make_inode (sb, NULL, &simple_dir_operations, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) goto Enomem; inode->i_op = &simple_dir_inode_operations; if (!(sb->s_root = d_make_root (inode))) goto Enomem; /* the ep0 file is named after the controller we expect; * user mode code can use it for sanity checks, like we do. */ dev = dev_new (); if (!dev) goto Enomem; dev->sb = sb; dev->dentry = gadgetfs_create_file(sb, CHIP, dev, &ep0_operations); if (!dev->dentry) { put_dev(dev); goto Enomem; } /* other endpoint files are available after hardware setup, * from binding to a controller. */ the_device = dev; rc = 0; goto Done; Enomem: kfree(CHIP); CHIP = NULL; rc = -ENOMEM; Done: mutex_unlock(&sb_mutex); return rc; } /* "mount -t gadgetfs path /dev/gadget" ends up here */ static int gadgetfs_get_tree(struct fs_context *fc) { return get_tree_single(fc, gadgetfs_fill_super); } static const struct fs_context_operations gadgetfs_context_ops = { .get_tree = gadgetfs_get_tree, }; static int gadgetfs_init_fs_context(struct fs_context *fc) { fc->ops = &gadgetfs_context_ops; return 0; } static void gadgetfs_kill_sb (struct super_block *sb) { mutex_lock(&sb_mutex); kill_litter_super (sb); if (the_device) { put_dev (the_device); the_device = NULL; } kfree(CHIP); CHIP = NULL; mutex_unlock(&sb_mutex); } /*----------------------------------------------------------------------*/ static struct file_system_type gadgetfs_type = { .owner = THIS_MODULE, .name = shortname, .init_fs_context = gadgetfs_init_fs_context, .kill_sb = gadgetfs_kill_sb, }; MODULE_ALIAS_FS("gadgetfs"); /*----------------------------------------------------------------------*/ static int __init gadgetfs_init (void) { int status; status = register_filesystem (&gadgetfs_type); if (status == 0) pr_info ("%s: %s, version " DRIVER_VERSION "\n", shortname, driver_desc); return status; } module_init (gadgetfs_init); static void __exit gadgetfs_cleanup (void) { pr_debug ("unregister %s\n", shortname); unregister_filesystem (&gadgetfs_type); } module_exit (gadgetfs_cleanup);
21 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_E820_API_H #define _ASM_E820_API_H #include <asm/e820/types.h> extern struct e820_table *e820_table; extern struct e820_table *e820_table_kexec; extern struct e820_table *e820_table_firmware; extern unsigned long pci_mem_start; extern bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type); extern bool e820__mapped_any(u64 start, u64 end, enum e820_type type); extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type); extern void e820__range_add (u64 start, u64 size, enum e820_type type); extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type); extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type); extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type); extern void e820__print_table(char *who); extern int e820__update_table(struct e820_table *table); extern void e820__update_table_print(void); extern unsigned long e820__end_of_ram_pfn(void); extern unsigned long e820__end_of_low_ram_pfn(void); extern u64 e820__memblock_alloc_reserved(u64 size, u64 align); extern void e820__memblock_setup(void); extern void e820__reserve_setup_data(void); extern void e820__finish_early_params(void); extern void e820__reserve_resources(void); extern void e820__reserve_resources_late(void); extern void e820__memory_setup(void); extern void e820__memory_setup_extended(u64 phys_addr, u32 data_len); extern char *e820__memory_setup_default(void); extern void e820__setup_pci_gap(void); extern void e820__reallocate_tables(void); extern void e820__register_nosave_regions(unsigned long limit_pfn); extern int e820__get_entry_type(u64 start, u64 end); /* * Returns true iff the specified range [start,end) is completely contained inside * the ISA region. */ static inline bool is_ISA_range(u64 start, u64 end) { return start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS; } #endif /* _ASM_E820_API_H */
14 2 58 16 31 11 58 58 41 46 47 46 46 41 41 39 42 39 4 13 42 42 42 15 45 42 45 45 42 42 1 41 5 9 41 36 9 40 36 9 41 49 47 45 45 42 40 42 42 42 42 42 42 5 3 11 40 20 41 42 1 41 42 40 13 42 9 41 24 39 42 41 4 4 42 42 42 41 41 42 42 42 18 18 2 4 5 6 1 4 5 8 15 15 2 2 2 58 38 42 39 49 23 49 35 41 42 56 18 18 16 2 18 17 18 49 48 2 49 49 2 48 48 49 48 49 49 45 45 6 6 6 6 6 6 6 89 88 3 87 88 89 3 86 56 57 17 40 57 87 54 40 42 41 42 42 42 40 41 42 1 39 3 41 41 10 33 33 33 33 33 33 33 33 31 10 10 10 10 26 25 18 2 15 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_shared.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_btree.h" #include "xfs_rmap.h" #include "xfs_alloc_btree.h" #include "xfs_alloc.h" #include "xfs_extent_busy.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" #include "xfs_ag.h" #include "xfs_ag_resv.h" #include "xfs_bmap.h" #include "xfs_health.h" #include "xfs_extfree_item.h" struct kmem_cache *xfs_extfree_item_cache; struct workqueue_struct *xfs_alloc_wq; #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) #define XFSA_FIXUP_BNO_OK 1 #define XFSA_FIXUP_CNT_OK 2 /* * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in * the beginning of the block for a proper header with the location information * and CRC. */ unsigned int xfs_agfl_size( struct xfs_mount *mp) { unsigned int size = mp->m_sb.sb_sectsize; if (xfs_has_crc(mp)) size -= sizeof(struct xfs_agfl); return size / sizeof(xfs_agblock_t); } unsigned int xfs_refc_block( struct xfs_mount *mp) { if (xfs_has_rmapbt(mp)) return XFS_RMAP_BLOCK(mp) + 1; if (xfs_has_finobt(mp)) return XFS_FIBT_BLOCK(mp) + 1; return XFS_IBT_BLOCK(mp) + 1; } xfs_extlen_t xfs_prealloc_blocks( struct xfs_mount *mp) { if (xfs_has_reflink(mp)) return xfs_refc_block(mp) + 1; if (xfs_has_rmapbt(mp)) return XFS_RMAP_BLOCK(mp) + 1; if (xfs_has_finobt(mp)) return XFS_FIBT_BLOCK(mp) + 1; return XFS_IBT_BLOCK(mp) + 1; } /* * The number of blocks per AG that we withhold from xfs_dec_fdblocks to * guarantee that we can refill the AGFL prior to allocating space in a nearly * full AG. Although the space described by the free space btrees, the * blocks used by the freesp btrees themselves, and the blocks owned by the * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk * free space in the AG drop so low that the free space btrees cannot refill an * empty AGFL up to the minimum level. Rather than grind through empty AGs * until the fs goes down, we subtract this many AG blocks from the incore * fdblocks to ensure user allocation does not overcommit the space the * filesystem needs for the AGFLs. The rmap btree uses a per-AG reservation to * withhold space from xfs_dec_fdblocks, so we do not account for that here. */ #define XFS_ALLOCBT_AGFL_RESERVE 4 /* * Compute the number of blocks that we set aside to guarantee the ability to * refill the AGFL and handle a full bmap btree split. * * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of * AGF buffer (PV 947395), we place constraints on the relationship among * actual allocations for data blocks, freelist blocks, and potential file data * bmap btree blocks. However, these restrictions may result in no actual space * allocated for a delayed extent, for example, a data block in a certain AG is * allocated but there is no additional block for the additional bmap btree * block due to a split of the bmap btree of the file. The result of this may * lead to an infinite loop when the file gets flushed to disk and all delayed * extents need to be actually allocated. To get around this, we explicitly set * aside a few blocks which will not be reserved in delayed allocation. * * For each AG, we need to reserve enough blocks to replenish a totally empty * AGFL and 4 more to handle a potential split of the file's bmap btree. */ unsigned int xfs_alloc_set_aside( struct xfs_mount *mp) { return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4); } /* * When deciding how much space to allocate out of an AG, we limit the * allocation maximum size to the size the AG. However, we cannot use all the * blocks in the AG - some are permanently used by metadata. These * blocks are generally: * - the AG superblock, AGF, AGI and AGFL * - the AGF (bno and cnt) and AGI btree root blocks, and optionally * the AGI free inode and rmap btree root blocks. * - blocks on the AGFL according to xfs_alloc_set_aside() limits * - the rmapbt root block * * The AG headers are sector sized, so the amount of space they take up is * dependent on filesystem geometry. The others are all single blocks. */ unsigned int xfs_alloc_ag_max_usable( struct xfs_mount *mp) { unsigned int blocks; blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */ blocks += XFS_ALLOCBT_AGFL_RESERVE; blocks += 3; /* AGF, AGI btree root blocks */ if (xfs_has_finobt(mp)) blocks++; /* finobt root block */ if (xfs_has_rmapbt(mp)) blocks++; /* rmap root block */ if (xfs_has_reflink(mp)) blocks++; /* refcount root block */ return mp->m_sb.sb_agblocks - blocks; } static int xfs_alloc_lookup( struct xfs_btree_cur *cur, xfs_lookup_t dir, xfs_agblock_t bno, xfs_extlen_t len, int *stat) { int error; cur->bc_rec.a.ar_startblock = bno; cur->bc_rec.a.ar_blockcount = len; error = xfs_btree_lookup(cur, dir, stat); if (*stat == 1) cur->bc_flags |= XFS_BTREE_ALLOCBT_ACTIVE; else cur->bc_flags &= ~XFS_BTREE_ALLOCBT_ACTIVE; return error; } /* * Lookup the record equal to [bno, len] in the btree given by cur. */ static inline int /* error */ xfs_alloc_lookup_eq( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ xfs_extlen_t len, /* length of extent */ int *stat) /* success/failure */ { return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, bno, len, stat); } /* * Lookup the first record greater than or equal to [bno, len] * in the btree given by cur. */ int /* error */ xfs_alloc_lookup_ge( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ xfs_extlen_t len, /* length of extent */ int *stat) /* success/failure */ { return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, bno, len, stat); } /* * Lookup the first record less than or equal to [bno, len] * in the btree given by cur. */ int /* error */ xfs_alloc_lookup_le( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ xfs_extlen_t len, /* length of extent */ int *stat) /* success/failure */ { return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, bno, len, stat); } static inline bool xfs_alloc_cur_active( struct xfs_btree_cur *cur) { return cur && (cur->bc_flags & XFS_BTREE_ALLOCBT_ACTIVE); } /* * Update the record referred to by cur to the value given * by [bno, len]. * This either works (return 0) or gets an EFSCORRUPTED error. */ STATIC int /* error */ xfs_alloc_update( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ xfs_extlen_t len) /* length of extent */ { union xfs_btree_rec rec; rec.alloc.ar_startblock = cpu_to_be32(bno); rec.alloc.ar_blockcount = cpu_to_be32(len); return xfs_btree_update(cur, &rec); } /* Convert the ondisk btree record to its incore representation. */ void xfs_alloc_btrec_to_irec( const union xfs_btree_rec *rec, struct xfs_alloc_rec_incore *irec) { irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock); irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount); } /* Simple checks for free space records. */ xfs_failaddr_t xfs_alloc_check_irec( struct xfs_perag *pag, const struct xfs_alloc_rec_incore *irec) { if (irec->ar_blockcount == 0) return __this_address; /* check for valid extent range, including overflow */ if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount)) return __this_address; return NULL; } static inline int xfs_alloc_complain_bad_rec( struct xfs_btree_cur *cur, xfs_failaddr_t fa, const struct xfs_alloc_rec_incore *irec) { struct xfs_mount *mp = cur->bc_mp; xfs_warn(mp, "%sbt record corruption in AG %d detected at %pS!", cur->bc_ops->name, cur->bc_group->xg_gno, fa); xfs_warn(mp, "start block 0x%x block count 0x%x", irec->ar_startblock, irec->ar_blockcount); xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } /* * Get the data from the pointed-to record. */ int /* error */ xfs_alloc_get_rec( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t *bno, /* output: starting block of extent */ xfs_extlen_t *len, /* output: length of extent */ int *stat) /* output: success/failure */ { struct xfs_alloc_rec_incore irec; union xfs_btree_rec *rec; xfs_failaddr_t fa; int error; error = xfs_btree_get_rec(cur, &rec, stat); if (error || !(*stat)) return error; xfs_alloc_btrec_to_irec(rec, &irec); fa = xfs_alloc_check_irec(to_perag(cur->bc_group), &irec); if (fa) return xfs_alloc_complain_bad_rec(cur, fa, &irec); *bno = irec.ar_startblock; *len = irec.ar_blockcount; return 0; } /* * Compute aligned version of the found extent. * Takes alignment and min length into account. */ STATIC bool xfs_alloc_compute_aligned( xfs_alloc_arg_t *args, /* allocation argument structure */ xfs_agblock_t foundbno, /* starting block in found extent */ xfs_extlen_t foundlen, /* length in found extent */ xfs_agblock_t *resbno, /* result block number */ xfs_extlen_t *reslen, /* result length */ unsigned *busy_gen) { xfs_agblock_t bno = foundbno; xfs_extlen_t len = foundlen; xfs_extlen_t diff; bool busy; /* Trim busy sections out of found extent */ busy = xfs_extent_busy_trim(pag_group(args->pag), args->minlen, args->maxlen, &bno, &len, busy_gen); /* * If we have a largish extent that happens to start before min_agbno, * see if we can shift it into range... */ if (bno < args->min_agbno && bno + len > args->min_agbno) { diff = args->min_agbno - bno; if (len > diff) { bno += diff; len -= diff; } } if (args->alignment > 1 && len >= args->minlen) { xfs_agblock_t aligned_bno = roundup(bno, args->alignment); diff = aligned_bno - bno; *resbno = aligned_bno; *reslen = diff >= len ? 0 : len - diff; } else { *resbno = bno; *reslen = len; } return busy; } /* * Compute best start block and diff for "near" allocations. * freelen >= wantlen already checked by caller. */ STATIC xfs_extlen_t /* difference value (absolute) */ xfs_alloc_compute_diff( xfs_agblock_t wantbno, /* target starting block */ xfs_extlen_t wantlen, /* target length */ xfs_extlen_t alignment, /* target alignment */ int datatype, /* are we allocating data? */ xfs_agblock_t freebno, /* freespace's starting block */ xfs_extlen_t freelen, /* freespace's length */ xfs_agblock_t *newbnop) /* result: best start block from free */ { xfs_agblock_t freeend; /* end of freespace extent */ xfs_agblock_t newbno1; /* return block number */ xfs_agblock_t newbno2; /* other new block number */ xfs_extlen_t newlen1=0; /* length with newbno1 */ xfs_extlen_t newlen2=0; /* length with newbno2 */ xfs_agblock_t wantend; /* end of target extent */ bool userdata = datatype & XFS_ALLOC_USERDATA; ASSERT(freelen >= wantlen); freeend = freebno + freelen; wantend = wantbno + wantlen; /* * We want to allocate from the start of a free extent if it is past * the desired block or if we are allocating user data and the free * extent is before desired block. The second case is there to allow * for contiguous allocation from the remaining free space if the file * grows in the short term. */ if (freebno >= wantbno || (userdata && freeend < wantend)) { if ((newbno1 = roundup(freebno, alignment)) >= freeend) newbno1 = NULLAGBLOCK; } else if (freeend >= wantend && alignment > 1) { newbno1 = roundup(wantbno, alignment); newbno2 = newbno1 - alignment; if (newbno1 >= freeend) newbno1 = NULLAGBLOCK; else newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); if (newbno2 < freebno) newbno2 = NULLAGBLOCK; else newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { if (newlen1 < newlen2 || (newlen1 == newlen2 && XFS_ABSDIFF(newbno1, wantbno) > XFS_ABSDIFF(newbno2, wantbno))) newbno1 = newbno2; } else if (newbno2 != NULLAGBLOCK) newbno1 = newbno2; } else if (freeend >= wantend) { newbno1 = wantbno; } else if (alignment > 1) { newbno1 = roundup(freeend - wantlen, alignment); if (newbno1 > freeend - wantlen && newbno1 - alignment >= freebno) newbno1 -= alignment; else if (newbno1 >= freeend) newbno1 = NULLAGBLOCK; } else newbno1 = freeend - wantlen; *newbnop = newbno1; return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); } /* * Fix up the length, based on mod and prod. * len should be k * prod + mod for some k. * If len is too small it is returned unchanged. * If len hits maxlen it is left alone. */ STATIC void xfs_alloc_fix_len( xfs_alloc_arg_t *args) /* allocation argument structure */ { xfs_extlen_t k; xfs_extlen_t rlen; ASSERT(args->mod < args->prod); rlen = args->len; ASSERT(rlen >= args->minlen); ASSERT(rlen <= args->maxlen); if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || (args->mod == 0 && rlen < args->prod)) return; k = rlen % args->prod; if (k == args->mod) return; if (k > args->mod) rlen = rlen - (k - args->mod); else rlen = rlen - args->prod + (args->mod - k); /* casts to (int) catch length underflows */ if ((int)rlen < (int)args->minlen) return; ASSERT(rlen >= args->minlen && rlen <= args->maxlen); ASSERT(rlen % args->prod == args->mod); ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >= rlen + args->minleft); args->len = rlen; } /* * Determine if the cursor points to the block that contains the right-most * block of records in the by-count btree. This block contains the largest * contiguous free extent in the AG, so if we modify a record in this block we * need to call xfs_alloc_fixup_longest() once the modifications are done to * ensure the agf->agf_longest field is kept up to date with the longest free * extent tracked by the by-count btree. */ static bool xfs_alloc_cursor_at_lastrec( struct xfs_btree_cur *cnt_cur) { struct xfs_btree_block *block; union xfs_btree_ptr ptr; struct xfs_buf *bp; block = xfs_btree_get_block(cnt_cur, 0, &bp); xfs_btree_get_sibling(cnt_cur, block, &ptr, XFS_BB_RIGHTSIB); return xfs_btree_ptr_is_null(cnt_cur, &ptr); } /* * Find the rightmost record of the cntbt, and return the longest free space * recorded in it. Simply set both the block number and the length to their * maximum values before searching. */ static int xfs_cntbt_longest( struct xfs_btree_cur *cnt_cur, xfs_extlen_t *longest) { struct xfs_alloc_rec_incore irec; union xfs_btree_rec *rec; int stat = 0; int error; memset(&cnt_cur->bc_rec, 0xFF, sizeof(cnt_cur->bc_rec)); error = xfs_btree_lookup(cnt_cur, XFS_LOOKUP_LE, &stat); if (error) return error; if (!stat) { /* totally empty tree */ *longest = 0; return 0; } error = xfs_btree_get_rec(cnt_cur, &rec, &stat); if (error) return error; if (XFS_IS_CORRUPT(cnt_cur->bc_mp, !stat)) { xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; } xfs_alloc_btrec_to_irec(rec, &irec); *longest = irec.ar_blockcount; return 0; } /* * Update the longest contiguous free extent in the AG from the by-count cursor * that is passed to us. This should be done at the end of any allocation or * freeing operation that touches the longest extent in the btree. * * Needing to update the longest extent can be determined by calling * xfs_alloc_cursor_at_lastrec() after the cursor is positioned for record * modification but before the modification begins. */ static int xfs_alloc_fixup_longest( struct xfs_btree_cur *cnt_cur) { struct xfs_perag *pag = to_perag(cnt_cur->bc_group); struct xfs_buf *bp = cnt_cur->bc_ag.agbp; struct xfs_agf *agf = bp->b_addr; xfs_extlen_t longest = 0; int error; /* Lookup last rec in order to update AGF. */ error = xfs_cntbt_longest(cnt_cur, &longest); if (error) return error; pag->pagf_longest = longest; agf->agf_longest = cpu_to_be32(pag->pagf_longest); xfs_alloc_log_agf(cnt_cur->bc_tp, bp, XFS_AGF_LONGEST); return 0; } /* * Update the two btrees, logically removing from freespace the extent * starting at rbno, rlen blocks. The extent is contained within the * actual (current) free extent fbno for flen blocks. * Flags are passed in indicating whether the cursors are set to the * relevant records. */ STATIC int /* error code */ xfs_alloc_fixup_trees( struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */ struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */ xfs_agblock_t fbno, /* starting block of free extent */ xfs_extlen_t flen, /* length of free extent */ xfs_agblock_t rbno, /* starting block of returned extent */ xfs_extlen_t rlen, /* length of returned extent */ int flags) /* flags, XFSA_FIXUP_... */ { int error; /* error code */ int i; /* operation results */ xfs_agblock_t nfbno1; /* first new free startblock */ xfs_agblock_t nfbno2; /* second new free startblock */ xfs_extlen_t nflen1=0; /* first new free length */ xfs_extlen_t nflen2=0; /* second new free length */ struct xfs_mount *mp; bool fixup_longest = false; mp = cnt_cur->bc_mp; /* * Look up the record in the by-size tree if necessary. */ if (flags & XFSA_FIXUP_CNT_OK) { #ifdef DEBUG if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) return error; if (XFS_IS_CORRUPT(mp, i != 1 || nfbno1 != fbno || nflen1 != flen)) { xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; } #endif } else { if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) return error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; } } /* * Look up the record in the by-block tree if necessary. */ if (flags & XFSA_FIXUP_BNO_OK) { #ifdef DEBUG if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) return error; if (XFS_IS_CORRUPT(mp, i != 1 || nfbno1 != fbno || nflen1 != flen)) { xfs_btree_mark_sick(bno_cur); return -EFSCORRUPTED; } #endif } else { if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) return error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(bno_cur); return -EFSCORRUPTED; } } #ifdef DEBUG if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { struct xfs_btree_block *bnoblock; struct xfs_btree_block *cntblock; bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp); cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp); if (XFS_IS_CORRUPT(mp, bnoblock->bb_numrecs != cntblock->bb_numrecs)) { xfs_btree_mark_sick(bno_cur); return -EFSCORRUPTED; } } #endif /* * Deal with all four cases: the allocated record is contained * within the freespace record, so we can have new freespace * at either (or both) end, or no freespace remaining. */ if (rbno == fbno && rlen == flen) nfbno1 = nfbno2 = NULLAGBLOCK; else if (rbno == fbno) { nfbno1 = rbno + rlen; nflen1 = flen - rlen; nfbno2 = NULLAGBLOCK; } else if (rbno + rlen == fbno + flen) { nfbno1 = fbno; nflen1 = flen - rlen; nfbno2 = NULLAGBLOCK; } else { nfbno1 = fbno; nflen1 = rbno - fbno; nfbno2 = rbno + rlen; nflen2 = (fbno + flen) - nfbno2; } if (xfs_alloc_cursor_at_lastrec(cnt_cur)) fixup_longest = true; /* * Delete the entry from the by-size btree. */ if ((error = xfs_btree_delete(cnt_cur, &i))) return error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; } /* * Add new by-size btree entry(s). */ if (nfbno1 != NULLAGBLOCK) { if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) return error; if (XFS_IS_CORRUPT(mp, i != 0)) { xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; } if ((error = xfs_btree_insert(cnt_cur, &i))) return error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; } } if (nfbno2 != NULLAGBLOCK) { if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) return error; if (XFS_IS_CORRUPT(mp, i != 0)) { xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; } if ((error = xfs_btree_insert(cnt_cur, &i))) return error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; } } /* * Fix up the by-block btree entry(s). */ if (nfbno1 == NULLAGBLOCK) { /* * No remaining freespace, just delete the by-block tree entry. */ if ((error = xfs_btree_delete(bno_cur, &i))) return error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(bno_cur); return -EFSCORRUPTED; } } else { /* * Update the by-block entry to start later|be shorter. */ if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1))) return error; } if (nfbno2 != NULLAGBLOCK) { /* * 2 resulting free entries, need to add one. */ if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) return error; if (XFS_IS_CORRUPT(mp, i != 0)) { xfs_btree_mark_sick(bno_cur); return -EFSCORRUPTED; } if ((error = xfs_btree_insert(bno_cur, &i))) return error; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(bno_cur); return -EFSCORRUPTED; } } if (fixup_longest) return xfs_alloc_fixup_longest(cnt_cur); return 0; } /* * We do not verify the AGFL contents against AGF-based index counters here, * even though we may have access to the perag that contains shadow copies. We * don't know if the AGF based counters have been checked, and if they have they * still may be inconsistent because they haven't yet been reset on the first * allocation after the AGF has been read in. * * This means we can only check that all agfl entries contain valid or null * values because we can't reliably determine the active range to exclude * NULLAGBNO as a valid value. * * However, we can't even do that for v4 format filesystems because there are * old versions of mkfs out there that does not initialise the AGFL to known, * verifiable values. HEnce we can't tell the difference between a AGFL block * allocated by mkfs and a corrupted AGFL block here on v4 filesystems. * * As a result, we can only fully validate AGFL block numbers when we pull them * from the freelist in xfs_alloc_get_freelist(). */ static xfs_failaddr_t xfs_agfl_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp); int i; if (!xfs_has_crc(mp)) return NULL; if (!xfs_verify_magic(bp, agfl->agfl_magicnum)) return __this_address; if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't * use it by using uncached buffers that don't have the perag attached * so we can detect and avoid this problem. */ if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != pag_agno((bp->b_pag))) return __this_address; for (i = 0; i < xfs_agfl_size(mp); i++) { if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK && be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks) return __this_address; } if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn))) return __this_address; return NULL; } static void xfs_agfl_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; /* * There is no verification of non-crc AGFLs because mkfs does not * initialise the AGFL to zero or NULL. Hence the only valid part of the * AGFL is what the AGF says is active. We can't get to the AGF, so we * can't verify just those entries are valid. */ if (!xfs_has_crc(mp)) return; if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { fa = xfs_agfl_verify(bp); if (fa) xfs_verifier_error(bp, -EFSCORRUPTED, fa); } } static void xfs_agfl_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; xfs_failaddr_t fa; /* no verification of non-crc AGFLs */ if (!xfs_has_crc(mp)) return; fa = xfs_agfl_verify(bp); if (fa) { xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } if (bip) XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); } const struct xfs_buf_ops xfs_agfl_buf_ops = { .name = "xfs_agfl", .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) }, .verify_read = xfs_agfl_read_verify, .verify_write = xfs_agfl_write_verify, .verify_struct = xfs_agfl_verify, }; /* * Read in the allocation group free block array. */ int xfs_alloc_read_agfl( struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf **bpp) { struct xfs_mount *mp = pag_mount(pag); struct xfs_buf *bp; int error; error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, pag_agno(pag), XFS_AGFL_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); if (xfs_metadata_is_sick(error)) xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL); if (error) return error; xfs_buf_set_ref(bp, XFS_AGFL_REF); *bpp = bp; return 0; } STATIC int xfs_alloc_update_counters( struct xfs_trans *tp, struct xfs_buf *agbp, long len) { struct xfs_agf *agf = agbp->b_addr; agbp->b_pag->pagf_freeblks += len; be32_add_cpu(&agf->agf_freeblks, len); if (unlikely(be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))) { xfs_buf_mark_corrupt(agbp); xfs_ag_mark_sick(agbp->b_pag, XFS_SICK_AG_AGF); return -EFSCORRUPTED; } xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); return 0; } /* * Block allocation algorithm and data structures. */ struct xfs_alloc_cur { struct xfs_btree_cur *cnt; /* btree cursors */ struct xfs_btree_cur *bnolt; struct xfs_btree_cur *bnogt; xfs_extlen_t cur_len;/* current search length */ xfs_agblock_t rec_bno;/* extent startblock */ xfs_extlen_t rec_len;/* extent length */ xfs_agblock_t bno; /* alloc bno */ xfs_extlen_t len; /* alloc len */ xfs_extlen_t diff; /* diff from search bno */ unsigned int busy_gen;/* busy state */ bool busy; }; /* * Set up cursors, etc. in the extent allocation cursor. This function can be * called multiple times to reset an initialized structure without having to * reallocate cursors. */ static int xfs_alloc_cur_setup( struct xfs_alloc_arg *args, struct xfs_alloc_cur *acur) { int error; int i; acur->cur_len = args->maxlen; acur->rec_bno = 0; acur->rec_len = 0; acur->bno = 0; acur->len = 0; acur->diff = -1; acur->busy = false; acur->busy_gen = 0; /* * Perform an initial cntbt lookup to check for availability of maxlen * extents. If this fails, we'll return -ENOSPC to signal the caller to * attempt a small allocation. */ if (!acur->cnt) acur->cnt = xfs_cntbt_init_cursor(args->mp, args->tp, args->agbp, args->pag); error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i); if (error) return error; /* * Allocate the bnobt left and right search cursors. */ if (!acur->bnolt) acur->bnolt = xfs_bnobt_init_cursor(args->mp, args->tp, args->agbp, args->pag); if (!acur->bnogt) acur->bnogt = xfs_bnobt_init_cursor(args->mp, args->tp, args->agbp, args->pag); return i == 1 ? 0 : -ENOSPC; } static void xfs_alloc_cur_close( struct xfs_alloc_cur *acur, bool error) { int cur_error = XFS_BTREE_NOERROR; if (error) cur_error = XFS_BTREE_ERROR; if (acur->cnt) xfs_btree_del_cursor(acur->cnt, cur_error); if (acur->bnolt) xfs_btree_del_cursor(acur->bnolt, cur_error); if (acur->bnogt) xfs_btree_del_cursor(acur->bnogt, cur_error); acur->cnt = acur->bnolt = acur->bnogt = NULL; } /* * Check an extent for allocation and track the best available candidate in the * allocation structure. The cursor is deactivated if it has entered an out of * range state based on allocation arguments. Optionally return the extent * extent geometry and allocation status if requested by the caller. */ static int xfs_alloc_cur_check( struct xfs_alloc_arg *args, struct xfs_alloc_cur *acur, struct xfs_btree_cur *cur, int *new) { int error, i; xfs_agblock_t bno, bnoa, bnew; xfs_extlen_t len, lena, diff = -1; bool busy; unsigned busy_gen = 0; bool deactivate = false; bool isbnobt = xfs_btree_is_bno(cur->bc_ops); *new = 0; error = xfs_alloc_get_rec(cur, &bno, &len, &i); if (error) return error; if (XFS_IS_CORRUPT(args->mp, i != 1)) { xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } /* * Check minlen and deactivate a cntbt cursor if out of acceptable size * range (i.e., walking backwards looking for a minlen extent). */ if (len < args->minlen) { deactivate = !isbnobt; goto out; } busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena, &busy_gen); acur->busy |= busy; if (busy) acur->busy_gen = busy_gen; /* deactivate a bnobt cursor outside of locality range */ if (bnoa < args->min_agbno || bnoa > args->max_agbno) { deactivate = isbnobt; goto out; } if (lena < args->minlen) goto out; args->len = XFS_EXTLEN_MIN(lena, args->maxlen); xfs_alloc_fix_len(args); ASSERT(args->len >= args->minlen); if (args->len < acur->len) goto out; /* * We have an aligned record that satisfies minlen and beats or matches * the candidate extent size. Compare locality for near allocation mode. */ diff = xfs_alloc_compute_diff(args->agbno, args->len, args->alignment, args->datatype, bnoa, lena, &bnew); if (bnew == NULLAGBLOCK) goto out; /* * Deactivate a bnobt cursor with worse locality than the current best. */ if (diff > acur->diff) { deactivate = isbnobt; goto out; } ASSERT(args->len > acur->len || (args->len == acur->len && diff <= acur->diff)); acur->rec_bno = bno; acur->rec_len = len; acur->bno = bnew; acur->len = args->len; acur->diff = diff; *new = 1; /* * We're done if we found a perfect allocation. This only deactivates * the current cursor, but this is just an optimization to terminate a * cntbt search that otherwise runs to the edge of the tree. */ if (acur->diff == 0 && acur->len == args->maxlen) deactivate = true; out: if (deactivate) cur->bc_flags &= ~XFS_BTREE_ALLOCBT_ACTIVE; trace_xfs_alloc_cur_check(cur, bno, len, diff, *new); return 0; } /* * Complete an allocation of a candidate extent. Remove the extent from both * trees and update the args structure. */ STATIC int xfs_alloc_cur_finish( struct xfs_alloc_arg *args, struct xfs_alloc_cur *acur) { int error; ASSERT(acur->cnt && acur->bnolt); ASSERT(acur->bno >= acur->rec_bno); ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len); ASSERT(xfs_verify_agbext(args->pag, acur->rec_bno, acur->rec_len)); error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno, acur->rec_len, acur->bno, acur->len, 0); if (error) return error; args->agbno = acur->bno; args->len = acur->len; args->wasfromfl = 0; trace_xfs_alloc_cur(args); return 0; } /* * Locality allocation lookup algorithm. This expects a cntbt cursor and uses * bno optimized lookup to search for extents with ideal size and locality. */ STATIC int xfs_alloc_cntbt_iter( struct xfs_alloc_arg *args, struct xfs_alloc_cur *acur) { struct xfs_btree_cur *cur = acur->cnt; xfs_agblock_t bno; xfs_extlen_t len, cur_len; int error; int i; if (!xfs_alloc_cur_active(cur)) return 0; /* locality optimized lookup */ cur_len = acur->cur_len; error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i); if (error) return error; if (i == 0) return 0; error = xfs_alloc_get_rec(cur, &bno, &len, &i); if (error) return error; /* check the current record and update search length from it */ error = xfs_alloc_cur_check(args, acur, cur, &i); if (error) return error; ASSERT(len >= acur->cur_len); acur->cur_len = len; /* * We looked up the first record >= [agbno, len] above. The agbno is a * secondary key and so the current record may lie just before or after * agbno. If it is past agbno, check the previous record too so long as * the length matches as it may be closer. Don't check a smaller record * because that could deactivate our cursor. */ if (bno > args->agbno) { error = xfs_btree_decrement(cur, 0, &i); if (!error && i) { error = xfs_alloc_get_rec(cur, &bno, &len, &i); if (!error && i && len == acur->cur_len) error = xfs_alloc_cur_check(args, acur, cur, &i); } if (error) return error; } /* * Increment the search key until we find at least one allocation * candidate or if the extent we found was larger. Otherwise, double the * search key to optimize the search. Efficiency is more important here * than absolute best locality. */ cur_len <<= 1; if (!acur->len || acur->cur_len >= cur_len) acur->cur_len++; else acur->cur_len = cur_len; return error; } /* * Deal with the case where only small freespaces remain. Either return the * contents of the last freespace record, or allocate space from the freelist if * there is nothing in the tree. */ STATIC int /* error */ xfs_alloc_ag_vextent_small( struct xfs_alloc_arg *args, /* allocation argument structure */ struct xfs_btree_cur *ccur, /* optional by-size cursor */ xfs_agblock_t *fbnop, /* result block number */ xfs_extlen_t *flenp, /* result length */ int *stat) /* status: 0-freelist, 1-normal/none */ { struct xfs_agf *agf = args->agbp->b_addr; int error = 0; xfs_agblock_t fbno = NULLAGBLOCK; xfs_extlen_t flen = 0; int i = 0; /* * If a cntbt cursor is provided, try to allocate the largest record in * the tree. Try the AGFL if the cntbt is empty, otherwise fail the * allocation. Make sure to respect minleft even when pulling from the * freelist. */ if (ccur) error = xfs_btree_decrement(ccur, 0, &i); if (error) goto error; if (i) { error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i); if (error) goto error; if (XFS_IS_CORRUPT(args->mp, i != 1)) { xfs_btree_mark_sick(ccur); error = -EFSCORRUPTED; goto error; } goto out; } if (args->minlen != 1 || args->alignment != 1 || args->resv == XFS_AG_RESV_AGFL || be32_to_cpu(agf->agf_flcount) <= args->minleft) goto out; error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp, &fbno, 0); if (error) goto error; if (fbno == NULLAGBLOCK) goto out; xfs_extent_busy_reuse(pag_group(args->pag), fbno, 1, (args->datatype & XFS_ALLOC_NOBUSY)); if (args->datatype & XFS_ALLOC_USERDATA) { struct xfs_buf *bp; error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp, xfs_agbno_to_daddr(args->pag, fbno), args->mp->m_bsize, 0, &bp); if (error) goto error; xfs_trans_binval(args->tp, bp); } *fbnop = args->agbno = fbno; *flenp = args->len = 1; if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) { xfs_btree_mark_sick(ccur); error = -EFSCORRUPTED; goto error; } args->wasfromfl = 1; trace_xfs_alloc_small_freelist(args); /* * If we're feeding an AGFL block to something that doesn't live in the * free space, we need to clear out the OWN_AG rmap. */ error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1, &XFS_RMAP_OINFO_AG); if (error) goto error; *stat = 0; return 0; out: /* * Can't do the allocation, give up. */ if (flen < args->minlen) { args->agbno = NULLAGBLOCK; trace_xfs_alloc_small_notenough(args); flen = 0; } *fbnop = fbno; *flenp = flen; *stat = 1; trace_xfs_alloc_small_done(args); return 0; error: trace_xfs_alloc_small_error(args); return error; } /* * Allocate a variable extent at exactly agno/bno. * Extent's length (returned in *len) will be between minlen and maxlen, * and of the form k * prod + mod unless there's nothing that large. * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. */ STATIC int /* error */ xfs_alloc_ag_vextent_exact( xfs_alloc_arg_t *args) /* allocation argument structure */ { struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */ struct xfs_btree_cur *cnt_cur;/* by count btree cursor */ int error; xfs_agblock_t fbno; /* start block of found extent */ xfs_extlen_t flen; /* length of found extent */ xfs_agblock_t tbno; /* start block of busy extent */ xfs_extlen_t tlen; /* length of busy extent */ xfs_agblock_t tend; /* end block of busy extent */ int i; /* success/failure of operation */ unsigned busy_gen; ASSERT(args->alignment == 1); /* * Allocate/initialize a cursor for the by-number freespace btree. */ bno_cur = xfs_bnobt_init_cursor(args->mp, args->tp, args->agbp, args->pag); /* * Lookup bno and minlen in the btree (minlen is irrelevant, really). * Look for the closest free block <= bno, it must contain bno * if any free block does. */ error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); if (error) goto error0; if (!i) goto not_found; /* * Grab the freespace record. */ error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); if (error) goto error0; if (XFS_IS_CORRUPT(args->mp, i != 1)) { xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } ASSERT(fbno <= args->agbno); /* * Check for overlapping busy extents. */ tbno = fbno; tlen = flen; xfs_extent_busy_trim(pag_group(args->pag), args->minlen, args->maxlen, &tbno, &tlen, &busy_gen); /* * Give up if the start of the extent is busy, or the freespace isn't * long enough for the minimum request. */ if (tbno > args->agbno) goto not_found; if (tlen < args->minlen) goto not_found; tend = tbno + tlen; if (tend < args->agbno + args->minlen) goto not_found; /* * End of extent will be smaller of the freespace end and the * maximal requested end. * * Fix the length according to mod and prod if given. */ args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) - args->agbno; xfs_alloc_fix_len(args); ASSERT(args->agbno + args->len <= tend); /* * We are allocating agbno for args->len * Allocate/initialize a cursor for the by-size btree. */ cnt_cur = xfs_cntbt_init_cursor(args->mp, args->tp, args->agbp, args->pag); ASSERT(xfs_verify_agbext(args->pag, args->agbno, args->len)); error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, args->len, XFSA_FIXUP_BNO_OK); if (error) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); goto error0; } xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); args->wasfromfl = 0; trace_xfs_alloc_exact_done(args); return 0; not_found: /* Didn't find it, return null. */ xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); args->agbno = NULLAGBLOCK; trace_xfs_alloc_exact_notfound(args); return 0; error0: xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); trace_xfs_alloc_exact_error(args); return error; } /* * Search a given number of btree records in a given direction. Check each * record against the good extent we've already found. */ STATIC int xfs_alloc_walk_iter( struct xfs_alloc_arg *args, struct xfs_alloc_cur *acur, struct xfs_btree_cur *cur, bool increment, bool find_one, /* quit on first candidate */ int count, /* rec count (-1 for infinite) */ int *stat) { int error; int i; *stat = 0; /* * Search so long as the cursor is active or we find a better extent. * The cursor is deactivated if it extends beyond the range of the * current allocation candidate. */ while (xfs_alloc_cur_active(cur) && count) { error = xfs_alloc_cur_check(args, acur, cur, &i); if (error) return error; if (i == 1) { *stat = 1; if (find_one) break; } if (!xfs_alloc_cur_active(cur)) break; if (increment) error = xfs_btree_increment(cur, 0, &i); else error = xfs_btree_decrement(cur, 0, &i); if (error) return error; if (i == 0) cur->bc_flags &= ~XFS_BTREE_ALLOCBT_ACTIVE; if (count > 0) count--; } return 0; } /* * Search the by-bno and by-size btrees in parallel in search of an extent with * ideal locality based on the NEAR mode ->agbno locality hint. */ STATIC int xfs_alloc_ag_vextent_locality( struct xfs_alloc_arg *args, struct xfs_alloc_cur *acur, int *stat) { struct xfs_btree_cur *fbcur = NULL; int error; int i; bool fbinc; ASSERT(acur->len == 0); *stat = 0; error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i); if (error) return error; error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i); if (error) return error; error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i); if (error) return error; /* * Search the bnobt and cntbt in parallel. Search the bnobt left and * right and lookup the closest extent to the locality hint for each * extent size key in the cntbt. The entire search terminates * immediately on a bnobt hit because that means we've found best case * locality. Otherwise the search continues until the cntbt cursor runs * off the end of the tree. If no allocation candidate is found at this * point, give up on locality, walk backwards from the end of the cntbt * and take the first available extent. * * The parallel tree searches balance each other out to provide fairly * consistent performance for various situations. The bnobt search can * have pathological behavior in the worst case scenario of larger * allocation requests and fragmented free space. On the other hand, the * bnobt is able to satisfy most smaller allocation requests much more * quickly than the cntbt. The cntbt search can sift through fragmented * free space and sets of free extents for larger allocation requests * more quickly than the bnobt. Since the locality hint is just a hint * and we don't want to scan the entire bnobt for perfect locality, the * cntbt search essentially bounds the bnobt search such that we can * find good enough locality at reasonable performance in most cases. */ while (xfs_alloc_cur_active(acur->bnolt) || xfs_alloc_cur_active(acur->bnogt) || xfs_alloc_cur_active(acur->cnt)) { trace_xfs_alloc_cur_lookup(args); /* * Search the bnobt left and right. In the case of a hit, finish * the search in the opposite direction and we're done. */ error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false, true, 1, &i); if (error) return error; if (i == 1) { trace_xfs_alloc_cur_left(args); fbcur = acur->bnogt; fbinc = true; break; } error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true, 1, &i); if (error) return error; if (i == 1) { trace_xfs_alloc_cur_right(args); fbcur = acur->bnolt; fbinc = false; break; } /* * Check the extent with best locality based on the current * extent size search key and keep track of the best candidate. */ error = xfs_alloc_cntbt_iter(args, acur); if (error) return error; if (!xfs_alloc_cur_active(acur->cnt)) { trace_xfs_alloc_cur_lookup_done(args); break; } } /* * If we failed to find anything due to busy extents, return empty * handed so the caller can flush and retry. If no busy extents were * found, walk backwards from the end of the cntbt as a last resort. */ if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) { error = xfs_btree_decrement(acur->cnt, 0, &i); if (error) return error; if (i) { acur->cnt->bc_flags |= XFS_BTREE_ALLOCBT_ACTIVE; fbcur = acur->cnt; fbinc = false; } } /* * Search in the opposite direction for a better entry in the case of * a bnobt hit or walk backwards from the end of the cntbt. */ if (fbcur) { error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1, &i); if (error) return error; } if (acur->len) *stat = 1; return 0; } /* Check the last block of the cnt btree for allocations. */ static int xfs_alloc_ag_vextent_lastblock( struct xfs_alloc_arg *args, struct xfs_alloc_cur *acur, xfs_agblock_t *bno, xfs_extlen_t *len, bool *allocated) { int error; int i; #ifdef DEBUG /* Randomly don't execute the first algorithm. */ if (get_random_u32_below(2)) return 0; #endif /* * Start from the entry that lookup found, sequence through all larger * free blocks. If we're actually pointing at a record smaller than * maxlen, go to the start of this block, and skip all those smaller * than minlen. */ if (*len || args->alignment > 1) { acur->cnt->bc_levels[0].ptr = 1; do { error = xfs_alloc_get_rec(acur->cnt, bno, len, &i); if (error) return error; if (XFS_IS_CORRUPT(args->mp, i != 1)) { xfs_btree_mark_sick(acur->cnt); return -EFSCORRUPTED; } if (*len >= args->minlen) break; error = xfs_btree_increment(acur->cnt, 0, &i); if (error) return error; } while (i); ASSERT(*len >= args->minlen); if (!i) return 0; } error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i); if (error) return error; /* * It didn't work. We COULD be in a case where there's a good record * somewhere, so try again. */ if (acur->len == 0) return 0; trace_xfs_alloc_near_first(args); *allocated = true; return 0; } /* * Allocate a variable extent near bno in the allocation group agno. * Extent's length (returned in len) will be between minlen and maxlen, * and of the form k * prod + mod unless there's nothing that large. * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. */ STATIC int xfs_alloc_ag_vextent_near( struct xfs_alloc_arg *args, uint32_t alloc_flags) { struct xfs_alloc_cur acur = {}; int error; /* error code */ int i; /* result code, temporary */ xfs_agblock_t bno; xfs_extlen_t len; /* handle uninitialized agbno range so caller doesn't have to */ if (!args->min_agbno && !args->max_agbno) args->max_agbno = args->mp->m_sb.sb_agblocks - 1; ASSERT(args->min_agbno <= args->max_agbno); /* clamp agbno to the range if it's outside */ if (args->agbno < args->min_agbno) args->agbno = args->min_agbno; if (args->agbno > args->max_agbno) args->agbno = args->max_agbno; /* Retry once quickly if we find busy extents before blocking. */ alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH; restart: len = 0; /* * Set up cursors and see if there are any free extents as big as * maxlen. If not, pick the last entry in the tree unless the tree is * empty. */ error = xfs_alloc_cur_setup(args, &acur); if (error == -ENOSPC) { error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno, &len, &i); if (error) goto out; if (i == 0 || len == 0) { trace_xfs_alloc_near_noentry(args); goto out; } ASSERT(i == 1); } else if (error) { goto out; } /* * First algorithm. * If the requested extent is large wrt the freespaces available * in this a.g., then the cursor will be pointing to a btree entry * near the right edge of the tree. If it's in the last btree leaf * block, then we just examine all the entries in that block * that are big enough, and pick the best one. */ if (xfs_btree_islastblock(acur.cnt, 0)) { bool allocated = false; error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len, &allocated); if (error) goto out; if (allocated) goto alloc_finish; } /* * Second algorithm. Combined cntbt and bnobt search to find ideal * locality. */ error = xfs_alloc_ag_vextent_locality(args, &acur, &i); if (error) goto out; /* * If we couldn't get anything, give up. */ if (!acur.len) { if (acur.busy) { /* * Our only valid extents must have been busy. Flush and * retry the allocation again. If we get an -EAGAIN * error, we're being told that a deadlock was avoided * and the current transaction needs committing before * the allocation can be retried. */ trace_xfs_alloc_near_busy(args); error = xfs_extent_busy_flush(args->tp, pag_group(args->pag), acur.busy_gen, alloc_flags); if (error) goto out; alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; goto restart; } trace_xfs_alloc_size_neither(args); args->agbno = NULLAGBLOCK; goto out; } alloc_finish: /* fix up btrees on a successful allocation */ error = xfs_alloc_cur_finish(args, &acur); out: xfs_alloc_cur_close(&acur, error); return error; } /* * Allocate a variable extent anywhere in the allocation group agno. * Extent's length (returned in len) will be between minlen and maxlen, * and of the form k * prod + mod unless there's nothing that large. * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. */ static int xfs_alloc_ag_vextent_size( struct xfs_alloc_arg *args, uint32_t alloc_flags) { struct xfs_agf *agf = args->agbp->b_addr; struct xfs_btree_cur *bno_cur; struct xfs_btree_cur *cnt_cur; xfs_agblock_t fbno; /* start of found freespace */ xfs_extlen_t flen; /* length of found freespace */ xfs_agblock_t rbno; /* returned block number */ xfs_extlen_t rlen; /* length of returned extent */ bool busy; unsigned busy_gen; int error; int i; /* Retry once quickly if we find busy extents before blocking. */ alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH; restart: /* * Allocate and initialize a cursor for the by-size btree. */ cnt_cur = xfs_cntbt_init_cursor(args->mp, args->tp, args->agbp, args->pag); bno_cur = NULL; /* * Look for an entry >= maxlen+alignment-1 blocks. */ if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen + args->alignment - 1, &i))) goto error0; /* * If none then we have to settle for a smaller extent. In the case that * there are no large extents, this will return the last entry in the * tree unless the tree is empty. In the case that there are only busy * large extents, this will return the largest small extent unless there * are no smaller extents available. */ if (!i) { error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, &flen, &i); if (error) goto error0; if (i == 0 || flen == 0) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); trace_xfs_alloc_size_noentry(args); return 0; } ASSERT(i == 1); busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen, &busy_gen); } else { /* * Search for a non-busy extent that is large enough. */ for (;;) { error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); if (error) goto error0; if (XFS_IS_CORRUPT(args->mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen, &busy_gen); if (rlen >= args->maxlen) break; error = xfs_btree_increment(cnt_cur, 0, &i); if (error) goto error0; if (i) continue; /* * Our only valid extents must have been busy. Flush and * retry the allocation again. If we get an -EAGAIN * error, we're being told that a deadlock was avoided * and the current transaction needs committing before * the allocation can be retried. */ trace_xfs_alloc_size_busy(args); error = xfs_extent_busy_flush(args->tp, pag_group(args->pag), busy_gen, alloc_flags); if (error) goto error0; alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); goto restart; } } /* * In the first case above, we got the last entry in the * by-size btree. Now we check to see if the space hits maxlen * once aligned; if not, we search left for something better. * This can't happen in the second case above. */ rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); if (XFS_IS_CORRUPT(args->mp, rlen != 0 && (rlen > flen || rbno + rlen > fbno + flen))) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if (rlen < args->maxlen) { xfs_agblock_t bestfbno; xfs_extlen_t bestflen; xfs_agblock_t bestrbno; xfs_extlen_t bestrlen; bestrlen = rlen; bestrbno = rbno; bestflen = flen; bestfbno = fbno; for (;;) { if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) goto error0; if (i == 0) break; if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) goto error0; if (XFS_IS_CORRUPT(args->mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if (flen <= bestrlen) break; busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen, &busy_gen); rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); if (XFS_IS_CORRUPT(args->mp, rlen != 0 && (rlen > flen || rbno + rlen > fbno + flen))) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if (rlen > bestrlen) { bestrlen = rlen; bestrbno = rbno; bestflen = flen; bestfbno = fbno; if (rlen == args->maxlen) break; } } if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, &i))) goto error0; if (XFS_IS_CORRUPT(args->mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } rlen = bestrlen; rbno = bestrbno; flen = bestflen; fbno = bestfbno; } args->wasfromfl = 0; /* * Fix up the length. */ args->len = rlen; if (rlen < args->minlen) { if (busy) { /* * Our only valid extents must have been busy. Flush and * retry the allocation again. If we get an -EAGAIN * error, we're being told that a deadlock was avoided * and the current transaction needs committing before * the allocation can be retried. */ trace_xfs_alloc_size_busy(args); error = xfs_extent_busy_flush(args->tp, pag_group(args->pag), busy_gen, alloc_flags); if (error) goto error0; alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); goto restart; } goto out_nominleft; } xfs_alloc_fix_len(args); rlen = args->len; if (XFS_IS_CORRUPT(args->mp, rlen > flen)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } /* * Allocate and initialize a cursor for the by-block tree. */ bno_cur = xfs_bnobt_init_cursor(args->mp, args->tp, args->agbp, args->pag); if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, rbno, rlen, XFSA_FIXUP_CNT_OK))) goto error0; xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); cnt_cur = bno_cur = NULL; args->len = rlen; args->agbno = rbno; if (XFS_IS_CORRUPT(args->mp, args->agbno + args->len > be32_to_cpu(agf->agf_length))) { xfs_ag_mark_sick(args->pag, XFS_SICK_AG_BNOBT); error = -EFSCORRUPTED; goto error0; } trace_xfs_alloc_size_done(args); return 0; error0: trace_xfs_alloc_size_error(args); if (cnt_cur) xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); if (bno_cur) xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); return error; out_nominleft: xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); trace_xfs_alloc_size_nominleft(args); args->agbno = NULLAGBLOCK; return 0; } /* * Free the extent starting at agno/bno for length. */ int xfs_free_ag_extent( struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type) { struct xfs_mount *mp; struct xfs_btree_cur *bno_cur; struct xfs_btree_cur *cnt_cur; xfs_agblock_t gtbno; /* start of right neighbor */ xfs_extlen_t gtlen; /* length of right neighbor */ xfs_agblock_t ltbno; /* start of left neighbor */ xfs_extlen_t ltlen; /* length of left neighbor */ xfs_agblock_t nbno; /* new starting block of freesp */ xfs_extlen_t nlen; /* new length of freespace */ int haveleft; /* have a left neighbor */ int haveright; /* have a right neighbor */ int i; int error; struct xfs_perag *pag = agbp->b_pag; bool fixup_longest = false; bno_cur = cnt_cur = NULL; mp = tp->t_mountp; if (!xfs_rmap_should_skip_owner_update(oinfo)) { error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo); if (error) goto error0; } /* * Allocate and initialize a cursor for the by-block btree. */ bno_cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag); /* * Look for a neighboring block on the left (lower block numbers) * that is contiguous with this space. */ if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))) goto error0; if (haveleft) { /* * There is a block to our left. */ if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } /* * It's not contiguous, though. */ if (ltbno + ltlen < bno) haveleft = 0; else { /* * If this failure happens the request to free this * space was invalid, it's (partly) already free. * Very bad. */ if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) { xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } } } /* * Look for a neighboring block on the right (higher block numbers) * that is contiguous with this space. */ if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) goto error0; if (haveright) { /* * There is a block to our right. */ if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } /* * It's not contiguous, though. */ if (bno + len < gtbno) haveright = 0; else { /* * If this failure happens the request to free this * space was invalid, it's (partly) already free. * Very bad. */ if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) { xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } } } /* * Now allocate and initialize a cursor for the by-size tree. */ cnt_cur = xfs_cntbt_init_cursor(mp, tp, agbp, pag); /* * Have both left and right contiguous neighbors. * Merge all three into a single free block. */ if (haveleft && haveright) { /* * Delete the old by-size entry on the left. */ if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } /* * Delete the old by-size entry on the right. */ if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } /* * Delete the old by-block entry for the right block. */ if ((error = xfs_btree_delete(bno_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } /* * Move the by-block cursor back to the left neighbor. */ if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } #ifdef DEBUG /* * Check that this is the right record: delete didn't * mangle the cursor. */ { xfs_agblock_t xxbno; xfs_extlen_t xxlen; if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1 || xxbno != ltbno || xxlen != ltlen)) { xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } } #endif /* * Update remaining by-block entry to the new, joined block. */ nbno = ltbno; nlen = len + ltlen + gtlen; if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) goto error0; } /* * Have only a left contiguous neighbor. * Merge it together with the new freespace. */ else if (haveleft) { /* * Delete the old by-size entry on the left. */ if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } /* * Back up the by-block cursor to the left neighbor, and * update its length. */ if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } nbno = ltbno; nlen = len + ltlen; if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) goto error0; } /* * Have only a right contiguous neighbor. * Merge it together with the new freespace. */ else if (haveright) { /* * Delete the old by-size entry on the right. */ if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } /* * Update the starting block and length of the right * neighbor in the by-block tree. */ nbno = bno; nlen = len + gtlen; if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) goto error0; } /* * No contiguous neighbors. * Insert the new freespace into the by-block tree. */ else { nbno = bno; nlen = len; if ((error = xfs_btree_insert(bno_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } } xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); bno_cur = NULL; /* * In all cases we need to insert the new freespace in the by-size tree. * * If this new freespace is being inserted in the block that contains * the largest free space in the btree, make sure we also fix up the * agf->agf-longest tracker field. */ if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 0)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if (xfs_alloc_cursor_at_lastrec(cnt_cur)) fixup_longest = true; if ((error = xfs_btree_insert(cnt_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if (fixup_longest) { error = xfs_alloc_fixup_longest(cnt_cur); if (error) goto error0; } xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); cnt_cur = NULL; /* * Update the freespace totals in the ag and superblock. */ error = xfs_alloc_update_counters(tp, agbp, len); xfs_ag_resv_free_extent(pag, type, tp, len); if (error) goto error0; XFS_STATS_INC(mp, xs_freex); XFS_STATS_ADD(mp, xs_freeb, len); trace_xfs_free_extent(pag, bno, len, type, haveleft, haveright); return 0; error0: trace_xfs_free_extent(pag, bno, len, type, -1, -1); if (bno_cur) xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); if (cnt_cur) xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); return error; } /* * Visible (exported) allocation/free functions. * Some of these are used just by xfs_alloc_btree.c and this file. */ /* * Compute and fill in value of m_alloc_maxlevels. */ void xfs_alloc_compute_maxlevels( xfs_mount_t *mp) /* file system mount structure */ { mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr, (mp->m_sb.sb_agblocks + 1) / 2); ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk()); } /* * Find the length of the longest extent in an AG. The 'need' parameter * specifies how much space we're going to need for the AGFL and the * 'reserved' parameter tells us how many blocks in this AG are reserved for * other callers. */ xfs_extlen_t xfs_alloc_longest_free_extent( struct xfs_perag *pag, xfs_extlen_t need, xfs_extlen_t reserved) { xfs_extlen_t delta = 0; /* * If the AGFL needs a recharge, we'll have to subtract that from the * longest extent. */ if (need > pag->pagf_flcount) delta = need - pag->pagf_flcount; /* * If we cannot maintain others' reservations with space from the * not-longest freesp extents, we'll have to subtract /that/ from * the longest extent too. */ if (pag->pagf_freeblks - pag->pagf_longest < reserved) delta += reserved - (pag->pagf_freeblks - pag->pagf_longest); /* * If the longest extent is long enough to satisfy all the * reservations and AGFL rules in place, we can return this extent. */ if (pag->pagf_longest > delta) return min_t(xfs_extlen_t, pag_mount(pag)->m_ag_max_usable, pag->pagf_longest - delta); /* Otherwise, let the caller try for 1 block if there's space. */ return pag->pagf_flcount > 0 || pag->pagf_longest > 0; } /* * Compute the minimum length of the AGFL in the given AG. If @pag is NULL, * return the largest possible minimum length. */ unsigned int xfs_alloc_min_freelist( struct xfs_mount *mp, struct xfs_perag *pag) { /* AG btrees have at least 1 level. */ const unsigned int bno_level = pag ? pag->pagf_bno_level : 1; const unsigned int cnt_level = pag ? pag->pagf_cnt_level : 1; const unsigned int rmap_level = pag ? pag->pagf_rmap_level : 1; unsigned int min_free; ASSERT(mp->m_alloc_maxlevels > 0); /* * For a btree shorter than the maximum height, the worst case is that * every level gets split and a new level is added, then while inserting * another entry to refill the AGFL, every level under the old root gets * split again. This is: * * (full height split reservation) + (AGFL refill split height) * = (current height + 1) + (current height - 1) * = (new height) + (new height - 2) * = 2 * new height - 2 * * For a btree of maximum height, the worst case is that every level * under the root gets split, then while inserting another entry to * refill the AGFL, every level under the root gets split again. This is * also: * * 2 * (current height - 1) * = 2 * (new height - 1) * = 2 * new height - 2 */ /* space needed by-bno freespace btree */ min_free = min(bno_level + 1, mp->m_alloc_maxlevels) * 2 - 2; /* space needed by-size freespace btree */ min_free += min(cnt_level + 1, mp->m_alloc_maxlevels) * 2 - 2; /* space needed reverse mapping used space btree */ if (xfs_has_rmapbt(mp)) min_free += min(rmap_level + 1, mp->m_rmap_maxlevels) * 2 - 2; return min_free; } /* * Check if the operation we are fixing up the freelist for should go ahead or * not. If we are freeing blocks, we always allow it, otherwise the allocation * is dependent on whether the size and shape of free space available will * permit the requested allocation to take place. */ static bool xfs_alloc_space_available( struct xfs_alloc_arg *args, xfs_extlen_t min_free, int flags) { struct xfs_perag *pag = args->pag; xfs_extlen_t alloc_len, longest; xfs_extlen_t reservation; /* blocks that are still reserved */ int available; xfs_extlen_t agflcount; if (flags & XFS_ALLOC_FLAG_FREEING) return true; reservation = xfs_ag_resv_needed(pag, args->resv); /* do we have enough contiguous free space for the allocation? */ alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop; longest = xfs_alloc_longest_free_extent(pag, min_free, reservation); if (longest < alloc_len) return false; /* * Do we have enough free space remaining for the allocation? Don't * account extra agfl blocks because we are about to defer free them, * making them unavailable until the current transaction commits. */ agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free); available = (int)(pag->pagf_freeblks + agflcount - reservation - min_free - args->minleft); if (available < (int)max(args->total, alloc_len)) return false; /* * Clamp maxlen to the amount of free space available for the actual * extent allocation. */ if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) { args->maxlen = available; ASSERT(args->maxlen > 0); ASSERT(args->maxlen >= args->minlen); } return true; } /* * Check the agfl fields of the agf for inconsistency or corruption. * * The original purpose was to detect an agfl header padding mismatch between * current and early v5 kernels. This problem manifests as a 1-slot size * difference between the on-disk flcount and the active [first, last] range of * a wrapped agfl. * * However, we need to use these same checks to catch agfl count corruptions * unrelated to padding. This could occur on any v4 or v5 filesystem, so either * way, we need to reset the agfl and warn the user. * * Return true if a reset is required before the agfl can be used, false * otherwise. */ static bool xfs_agfl_needs_reset( struct xfs_mount *mp, struct xfs_agf *agf) { uint32_t f = be32_to_cpu(agf->agf_flfirst); uint32_t l = be32_to_cpu(agf->agf_fllast); uint32_t c = be32_to_cpu(agf->agf_flcount); int agfl_size = xfs_agfl_size(mp); int active; /* * The agf read verifier catches severe corruption of these fields. * Repeat some sanity checks to cover a packed -> unpacked mismatch if * the verifier allows it. */ if (f >= agfl_size || l >= agfl_size) return true; if (c > agfl_size) return true; /* * Check consistency between the on-disk count and the active range. An * agfl padding mismatch manifests as an inconsistent flcount. */ if (c && l >= f) active = l - f + 1; else if (c) active = agfl_size - f + l + 1; else active = 0; return active != c; } /* * Reset the agfl to an empty state. Ignore/drop any existing blocks since the * agfl content cannot be trusted. Warn the user that a repair is required to * recover leaked blocks. * * The purpose of this mechanism is to handle filesystems affected by the agfl * header padding mismatch problem. A reset keeps the filesystem online with a * relatively minor free space accounting inconsistency rather than suffer the * inevitable crash from use of an invalid agfl block. */ static void xfs_agfl_reset( struct xfs_trans *tp, struct xfs_buf *agbp, struct xfs_perag *pag) { struct xfs_mount *mp = tp->t_mountp; struct xfs_agf *agf = agbp->b_addr; ASSERT(xfs_perag_agfl_needs_reset(pag)); trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_); xfs_warn(mp, "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. " "Please unmount and run xfs_repair.", pag_agno(pag), pag->pagf_flcount); agf->agf_flfirst = 0; agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1); agf->agf_flcount = 0; xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); pag->pagf_flcount = 0; clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate); } /* * Add the extent to the list of extents to be free at transaction end. * The list is maintained sorted (by block number). */ static int xfs_defer_extent_free( struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type, unsigned int free_flags, struct xfs_defer_pending **dfpp) { struct xfs_extent_free_item *xefi; struct xfs_mount *mp = tp->t_mountp; ASSERT(len <= XFS_MAX_BMBT_EXTLEN); ASSERT(!isnullstartblock(bno)); ASSERT(!(free_flags & ~XFS_FREE_EXTENT_ALL_FLAGS)); if (free_flags & XFS_FREE_EXTENT_REALTIME) { if (type != XFS_AG_RESV_NONE) { ASSERT(type == XFS_AG_RESV_NONE); return -EFSCORRUPTED; } if (XFS_IS_CORRUPT(mp, !xfs_verify_rtbext(mp, bno, len))) return -EFSCORRUPTED; } else { if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) return -EFSCORRUPTED; } xefi = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); xefi->xefi_startblock = bno; xefi->xefi_blockcount = (xfs_extlen_t)len; xefi->xefi_agresv = type; if (free_flags & XFS_FREE_EXTENT_SKIP_DISCARD) xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD; if (free_flags & XFS_FREE_EXTENT_REALTIME) xefi->xefi_flags |= XFS_EFI_REALTIME; if (oinfo) { ASSERT(oinfo->oi_offset == 0); if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK) xefi->xefi_flags |= XFS_EFI_ATTR_FORK; if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK) xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK; xefi->xefi_owner = oinfo->oi_owner; } else { xefi->xefi_owner = XFS_RMAP_OWN_NULL; } xfs_extent_free_defer_add(tp, xefi, dfpp); return 0; } int xfs_free_extent_later( struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type, unsigned int free_flags) { struct xfs_defer_pending *dontcare = NULL; return xfs_defer_extent_free(tp, bno, len, oinfo, type, free_flags, &dontcare); } /* * Set up automatic freeing of unwritten space in the filesystem. * * This function attached a paused deferred extent free item to the * transaction. Pausing means that the EFI will be logged in the next * transaction commit, but the pending EFI will not be finished until the * pending item is unpaused. * * If the system goes down after the EFI has been persisted to the log but * before the pending item is unpaused, log recovery will find the EFI, fail to * find the EFD, and free the space. * * If the pending item is unpaused, the next transaction commit will log an EFD * without freeing the space. * * Caller must ensure that the tp, fsbno, len, oinfo, and resv flags of the * @args structure are set to the relevant values. */ int xfs_alloc_schedule_autoreap( const struct xfs_alloc_arg *args, unsigned int free_flags, struct xfs_alloc_autoreap *aarp) { int error; error = xfs_defer_extent_free(args->tp, args->fsbno, args->len, &args->oinfo, args->resv, free_flags, &aarp->dfp); if (error) return error; xfs_defer_item_pause(args->tp, aarp->dfp); return 0; } /* * Cancel automatic freeing of unwritten space in the filesystem. * * Earlier, we created a paused deferred extent free item and attached it to * this transaction so that we could automatically roll back a new space * allocation if the system went down. Now we want to cancel the paused work * item by marking the EFI stale so we don't actually free the space, unpausing * the pending item and logging an EFD. * * The caller generally should have already mapped the space into the ondisk * filesystem. If the reserved space was partially used, the caller must call * xfs_free_extent_later to create a new EFI to free the unused space. */ void xfs_alloc_cancel_autoreap( struct xfs_trans *tp, struct xfs_alloc_autoreap *aarp) { struct xfs_defer_pending *dfp = aarp->dfp; struct xfs_extent_free_item *xefi; if (!dfp) return; list_for_each_entry(xefi, &dfp->dfp_work, xefi_list) xefi->xefi_flags |= XFS_EFI_CANCELLED; xfs_defer_item_unpause(tp, dfp); } /* * Commit automatic freeing of unwritten space in the filesystem. * * This unpauses an earlier _schedule_autoreap and commits to freeing the * allocated space. Call this if none of the reserved space was used. */ void xfs_alloc_commit_autoreap( struct xfs_trans *tp, struct xfs_alloc_autoreap *aarp) { if (aarp->dfp) xfs_defer_item_unpause(tp, aarp->dfp); } /* * Check if an AGF has a free extent record whose length is equal to * args->minlen. */ STATIC int xfs_exact_minlen_extent_available( struct xfs_alloc_arg *args, struct xfs_buf *agbp, int *stat) { struct xfs_btree_cur *cnt_cur; xfs_agblock_t fbno; xfs_extlen_t flen; int error = 0; cnt_cur = xfs_cntbt_init_cursor(args->mp, args->tp, agbp, args->pag); error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat); if (error) goto out; if (*stat == 0) { xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto out; } error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat); if (error) goto out; if (*stat == 1 && flen != args->minlen) *stat = 0; out: xfs_btree_del_cursor(cnt_cur, error); return error; } /* * Decide whether to use this allocation group for this allocation. * If so, fix up the btree freelist's size. */ int /* error */ xfs_alloc_fix_freelist( struct xfs_alloc_arg *args, /* allocation argument structure */ uint32_t alloc_flags) { struct xfs_mount *mp = args->mp; struct xfs_perag *pag = args->pag; struct xfs_trans *tp = args->tp; struct xfs_buf *agbp = NULL; struct xfs_buf *agflbp = NULL; struct xfs_alloc_arg targs; /* local allocation arguments */ xfs_agblock_t bno; /* freelist block */ xfs_extlen_t need; /* total blocks needed in freelist */ int error = 0; /* deferred ops (AGFL block frees) require permanent transactions */ ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); if (!xfs_perag_initialised_agf(pag)) { error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp); if (error) { /* Couldn't lock the AGF so skip this AG. */ if (error == -EAGAIN) error = 0; goto out_no_agbp; } } /* * If this is a metadata preferred pag and we are user data then try * somewhere else if we are not being asked to try harder at this * point */ if (xfs_perag_prefers_metadata(pag) && (args->datatype & XFS_ALLOC_USERDATA) && (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) { ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING)); goto out_agbp_relse; } need = xfs_alloc_min_freelist(mp, pag); if (!xfs_alloc_space_available(args, need, alloc_flags | XFS_ALLOC_FLAG_CHECK)) goto out_agbp_relse; /* * Get the a.g. freespace buffer. * Can fail if we're not blocking on locks, and it's held. */ if (!agbp) { error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp); if (error) { /* Couldn't lock the AGF so skip this AG. */ if (error == -EAGAIN) error = 0; goto out_no_agbp; } } /* reset a padding mismatched agfl before final free space check */ if (xfs_perag_agfl_needs_reset(pag)) xfs_agfl_reset(tp, agbp, pag); /* If there isn't enough total space or single-extent, reject it. */ need = xfs_alloc_min_freelist(mp, pag); if (!xfs_alloc_space_available(args, need, alloc_flags)) goto out_agbp_relse; if (IS_ENABLED(CONFIG_XFS_DEBUG) && args->alloc_minlen_only) { int stat; error = xfs_exact_minlen_extent_available(args, agbp, &stat); if (error || !stat) goto out_agbp_relse; } /* * Make the freelist shorter if it's too long. * * Note that from this point onwards, we will always release the agf and * agfl buffers on error. This handles the case where we error out and * the buffers are clean or may not have been joined to the transaction * and hence need to be released manually. If they have been joined to * the transaction, then xfs_trans_brelse() will handle them * appropriately based on the recursion count and dirty state of the * buffer. * * XXX (dgc): When we have lots of free space, does this buy us * anything other than extra overhead when we need to put more blocks * back on the free list? Maybe we should only do this when space is * getting low or the AGFL is more than half full? * * The NOSHRINK flag prevents the AGFL from being shrunk if it's too * big; the NORMAP flag prevents AGFL expand/shrink operations from * updating the rmapbt. Both flags are used in xfs_repair while we're * rebuilding the rmapbt, and neither are used by the kernel. They're * both required to ensure that rmaps are correctly recorded for the * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and * repair/rmap.c in xfsprogs for details. */ memset(&targs, 0, sizeof(targs)); /* struct copy below */ if (alloc_flags & XFS_ALLOC_FLAG_NORMAP) targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; else targs.oinfo = XFS_RMAP_OINFO_AG; while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) { error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0); if (error) goto out_agbp_relse; /* * Defer the AGFL block free. * * This helps to prevent log reservation overruns due to too * many allocation operations in a transaction. AGFL frees are * prone to this problem because for one they are always freed * one at a time. Further, an immediate AGFL block free can * cause a btree join and require another block free before the * real allocation can proceed. * Deferring the free disconnects freeing up the AGFL slot from * freeing the block. */ error = xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, bno), 1, &targs.oinfo, XFS_AG_RESV_AGFL, 0); if (error) goto out_agbp_relse; } targs.tp = tp; targs.mp = mp; targs.agbp = agbp; targs.agno = args->agno; targs.alignment = targs.minlen = targs.prod = 1; targs.pag = pag; error = xfs_alloc_read_agfl(pag, tp, &agflbp); if (error) goto out_agbp_relse; /* Make the freelist longer if it's too short. */ while (pag->pagf_flcount < need) { targs.agbno = 0; targs.maxlen = need - pag->pagf_flcount; targs.resv = XFS_AG_RESV_AGFL; /* Allocate as many blocks as possible at once. */ error = xfs_alloc_ag_vextent_size(&targs, alloc_flags); if (error) goto out_agflbp_relse; /* * Stop if we run out. Won't happen if callers are obeying * the restrictions correctly. Can happen for free calls * on a completely full ag. */ if (targs.agbno == NULLAGBLOCK) { if (alloc_flags & XFS_ALLOC_FLAG_FREEING) break; goto out_agflbp_relse; } if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) { error = xfs_rmap_alloc(tp, agbp, pag, targs.agbno, targs.len, &targs.oinfo); if (error) goto out_agflbp_relse; } error = xfs_alloc_update_counters(tp, agbp, -((long)(targs.len))); if (error) goto out_agflbp_relse; /* * Put each allocated block on the list. */ for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { error = xfs_alloc_put_freelist(pag, tp, agbp, agflbp, bno, 0); if (error) goto out_agflbp_relse; } } xfs_trans_brelse(tp, agflbp); args->agbp = agbp; return 0; out_agflbp_relse: xfs_trans_brelse(tp, agflbp); out_agbp_relse: if (agbp) xfs_trans_brelse(tp, agbp); out_no_agbp: args->agbp = NULL; return error; } /* * Get a block from the freelist. * Returns with the buffer for the block gotten. */ int xfs_alloc_get_freelist( struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agblock_t *bnop, int btreeblk) { struct xfs_agf *agf = agbp->b_addr; struct xfs_buf *agflbp; xfs_agblock_t bno; __be32 *agfl_bno; int error; uint32_t logflags; struct xfs_mount *mp = tp->t_mountp; /* * Freelist is empty, give up. */ if (!agf->agf_flcount) { *bnop = NULLAGBLOCK; return 0; } /* * Read the array of free blocks. */ error = xfs_alloc_read_agfl(pag, tp, &agflbp); if (error) return error; /* * Get the block number and update the data structures. */ agfl_bno = xfs_buf_to_agfl_bno(agflbp); bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno))) return -EFSCORRUPTED; be32_add_cpu(&agf->agf_flfirst, 1); xfs_trans_brelse(tp, agflbp); if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp)) agf->agf_flfirst = 0; ASSERT(!xfs_perag_agfl_needs_reset(pag)); be32_add_cpu(&agf->agf_flcount, -1); pag->pagf_flcount--; logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; if (btreeblk) { be32_add_cpu(&agf->agf_btreeblks, 1); pag->pagf_btreeblks++; logflags |= XFS_AGF_BTREEBLKS; } xfs_alloc_log_agf(tp, agbp, logflags); *bnop = bno; return 0; } /* * Log the given fields from the agf structure. */ void xfs_alloc_log_agf( struct xfs_trans *tp, struct xfs_buf *bp, uint32_t fields) { int first; /* first byte offset */ int last; /* last byte offset */ static const short offsets[] = { offsetof(xfs_agf_t, agf_magicnum), offsetof(xfs_agf_t, agf_versionnum), offsetof(xfs_agf_t, agf_seqno), offsetof(xfs_agf_t, agf_length), offsetof(xfs_agf_t, agf_bno_root), /* also cnt/rmap root */ offsetof(xfs_agf_t, agf_bno_level), /* also cnt/rmap levels */ offsetof(xfs_agf_t, agf_flfirst), offsetof(xfs_agf_t, agf_fllast), offsetof(xfs_agf_t, agf_flcount), offsetof(xfs_agf_t, agf_freeblks), offsetof(xfs_agf_t, agf_longest), offsetof(xfs_agf_t, agf_btreeblks), offsetof(xfs_agf_t, agf_uuid), offsetof(xfs_agf_t, agf_rmap_blocks), offsetof(xfs_agf_t, agf_refcount_blocks), offsetof(xfs_agf_t, agf_refcount_root), offsetof(xfs_agf_t, agf_refcount_level), /* needed so that we don't log the whole rest of the structure: */ offsetof(xfs_agf_t, agf_spare64), sizeof(xfs_agf_t) }; trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF); xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); } /* * Put the block on the freelist for the allocation group. */ int xfs_alloc_put_freelist( struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, struct xfs_buf *agflbp, xfs_agblock_t bno, int btreeblk) { struct xfs_mount *mp = tp->t_mountp; struct xfs_agf *agf = agbp->b_addr; __be32 *blockp; int error; uint32_t logflags; __be32 *agfl_bno; int startoff; if (!agflbp) { error = xfs_alloc_read_agfl(pag, tp, &agflbp); if (error) return error; } be32_add_cpu(&agf->agf_fllast, 1); if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp)) agf->agf_fllast = 0; ASSERT(!xfs_perag_agfl_needs_reset(pag)); be32_add_cpu(&agf->agf_flcount, 1); pag->pagf_flcount++; logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; if (btreeblk) { be32_add_cpu(&agf->agf_btreeblks, -1); pag->pagf_btreeblks--; logflags |= XFS_AGF_BTREEBLKS; } ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)); agfl_bno = xfs_buf_to_agfl_bno(agflbp); blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; *blockp = cpu_to_be32(bno); startoff = (char *)blockp - (char *)agflbp->b_addr; xfs_alloc_log_agf(tp, agbp, logflags); xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); xfs_trans_log_buf(tp, agflbp, startoff, startoff + sizeof(xfs_agblock_t) - 1); return 0; } /* * Check that this AGF/AGI header's sequence number and length matches the AG * number and size in fsblocks. */ xfs_failaddr_t xfs_validate_ag_length( struct xfs_buf *bp, uint32_t seqno, uint32_t length) { struct xfs_mount *mp = bp->b_mount; /* * During growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't * use it by using uncached buffers that don't have the perag attached * so we can detect and avoid this problem. */ if (bp->b_pag && seqno != pag_agno(bp->b_pag)) return __this_address; /* * Only the last AG in the filesystem is allowed to be shorter * than the AG size recorded in the superblock. */ if (length != mp->m_sb.sb_agblocks) { /* * During growfs, the new last AG can get here before we * have updated the superblock. Give it a pass on the seqno * check. */ if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1) return __this_address; if (length < XFS_MIN_AG_BLOCKS) return __this_address; if (length > mp->m_sb.sb_agblocks) return __this_address; } return NULL; } /* * Verify the AGF is consistent. * * We do not verify the AGFL indexes in the AGF are fully consistent here * because of issues with variable on-disk structure sizes. Instead, we check * the agfl indexes for consistency when we initialise the perag from the AGF * information after a read completes. * * If the index is inconsistent, then we mark the perag as needing an AGFL * reset. The first AGFL update performed then resets the AGFL indexes and * refills the AGFL with known good free blocks, allowing the filesystem to * continue operating normally at the cost of a few leaked free space blocks. */ static xfs_failaddr_t xfs_agf_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; struct xfs_agf *agf = bp->b_addr; xfs_failaddr_t fa; uint32_t agf_seqno = be32_to_cpu(agf->agf_seqno); uint32_t agf_length = be32_to_cpu(agf->agf_length); if (xfs_has_crc(mp)) { if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn))) return __this_address; } if (!xfs_verify_magic(bp, agf->agf_magicnum)) return __this_address; if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum))) return __this_address; /* * Both agf_seqno and agf_length need to validated before anything else * block number related in the AGF or AGFL can be checked. */ fa = xfs_validate_ag_length(bp, agf_seqno, agf_length); if (fa) return fa; if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp)) return __this_address; if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp)) return __this_address; if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp)) return __this_address; if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) || be32_to_cpu(agf->agf_freeblks) > agf_length) return __this_address; if (be32_to_cpu(agf->agf_bno_level) < 1 || be32_to_cpu(agf->agf_cnt_level) < 1 || be32_to_cpu(agf->agf_bno_level) > mp->m_alloc_maxlevels || be32_to_cpu(agf->agf_cnt_level) > mp->m_alloc_maxlevels) return __this_address; if (xfs_has_lazysbcount(mp) && be32_to_cpu(agf->agf_btreeblks) > agf_length) return __this_address; if (xfs_has_rmapbt(mp)) { if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length) return __this_address; if (be32_to_cpu(agf->agf_rmap_level) < 1 || be32_to_cpu(agf->agf_rmap_level) > mp->m_rmap_maxlevels) return __this_address; } if (xfs_has_reflink(mp)) { if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length) return __this_address; if (be32_to_cpu(agf->agf_refcount_level) < 1 || be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels) return __this_address; } return NULL; } static void xfs_agf_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_has_crc(mp) && !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { fa = xfs_agf_verify(bp); if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF)) xfs_verifier_error(bp, -EFSCORRUPTED, fa); } } static void xfs_agf_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_agf *agf = bp->b_addr; xfs_failaddr_t fa; fa = xfs_agf_verify(bp); if (fa) { xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } if (!xfs_has_crc(mp)) return; if (bip) agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); } const struct xfs_buf_ops xfs_agf_buf_ops = { .name = "xfs_agf", .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) }, .verify_read = xfs_agf_read_verify, .verify_write = xfs_agf_write_verify, .verify_struct = xfs_agf_verify, }; /* * Read in the allocation group header (free/alloc section). */ int xfs_read_agf( struct xfs_perag *pag, struct xfs_trans *tp, int flags, struct xfs_buf **agfbpp) { struct xfs_mount *mp = pag_mount(pag); int error; trace_xfs_read_agf(pag); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, pag_agno(pag), XFS_AGF_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops); if (xfs_metadata_is_sick(error)) xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF); if (error) return error; xfs_buf_set_ref(*agfbpp, XFS_AGF_REF); return 0; } /* * Read in the allocation group header (free/alloc section) and initialise the * perag structure if necessary. If the caller provides @agfbpp, then return the * locked buffer to the caller, otherwise free it. */ int xfs_alloc_read_agf( struct xfs_perag *pag, struct xfs_trans *tp, int flags, struct xfs_buf **agfbpp) { struct xfs_mount *mp = pag_mount(pag); struct xfs_buf *agfbp; struct xfs_agf *agf; int error; int allocbt_blks; trace_xfs_alloc_read_agf(pag); /* We don't support trylock when freeing. */ ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) != (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)); error = xfs_read_agf(pag, tp, (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, &agfbp); if (error) return error; agf = agfbp->b_addr; if (!xfs_perag_initialised_agf(pag)) { pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); pag->pagf_longest = be32_to_cpu(agf->agf_longest); pag->pagf_bno_level = be32_to_cpu(agf->agf_bno_level); pag->pagf_cnt_level = be32_to_cpu(agf->agf_cnt_level); pag->pagf_rmap_level = be32_to_cpu(agf->agf_rmap_level); pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); if (xfs_agfl_needs_reset(mp, agf)) set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate); else clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate); /* * Update the in-core allocbt counter. Filter out the rmapbt * subset of the btreeblks counter because the rmapbt is managed * by perag reservation. Subtract one for the rmapbt root block * because the rmap counter includes it while the btreeblks * counter only tracks non-root blocks. */ allocbt_blks = pag->pagf_btreeblks; if (xfs_has_rmapbt(mp)) allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1; if (allocbt_blks > 0) atomic64_add(allocbt_blks, &mp->m_allocbt_blks); set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); } #ifdef DEBUG else if (!xfs_is_shutdown(mp)) { ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level)); ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level)); } #endif if (agfbpp) *agfbpp = agfbp; else xfs_trans_brelse(tp, agfbp); return 0; } /* * Pre-proces allocation arguments to set initial state that we don't require * callers to set up correctly, as well as bounds check the allocation args * that are set up. */ static int xfs_alloc_vextent_check_args( struct xfs_alloc_arg *args, xfs_fsblock_t target, xfs_agnumber_t *minimum_agno) { struct xfs_mount *mp = args->mp; xfs_agblock_t agsize; args->fsbno = NULLFSBLOCK; *minimum_agno = 0; if (args->tp->t_highest_agno != NULLAGNUMBER) *minimum_agno = args->tp->t_highest_agno; /* * Just fix this up, for the case where the last a.g. is shorter * (or there's only one a.g.) and the caller couldn't easily figure * that out (xfs_bmap_alloc). */ agsize = mp->m_sb.sb_agblocks; if (args->maxlen > agsize) args->maxlen = agsize; if (args->alignment == 0) args->alignment = 1; ASSERT(args->minlen > 0); ASSERT(args->maxlen > 0); ASSERT(args->alignment > 0); ASSERT(args->resv != XFS_AG_RESV_AGFL); ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount); ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize); ASSERT(args->minlen <= args->maxlen); ASSERT(args->minlen <= agsize); ASSERT(args->mod < args->prod); if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount || XFS_FSB_TO_AGBNO(mp, target) >= agsize || args->minlen > args->maxlen || args->minlen > agsize || args->mod >= args->prod) { trace_xfs_alloc_vextent_badargs(args); return -ENOSPC; } if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) { trace_xfs_alloc_vextent_skip_deadlock(args); return -ENOSPC; } return 0; } /* * Prepare an AG for allocation. If the AG is not prepared to accept the * allocation, return failure. * * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are * modified to hold their own perag references. */ static int xfs_alloc_vextent_prepare_ag( struct xfs_alloc_arg *args, uint32_t alloc_flags) { bool need_pag = !args->pag; int error; if (need_pag) args->pag = xfs_perag_get(args->mp, args->agno); args->agbp = NULL; error = xfs_alloc_fix_freelist(args, alloc_flags); if (error) { trace_xfs_alloc_vextent_nofix(args); if (need_pag) xfs_perag_put(args->pag); args->agbno = NULLAGBLOCK; return error; } if (!args->agbp) { /* cannot allocate in this AG at all */ trace_xfs_alloc_vextent_noagbp(args); args->agbno = NULLAGBLOCK; return 0; } args->wasfromfl = 0; return 0; } /* * Post-process allocation results to account for the allocation if it succeed * and set the allocated block number correctly for the caller. * * XXX: we should really be returning ENOSPC for ENOSPC, not * hiding it behind a "successful" NULLFSBLOCK allocation. */ static int xfs_alloc_vextent_finish( struct xfs_alloc_arg *args, xfs_agnumber_t minimum_agno, int alloc_error, bool drop_perag) { struct xfs_mount *mp = args->mp; int error = 0; /* * We can end up here with a locked AGF. If we failed, the caller is * likely going to try to allocate again with different parameters, and * that can widen the AGs that are searched for free space. If we have * to do BMBT block allocation, we have to do a new allocation. * * Hence leaving this function with the AGF locked opens up potential * ABBA AGF deadlocks because a future allocation attempt in this * transaction may attempt to lock a lower number AGF. * * We can't release the AGF until the transaction is commited, so at * this point we must update the "first allocation" tracker to point at * this AG if the tracker is empty or points to a lower AG. This allows * the next allocation attempt to be modified appropriately to avoid * deadlocks. */ if (args->agbp && (args->tp->t_highest_agno == NULLAGNUMBER || args->agno > minimum_agno)) args->tp->t_highest_agno = args->agno; /* * If the allocation failed with an error or we had an ENOSPC result, * preserve the returned error whilst also marking the allocation result * as "no extent allocated". This ensures that callers that fail to * capture the error will still treat it as a failed allocation. */ if (alloc_error || args->agbno == NULLAGBLOCK) { args->fsbno = NULLFSBLOCK; error = alloc_error; goto out_drop_perag; } args->fsbno = xfs_agbno_to_fsb(args->pag, args->agbno); ASSERT(args->len >= args->minlen); ASSERT(args->len <= args->maxlen); ASSERT(args->agbno % args->alignment == 0); XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len); /* if not file data, insert new block into the reverse map btree */ if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { error = xfs_rmap_alloc(args->tp, args->agbp, args->pag, args->agbno, args->len, &args->oinfo); if (error) goto out_drop_perag; } if (!args->wasfromfl) { error = xfs_alloc_update_counters(args->tp, args->agbp, -((long)(args->len))); if (error) goto out_drop_perag; ASSERT(!xfs_extent_busy_search(pag_group(args->pag), args->agbno, args->len)); } xfs_ag_resv_alloc_extent(args->pag, args->resv, args); XFS_STATS_INC(mp, xs_allocx); XFS_STATS_ADD(mp, xs_allocb, args->len); trace_xfs_alloc_vextent_finish(args); out_drop_perag: if (drop_perag && args->pag) { xfs_perag_rele(args->pag); args->pag = NULL; } return error; } /* * Allocate within a single AG only. This uses a best-fit length algorithm so if * you need an exact sized allocation without locality constraints, this is the * fastest way to do it. * * Caller is expected to hold a perag reference in args->pag. */ int xfs_alloc_vextent_this_ag( struct xfs_alloc_arg *args, xfs_agnumber_t agno) { xfs_agnumber_t minimum_agno; uint32_t alloc_flags = 0; int error; ASSERT(args->pag != NULL); ASSERT(pag_agno(args->pag) == agno); args->agno = agno; args->agbno = 0; trace_xfs_alloc_vextent_this_ag(args); error = xfs_alloc_vextent_check_args(args, xfs_agbno_to_fsb(args->pag, 0), &minimum_agno); if (error) { if (error == -ENOSPC) return 0; return error; } error = xfs_alloc_vextent_prepare_ag(args, alloc_flags); if (!error && args->agbp) error = xfs_alloc_ag_vextent_size(args, alloc_flags); return xfs_alloc_vextent_finish(args, minimum_agno, error, false); } /* * Iterate all AGs trying to allocate an extent starting from @start_ag. * * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the * allocation attempts in @start_agno have locality information. If we fail to * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs * we attempt to allocation in as there is no locality optimisation possible for * those allocations. * * On return, args->pag may be left referenced if we finish before the "all * failed" return point. The allocation finish still needs the perag, and * so the caller will release it once they've finished the allocation. * * When we wrap the AG iteration at the end of the filesystem, we have to be * careful not to wrap into AGs below ones we already have locked in the * transaction if we are doing a blocking iteration. This will result in an * out-of-order locking of AGFs and hence can cause deadlocks. */ static int xfs_alloc_vextent_iterate_ags( struct xfs_alloc_arg *args, xfs_agnumber_t minimum_agno, xfs_agnumber_t start_agno, xfs_agblock_t target_agbno, uint32_t alloc_flags) { struct xfs_mount *mp = args->mp; xfs_agnumber_t restart_agno = minimum_agno; xfs_agnumber_t agno; int error = 0; if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) restart_agno = 0; restart: for_each_perag_wrap_range(mp, start_agno, restart_agno, mp->m_sb.sb_agcount, agno, args->pag) { args->agno = agno; error = xfs_alloc_vextent_prepare_ag(args, alloc_flags); if (error) break; if (!args->agbp) { trace_xfs_alloc_vextent_loopfailed(args); continue; } /* * Allocation is supposed to succeed now, so break out of the * loop regardless of whether we succeed or not. */ if (args->agno == start_agno && target_agbno) { args->agbno = target_agbno; error = xfs_alloc_ag_vextent_near(args, alloc_flags); } else { args->agbno = 0; error = xfs_alloc_ag_vextent_size(args, alloc_flags); } break; } if (error) { xfs_perag_rele(args->pag); args->pag = NULL; return error; } if (args->agbp) return 0; /* * We didn't find an AG we can alloation from. If we were given * constraining flags by the caller, drop them and retry the allocation * without any constraints being set. */ if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) { alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK; restart_agno = minimum_agno; goto restart; } ASSERT(args->pag == NULL); trace_xfs_alloc_vextent_allfailed(args); return 0; } /* * Iterate from the AGs from the start AG to the end of the filesystem, trying * to allocate blocks. It starts with a near allocation attempt in the initial * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap * back to zero if allowed by previous allocations in this transaction, * otherwise will wrap back to the start AG and run a second blocking pass to * the end of the filesystem. */ int xfs_alloc_vextent_start_ag( struct xfs_alloc_arg *args, xfs_fsblock_t target) { struct xfs_mount *mp = args->mp; xfs_agnumber_t minimum_agno; xfs_agnumber_t start_agno; xfs_agnumber_t rotorstep = xfs_rotorstep; bool bump_rotor = false; uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK; int error; ASSERT(args->pag == NULL); args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; trace_xfs_alloc_vextent_start_ag(args); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { if (error == -ENOSPC) return 0; return error; } if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) && xfs_is_inode32(mp)) { target = XFS_AGB_TO_FSB(mp, ((mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount), 0); bump_rotor = 1; } start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target)); error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno, XFS_FSB_TO_AGBNO(mp, target), alloc_flags); if (bump_rotor) { if (args->agno == start_agno) mp->m_agfrotor = (mp->m_agfrotor + 1) % (mp->m_sb.sb_agcount * rotorstep); else mp->m_agfrotor = (args->agno * rotorstep + 1) % (mp->m_sb.sb_agcount * rotorstep); } return xfs_alloc_vextent_finish(args, minimum_agno, error, true); } /* * Iterate from the agno indicated via @target through to the end of the * filesystem attempting blocking allocation. This does not wrap or try a second * pass, so will not recurse into AGs lower than indicated by the target. */ int xfs_alloc_vextent_first_ag( struct xfs_alloc_arg *args, xfs_fsblock_t target) { struct xfs_mount *mp = args->mp; xfs_agnumber_t minimum_agno; xfs_agnumber_t start_agno; uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK; int error; ASSERT(args->pag == NULL); args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; trace_xfs_alloc_vextent_first_ag(args); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { if (error == -ENOSPC) return 0; return error; } start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target)); error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno, XFS_FSB_TO_AGBNO(mp, target), alloc_flags); return xfs_alloc_vextent_finish(args, minimum_agno, error, true); } /* * Allocate at the exact block target or fail. Caller is expected to hold a * perag reference in args->pag. */ int xfs_alloc_vextent_exact_bno( struct xfs_alloc_arg *args, xfs_fsblock_t target) { struct xfs_mount *mp = args->mp; xfs_agnumber_t minimum_agno; int error; ASSERT(args->pag != NULL); ASSERT(pag_agno(args->pag) == XFS_FSB_TO_AGNO(mp, target)); args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); trace_xfs_alloc_vextent_exact_bno(args); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { if (error == -ENOSPC) return 0; return error; } error = xfs_alloc_vextent_prepare_ag(args, 0); if (!error && args->agbp) error = xfs_alloc_ag_vextent_exact(args); return xfs_alloc_vextent_finish(args, minimum_agno, error, false); } /* * Allocate an extent as close to the target as possible. If there are not * viable candidates in the AG, then fail the allocation. * * Caller may or may not have a per-ag reference in args->pag. */ int xfs_alloc_vextent_near_bno( struct xfs_alloc_arg *args, xfs_fsblock_t target) { struct xfs_mount *mp = args->mp; xfs_agnumber_t minimum_agno; bool needs_perag = args->pag == NULL; uint32_t alloc_flags = 0; int error; if (!needs_perag) ASSERT(pag_agno(args->pag) == XFS_FSB_TO_AGNO(mp, target)); args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); trace_xfs_alloc_vextent_near_bno(args); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { if (error == -ENOSPC) return 0; return error; } if (needs_perag) args->pag = xfs_perag_grab(mp, args->agno); error = xfs_alloc_vextent_prepare_ag(args, alloc_flags); if (!error && args->agbp) error = xfs_alloc_ag_vextent_near(args, alloc_flags); return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag); } /* Ensure that the freelist is at full capacity. */ int xfs_free_extent_fix_freelist( struct xfs_trans *tp, struct xfs_perag *pag, struct xfs_buf **agbp) { struct xfs_alloc_arg args; int error; memset(&args, 0, sizeof(struct xfs_alloc_arg)); args.tp = tp; args.mp = tp->t_mountp; args.agno = pag_agno(pag); args.pag = pag; /* * validate that the block number is legal - the enables us to detect * and handle a silent filesystem corruption rather than crashing. */ if (args.agno >= args.mp->m_sb.sb_agcount) return -EFSCORRUPTED; error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); if (error) return error; *agbp = args.agbp; return 0; } /* * Free an extent. * Just break up the extent address and hand off to xfs_free_ag_extent * after fixing up the freelist. */ int __xfs_free_extent( struct xfs_trans *tp, struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type, bool skip_discard) { struct xfs_mount *mp = tp->t_mountp; struct xfs_buf *agbp; struct xfs_agf *agf; int error; unsigned int busy_flags = 0; ASSERT(len != 0); ASSERT(type != XFS_AG_RESV_AGFL); if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FREE_EXTENT)) return -EIO; error = xfs_free_extent_fix_freelist(tp, pag, &agbp); if (error) { if (xfs_metadata_is_sick(error)) xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT); return error; } agf = agbp->b_addr; if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) { xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT); error = -EFSCORRUPTED; goto err_release; } /* validate the extent size is legal now we have the agf locked */ if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) { xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT); error = -EFSCORRUPTED; goto err_release; } error = xfs_free_ag_extent(tp, agbp, agbno, len, oinfo, type); if (error) goto err_release; if (skip_discard) busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD; xfs_extent_busy_insert(tp, pag_group(pag), agbno, len, busy_flags); return 0; err_release: xfs_trans_brelse(tp, agbp); return error; } struct xfs_alloc_query_range_info { xfs_alloc_query_range_fn fn; void *priv; }; /* Format btree record and pass to our callback. */ STATIC int xfs_alloc_query_range_helper( struct xfs_btree_cur *cur, const union xfs_btree_rec *rec, void *priv) { struct xfs_alloc_query_range_info *query = priv; struct xfs_alloc_rec_incore irec; xfs_failaddr_t fa; xfs_alloc_btrec_to_irec(rec, &irec); fa = xfs_alloc_check_irec(to_perag(cur->bc_group), &irec); if (fa) return xfs_alloc_complain_bad_rec(cur, fa, &irec); return query->fn(cur, &irec, query->priv); } /* Find all free space within a given range of blocks. */ int xfs_alloc_query_range( struct xfs_btree_cur *cur, const struct xfs_alloc_rec_incore *low_rec, const struct xfs_alloc_rec_incore *high_rec, xfs_alloc_query_range_fn fn, void *priv) { union xfs_btree_irec low_brec = { .a = *low_rec }; union xfs_btree_irec high_brec = { .a = *high_rec }; struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn }; ASSERT(xfs_btree_is_bno(cur->bc_ops)); return xfs_btree_query_range(cur, &low_brec, &high_brec, xfs_alloc_query_range_helper, &query); } /* Find all free space records. */ int xfs_alloc_query_all( struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn, void *priv) { struct xfs_alloc_query_range_info query; ASSERT(xfs_btree_is_bno(cur->bc_ops)); query.priv = priv; query.fn = fn; return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query); } /* * Scan part of the keyspace of the free space and tell us if the area has no * records, is fully mapped by records, or is partially filled. */ int xfs_alloc_has_records( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, enum xbtree_recpacking *outcome) { union xfs_btree_irec low; union xfs_btree_irec high; memset(&low, 0, sizeof(low)); low.a.ar_startblock = bno; memset(&high, 0xFF, sizeof(high)); high.a.ar_startblock = bno + len - 1; return xfs_btree_has_records(cur, &low, &high, NULL, outcome); } /* * Walk all the blocks in the AGFL. The @walk_fn can return any negative * error code or XFS_ITER_*. */ int xfs_agfl_walk( struct xfs_mount *mp, struct xfs_agf *agf, struct xfs_buf *agflbp, xfs_agfl_walk_fn walk_fn, void *priv) { __be32 *agfl_bno; unsigned int i; int error; agfl_bno = xfs_buf_to_agfl_bno(agflbp); i = be32_to_cpu(agf->agf_flfirst); /* Nothing to walk in an empty AGFL. */ if (agf->agf_flcount == cpu_to_be32(0)) return 0; /* Otherwise, walk from first to last, wrapping as needed. */ for (;;) { error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv); if (error) return error; if (i == be32_to_cpu(agf->agf_fllast)) break; if (++i == xfs_agfl_size(mp)) i = 0; } return 0; } int __init xfs_extfree_intent_init_cache(void) { xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent", sizeof(struct xfs_extent_free_item), 0, 0, NULL); return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM; } void xfs_extfree_intent_destroy_cache(void) { kmem_cache_destroy(xfs_extfree_item_cache); xfs_extfree_item_cache = NULL; }
2 2 2 2 173 3 3 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 // SPDX-License-Identifier: MIT #include <linux/fb.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_shmem.h> #include <drm/drm_fb_helper.h> #include <drm/drm_framebuffer.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> /* * struct fb_ops */ static int drm_fbdev_shmem_fb_open(struct fb_info *info, int user) { struct drm_fb_helper *fb_helper = info->par; /* No need to take a ref for fbcon because it unbinds on unregister */ if (user && !try_module_get(fb_helper->dev->driver->fops->owner)) return -ENODEV; return 0; } static int drm_fbdev_shmem_fb_release(struct fb_info *info, int user) { struct drm_fb_helper *fb_helper = info->par; if (user) module_put(fb_helper->dev->driver->fops->owner); return 0; } FB_GEN_DEFAULT_DEFERRED_SYSMEM_OPS(drm_fbdev_shmem, drm_fb_helper_damage_range, drm_fb_helper_damage_area); static int drm_fbdev_shmem_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) { struct drm_fb_helper *fb_helper = info->par; struct drm_framebuffer *fb = fb_helper->fb; struct drm_gem_object *obj = drm_gem_fb_get_obj(fb, 0); struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); if (shmem->map_wc) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); return fb_deferred_io_mmap(info, vma); } static void drm_fbdev_shmem_fb_destroy(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; if (!fb_helper->dev) return; fb_deferred_io_cleanup(info); drm_fb_helper_fini(fb_helper); drm_client_buffer_vunmap(fb_helper->buffer); drm_client_framebuffer_delete(fb_helper->buffer); drm_client_release(&fb_helper->client); drm_fb_helper_unprepare(fb_helper); kfree(fb_helper); } static const struct fb_ops drm_fbdev_shmem_fb_ops = { .owner = THIS_MODULE, .fb_open = drm_fbdev_shmem_fb_open, .fb_release = drm_fbdev_shmem_fb_release, __FB_DEFAULT_DEFERRED_OPS_RDWR(drm_fbdev_shmem), DRM_FB_HELPER_DEFAULT_OPS, __FB_DEFAULT_DEFERRED_OPS_DRAW(drm_fbdev_shmem), .fb_mmap = drm_fbdev_shmem_fb_mmap, .fb_destroy = drm_fbdev_shmem_fb_destroy, }; static struct page *drm_fbdev_shmem_get_page(struct fb_info *info, unsigned long offset) { struct drm_fb_helper *fb_helper = info->par; struct drm_framebuffer *fb = fb_helper->fb; struct drm_gem_object *obj = drm_gem_fb_get_obj(fb, 0); struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); unsigned int i = offset >> PAGE_SHIFT; struct page *page; if (fb_WARN_ON_ONCE(info, offset > obj->size)) return NULL; page = shmem->pages[i]; // protected by active vmap if (page) get_page(page); fb_WARN_ON_ONCE(info, !page); return page; } /* * struct drm_fb_helper */ static int drm_fbdev_shmem_helper_fb_dirty(struct drm_fb_helper *helper, struct drm_clip_rect *clip) { struct drm_device *dev = helper->dev; int ret; /* Call damage handlers only if necessary */ if (!(clip->x1 < clip->x2 && clip->y1 < clip->y2)) return 0; if (helper->fb->funcs->dirty) { ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1); if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret)) return ret; } return 0; } static const struct drm_fb_helper_funcs drm_fbdev_shmem_helper_funcs = { .fb_dirty = drm_fbdev_shmem_helper_fb_dirty, }; /* * struct drm_driver */ int drm_fbdev_shmem_driver_fbdev_probe(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; struct drm_client_buffer *buffer; struct drm_gem_shmem_object *shmem; struct drm_framebuffer *fb; struct fb_info *info; u32 format; struct iosys_map map; int ret; drm_dbg_kms(dev, "surface width(%d), height(%d) and bpp(%d)\n", sizes->surface_width, sizes->surface_height, sizes->surface_bpp); format = drm_driver_legacy_fb_format(dev, sizes->surface_bpp, sizes->surface_depth); buffer = drm_client_framebuffer_create(client, sizes->surface_width, sizes->surface_height, format); if (IS_ERR(buffer)) return PTR_ERR(buffer); shmem = to_drm_gem_shmem_obj(buffer->gem); fb = buffer->fb; ret = drm_client_buffer_vmap(buffer, &map); if (ret) { goto err_drm_client_buffer_delete; } else if (drm_WARN_ON(dev, map.is_iomem)) { ret = -ENODEV; /* I/O memory not supported; use generic emulation */ goto err_drm_client_buffer_delete; } fb_helper->funcs = &drm_fbdev_shmem_helper_funcs; fb_helper->buffer = buffer; fb_helper->fb = fb; info = drm_fb_helper_alloc_info(fb_helper); if (IS_ERR(info)) { ret = PTR_ERR(info); goto err_drm_client_buffer_vunmap; } drm_fb_helper_fill_info(info, fb_helper, sizes); info->fbops = &drm_fbdev_shmem_fb_ops; /* screen */ info->flags |= FBINFO_VIRTFB; /* system memory */ if (!shmem->map_wc) info->flags |= FBINFO_READS_FAST; /* signal caching */ info->screen_size = sizes->surface_height * fb->pitches[0]; info->screen_buffer = map.vaddr; info->fix.smem_len = info->screen_size; /* deferred I/O */ fb_helper->fbdefio.delay = HZ / 20; fb_helper->fbdefio.get_page = drm_fbdev_shmem_get_page; fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; info->fbdefio = &fb_helper->fbdefio; ret = fb_deferred_io_init(info); if (ret) goto err_drm_fb_helper_release_info; return 0; err_drm_fb_helper_release_info: drm_fb_helper_release_info(fb_helper); err_drm_client_buffer_vunmap: fb_helper->fb = NULL; fb_helper->buffer = NULL; drm_client_buffer_vunmap(buffer); err_drm_client_buffer_delete: drm_client_framebuffer_delete(buffer); return ret; } EXPORT_SYMBOL(drm_fbdev_shmem_driver_fbdev_probe);
9 9 9 17 17 33 2 2 3 25 1 1 2 2 2 2 3 2 12 1 6 17 3 16 9 6 3 9 9 9 1804 1801 1799 453 120 1 10 10 15 6 10 17 15 15 4 7 2 2 4 7 2 2 47 47 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_mirred.c packet mirroring and redirect actions * * Authors: Jamal Hadi Salim (2002-4) * * TODO: Add ingress support (and socket redirect support) */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> #include <linux/gfp.h> #include <linux/if_arp.h> #include <net/net_namespace.h> #include <net/netlink.h> #include <net/dst.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <linux/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_wrapper.h> static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); #define MIRRED_NEST_LIMIT 4 static DEFINE_PER_CPU(unsigned int, mirred_nest_level); static bool tcf_mirred_is_act_redirect(int action) { return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR; } static bool tcf_mirred_act_wants_ingress(int action) { switch (action) { case TCA_EGRESS_REDIR: case TCA_EGRESS_MIRROR: return false; case TCA_INGRESS_REDIR: case TCA_INGRESS_MIRROR: return true; default: BUG(); } } static bool tcf_mirred_can_reinsert(int action) { switch (action) { case TC_ACT_SHOT: case TC_ACT_STOLEN: case TC_ACT_QUEUED: case TC_ACT_TRAP: return true; } return false; } static struct net_device *tcf_mirred_dev_dereference(struct tcf_mirred *m) { return rcu_dereference_protected(m->tcfm_dev, lockdep_is_held(&m->tcf_lock)); } static void tcf_mirred_release(struct tc_action *a) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev; spin_lock(&mirred_list_lock); list_del(&m->tcfm_list); spin_unlock(&mirred_list_lock); /* last reference to action, no need to lock */ dev = rcu_dereference_protected(m->tcfm_dev, 1); netdev_put(dev, &m->tcfm_dev_tracker); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, [TCA_MIRRED_BLOCKID] = NLA_POLICY_MIN(NLA_U32, 1), }; static struct tc_action_ops act_mirred_ops; static void tcf_mirred_replace_dev(struct tcf_mirred *m, struct net_device *ndev) { struct net_device *odev; odev = rcu_replace_pointer(m->tcfm_dev, ndev, lockdep_is_held(&m->tcf_lock)); netdev_put(odev, &m->tcfm_dev_tracker); } static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MIRRED_MAX + 1]; struct tcf_chain *goto_ch = NULL; bool mac_header_xmit = false; struct tc_mirred *parm; struct tcf_mirred *m; bool exists = false; int ret, err; u32 index; if (!nla) { NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed"); return -EINVAL; } ret = nla_parse_nested_deprecated(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack); if (ret < 0) return ret; if (!tb[TCA_MIRRED_PARMS]) { NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters"); return -EINVAL; } parm = nla_data(tb[TCA_MIRRED_PARMS]); index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; if (exists && bind) return ACT_P_BOUND; if (tb[TCA_MIRRED_BLOCKID] && parm->ifindex) { NL_SET_ERR_MSG_MOD(extack, "Cannot specify Block ID and dev simultaneously"); if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); return -EINVAL; } switch (parm->eaction) { case TCA_EGRESS_MIRROR: case TCA_EGRESS_REDIR: case TCA_INGRESS_REDIR: case TCA_INGRESS_MIRROR: break; default: if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option"); return -EINVAL; } if (!exists) { if (!parm->ifindex && !tb[TCA_MIRRED_BLOCKID]) { tcf_idr_cleanup(tn, index); NL_SET_ERR_MSG_MOD(extack, "Must specify device or block"); return -EINVAL; } ret = tcf_idr_create_from_flags(tn, index, est, a, &act_mirred_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } m = to_mirred(*a); if (ret == ACT_P_CREATED) INIT_LIST_HEAD(&m->tcfm_list); err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; spin_lock_bh(&m->tcf_lock); if (parm->ifindex) { struct net_device *ndev; ndev = dev_get_by_index(net, parm->ifindex); if (!ndev) { spin_unlock_bh(&m->tcf_lock); err = -ENODEV; goto put_chain; } mac_header_xmit = dev_is_mac_header_xmit(ndev); tcf_mirred_replace_dev(m, ndev); netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC); m->tcfm_mac_header_xmit = mac_header_xmit; m->tcfm_blockid = 0; } else if (tb[TCA_MIRRED_BLOCKID]) { tcf_mirred_replace_dev(m, NULL); m->tcfm_mac_header_xmit = false; m->tcfm_blockid = nla_get_u32(tb[TCA_MIRRED_BLOCKID]); } goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); m->tcfm_eaction = parm->eaction; spin_unlock_bh(&m->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) { spin_lock(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); spin_unlock(&mirred_list_lock); } return ret; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*a, bind); return err; } static int tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb) { int err; if (!want_ingress) err = tcf_dev_queue_xmit(skb, dev_queue_xmit); else if (!at_ingress) err = netif_rx(skb); else err = netif_receive_skb(skb); return err; } static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, struct net_device *dev, const bool m_mac_header_xmit, int m_eaction, int retval) { struct sk_buff *skb_to_send = skb; bool want_ingress; bool is_redirect; bool expects_nh; bool at_ingress; bool dont_clone; int mac_len; bool at_nh; int err; is_redirect = tcf_mirred_is_act_redirect(m_eaction); if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); goto err_cant_do; } /* we could easily avoid the clone only if called by ingress and clsact; * since we can't easily detect the clsact caller, skip clone only for * ingress - that covers the TC S/W datapath. */ at_ingress = skb_at_tc_ingress(skb); dont_clone = skb_at_tc_ingress(skb) && is_redirect && tcf_mirred_can_reinsert(retval); if (!dont_clone) { skb_to_send = skb_clone(skb, GFP_ATOMIC); if (!skb_to_send) goto err_cant_do; } want_ingress = tcf_mirred_act_wants_ingress(m_eaction); /* All mirred/redirected skbs should clear previous ct info */ nf_reset_ct(skb_to_send); if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ skb_dst_drop(skb_to_send); expects_nh = want_ingress || !m_mac_header_xmit; at_nh = skb->data == skb_network_header(skb); if (at_nh != expects_nh) { mac_len = at_ingress ? skb->mac_len : skb_network_offset(skb); if (expects_nh) { /* target device/action expect data at nh */ skb_pull_rcsum(skb_to_send, mac_len); } else { /* target device/action expect data at mac */ skb_push_rcsum(skb_to_send, mac_len); } } skb_to_send->skb_iif = skb->dev->ifindex; skb_to_send->dev = dev; if (is_redirect) { if (skb == skb_to_send) retval = TC_ACT_CONSUMED; skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } else { err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } if (err) tcf_action_inc_overlimit_qstats(&m->common); return retval; err_cant_do: if (is_redirect) retval = TC_ACT_SHOT; tcf_action_inc_overlimit_qstats(&m->common); return retval; } static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m, struct tcf_block *block, int m_eaction, const u32 exception_ifindex, int retval) { struct net_device *dev_prev = NULL; struct net_device *dev = NULL; unsigned long index; int mirred_eaction; mirred_eaction = tcf_mirred_act_wants_ingress(m_eaction) ? TCA_INGRESS_MIRROR : TCA_EGRESS_MIRROR; xa_for_each(&block->ports, index, dev) { if (index == exception_ifindex) continue; if (!dev_prev) goto assign_prev; tcf_mirred_to_dev(skb, m, dev_prev, dev_is_mac_header_xmit(dev), mirred_eaction, retval); assign_prev: dev_prev = dev; } if (dev_prev) return tcf_mirred_to_dev(skb, m, dev_prev, dev_is_mac_header_xmit(dev_prev), m_eaction, retval); return retval; } static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m, struct tcf_block *block, int m_eaction, const u32 exception_ifindex, int retval) { struct net_device *dev = NULL; unsigned long index; xa_for_each(&block->ports, index, dev) { if (index == exception_ifindex) continue; tcf_mirred_to_dev(skb, m, dev, dev_is_mac_header_xmit(dev), m_eaction, retval); } return retval; } static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m, const u32 blockid, struct tcf_result *res, int retval) { const u32 exception_ifindex = skb->dev->ifindex; struct tcf_block *block; bool is_redirect; int m_eaction; m_eaction = READ_ONCE(m->tcfm_eaction); is_redirect = tcf_mirred_is_act_redirect(m_eaction); /* we are already under rcu protection, so can call block lookup * directly. */ block = tcf_block_lookup(dev_net(skb->dev), blockid); if (!block || xa_empty(&block->ports)) { tcf_action_inc_overlimit_qstats(&m->common); return retval; } if (is_redirect) return tcf_blockcast_redir(skb, m, block, m_eaction, exception_ifindex, retval); /* If it's not redirect, it is mirror */ return tcf_blockcast_mirror(skb, m, block, m_eaction, exception_ifindex, retval); } TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_mirred *m = to_mirred(a); int retval = READ_ONCE(m->tcf_action); unsigned int nest_level; bool m_mac_header_xmit; struct net_device *dev; int m_eaction; u32 blockid; nest_level = __this_cpu_inc_return(mirred_nest_level); if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", netdev_name(skb->dev)); retval = TC_ACT_SHOT; goto dec_nest_level; } tcf_lastuse_update(&m->tcf_tm); tcf_action_update_bstats(&m->common, skb); blockid = READ_ONCE(m->tcfm_blockid); if (blockid) { retval = tcf_blockcast(skb, m, blockid, res, retval); goto dec_nest_level; } dev = rcu_dereference_bh(m->tcfm_dev); if (unlikely(!dev)) { pr_notice_once("tc mirred: target device is gone\n"); tcf_action_inc_overlimit_qstats(&m->common); goto dec_nest_level; } m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); m_eaction = READ_ONCE(m->tcfm_eaction); retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction, retval); dec_nest_level: __this_cpu_dec(mirred_nest_level); return retval; } static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { struct tcf_mirred *m = to_mirred(a); struct tcf_t *tm = &m->tcf_tm; tcf_action_update_stats(a, bytes, packets, drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = to_mirred(a); struct tc_mirred opt = { .index = m->tcf_index, .refcnt = refcount_read(&m->tcf_refcnt) - ref, .bindcnt = atomic_read(&m->tcf_bindcnt) - bind, }; struct net_device *dev; struct tcf_t t; u32 blockid; spin_lock_bh(&m->tcf_lock); opt.action = m->tcf_action; opt.eaction = m->tcfm_eaction; dev = tcf_mirred_dev_dereference(m); if (dev) opt.ifindex = dev->ifindex; if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt)) goto nla_put_failure; blockid = m->tcfm_blockid; if (blockid && nla_put_u32(skb, TCA_MIRRED_BLOCKID, blockid)) goto nla_put_failure; tcf_tm_dump(&t, &m->tcf_tm); if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD)) goto nla_put_failure; spin_unlock_bh(&m->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&m->tcf_lock); nlmsg_trim(skb, b); return -1; } static int mirred_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct tcf_mirred *m; ASSERT_RTNL(); if (event == NETDEV_UNREGISTER) { spin_lock(&mirred_list_lock); list_for_each_entry(m, &mirred_list, tcfm_list) { spin_lock_bh(&m->tcf_lock); if (tcf_mirred_dev_dereference(m) == dev) { netdev_put(dev, &m->tcfm_dev_tracker); /* Note : no rcu grace period necessary, as * net_device are already rcu protected. */ RCU_INIT_POINTER(m->tcfm_dev, NULL); } spin_unlock_bh(&m->tcf_lock); } spin_unlock(&mirred_list_lock); } return NOTIFY_DONE; } static struct notifier_block mirred_device_notifier = { .notifier_call = mirred_device_event, }; static void tcf_mirred_dev_put(void *priv) { struct net_device *dev = priv; dev_put(dev); } static struct net_device * tcf_mirred_get_dev(const struct tc_action *a, tc_action_priv_destructor *destructor) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev; rcu_read_lock(); dev = rcu_dereference(m->tcfm_dev); if (dev) { dev_hold(dev); *destructor = tcf_mirred_dev_put; } rcu_read_unlock(); return dev; } static size_t tcf_mirred_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_mirred)); } static void tcf_offload_mirred_get_dev(struct flow_action_entry *entry, const struct tc_action *act) { entry->dev = act->ops->get_dev(act, &entry->destructor); if (!entry->dev) return; entry->destructor_priv = entry->dev; } static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, bool bind, struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; if (is_tcf_mirred_egress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT; tcf_offload_mirred_get_dev(entry, act); } else if (is_tcf_mirred_egress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED; tcf_offload_mirred_get_dev(entry, act); } else if (is_tcf_mirred_ingress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT_INGRESS; tcf_offload_mirred_get_dev(entry, act); } else if (is_tcf_mirred_ingress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED_INGRESS; tcf_offload_mirred_get_dev(entry, act); } else { NL_SET_ERR_MSG_MOD(extack, "Unsupported mirred offload"); return -EOPNOTSUPP; } *index_inc = 1; } else { struct flow_offload_action *fl_action = entry_data; if (is_tcf_mirred_egress_redirect(act)) fl_action->id = FLOW_ACTION_REDIRECT; else if (is_tcf_mirred_egress_mirror(act)) fl_action->id = FLOW_ACTION_MIRRED; else if (is_tcf_mirred_ingress_redirect(act)) fl_action->id = FLOW_ACTION_REDIRECT_INGRESS; else if (is_tcf_mirred_ingress_mirror(act)) fl_action->id = FLOW_ACTION_MIRRED_INGRESS; else return -EOPNOTSUPP; } return 0; } static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .id = TCA_ID_MIRRED, .owner = THIS_MODULE, .act = tcf_mirred_act, .stats_update = tcf_stats_update, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_release, .init = tcf_mirred_init, .get_fill_size = tcf_mirred_get_fill_size, .offload_act_setup = tcf_mirred_offload_act_setup, .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, }; MODULE_ALIAS_NET_ACT("mirred"); static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); return tc_action_net_init(net, tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_mirred_ops.net_id); } static struct pernet_operations mirred_net_ops = { .init = mirred_init_net, .exit_batch = mirred_exit_net, .id = &act_mirred_ops.net_id, .size = sizeof(struct tc_action_net), }; MODULE_AUTHOR("Jamal Hadi Salim(2002)"); MODULE_DESCRIPTION("Device Mirror/redirect actions"); MODULE_LICENSE("GPL"); static int __init mirred_init_module(void) { int err = register_netdevice_notifier(&mirred_device_notifier); if (err) return err; pr_info("Mirror/redirect action on\n"); err = tcf_register_action(&act_mirred_ops, &mirred_net_ops); if (err) unregister_netdevice_notifier(&mirred_device_notifier); return err; } static void __exit mirred_cleanup_module(void) { tcf_unregister_action(&act_mirred_ops, &mirred_net_ops); unregister_netdevice_notifier(&mirred_device_notifier); } module_init(mirred_init_module); module_exit(mirred_cleanup_module);
120 2 21 5 120 122 5 5 1 122 122 122 122 1 1 3 3 1 4 2 2 1 5 19 20 19 2 5 4 7 2 1 2 20 22 2 1 2 19 2 2 2 6 8 14 12 1 2 1 14 2 1 15 6 2 2 2 2 1 2 1 2 1 2 3 3 1 1 1 1 1 1 1 3 3 9 9 1 48 48 7 4 2 6 4 4 4 5 1 4 2 3 2 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 // SPDX-License-Identifier: GPL-2.0-only /* * binfmt_misc.c * * Copyright (C) 1997 Richard Günther * * binfmt_misc detects binaries via a magic or filename extension and invokes * a specified wrapper. See Documentation/admin-guide/binfmt-misc.rst for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/sched/mm.h> #include <linux/magic.h> #include <linux/binfmts.h> #include <linux/slab.h> #include <linux/ctype.h> #include <linux/string_helpers.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/namei.h> #include <linux/mount.h> #include <linux/fs_context.h> #include <linux/syscalls.h> #include <linux/fs.h> #include <linux/uaccess.h> #include "internal.h" #ifdef DEBUG # define USE_DEBUG 1 #else # define USE_DEBUG 0 #endif enum { VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */ }; enum {Enabled, Magic}; #define MISC_FMT_PRESERVE_ARGV0 (1UL << 31) #define MISC_FMT_OPEN_BINARY (1UL << 30) #define MISC_FMT_CREDENTIALS (1UL << 29) #define MISC_FMT_OPEN_FILE (1UL << 28) typedef struct { struct list_head list; unsigned long flags; /* type, status, etc. */ int offset; /* offset of magic */ int size; /* size of magic/mask */ char *magic; /* magic or filename extension */ char *mask; /* mask, NULL for exact match */ const char *interpreter; /* filename of interpreter */ char *name; struct dentry *dentry; struct file *interp_file; refcount_t users; /* sync removal with load_misc_binary() */ } Node; static struct file_system_type bm_fs_type; /* * Max length of the register string. Determined by: * - 7 delimiters * - name: ~50 bytes * - type: 1 byte * - offset: 3 bytes (has to be smaller than BINPRM_BUF_SIZE) * - magic: 128 bytes (512 in escaped form) * - mask: 128 bytes (512 in escaped form) * - interp: ~50 bytes * - flags: 5 bytes * Round that up a bit, and then back off to hold the internal data * (like struct Node). */ #define MAX_REGISTER_LENGTH 1920 /** * search_binfmt_handler - search for a binary handler for @bprm * @misc: handle to binfmt_misc instance * @bprm: binary for which we are looking for a handler * * Search for a binary type handler for @bprm in the list of registered binary * type handlers. * * Return: binary type list entry on success, NULL on failure */ static Node *search_binfmt_handler(struct binfmt_misc *misc, struct linux_binprm *bprm) { char *p = strrchr(bprm->interp, '.'); Node *e; /* Walk all the registered handlers. */ list_for_each_entry(e, &misc->entries, list) { char *s; int j; /* Make sure this one is currently enabled. */ if (!test_bit(Enabled, &e->flags)) continue; /* Do matching based on extension if applicable. */ if (!test_bit(Magic, &e->flags)) { if (p && !strcmp(e->magic, p + 1)) return e; continue; } /* Do matching based on magic & mask. */ s = bprm->buf + e->offset; if (e->mask) { for (j = 0; j < e->size; j++) if ((*s++ ^ e->magic[j]) & e->mask[j]) break; } else { for (j = 0; j < e->size; j++) if ((*s++ ^ e->magic[j])) break; } if (j == e->size) return e; } return NULL; } /** * get_binfmt_handler - try to find a binary type handler * @misc: handle to binfmt_misc instance * @bprm: binary for which we are looking for a handler * * Try to find a binfmt handler for the binary type. If one is found take a * reference to protect against removal via bm_{entry,status}_write(). * * Return: binary type list entry on success, NULL on failure */ static Node *get_binfmt_handler(struct binfmt_misc *misc, struct linux_binprm *bprm) { Node *e; read_lock(&misc->entries_lock); e = search_binfmt_handler(misc, bprm); if (e) refcount_inc(&e->users); read_unlock(&misc->entries_lock); return e; } /** * put_binfmt_handler - put binary handler node * @e: node to put * * Free node syncing with load_misc_binary() and defer final free to * load_misc_binary() in case it is using the binary type handler we were * requested to remove. */ static void put_binfmt_handler(Node *e) { if (refcount_dec_and_test(&e->users)) { if (e->flags & MISC_FMT_OPEN_FILE) filp_close(e->interp_file, NULL); kfree(e); } } /** * load_binfmt_misc - load the binfmt_misc of the caller's user namespace * * To be called in load_misc_binary() to load the relevant struct binfmt_misc. * If a user namespace doesn't have its own binfmt_misc mount it can make use * of its ancestor's binfmt_misc handlers. This mimicks the behavior of * pre-namespaced binfmt_misc where all registered binfmt_misc handlers where * available to all user and user namespaces on the system. * * Return: the binfmt_misc instance of the caller's user namespace */ static struct binfmt_misc *load_binfmt_misc(void) { const struct user_namespace *user_ns; struct binfmt_misc *misc; user_ns = current_user_ns(); while (user_ns) { /* Pairs with smp_store_release() in bm_fill_super(). */ misc = smp_load_acquire(&user_ns->binfmt_misc); if (misc) return misc; user_ns = user_ns->parent; } return &init_binfmt_misc; } /* * the loader itself */ static int load_misc_binary(struct linux_binprm *bprm) { Node *fmt; struct file *interp_file = NULL; int retval = -ENOEXEC; struct binfmt_misc *misc; misc = load_binfmt_misc(); if (!misc->enabled) return retval; fmt = get_binfmt_handler(misc, bprm); if (!fmt) return retval; /* Need to be able to load the file after exec */ retval = -ENOENT; if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) goto ret; if (fmt->flags & MISC_FMT_PRESERVE_ARGV0) { bprm->interp_flags |= BINPRM_FLAGS_PRESERVE_ARGV0; } else { retval = remove_arg_zero(bprm); if (retval) goto ret; } if (fmt->flags & MISC_FMT_OPEN_BINARY) bprm->have_execfd = 1; /* make argv[1] be the path to the binary */ retval = copy_string_kernel(bprm->interp, bprm); if (retval < 0) goto ret; bprm->argc++; /* add the interp as argv[0] */ retval = copy_string_kernel(fmt->interpreter, bprm); if (retval < 0) goto ret; bprm->argc++; /* Update interp in case binfmt_script needs it. */ retval = bprm_change_interp(fmt->interpreter, bprm); if (retval < 0) goto ret; if (fmt->flags & MISC_FMT_OPEN_FILE) { interp_file = file_clone_open(fmt->interp_file); if (!IS_ERR(interp_file)) deny_write_access(interp_file); } else { interp_file = open_exec(fmt->interpreter); } retval = PTR_ERR(interp_file); if (IS_ERR(interp_file)) goto ret; bprm->interpreter = interp_file; if (fmt->flags & MISC_FMT_CREDENTIALS) bprm->execfd_creds = 1; retval = 0; ret: /* * If we actually put the node here all concurrent calls to * load_misc_binary() will have finished. We also know * that for the refcount to be zero someone must have concurently * removed the binary type handler from the list and it's our job to * free it. */ put_binfmt_handler(fmt); return retval; } /* Command parsers */ /* * parses and copies one argument enclosed in del from *sp to *dp, * recognising the \x special. * returns pointer to the copied argument or NULL in case of an * error (and sets err) or null argument length. */ static char *scanarg(char *s, char del) { char c; while ((c = *s++) != del) { if (c == '\\' && *s == 'x') { s++; if (!isxdigit(*s++)) return NULL; if (!isxdigit(*s++)) return NULL; } } s[-1] ='\0'; return s; } static char *check_special_flags(char *sfs, Node *e) { char *p = sfs; int cont = 1; /* special flags */ while (cont) { switch (*p) { case 'P': pr_debug("register: flag: P (preserve argv0)\n"); p++; e->flags |= MISC_FMT_PRESERVE_ARGV0; break; case 'O': pr_debug("register: flag: O (open binary)\n"); p++; e->flags |= MISC_FMT_OPEN_BINARY; break; case 'C': pr_debug("register: flag: C (preserve creds)\n"); p++; /* this flags also implies the open-binary flag */ e->flags |= (MISC_FMT_CREDENTIALS | MISC_FMT_OPEN_BINARY); break; case 'F': pr_debug("register: flag: F: open interpreter file now\n"); p++; e->flags |= MISC_FMT_OPEN_FILE; break; default: cont = 0; } } return p; } /* * This registers a new binary format, it recognises the syntax * ':name:type:offset:magic:mask:interpreter:flags' * where the ':' is the IFS, that can be chosen with the first char */ static Node *create_entry(const char __user *buffer, size_t count) { Node *e; int memsize, err; char *buf, *p; char del; pr_debug("register: received %zu bytes\n", count); /* some sanity checks */ err = -EINVAL; if ((count < 11) || (count > MAX_REGISTER_LENGTH)) goto out; err = -ENOMEM; memsize = sizeof(Node) + count + 8; e = kmalloc(memsize, GFP_KERNEL_ACCOUNT); if (!e) goto out; p = buf = (char *)e + sizeof(Node); memset(e, 0, sizeof(Node)); if (copy_from_user(buf, buffer, count)) goto efault; del = *p++; /* delimeter */ pr_debug("register: delim: %#x {%c}\n", del, del); /* Pad the buffer with the delim to simplify parsing below. */ memset(buf + count, del, 8); /* Parse the 'name' field. */ e->name = p; p = strchr(p, del); if (!p) goto einval; *p++ = '\0'; if (!e->name[0] || !strcmp(e->name, ".") || !strcmp(e->name, "..") || strchr(e->name, '/')) goto einval; pr_debug("register: name: {%s}\n", e->name); /* Parse the 'type' field. */ switch (*p++) { case 'E': pr_debug("register: type: E (extension)\n"); e->flags = 1 << Enabled; break; case 'M': pr_debug("register: type: M (magic)\n"); e->flags = (1 << Enabled) | (1 << Magic); break; default: goto einval; } if (*p++ != del) goto einval; if (test_bit(Magic, &e->flags)) { /* Handle the 'M' (magic) format. */ char *s; /* Parse the 'offset' field. */ s = strchr(p, del); if (!s) goto einval; *s = '\0'; if (p != s) { int r = kstrtoint(p, 10, &e->offset); if (r != 0 || e->offset < 0) goto einval; } p = s; if (*p++) goto einval; pr_debug("register: offset: %#x\n", e->offset); /* Parse the 'magic' field. */ e->magic = p; p = scanarg(p, del); if (!p) goto einval; if (!e->magic[0]) goto einval; if (USE_DEBUG) print_hex_dump_bytes( KBUILD_MODNAME ": register: magic[raw]: ", DUMP_PREFIX_NONE, e->magic, p - e->magic); /* Parse the 'mask' field. */ e->mask = p; p = scanarg(p, del); if (!p) goto einval; if (!e->mask[0]) { e->mask = NULL; pr_debug("register: mask[raw]: none\n"); } else if (USE_DEBUG) print_hex_dump_bytes( KBUILD_MODNAME ": register: mask[raw]: ", DUMP_PREFIX_NONE, e->mask, p - e->mask); /* * Decode the magic & mask fields. * Note: while we might have accepted embedded NUL bytes from * above, the unescape helpers here will stop at the first one * it encounters. */ e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX); if (e->mask && string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size) goto einval; if (e->size > BINPRM_BUF_SIZE || BINPRM_BUF_SIZE - e->size < e->offset) goto einval; pr_debug("register: magic/mask length: %i\n", e->size); if (USE_DEBUG) { print_hex_dump_bytes( KBUILD_MODNAME ": register: magic[decoded]: ", DUMP_PREFIX_NONE, e->magic, e->size); if (e->mask) { int i; char *masked = kmalloc(e->size, GFP_KERNEL_ACCOUNT); print_hex_dump_bytes( KBUILD_MODNAME ": register: mask[decoded]: ", DUMP_PREFIX_NONE, e->mask, e->size); if (masked) { for (i = 0; i < e->size; ++i) masked[i] = e->magic[i] & e->mask[i]; print_hex_dump_bytes( KBUILD_MODNAME ": register: magic[masked]: ", DUMP_PREFIX_NONE, masked, e->size); kfree(masked); } } } } else { /* Handle the 'E' (extension) format. */ /* Skip the 'offset' field. */ p = strchr(p, del); if (!p) goto einval; *p++ = '\0'; /* Parse the 'magic' field. */ e->magic = p; p = strchr(p, del); if (!p) goto einval; *p++ = '\0'; if (!e->magic[0] || strchr(e->magic, '/')) goto einval; pr_debug("register: extension: {%s}\n", e->magic); /* Skip the 'mask' field. */ p = strchr(p, del); if (!p) goto einval; *p++ = '\0'; } /* Parse the 'interpreter' field. */ e->interpreter = p; p = strchr(p, del); if (!p) goto einval; *p++ = '\0'; if (!e->interpreter[0]) goto einval; pr_debug("register: interpreter: {%s}\n", e->interpreter); /* Parse the 'flags' field. */ p = check_special_flags(p, e); if (*p == '\n') p++; if (p != buf + count) goto einval; return e; out: return ERR_PTR(err); efault: kfree(e); return ERR_PTR(-EFAULT); einval: kfree(e); return ERR_PTR(-EINVAL); } /* * Set status of entry/binfmt_misc: * '1' enables, '0' disables and '-1' clears entry/binfmt_misc */ static int parse_command(const char __user *buffer, size_t count) { char s[4]; if (count > 3) return -EINVAL; if (copy_from_user(s, buffer, count)) return -EFAULT; if (!count) return 0; if (s[count - 1] == '\n') count--; if (count == 1 && s[0] == '0') return 1; if (count == 1 && s[0] == '1') return 2; if (count == 2 && s[0] == '-' && s[1] == '1') return 3; return -EINVAL; } /* generic stuff */ static void entry_status(Node *e, char *page) { char *dp = page; const char *status = "disabled"; if (test_bit(Enabled, &e->flags)) status = "enabled"; if (!VERBOSE_STATUS) { sprintf(page, "%s\n", status); return; } dp += sprintf(dp, "%s\ninterpreter %s\n", status, e->interpreter); /* print the special flags */ dp += sprintf(dp, "flags: "); if (e->flags & MISC_FMT_PRESERVE_ARGV0) *dp++ = 'P'; if (e->flags & MISC_FMT_OPEN_BINARY) *dp++ = 'O'; if (e->flags & MISC_FMT_CREDENTIALS) *dp++ = 'C'; if (e->flags & MISC_FMT_OPEN_FILE) *dp++ = 'F'; *dp++ = '\n'; if (!test_bit(Magic, &e->flags)) { sprintf(dp, "extension .%s\n", e->magic); } else { dp += sprintf(dp, "offset %i\nmagic ", e->offset); dp = bin2hex(dp, e->magic, e->size); if (e->mask) { dp += sprintf(dp, "\nmask "); dp = bin2hex(dp, e->mask, e->size); } *dp++ = '\n'; *dp = '\0'; } } static struct inode *bm_get_inode(struct super_block *sb, int mode) { struct inode *inode = new_inode(sb); if (inode) { inode->i_ino = get_next_ino(); inode->i_mode = mode; simple_inode_init_ts(inode); } return inode; } /** * i_binfmt_misc - retrieve struct binfmt_misc from a binfmt_misc inode * @inode: inode of the relevant binfmt_misc instance * * This helper retrieves struct binfmt_misc from a binfmt_misc inode. This can * be done without any memory barriers because we are guaranteed that * user_ns->binfmt_misc is fully initialized. It was fully initialized when the * binfmt_misc mount was first created. * * Return: struct binfmt_misc of the relevant binfmt_misc instance */ static struct binfmt_misc *i_binfmt_misc(struct inode *inode) { return inode->i_sb->s_user_ns->binfmt_misc; } /** * bm_evict_inode - cleanup data associated with @inode * @inode: inode to which the data is attached * * Cleanup the binary type handler data associated with @inode if a binary type * entry is removed or the filesystem is unmounted and the super block is * shutdown. * * If the ->evict call was not caused by a super block shutdown but by a write * to remove the entry or all entries via bm_{entry,status}_write() the entry * will have already been removed from the list. We keep the list_empty() check * to make that explicit. */ static void bm_evict_inode(struct inode *inode) { Node *e = inode->i_private; clear_inode(inode); if (e) { struct binfmt_misc *misc; misc = i_binfmt_misc(inode); write_lock(&misc->entries_lock); if (!list_empty(&e->list)) list_del_init(&e->list); write_unlock(&misc->entries_lock); put_binfmt_handler(e); } } /** * unlink_binfmt_dentry - remove the dentry for the binary type handler * @dentry: dentry associated with the binary type handler * * Do the actual filesystem work to remove a dentry for a registered binary * type handler. Since binfmt_misc only allows simple files to be created * directly under the root dentry of the filesystem we ensure that we are * indeed passed a dentry directly beneath the root dentry, that the inode * associated with the root dentry is locked, and that it is a regular file we * are asked to remove. */ static void unlink_binfmt_dentry(struct dentry *dentry) { struct dentry *parent = dentry->d_parent; struct inode *inode, *parent_inode; /* All entries are immediate descendants of the root dentry. */ if (WARN_ON_ONCE(dentry->d_sb->s_root != parent)) return; /* We only expect to be called on regular files. */ inode = d_inode(dentry); if (WARN_ON_ONCE(!S_ISREG(inode->i_mode))) return; /* The parent inode must be locked. */ parent_inode = d_inode(parent); if (WARN_ON_ONCE(!inode_is_locked(parent_inode))) return; if (simple_positive(dentry)) { dget(dentry); simple_unlink(parent_inode, dentry); d_delete(dentry); dput(dentry); } } /** * remove_binfmt_handler - remove a binary type handler * @misc: handle to binfmt_misc instance * @e: binary type handler to remove * * Remove a binary type handler from the list of binary type handlers and * remove its associated dentry. This is called from * binfmt_{entry,status}_write(). In the future, we might want to think about * adding a proper ->unlink() method to binfmt_misc instead of forcing caller's * to use writes to files in order to delete binary type handlers. But it has * worked for so long that it's not a pressing issue. */ static void remove_binfmt_handler(struct binfmt_misc *misc, Node *e) { write_lock(&misc->entries_lock); list_del_init(&e->list); write_unlock(&misc->entries_lock); unlink_binfmt_dentry(e->dentry); } /* /<entry> */ static ssize_t bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { Node *e = file_inode(file)->i_private; ssize_t res; char *page; page = (char *) __get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; entry_status(e, page); res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page)); free_page((unsigned long) page); return res; } static ssize_t bm_entry_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct inode *inode = file_inode(file); Node *e = inode->i_private; int res = parse_command(buffer, count); switch (res) { case 1: /* Disable this handler. */ clear_bit(Enabled, &e->flags); break; case 2: /* Enable this handler. */ set_bit(Enabled, &e->flags); break; case 3: /* Delete this handler. */ inode = d_inode(inode->i_sb->s_root); inode_lock(inode); /* * In order to add new element or remove elements from the list * via bm_{entry,register,status}_write() inode_lock() on the * root inode must be held. * The lock is exclusive ensuring that the list can't be * modified. Only load_misc_binary() can access but does so * read-only. So we only need to take the write lock when we * actually remove the entry from the list. */ if (!list_empty(&e->list)) remove_binfmt_handler(i_binfmt_misc(inode), e); inode_unlock(inode); break; default: return res; } return count; } static const struct file_operations bm_entry_operations = { .read = bm_entry_read, .write = bm_entry_write, .llseek = default_llseek, }; /* /register */ static ssize_t bm_register_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { Node *e; struct inode *inode; struct super_block *sb = file_inode(file)->i_sb; struct dentry *root = sb->s_root, *dentry; struct binfmt_misc *misc; int err = 0; struct file *f = NULL; e = create_entry(buffer, count); if (IS_ERR(e)) return PTR_ERR(e); if (e->flags & MISC_FMT_OPEN_FILE) { const struct cred *old_cred; /* * Now that we support unprivileged binfmt_misc mounts make * sure we use the credentials that the register @file was * opened with to also open the interpreter. Before that this * didn't matter much as only a privileged process could open * the register file. */ old_cred = override_creds(file->f_cred); f = open_exec(e->interpreter); revert_creds(old_cred); if (IS_ERR(f)) { pr_notice("register: failed to install interpreter file %s\n", e->interpreter); kfree(e); return PTR_ERR(f); } e->interp_file = f; } inode_lock(d_inode(root)); dentry = lookup_one_len(e->name, root, strlen(e->name)); err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out; err = -EEXIST; if (d_really_is_positive(dentry)) goto out2; inode = bm_get_inode(sb, S_IFREG | 0644); err = -ENOMEM; if (!inode) goto out2; refcount_set(&e->users, 1); e->dentry = dget(dentry); inode->i_private = e; inode->i_fop = &bm_entry_operations; d_instantiate(dentry, inode); misc = i_binfmt_misc(inode); write_lock(&misc->entries_lock); list_add(&e->list, &misc->entries); write_unlock(&misc->entries_lock); err = 0; out2: dput(dentry); out: inode_unlock(d_inode(root)); if (err) { if (f) filp_close(f, NULL); kfree(e); return err; } return count; } static const struct file_operations bm_register_operations = { .write = bm_register_write, .llseek = noop_llseek, }; /* /status */ static ssize_t bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { struct binfmt_misc *misc; char *s; misc = i_binfmt_misc(file_inode(file)); s = misc->enabled ? "enabled\n" : "disabled\n"; return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s)); } static ssize_t bm_status_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct binfmt_misc *misc; int res = parse_command(buffer, count); Node *e, *next; struct inode *inode; misc = i_binfmt_misc(file_inode(file)); switch (res) { case 1: /* Disable all handlers. */ misc->enabled = false; break; case 2: /* Enable all handlers. */ misc->enabled = true; break; case 3: /* Delete all handlers. */ inode = d_inode(file_inode(file)->i_sb->s_root); inode_lock(inode); /* * In order to add new element or remove elements from the list * via bm_{entry,register,status}_write() inode_lock() on the * root inode must be held. * The lock is exclusive ensuring that the list can't be * modified. Only load_misc_binary() can access but does so * read-only. So we only need to take the write lock when we * actually remove the entry from the list. */ list_for_each_entry_safe(e, next, &misc->entries, list) remove_binfmt_handler(misc, e); inode_unlock(inode); break; default: return res; } return count; } static const struct file_operations bm_status_operations = { .read = bm_status_read, .write = bm_status_write, .llseek = default_llseek, }; /* Superblock handling */ static void bm_put_super(struct super_block *sb) { struct user_namespace *user_ns = sb->s_fs_info; sb->s_fs_info = NULL; put_user_ns(user_ns); } static const struct super_operations s_ops = { .statfs = simple_statfs, .evict_inode = bm_evict_inode, .put_super = bm_put_super, }; static int bm_fill_super(struct super_block *sb, struct fs_context *fc) { int err; struct user_namespace *user_ns = sb->s_user_ns; struct binfmt_misc *misc; static const struct tree_descr bm_files[] = { [2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO}, [3] = {"register", &bm_register_operations, S_IWUSR}, /* last one */ {""} }; if (WARN_ON(user_ns != current_user_ns())) return -EINVAL; /* * Lazily allocate a new binfmt_misc instance for this namespace, i.e. * do it here during the first mount of binfmt_misc. We don't need to * waste memory for every user namespace allocation. It's likely much * more common to not mount a separate binfmt_misc instance than it is * to mount one. * * While multiple superblocks can exist they are keyed by userns in * s_fs_info for binfmt_misc. Hence, the vfs guarantees that * bm_fill_super() is called exactly once whenever a binfmt_misc * superblock for a userns is created. This in turn lets us conclude * that when a binfmt_misc superblock is created for the first time for * a userns there's no one racing us. Therefore we don't need any * barriers when we dereference binfmt_misc. */ misc = user_ns->binfmt_misc; if (!misc) { /* * If it turns out that most user namespaces actually want to * register their own binary type handler and therefore all * create their own separate binfmt_misc mounts we should * consider turning this into a kmem cache. */ misc = kzalloc(sizeof(struct binfmt_misc), GFP_KERNEL); if (!misc) return -ENOMEM; INIT_LIST_HEAD(&misc->entries); rwlock_init(&misc->entries_lock); /* Pairs with smp_load_acquire() in load_binfmt_misc(). */ smp_store_release(&user_ns->binfmt_misc, misc); } /* * When the binfmt_misc superblock for this userns is shutdown * ->enabled might have been set to false and we don't reinitialize * ->enabled again in put_super() as someone might already be mounting * binfmt_misc again. It also would be pointless since by the time * ->put_super() is called we know that the binary type list for this * bintfmt_misc mount is empty making load_misc_binary() return * -ENOEXEC independent of whether ->enabled is true. Instead, if * someone mounts binfmt_misc for the first time or again we simply * reset ->enabled to true. */ misc->enabled = true; err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files); if (!err) sb->s_op = &s_ops; return err; } static void bm_free(struct fs_context *fc) { if (fc->s_fs_info) put_user_ns(fc->s_fs_info); } static int bm_get_tree(struct fs_context *fc) { return get_tree_keyed(fc, bm_fill_super, get_user_ns(fc->user_ns)); } static const struct fs_context_operations bm_context_ops = { .free = bm_free, .get_tree = bm_get_tree, }; static int bm_init_fs_context(struct fs_context *fc) { fc->ops = &bm_context_ops; return 0; } static struct linux_binfmt misc_format = { .module = THIS_MODULE, .load_binary = load_misc_binary, }; static struct file_system_type bm_fs_type = { .owner = THIS_MODULE, .name = "binfmt_misc", .init_fs_context = bm_init_fs_context, .fs_flags = FS_USERNS_MOUNT, .kill_sb = kill_litter_super, }; MODULE_ALIAS_FS("binfmt_misc"); static int __init init_misc_binfmt(void) { int err = register_filesystem(&bm_fs_type); if (!err) insert_binfmt(&misc_format); return err; } static void __exit exit_misc_binfmt(void) { unregister_binfmt(&misc_format); unregister_filesystem(&bm_fs_type); } core_initcall(init_misc_binfmt); module_exit(exit_misc_binfmt); MODULE_DESCRIPTION("Kernel support for miscellaneous binaries"); MODULE_LICENSE("GPL");
7 2 5 20 8 2 2 1 1 11 1 10 8 10 7 5 5 9 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2002 * Portions Copyright (C) Christoph Hellwig, 2001-2002 */ #include <linux/mm.h> #include <linux/fs.h> #include <linux/posix_acl.h> #include <linux/quotaops.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_dmap.h" #include "jfs_txnmgr.h" #include "jfs_xattr.h" #include "jfs_acl.h" #include "jfs_debug.h" int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; int rc = 0; rc = file_write_and_wait_range(file, start, end); if (rc) return rc; inode_lock(inode); if (!(inode->i_state & I_DIRTY_ALL) || (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { /* Make sure committed changes hit the disk */ jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); inode_unlock(inode); return rc; } rc |= jfs_commit_inode(inode, 1); inode_unlock(inode); return rc ? -EIO : 0; } static int jfs_open(struct inode *inode, struct file *file) { int rc; if ((rc = dquot_file_open(inode, file))) return rc; /* * We attempt to allow only one "active" file open per aggregate * group. Otherwise, appending to files in parallel can cause * fragmentation within the files. * * If the file is empty, it was probably just created and going * to be written to. If it has a size, we'll hold off until the * file is actually grown. */ if (S_ISREG(inode->i_mode) && file->f_mode & FMODE_WRITE && (inode->i_size == 0)) { struct jfs_inode_info *ji = JFS_IP(inode); spin_lock_irq(&ji->ag_lock); if (ji->active_ag == -1) { struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb); ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb); atomic_inc(&jfs_sb->bmap->db_active[ji->active_ag]); } spin_unlock_irq(&ji->ag_lock); } return 0; } static int jfs_release(struct inode *inode, struct file *file) { struct jfs_inode_info *ji = JFS_IP(inode); spin_lock_irq(&ji->ag_lock); if (ji->active_ag != -1) { struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap; atomic_dec(&bmap->db_active[ji->active_ag]); ji->active_ag = -1; } spin_unlock_irq(&ji->ag_lock); return 0; } int jfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); int rc; rc = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (rc) return rc; if (is_quota_modification(&nop_mnt_idmap, inode, iattr)) { rc = dquot_initialize(inode); if (rc) return rc; } if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) || (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) { rc = dquot_transfer(&nop_mnt_idmap, inode, iattr); if (rc) return rc; } if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { inode_dio_wait(inode); rc = inode_newsize_ok(inode, iattr->ia_size); if (rc) return rc; truncate_setsize(inode, iattr->ia_size); jfs_truncate(inode); } setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) rc = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode); return rc; } const struct inode_operations jfs_file_inode_operations = { .listxattr = jfs_listxattr, .setattr = jfs_setattr, .fileattr_get = jfs_fileattr_get, .fileattr_set = jfs_fileattr_set, #ifdef CONFIG_JFS_POSIX_ACL .get_inode_acl = jfs_get_acl, .set_acl = jfs_set_acl, #endif }; const struct file_operations jfs_file_operations = { .open = jfs_open, .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .fsync = jfs_fsync, .release = jfs_release, .unlocked_ioctl = jfs_ioctl, .compat_ioctl = compat_ptr_ioctl, };
5 5 1 4 4 1 1 1 1 2 1 5 5 5 5 5 5 1 2 2 3 1 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for multitouch panels * * Copyright (c) 2010-2012 Stephane Chatty <chatty@enac.fr> * Copyright (c) 2010-2013 Benjamin Tissoires <benjamin.tissoires@gmail.com> * Copyright (c) 2010-2012 Ecole Nationale de l'Aviation Civile, France * Copyright (c) 2012-2013 Red Hat, Inc * * This code is partly based on hid-egalax.c: * * Copyright (c) 2010 Stephane Chatty <chatty@enac.fr> * Copyright (c) 2010 Henrik Rydberg <rydberg@euromail.se> * Copyright (c) 2010 Canonical, Ltd. * * This code is partly based on hid-3m-pct.c: * * Copyright (c) 2009-2010 Stephane Chatty <chatty@enac.fr> * Copyright (c) 2010 Henrik Rydberg <rydberg@euromail.se> * Copyright (c) 2010 Canonical, Ltd. */ /* */ /* * This driver is regularly tested thanks to the test suite in hid-tools[1]. * Please run these regression tests before patching this module so that * your patch won't break existing known devices. * * [1] https://gitlab.freedesktop.org/libevdev/hid-tools */ #include <linux/bits.h> #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/input/mt.h> #include <linux/jiffies.h> #include <linux/string.h> #include <linux/timer.h> MODULE_AUTHOR("Stephane Chatty <chatty@enac.fr>"); MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>"); MODULE_DESCRIPTION("HID multitouch panels"); MODULE_LICENSE("GPL"); #include "hid-ids.h" /* quirks to control the device */ #define MT_QUIRK_NOT_SEEN_MEANS_UP BIT(0) #define MT_QUIRK_SLOT_IS_CONTACTID BIT(1) #define MT_QUIRK_CYPRESS BIT(2) #define MT_QUIRK_SLOT_IS_CONTACTNUMBER BIT(3) #define MT_QUIRK_ALWAYS_VALID BIT(4) #define MT_QUIRK_VALID_IS_INRANGE BIT(5) #define MT_QUIRK_VALID_IS_CONFIDENCE BIT(6) #define MT_QUIRK_CONFIDENCE BIT(7) #define MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE BIT(8) #define MT_QUIRK_NO_AREA BIT(9) #define MT_QUIRK_IGNORE_DUPLICATES BIT(10) #define MT_QUIRK_HOVERING BIT(11) #define MT_QUIRK_CONTACT_CNT_ACCURATE BIT(12) #define MT_QUIRK_FORCE_GET_FEATURE BIT(13) #define MT_QUIRK_FIX_CONST_CONTACT_ID BIT(14) #define MT_QUIRK_TOUCH_SIZE_SCALING BIT(15) #define MT_QUIRK_STICKY_FINGERS BIT(16) #define MT_QUIRK_ASUS_CUSTOM_UP BIT(17) #define MT_QUIRK_WIN8_PTP_BUTTONS BIT(18) #define MT_QUIRK_SEPARATE_APP_REPORT BIT(19) #define MT_QUIRK_FORCE_MULTI_INPUT BIT(20) #define MT_QUIRK_DISABLE_WAKEUP BIT(21) #define MT_QUIRK_ORIENTATION_INVERT BIT(22) #define MT_INPUTMODE_TOUCHSCREEN 0x02 #define MT_INPUTMODE_TOUCHPAD 0x03 #define MT_BUTTONTYPE_CLICKPAD 0 enum latency_mode { HID_LATENCY_NORMAL = 0, HID_LATENCY_HIGH = 1, }; enum report_mode { TOUCHPAD_REPORT_NONE = 0, TOUCHPAD_REPORT_BUTTONS = BIT(0), TOUCHPAD_REPORT_CONTACTS = BIT(1), TOUCHPAD_REPORT_ALL = TOUCHPAD_REPORT_BUTTONS | TOUCHPAD_REPORT_CONTACTS, }; #define MT_IO_FLAGS_RUNNING 0 #define MT_IO_FLAGS_ACTIVE_SLOTS 1 #define MT_IO_FLAGS_PENDING_SLOTS 2 static const bool mtrue = true; /* default for true */ static const bool mfalse; /* default for false */ static const __s32 mzero; /* default for 0 */ #define DEFAULT_TRUE ((void *)&mtrue) #define DEFAULT_FALSE ((void *)&mfalse) #define DEFAULT_ZERO ((void *)&mzero) struct mt_usages { struct list_head list; __s32 *x, *y, *cx, *cy, *p, *w, *h, *a; __s32 *contactid; /* the device ContactID assigned to this slot */ bool *tip_state; /* is the touch valid? */ bool *inrange_state; /* is the finger in proximity of the sensor? */ bool *confidence_state; /* is the touch made by a finger? */ }; struct mt_application { struct list_head list; unsigned int application; unsigned int report_id; struct list_head mt_usages; /* mt usages list */ __s32 quirks; __s32 *scantime; /* scantime reported */ __s32 scantime_logical_max; /* max value for raw scantime */ __s32 *raw_cc; /* contact count in the report */ int left_button_state; /* left button state */ unsigned int mt_flags; /* flags to pass to input-mt */ unsigned long *pending_palm_slots; /* slots where we reported palm * and need to release */ __u8 num_received; /* how many contacts we received */ __u8 num_expected; /* expected last contact index */ __u8 buttons_count; /* number of physical buttons per touchpad */ __u8 touches_by_report; /* how many touches are present in one report: * 1 means we should use a serial protocol * > 1 means hybrid (multitouch) protocol */ unsigned long jiffies; /* the frame's jiffies */ int timestamp; /* the timestamp to be sent */ int prev_scantime; /* scantime reported previously */ bool have_contact_count; }; struct mt_class { __s32 name; /* MT_CLS */ __s32 quirks; __s32 sn_move; /* Signal/noise ratio for move events */ __s32 sn_width; /* Signal/noise ratio for width events */ __s32 sn_height; /* Signal/noise ratio for height events */ __s32 sn_pressure; /* Signal/noise ratio for pressure events */ __u8 maxcontacts; bool is_indirect; /* true for touchpads */ bool export_all_inputs; /* do not ignore mouse, keyboards, etc... */ }; struct mt_report_data { struct list_head list; struct hid_report *report; struct mt_application *application; bool is_mt_collection; }; struct mt_device { struct mt_class mtclass; /* our mt device class */ struct timer_list release_timer; /* to release sticky fingers */ struct hid_device *hdev; /* hid_device we're attached to */ unsigned long mt_io_flags; /* mt flags (MT_IO_FLAGS_*) */ __u8 inputmode_value; /* InputMode HID feature value */ __u8 maxcontacts; bool is_buttonpad; /* is this device a button pad? */ bool serial_maybe; /* need to check for serial protocol */ struct list_head applications; struct list_head reports; }; static void mt_post_parse_default_settings(struct mt_device *td, struct mt_application *app); static void mt_post_parse(struct mt_device *td, struct mt_application *app); /* classes of device behavior */ #define MT_CLS_DEFAULT 0x0001 #define MT_CLS_SERIAL 0x0002 #define MT_CLS_CONFIDENCE 0x0003 #define MT_CLS_CONFIDENCE_CONTACT_ID 0x0004 #define MT_CLS_CONFIDENCE_MINUS_ONE 0x0005 #define MT_CLS_DUAL_INRANGE_CONTACTID 0x0006 #define MT_CLS_DUAL_INRANGE_CONTACTNUMBER 0x0007 /* reserved 0x0008 */ #define MT_CLS_INRANGE_CONTACTNUMBER 0x0009 #define MT_CLS_NSMU 0x000a /* reserved 0x0010 */ /* reserved 0x0011 */ #define MT_CLS_WIN_8 0x0012 #define MT_CLS_EXPORT_ALL_INPUTS 0x0013 /* reserved 0x0014 */ #define MT_CLS_WIN_8_FORCE_MULTI_INPUT 0x0015 #define MT_CLS_WIN_8_DISABLE_WAKEUP 0x0016 #define MT_CLS_WIN_8_NO_STICKY_FINGERS 0x0017 #define MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU 0x0018 /* vendor specific classes */ #define MT_CLS_3M 0x0101 /* reserved 0x0102 */ #define MT_CLS_EGALAX 0x0103 #define MT_CLS_EGALAX_SERIAL 0x0104 #define MT_CLS_TOPSEED 0x0105 #define MT_CLS_PANASONIC 0x0106 #define MT_CLS_FLATFROG 0x0107 #define MT_CLS_GENERALTOUCH_TWOFINGERS 0x0108 #define MT_CLS_GENERALTOUCH_PWT_TENFINGERS 0x0109 #define MT_CLS_LG 0x010a #define MT_CLS_ASUS 0x010b #define MT_CLS_VTL 0x0110 #define MT_CLS_GOOGLE 0x0111 #define MT_CLS_RAZER_BLADE_STEALTH 0x0112 #define MT_CLS_SMART_TECH 0x0113 #define MT_CLS_SIS 0x0457 #define MT_DEFAULT_MAXCONTACT 10 #define MT_MAX_MAXCONTACT 250 /* * Resync device and local timestamps after that many microseconds without * receiving data. */ #define MAX_TIMESTAMP_INTERVAL 1000000 #define MT_USB_DEVICE(v, p) HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH, v, p) #define MT_BT_DEVICE(v, p) HID_DEVICE(BUS_BLUETOOTH, HID_GROUP_MULTITOUCH, v, p) /* * these device-dependent functions determine what slot corresponds * to a valid contact that was just read. */ static int cypress_compute_slot(struct mt_application *application, struct mt_usages *slot) { if (*slot->contactid != 0 || application->num_received == 0) return *slot->contactid; else return -1; } static const struct mt_class mt_classes[] = { { .name = MT_CLS_DEFAULT, .quirks = MT_QUIRK_ALWAYS_VALID | MT_QUIRK_CONTACT_CNT_ACCURATE }, { .name = MT_CLS_NSMU, .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP }, { .name = MT_CLS_SERIAL, .quirks = MT_QUIRK_ALWAYS_VALID}, { .name = MT_CLS_CONFIDENCE, .quirks = MT_QUIRK_VALID_IS_CONFIDENCE }, { .name = MT_CLS_CONFIDENCE_CONTACT_ID, .quirks = MT_QUIRK_VALID_IS_CONFIDENCE | MT_QUIRK_SLOT_IS_CONTACTID }, { .name = MT_CLS_CONFIDENCE_MINUS_ONE, .quirks = MT_QUIRK_VALID_IS_CONFIDENCE | MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE }, { .name = MT_CLS_DUAL_INRANGE_CONTACTID, .quirks = MT_QUIRK_VALID_IS_INRANGE | MT_QUIRK_SLOT_IS_CONTACTID, .maxcontacts = 2 }, { .name = MT_CLS_DUAL_INRANGE_CONTACTNUMBER, .quirks = MT_QUIRK_VALID_IS_INRANGE | MT_QUIRK_SLOT_IS_CONTACTNUMBER, .maxcontacts = 2 }, { .name = MT_CLS_INRANGE_CONTACTNUMBER, .quirks = MT_QUIRK_VALID_IS_INRANGE | MT_QUIRK_SLOT_IS_CONTACTNUMBER }, { .name = MT_CLS_WIN_8, .quirks = MT_QUIRK_ALWAYS_VALID | MT_QUIRK_IGNORE_DUPLICATES | MT_QUIRK_HOVERING | MT_QUIRK_CONTACT_CNT_ACCURATE | MT_QUIRK_STICKY_FINGERS | MT_QUIRK_WIN8_PTP_BUTTONS, .export_all_inputs = true }, { .name = MT_CLS_EXPORT_ALL_INPUTS, .quirks = MT_QUIRK_ALWAYS_VALID | MT_QUIRK_CONTACT_CNT_ACCURATE, .export_all_inputs = true }, { .name = MT_CLS_WIN_8_FORCE_MULTI_INPUT, .quirks = MT_QUIRK_ALWAYS_VALID | MT_QUIRK_IGNORE_DUPLICATES | MT_QUIRK_HOVERING | MT_QUIRK_CONTACT_CNT_ACCURATE | MT_QUIRK_STICKY_FINGERS | MT_QUIRK_WIN8_PTP_BUTTONS | MT_QUIRK_FORCE_MULTI_INPUT, .export_all_inputs = true }, { .name = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, .quirks = MT_QUIRK_IGNORE_DUPLICATES | MT_QUIRK_HOVERING | MT_QUIRK_CONTACT_CNT_ACCURATE | MT_QUIRK_STICKY_FINGERS | MT_QUIRK_WIN8_PTP_BUTTONS | MT_QUIRK_FORCE_MULTI_INPUT | MT_QUIRK_NOT_SEEN_MEANS_UP, .export_all_inputs = true }, { .name = MT_CLS_WIN_8_DISABLE_WAKEUP, .quirks = MT_QUIRK_ALWAYS_VALID | MT_QUIRK_IGNORE_DUPLICATES | MT_QUIRK_HOVERING | MT_QUIRK_CONTACT_CNT_ACCURATE | MT_QUIRK_STICKY_FINGERS | MT_QUIRK_WIN8_PTP_BUTTONS | MT_QUIRK_DISABLE_WAKEUP, .export_all_inputs = true }, { .name = MT_CLS_WIN_8_NO_STICKY_FINGERS, .quirks = MT_QUIRK_ALWAYS_VALID | MT_QUIRK_IGNORE_DUPLICATES | MT_QUIRK_HOVERING | MT_QUIRK_CONTACT_CNT_ACCURATE | MT_QUIRK_WIN8_PTP_BUTTONS, .export_all_inputs = true }, /* * vendor specific classes */ { .name = MT_CLS_3M, .quirks = MT_QUIRK_VALID_IS_CONFIDENCE | MT_QUIRK_SLOT_IS_CONTACTID | MT_QUIRK_TOUCH_SIZE_SCALING, .sn_move = 2048, .sn_width = 128, .sn_height = 128, .maxcontacts = 60, }, { .name = MT_CLS_EGALAX, .quirks = MT_QUIRK_SLOT_IS_CONTACTID | MT_QUIRK_VALID_IS_INRANGE, .sn_move = 4096, .sn_pressure = 32, }, { .name = MT_CLS_EGALAX_SERIAL, .quirks = MT_QUIRK_SLOT_IS_CONTACTID | MT_QUIRK_ALWAYS_VALID, .sn_move = 4096, .sn_pressure = 32, }, { .name = MT_CLS_TOPSEED, .quirks = MT_QUIRK_ALWAYS_VALID, .is_indirect = true, .maxcontacts = 2, }, { .name = MT_CLS_PANASONIC, .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP, .maxcontacts = 4 }, { .name = MT_CLS_GENERALTOUCH_TWOFINGERS, .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP | MT_QUIRK_VALID_IS_INRANGE | MT_QUIRK_SLOT_IS_CONTACTID, .maxcontacts = 2 }, { .name = MT_CLS_GENERALTOUCH_PWT_TENFINGERS, .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP | MT_QUIRK_SLOT_IS_CONTACTID }, { .name = MT_CLS_FLATFROG, .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP | MT_QUIRK_NO_AREA, .sn_move = 2048, .maxcontacts = 40, }, { .name = MT_CLS_LG, .quirks = MT_QUIRK_ALWAYS_VALID | MT_QUIRK_FIX_CONST_CONTACT_ID | MT_QUIRK_IGNORE_DUPLICATES | MT_QUIRK_HOVERING | MT_QUIRK_CONTACT_CNT_ACCURATE }, { .name = MT_CLS_ASUS, .quirks = MT_QUIRK_ALWAYS_VALID | MT_QUIRK_CONTACT_CNT_ACCURATE | MT_QUIRK_ASUS_CUSTOM_UP }, { .name = MT_CLS_VTL, .quirks = MT_QUIRK_ALWAYS_VALID | MT_QUIRK_CONTACT_CNT_ACCURATE | MT_QUIRK_FORCE_GET_FEATURE, }, { .name = MT_CLS_GOOGLE, .quirks = MT_QUIRK_ALWAYS_VALID | MT_QUIRK_CONTACT_CNT_ACCURATE | MT_QUIRK_SLOT_IS_CONTACTID | MT_QUIRK_HOVERING }, { .name = MT_CLS_RAZER_BLADE_STEALTH, .quirks = MT_QUIRK_ALWAYS_VALID | MT_QUIRK_IGNORE_DUPLICATES | MT_QUIRK_HOVERING | MT_QUIRK_CONTACT_CNT_ACCURATE | MT_QUIRK_WIN8_PTP_BUTTONS, }, { .name = MT_CLS_SMART_TECH, .quirks = MT_QUIRK_ALWAYS_VALID | MT_QUIRK_IGNORE_DUPLICATES | MT_QUIRK_CONTACT_CNT_ACCURATE | MT_QUIRK_SEPARATE_APP_REPORT, }, { .name = MT_CLS_SIS, .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP | MT_QUIRK_ALWAYS_VALID | MT_QUIRK_CONTACT_CNT_ACCURATE, }, { } }; static ssize_t mt_show_quirks(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct mt_device *td = hid_get_drvdata(hdev); return sprintf(buf, "%u\n", td->mtclass.quirks); } static ssize_t mt_set_quirks(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hid_device *hdev = to_hid_device(dev); struct mt_device *td = hid_get_drvdata(hdev); struct mt_application *application; unsigned long val; if (kstrtoul(buf, 0, &val)) return -EINVAL; td->mtclass.quirks = val; list_for_each_entry(application, &td->applications, list) { application->quirks = val; if (!application->have_contact_count) application->quirks &= ~MT_QUIRK_CONTACT_CNT_ACCURATE; } return count; } static DEVICE_ATTR(quirks, S_IWUSR | S_IRUGO, mt_show_quirks, mt_set_quirks); static struct attribute *sysfs_attrs[] = { &dev_attr_quirks.attr, NULL }; static const struct attribute_group mt_attribute_group = { .attrs = sysfs_attrs }; static void mt_get_feature(struct hid_device *hdev, struct hid_report *report) { int ret; u32 size = hid_report_len(report); u8 *buf; /* * Do not fetch the feature report if the device has been explicitly * marked as non-capable. */ if (hdev->quirks & HID_QUIRK_NO_INIT_REPORTS) return; buf = hid_alloc_report_buf(report, GFP_KERNEL); if (!buf) return; ret = hid_hw_raw_request(hdev, report->id, buf, size, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); if (ret < 0) { dev_warn(&hdev->dev, "failed to fetch feature %d\n", report->id); } else { ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, buf, size, 0); if (ret) dev_warn(&hdev->dev, "failed to report feature\n"); } kfree(buf); } static void mt_feature_mapping(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage) { struct mt_device *td = hid_get_drvdata(hdev); switch (usage->hid) { case HID_DG_CONTACTMAX: mt_get_feature(hdev, field->report); td->maxcontacts = field->value[0]; if (!td->maxcontacts && field->logical_maximum <= MT_MAX_MAXCONTACT) td->maxcontacts = field->logical_maximum; if (td->mtclass.maxcontacts) /* check if the maxcontacts is given by the class */ td->maxcontacts = td->mtclass.maxcontacts; break; case HID_DG_BUTTONTYPE: if (usage->usage_index >= field->report_count) { dev_err(&hdev->dev, "HID_DG_BUTTONTYPE out of range\n"); break; } mt_get_feature(hdev, field->report); if (field->value[usage->usage_index] == MT_BUTTONTYPE_CLICKPAD) td->is_buttonpad = true; break; case 0xff0000c5: /* Retrieve the Win8 blob once to enable some devices */ if (usage->usage_index == 0) mt_get_feature(hdev, field->report); break; } } static void set_abs(struct input_dev *input, unsigned int code, struct hid_field *field, int snratio) { int fmin = field->logical_minimum; int fmax = field->logical_maximum; int fuzz = snratio ? (fmax - fmin) / snratio : 0; input_set_abs_params(input, code, fmin, fmax, fuzz, 0); input_abs_set_res(input, code, hidinput_calc_abs_res(field, code)); } static struct mt_usages *mt_allocate_usage(struct hid_device *hdev, struct mt_application *application) { struct mt_usages *usage; usage = devm_kzalloc(&hdev->dev, sizeof(*usage), GFP_KERNEL); if (!usage) return NULL; /* set some defaults so we do not need to check for null pointers */ usage->x = DEFAULT_ZERO; usage->y = DEFAULT_ZERO; usage->cx = DEFAULT_ZERO; usage->cy = DEFAULT_ZERO; usage->p = DEFAULT_ZERO; usage->w = DEFAULT_ZERO; usage->h = DEFAULT_ZERO; usage->a = DEFAULT_ZERO; usage->contactid = DEFAULT_ZERO; usage->tip_state = DEFAULT_FALSE; usage->inrange_state = DEFAULT_FALSE; usage->confidence_state = DEFAULT_TRUE; list_add_tail(&usage->list, &application->mt_usages); return usage; } static struct mt_application *mt_allocate_application(struct mt_device *td, struct hid_report *report) { unsigned int application = report->application; struct mt_application *mt_application; mt_application = devm_kzalloc(&td->hdev->dev, sizeof(*mt_application), GFP_KERNEL); if (!mt_application) return NULL; mt_application->application = application; INIT_LIST_HEAD(&mt_application->mt_usages); if (application == HID_DG_TOUCHSCREEN) mt_application->mt_flags |= INPUT_MT_DIRECT; /* * Model touchscreens providing buttons as touchpads. */ if (application == HID_DG_TOUCHPAD) { mt_application->mt_flags |= INPUT_MT_POINTER; td->inputmode_value = MT_INPUTMODE_TOUCHPAD; } mt_application->scantime = DEFAULT_ZERO; mt_application->raw_cc = DEFAULT_ZERO; mt_application->quirks = td->mtclass.quirks; mt_application->report_id = report->id; list_add_tail(&mt_application->list, &td->applications); return mt_application; } static struct mt_application *mt_find_application(struct mt_device *td, struct hid_report *report) { unsigned int application = report->application; struct mt_application *tmp, *mt_application = NULL; list_for_each_entry(tmp, &td->applications, list) { if (application == tmp->application) { if (!(td->mtclass.quirks & MT_QUIRK_SEPARATE_APP_REPORT) || tmp->report_id == report->id) { mt_application = tmp; break; } } } if (!mt_application) mt_application = mt_allocate_application(td, report); return mt_application; } static struct mt_report_data *mt_allocate_report_data(struct mt_device *td, struct hid_report *report) { struct mt_report_data *rdata; struct hid_field *field; int r, n; rdata = devm_kzalloc(&td->hdev->dev, sizeof(*rdata), GFP_KERNEL); if (!rdata) return NULL; rdata->report = report; rdata->application = mt_find_application(td, report); if (!rdata->application) { devm_kfree(&td->hdev->dev, rdata); return NULL; } for (r = 0; r < report->maxfield; r++) { field = report->field[r]; if (!(HID_MAIN_ITEM_VARIABLE & field->flags)) continue; if (field->logical == HID_DG_FINGER || td->hdev->group != HID_GROUP_MULTITOUCH_WIN_8) { for (n = 0; n < field->report_count; n++) { if (field->usage[n].hid == HID_DG_CONTACTID) { rdata->is_mt_collection = true; break; } } } } list_add_tail(&rdata->list, &td->reports); return rdata; } static struct mt_report_data *mt_find_report_data(struct mt_device *td, struct hid_report *report) { struct mt_report_data *tmp, *rdata = NULL; list_for_each_entry(tmp, &td->reports, list) { if (report == tmp->report) { rdata = tmp; break; } } if (!rdata) rdata = mt_allocate_report_data(td, report); return rdata; } static void mt_store_field(struct hid_device *hdev, struct mt_application *application, __s32 *value, size_t offset) { struct mt_usages *usage; __s32 **target; if (list_empty(&application->mt_usages)) usage = mt_allocate_usage(hdev, application); else usage = list_last_entry(&application->mt_usages, struct mt_usages, list); if (!usage) return; target = (__s32 **)((char *)usage + offset); /* the value has already been filled, create a new slot */ if (*target != DEFAULT_TRUE && *target != DEFAULT_FALSE && *target != DEFAULT_ZERO) { if (usage->contactid == DEFAULT_ZERO || usage->x == DEFAULT_ZERO || usage->y == DEFAULT_ZERO) { hid_dbg(hdev, "ignoring duplicate usage on incomplete"); return; } usage = mt_allocate_usage(hdev, application); if (!usage) return; target = (__s32 **)((char *)usage + offset); } *target = value; } #define MT_STORE_FIELD(__name) \ mt_store_field(hdev, app, \ &field->value[usage->usage_index], \ offsetof(struct mt_usages, __name)) static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max, struct mt_application *app) { struct mt_device *td = hid_get_drvdata(hdev); struct mt_class *cls = &td->mtclass; int code; struct hid_usage *prev_usage = NULL; /* * Model touchscreens providing buttons as touchpads. */ if (field->application == HID_DG_TOUCHSCREEN && (usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON) { app->mt_flags |= INPUT_MT_POINTER; td->inputmode_value = MT_INPUTMODE_TOUCHPAD; } /* count the buttons on touchpads */ if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON) app->buttons_count++; if (usage->usage_index) prev_usage = &field->usage[usage->usage_index - 1]; switch (usage->hid & HID_USAGE_PAGE) { case HID_UP_GENDESK: switch (usage->hid) { case HID_GD_X: if (prev_usage && (prev_usage->hid == usage->hid)) { code = ABS_MT_TOOL_X; MT_STORE_FIELD(cx); } else { code = ABS_MT_POSITION_X; MT_STORE_FIELD(x); } set_abs(hi->input, code, field, cls->sn_move); /* * A system multi-axis that exports X and Y has a high * chance of being used directly on a surface */ if (field->application == HID_GD_SYSTEM_MULTIAXIS) { __set_bit(INPUT_PROP_DIRECT, hi->input->propbit); input_set_abs_params(hi->input, ABS_MT_TOOL_TYPE, MT_TOOL_DIAL, MT_TOOL_DIAL, 0, 0); } return 1; case HID_GD_Y: if (prev_usage && (prev_usage->hid == usage->hid)) { code = ABS_MT_TOOL_Y; MT_STORE_FIELD(cy); } else { code = ABS_MT_POSITION_Y; MT_STORE_FIELD(y); } set_abs(hi->input, code, field, cls->sn_move); return 1; } return 0; case HID_UP_DIGITIZER: switch (usage->hid) { case HID_DG_INRANGE: if (app->quirks & MT_QUIRK_HOVERING) { input_set_abs_params(hi->input, ABS_MT_DISTANCE, 0, 1, 0, 0); } MT_STORE_FIELD(inrange_state); return 1; case HID_DG_CONFIDENCE: if ((cls->name == MT_CLS_WIN_8 || cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT || cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU || cls->name == MT_CLS_WIN_8_DISABLE_WAKEUP) && (field->application == HID_DG_TOUCHPAD || field->application == HID_DG_TOUCHSCREEN)) app->quirks |= MT_QUIRK_CONFIDENCE; if (app->quirks & MT_QUIRK_CONFIDENCE) input_set_abs_params(hi->input, ABS_MT_TOOL_TYPE, MT_TOOL_FINGER, MT_TOOL_PALM, 0, 0); MT_STORE_FIELD(confidence_state); return 1; case HID_DG_TIPSWITCH: if (field->application != HID_GD_SYSTEM_MULTIAXIS) input_set_capability(hi->input, EV_KEY, BTN_TOUCH); MT_STORE_FIELD(tip_state); return 1; case HID_DG_CONTACTID: MT_STORE_FIELD(contactid); app->touches_by_report++; return 1; case HID_DG_WIDTH: if (!(app->quirks & MT_QUIRK_NO_AREA)) set_abs(hi->input, ABS_MT_TOUCH_MAJOR, field, cls->sn_width); MT_STORE_FIELD(w); return 1; case HID_DG_HEIGHT: if (!(app->quirks & MT_QUIRK_NO_AREA)) { set_abs(hi->input, ABS_MT_TOUCH_MINOR, field, cls->sn_height); /* * Only set ABS_MT_ORIENTATION if it is not * already set by the HID_DG_AZIMUTH usage. */ if (!test_bit(ABS_MT_ORIENTATION, hi->input->absbit)) input_set_abs_params(hi->input, ABS_MT_ORIENTATION, 0, 1, 0, 0); } MT_STORE_FIELD(h); return 1; case HID_DG_TIPPRESSURE: set_abs(hi->input, ABS_MT_PRESSURE, field, cls->sn_pressure); MT_STORE_FIELD(p); return 1; case HID_DG_SCANTIME: input_set_capability(hi->input, EV_MSC, MSC_TIMESTAMP); app->scantime = &field->value[usage->usage_index]; app->scantime_logical_max = field->logical_maximum; return 1; case HID_DG_CONTACTCOUNT: app->have_contact_count = true; app->raw_cc = &field->value[usage->usage_index]; return 1; case HID_DG_AZIMUTH: /* * Azimuth has the range of [0, MAX) representing a full * revolution. Set ABS_MT_ORIENTATION to a quarter of * MAX according the definition of ABS_MT_ORIENTATION */ input_set_abs_params(hi->input, ABS_MT_ORIENTATION, -field->logical_maximum / 4, field->logical_maximum / 4, cls->sn_move ? field->logical_maximum / cls->sn_move : 0, 0); MT_STORE_FIELD(a); return 1; case HID_DG_CONTACTMAX: /* contact max are global to the report */ return -1; case HID_DG_TOUCH: /* Legacy devices use TIPSWITCH and not TOUCH. * Let's just ignore this field. */ return -1; } /* let hid-input decide for the others */ return 0; case HID_UP_BUTTON: code = BTN_MOUSE + ((usage->hid - 1) & HID_USAGE); /* * MS PTP spec says that external buttons left and right have * usages 2 and 3. */ if ((app->quirks & MT_QUIRK_WIN8_PTP_BUTTONS) && field->application == HID_DG_TOUCHPAD && (usage->hid & HID_USAGE) > 1) code--; if (field->application == HID_GD_SYSTEM_MULTIAXIS) code = BTN_0 + ((usage->hid - 1) & HID_USAGE); hid_map_usage(hi, usage, bit, max, EV_KEY, code); if (!*bit) return -1; input_set_capability(hi->input, EV_KEY, code); return 1; case 0xff000000: /* we do not want to map these: no input-oriented meaning */ return -1; } return 0; } static int mt_compute_slot(struct mt_device *td, struct mt_application *app, struct mt_usages *slot, struct input_dev *input) { __s32 quirks = app->quirks; if (quirks & MT_QUIRK_SLOT_IS_CONTACTID) return *slot->contactid; if (quirks & MT_QUIRK_CYPRESS) return cypress_compute_slot(app, slot); if (quirks & MT_QUIRK_SLOT_IS_CONTACTNUMBER) return app->num_received; if (quirks & MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE) return *slot->contactid - 1; return input_mt_get_slot_by_key(input, *slot->contactid); } static void mt_release_pending_palms(struct mt_device *td, struct mt_application *app, struct input_dev *input) { int slotnum; bool need_sync = false; for_each_set_bit(slotnum, app->pending_palm_slots, td->maxcontacts) { clear_bit(slotnum, app->pending_palm_slots); input_mt_slot(input, slotnum); input_mt_report_slot_inactive(input); need_sync = true; } if (need_sync) { input_mt_sync_frame(input); input_sync(input); } } /* * this function is called when a whole packet has been received and processed, * so that it can decide what to send to the input layer. */ static void mt_sync_frame(struct mt_device *td, struct mt_application *app, struct input_dev *input) { if (app->quirks & MT_QUIRK_WIN8_PTP_BUTTONS) input_event(input, EV_KEY, BTN_LEFT, app->left_button_state); input_mt_sync_frame(input); input_event(input, EV_MSC, MSC_TIMESTAMP, app->timestamp); input_sync(input); mt_release_pending_palms(td, app, input); app->num_received = 0; app->left_button_state = 0; if (test_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags)) set_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags); else clear_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags); clear_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags); } static int mt_compute_timestamp(struct mt_application *app, __s32 value) { long delta = value - app->prev_scantime; unsigned long jdelta = jiffies_to_usecs(jiffies - app->jiffies); app->jiffies = jiffies; if (delta < 0) delta += app->scantime_logical_max; /* HID_DG_SCANTIME is expressed in 100us, we want it in us. */ delta *= 100; if (jdelta > MAX_TIMESTAMP_INTERVAL) /* No data received for a while, resync the timestamp. */ return 0; else return app->timestamp + delta; } static int mt_touch_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value) { /* we will handle the hidinput part later, now remains hiddev */ if (hid->claimed & HID_CLAIMED_HIDDEV && hid->hiddev_hid_event) hid->hiddev_hid_event(hid, field, usage, value); return 1; } static int mt_process_slot(struct mt_device *td, struct input_dev *input, struct mt_application *app, struct mt_usages *slot) { struct input_mt *mt = input->mt; struct hid_device *hdev = td->hdev; __s32 quirks = app->quirks; bool valid = true; bool confidence_state = true; bool inrange_state = false; int active; int slotnum; int tool = MT_TOOL_FINGER; if (!slot) return -EINVAL; if ((quirks & MT_QUIRK_CONTACT_CNT_ACCURATE) && app->num_received >= app->num_expected) return -EAGAIN; if (!(quirks & MT_QUIRK_ALWAYS_VALID)) { if (quirks & MT_QUIRK_VALID_IS_INRANGE) valid = *slot->inrange_state; if (quirks & MT_QUIRK_NOT_SEEN_MEANS_UP) valid = *slot->tip_state; if (quirks & MT_QUIRK_VALID_IS_CONFIDENCE) valid = *slot->confidence_state; if (!valid) return 0; } slotnum = mt_compute_slot(td, app, slot, input); if (slotnum < 0 || slotnum >= td->maxcontacts) return 0; if ((quirks & MT_QUIRK_IGNORE_DUPLICATES) && mt) { struct input_mt_slot *i_slot = &mt->slots[slotnum]; if (input_mt_is_active(i_slot) && input_mt_is_used(mt, i_slot)) return -EAGAIN; } if (quirks & MT_QUIRK_CONFIDENCE) confidence_state = *slot->confidence_state; if (quirks & MT_QUIRK_HOVERING) inrange_state = *slot->inrange_state; active = *slot->tip_state || inrange_state; if (app->application == HID_GD_SYSTEM_MULTIAXIS) tool = MT_TOOL_DIAL; else if (unlikely(!confidence_state)) { tool = MT_TOOL_PALM; if (!active && mt && input_mt_is_active(&mt->slots[slotnum])) { /* * The non-confidence was reported for * previously valid contact that is also no * longer valid. We can't simply report * lift-off as userspace will not be aware * of non-confidence, so we need to split * it into 2 events: active MT_TOOL_PALM * and a separate liftoff. */ active = true; set_bit(slotnum, app->pending_palm_slots); } } input_mt_slot(input, slotnum); input_mt_report_slot_state(input, tool, active); if (active) { /* this finger is in proximity of the sensor */ int wide = (*slot->w > *slot->h); int major = max(*slot->w, *slot->h); int minor = min(*slot->w, *slot->h); int orientation = wide; int max_azimuth; int azimuth; int x; int y; int cx; int cy; if (slot->a != DEFAULT_ZERO) { /* * Azimuth is counter-clockwise and ranges from [0, MAX) * (a full revolution). Convert it to clockwise ranging * [-MAX/2, MAX/2]. * * Note that ABS_MT_ORIENTATION require us to report * the limit of [-MAX/4, MAX/4], but the value can go * out of range to [-MAX/2, MAX/2] to report an upside * down ellipsis. */ azimuth = *slot->a; max_azimuth = input_abs_get_max(input, ABS_MT_ORIENTATION); if (azimuth > max_azimuth * 2) azimuth -= max_azimuth * 4; orientation = -azimuth; if (quirks & MT_QUIRK_ORIENTATION_INVERT) orientation = -orientation; } if (quirks & MT_QUIRK_TOUCH_SIZE_SCALING) { /* * divided by two to match visual scale of touch * for devices with this quirk */ major = major >> 1; minor = minor >> 1; } x = hdev->quirks & HID_QUIRK_X_INVERT ? input_abs_get_max(input, ABS_MT_POSITION_X) - *slot->x : *slot->x; y = hdev->quirks & HID_QUIRK_Y_INVERT ? input_abs_get_max(input, ABS_MT_POSITION_Y) - *slot->y : *slot->y; cx = hdev->quirks & HID_QUIRK_X_INVERT ? input_abs_get_max(input, ABS_MT_POSITION_X) - *slot->cx : *slot->cx; cy = hdev->quirks & HID_QUIRK_Y_INVERT ? input_abs_get_max(input, ABS_MT_POSITION_Y) - *slot->cy : *slot->cy; input_event(input, EV_ABS, ABS_MT_POSITION_X, x); input_event(input, EV_ABS, ABS_MT_POSITION_Y, y); input_event(input, EV_ABS, ABS_MT_TOOL_X, cx); input_event(input, EV_ABS, ABS_MT_TOOL_Y, cy); input_event(input, EV_ABS, ABS_MT_DISTANCE, !*slot->tip_state); input_event(input, EV_ABS, ABS_MT_ORIENTATION, orientation); input_event(input, EV_ABS, ABS_MT_PRESSURE, *slot->p); input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, major); input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, minor); set_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags); } return 0; } static void mt_process_mt_event(struct hid_device *hid, struct mt_application *app, struct hid_field *field, struct hid_usage *usage, __s32 value, bool first_packet) { __s32 quirks = app->quirks; struct input_dev *input = field->hidinput->input; if (!usage->type || !(hid->claimed & HID_CLAIMED_INPUT)) return; if (quirks & MT_QUIRK_WIN8_PTP_BUTTONS) { /* * For Win8 PTP touchpads we should only look at * non finger/touch events in the first_packet of a * (possible) multi-packet frame. */ if (!first_packet) return; /* * For Win8 PTP touchpads we map both the clickpad click * and any "external" left buttons to BTN_LEFT if a * device claims to have both we need to report 1 for * BTN_LEFT if either is pressed, so we or all values * together and report the result in mt_sync_frame(). */ if (usage->type == EV_KEY && usage->code == BTN_LEFT) { app->left_button_state |= value; return; } } input_event(input, usage->type, usage->code, value); } static void mt_touch_report(struct hid_device *hid, struct mt_report_data *rdata) { struct mt_device *td = hid_get_drvdata(hid); struct hid_report *report = rdata->report; struct mt_application *app = rdata->application; struct hid_field *field; struct input_dev *input; struct mt_usages *slot; bool first_packet; unsigned count; int r, n; int scantime = 0; int contact_count = -1; /* sticky fingers release in progress, abort */ if (test_and_set_bit_lock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags)) return; scantime = *app->scantime; app->timestamp = mt_compute_timestamp(app, scantime); if (app->raw_cc != DEFAULT_ZERO) contact_count = *app->raw_cc; /* * Includes multi-packet support where subsequent * packets are sent with zero contactcount. */ if (contact_count >= 0) { /* * For Win8 PTPs the first packet (td->num_received == 0) may * have a contactcount of 0 if there only is a button event. * We double check that this is not a continuation packet * of a possible multi-packet frame be checking that the * timestamp has changed. */ if ((app->quirks & MT_QUIRK_WIN8_PTP_BUTTONS) && app->num_received == 0 && app->prev_scantime != scantime) app->num_expected = contact_count; /* A non 0 contact count always indicates a first packet */ else if (contact_count) app->num_expected = contact_count; } app->prev_scantime = scantime; first_packet = app->num_received == 0; input = report->field[0]->hidinput->input; list_for_each_entry(slot, &app->mt_usages, list) { if (!mt_process_slot(td, input, app, slot)) app->num_received++; } for (r = 0; r < report->maxfield; r++) { field = report->field[r]; count = field->report_count; if (!(HID_MAIN_ITEM_VARIABLE & field->flags)) continue; for (n = 0; n < count; n++) mt_process_mt_event(hid, app, field, &field->usage[n], field->value[n], first_packet); } if (app->num_received >= app->num_expected) mt_sync_frame(td, app, input); /* * Windows 8 specs says 2 things: * - once a contact has been reported, it has to be reported in each * subsequent report * - the report rate when fingers are present has to be at least * the refresh rate of the screen, 60 or 120 Hz * * I interprete this that the specification forces a report rate of * at least 60 Hz for a touchscreen to be certified. * Which means that if we do not get a report whithin 16 ms, either * something wrong happens, either the touchscreen forgets to send * a release. Taking a reasonable margin allows to remove issues * with USB communication or the load of the machine. * * Given that Win 8 devices are forced to send a release, this will * only affect laggish machines and the ones that have a firmware * defect. */ if (app->quirks & MT_QUIRK_STICKY_FINGERS) { if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags)) mod_timer(&td->release_timer, jiffies + msecs_to_jiffies(100)); else del_timer(&td->release_timer); } clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags); } static int mt_touch_input_configured(struct hid_device *hdev, struct hid_input *hi, struct mt_application *app) { struct mt_device *td = hid_get_drvdata(hdev); struct mt_class *cls = &td->mtclass; struct input_dev *input = hi->input; int ret; if (!td->maxcontacts) td->maxcontacts = MT_DEFAULT_MAXCONTACT; mt_post_parse(td, app); if (td->serial_maybe) mt_post_parse_default_settings(td, app); if (cls->is_indirect) app->mt_flags |= INPUT_MT_POINTER; if (app->quirks & MT_QUIRK_NOT_SEEN_MEANS_UP) app->mt_flags |= INPUT_MT_DROP_UNUSED; /* check for clickpads */ if ((app->mt_flags & INPUT_MT_POINTER) && (app->buttons_count == 1)) td->is_buttonpad = true; if (td->is_buttonpad) __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); app->pending_palm_slots = devm_kcalloc(&hi->input->dev, BITS_TO_LONGS(td->maxcontacts), sizeof(long), GFP_KERNEL); if (!app->pending_palm_slots) return -ENOMEM; ret = input_mt_init_slots(input, td->maxcontacts, app->mt_flags); if (ret) return ret; app->mt_flags = 0; return 0; } #define mt_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, \ max, EV_KEY, (c)) static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { struct mt_device *td = hid_get_drvdata(hdev); struct mt_application *application; struct mt_report_data *rdata; rdata = mt_find_report_data(td, field->report); if (!rdata) { hid_err(hdev, "failed to allocate data for report\n"); return 0; } application = rdata->application; /* * If mtclass.export_all_inputs is not set, only map fields from * TouchScreen or TouchPad collections. We need to ignore fields * that belong to other collections such as Mouse that might have * the same GenericDesktop usages. */ if (!td->mtclass.export_all_inputs && field->application != HID_DG_TOUCHSCREEN && field->application != HID_DG_PEN && field->application != HID_DG_TOUCHPAD && field->application != HID_GD_KEYBOARD && field->application != HID_GD_SYSTEM_CONTROL && field->application != HID_CP_CONSUMER_CONTROL && field->application != HID_GD_WIRELESS_RADIO_CTLS && field->application != HID_GD_SYSTEM_MULTIAXIS && !(field->application == HID_VD_ASUS_CUSTOM_MEDIA_KEYS && application->quirks & MT_QUIRK_ASUS_CUSTOM_UP)) return -1; /* * Some Asus keyboard+touchpad devices have the hotkeys defined in the * touchpad report descriptor. We need to treat these as an array to * map usages to input keys. */ if (field->application == HID_VD_ASUS_CUSTOM_MEDIA_KEYS && application->quirks & MT_QUIRK_ASUS_CUSTOM_UP && (usage->hid & HID_USAGE_PAGE) == HID_UP_CUSTOM) { set_bit(EV_REP, hi->input->evbit); if (field->flags & HID_MAIN_ITEM_VARIABLE) field->flags &= ~HID_MAIN_ITEM_VARIABLE; switch (usage->hid & HID_USAGE) { case 0x10: mt_map_key_clear(KEY_BRIGHTNESSDOWN); break; case 0x20: mt_map_key_clear(KEY_BRIGHTNESSUP); break; case 0x35: mt_map_key_clear(KEY_DISPLAY_OFF); break; case 0x6b: mt_map_key_clear(KEY_F21); break; case 0x6c: mt_map_key_clear(KEY_SLEEP); break; default: return -1; } return 1; } if (rdata->is_mt_collection) return mt_touch_input_mapping(hdev, hi, field, usage, bit, max, application); /* * some egalax touchscreens have "application == DG_TOUCHSCREEN" * for the stylus. Overwrite the hid_input application */ if (field->physical == HID_DG_STYLUS) hi->application = HID_DG_STYLUS; /* let hid-core decide for the others */ return 0; } static int mt_input_mapped(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { struct mt_device *td = hid_get_drvdata(hdev); struct mt_report_data *rdata; rdata = mt_find_report_data(td, field->report); if (rdata && rdata->is_mt_collection) { /* We own these mappings, tell hid-input to ignore them */ return -1; } /* let hid-core decide for the others */ return 0; } static int mt_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value) { struct mt_device *td = hid_get_drvdata(hid); struct mt_report_data *rdata; rdata = mt_find_report_data(td, field->report); if (rdata && rdata->is_mt_collection) return mt_touch_event(hid, field, usage, value); return 0; } static const __u8 *mt_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *size) { if (hdev->vendor == I2C_VENDOR_ID_GOODIX && (hdev->product == I2C_DEVICE_ID_GOODIX_01E8 || hdev->product == I2C_DEVICE_ID_GOODIX_01E9)) { if (rdesc[607] == 0x15) { rdesc[607] = 0x25; dev_info( &hdev->dev, "GT7868Q report descriptor fixup is applied.\n"); } else { dev_info( &hdev->dev, "The byte is not expected for fixing the report descriptor. \ It's possible that the touchpad firmware is not suitable for applying the fix. \ got: %x\n", rdesc[607]); } } return rdesc; } static void mt_report(struct hid_device *hid, struct hid_report *report) { struct mt_device *td = hid_get_drvdata(hid); struct hid_field *field = report->field[0]; struct mt_report_data *rdata; if (!(hid->claimed & HID_CLAIMED_INPUT)) return; rdata = mt_find_report_data(td, report); if (rdata && rdata->is_mt_collection) return mt_touch_report(hid, rdata); if (field && field->hidinput && field->hidinput->input) input_sync(field->hidinput->input); } static bool mt_need_to_apply_feature(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, enum latency_mode latency, enum report_mode report_mode, bool *inputmode_found) { struct mt_device *td = hid_get_drvdata(hdev); struct mt_class *cls = &td->mtclass; struct hid_report *report = field->report; unsigned int index = usage->usage_index; char *buf; u32 report_len; int max; switch (usage->hid) { case HID_DG_INPUTMODE: /* * Some elan panels wrongly declare 2 input mode features, * and silently ignore when we set the value in the second * field. Skip the second feature and hope for the best. */ if (*inputmode_found) return false; if (cls->quirks & MT_QUIRK_FORCE_GET_FEATURE) { report_len = hid_report_len(report); buf = hid_alloc_report_buf(report, GFP_KERNEL); if (!buf) { hid_err(hdev, "failed to allocate buffer for report\n"); return false; } hid_hw_raw_request(hdev, report->id, buf, report_len, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); kfree(buf); } field->value[index] = td->inputmode_value; *inputmode_found = true; return true; case HID_DG_CONTACTMAX: if (cls->maxcontacts) { max = min_t(int, field->logical_maximum, cls->maxcontacts); if (field->value[index] != max) { field->value[index] = max; return true; } } break; case HID_DG_LATENCYMODE: field->value[index] = latency; return true; case HID_DG_SURFACESWITCH: field->value[index] = !!(report_mode & TOUCHPAD_REPORT_CONTACTS); return true; case HID_DG_BUTTONSWITCH: field->value[index] = !!(report_mode & TOUCHPAD_REPORT_BUTTONS); return true; } return false; /* no need to update the report */ } static void mt_set_modes(struct hid_device *hdev, enum latency_mode latency, enum report_mode report_mode) { struct hid_report_enum *rep_enum; struct hid_report *rep; struct hid_usage *usage; int i, j; bool update_report; bool inputmode_found = false; rep_enum = &hdev->report_enum[HID_FEATURE_REPORT]; list_for_each_entry(rep, &rep_enum->report_list, list) { update_report = false; for (i = 0; i < rep->maxfield; i++) { /* Ignore if report count is out of bounds. */ if (rep->field[i]->report_count < 1) continue; for (j = 0; j < rep->field[i]->maxusage; j++) { usage = &rep->field[i]->usage[j]; if (mt_need_to_apply_feature(hdev, rep->field[i], usage, latency, report_mode, &inputmode_found)) update_report = true; } } if (update_report) hid_hw_request(hdev, rep, HID_REQ_SET_REPORT); } } static void mt_post_parse_default_settings(struct mt_device *td, struct mt_application *app) { __s32 quirks = app->quirks; /* unknown serial device needs special quirks */ if (list_is_singular(&app->mt_usages)) { quirks |= MT_QUIRK_ALWAYS_VALID; quirks &= ~MT_QUIRK_NOT_SEEN_MEANS_UP; quirks &= ~MT_QUIRK_VALID_IS_INRANGE; quirks &= ~MT_QUIRK_VALID_IS_CONFIDENCE; quirks &= ~MT_QUIRK_CONTACT_CNT_ACCURATE; } app->quirks = quirks; } static void mt_post_parse(struct mt_device *td, struct mt_application *app) { if (!app->have_contact_count) app->quirks &= ~MT_QUIRK_CONTACT_CNT_ACCURATE; } static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) { struct mt_device *td = hid_get_drvdata(hdev); const char *suffix = NULL; struct mt_report_data *rdata; struct mt_application *mt_application = NULL; struct hid_report *report; int ret; list_for_each_entry(report, &hi->reports, hidinput_list) { rdata = mt_find_report_data(td, report); if (!rdata) { hid_err(hdev, "failed to allocate data for report\n"); return -ENOMEM; } mt_application = rdata->application; if (rdata->is_mt_collection) { ret = mt_touch_input_configured(hdev, hi, mt_application); if (ret) return ret; } } switch (hi->application) { case HID_GD_KEYBOARD: case HID_GD_KEYPAD: case HID_GD_MOUSE: case HID_DG_TOUCHPAD: case HID_GD_SYSTEM_CONTROL: case HID_CP_CONSUMER_CONTROL: case HID_GD_WIRELESS_RADIO_CTLS: case HID_GD_SYSTEM_MULTIAXIS: /* already handled by hid core */ break; case HID_DG_TOUCHSCREEN: /* we do not set suffix = "Touchscreen" */ hi->input->name = hdev->name; break; case HID_VD_ASUS_CUSTOM_MEDIA_KEYS: suffix = "Custom Media Keys"; break; case HID_DG_STYLUS: /* force BTN_STYLUS to allow tablet matching in udev */ __set_bit(BTN_STYLUS, hi->input->keybit); break; default: suffix = "UNKNOWN"; break; } if (suffix) { hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name, suffix); if (!hi->input->name) return -ENOMEM; } return 0; } static void mt_fix_const_field(struct hid_field *field, unsigned int usage) { if (field->usage[0].hid != usage || !(field->flags & HID_MAIN_ITEM_CONSTANT)) return; field->flags &= ~HID_MAIN_ITEM_CONSTANT; field->flags |= HID_MAIN_ITEM_VARIABLE; } static void mt_fix_const_fields(struct hid_device *hdev, unsigned int usage) { struct hid_report *report; int i; list_for_each_entry(report, &hdev->report_enum[HID_INPUT_REPORT].report_list, list) { if (!report->maxfield) continue; for (i = 0; i < report->maxfield; i++) if (report->field[i]->maxusage >= 1) mt_fix_const_field(report->field[i], usage); } } static void mt_release_contacts(struct hid_device *hid) { struct hid_input *hidinput; struct mt_application *application; struct mt_device *td = hid_get_drvdata(hid); list_for_each_entry(hidinput, &hid->inputs, list) { struct input_dev *input_dev = hidinput->input; struct input_mt *mt = input_dev->mt; int i; if (mt) { for (i = 0; i < mt->num_slots; i++) { input_mt_slot(input_dev, i); input_mt_report_slot_inactive(input_dev); } input_mt_sync_frame(input_dev); input_sync(input_dev); } } list_for_each_entry(application, &td->applications, list) { application->num_received = 0; } } static void mt_expired_timeout(struct timer_list *t) { struct mt_device *td = from_timer(td, t, release_timer); struct hid_device *hdev = td->hdev; /* * An input report came in just before we release the sticky fingers, * it will take care of the sticky fingers. */ if (test_and_set_bit_lock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags)) return; if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags)) mt_release_contacts(hdev); clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags); } static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret, i; struct mt_device *td; const struct mt_class *mtclass = mt_classes; /* MT_CLS_DEFAULT */ for (i = 0; mt_classes[i].name ; i++) { if (id->driver_data == mt_classes[i].name) { mtclass = &(mt_classes[i]); break; } } td = devm_kzalloc(&hdev->dev, sizeof(struct mt_device), GFP_KERNEL); if (!td) { dev_err(&hdev->dev, "cannot allocate multitouch data\n"); return -ENOMEM; } td->hdev = hdev; td->mtclass = *mtclass; td->inputmode_value = MT_INPUTMODE_TOUCHSCREEN; hid_set_drvdata(hdev, td); INIT_LIST_HEAD(&td->applications); INIT_LIST_HEAD(&td->reports); if (id->vendor == HID_ANY_ID && id->product == HID_ANY_ID) td->serial_maybe = true; /* Orientation is inverted if the X or Y axes are * flipped, but normalized if both are inverted. */ if (hdev->quirks & (HID_QUIRK_X_INVERT | HID_QUIRK_Y_INVERT) && !((hdev->quirks & HID_QUIRK_X_INVERT) && (hdev->quirks & HID_QUIRK_Y_INVERT))) td->mtclass.quirks = MT_QUIRK_ORIENTATION_INVERT; /* This allows the driver to correctly support devices * that emit events over several HID messages. */ hdev->quirks |= HID_QUIRK_NO_INPUT_SYNC; /* * This allows the driver to handle different input sensors * that emits events through different applications on the same HID * device. */ hdev->quirks |= HID_QUIRK_INPUT_PER_APP; if (id->group != HID_GROUP_MULTITOUCH_WIN_8) hdev->quirks |= HID_QUIRK_MULTI_INPUT; if (mtclass->quirks & MT_QUIRK_FORCE_MULTI_INPUT) { hdev->quirks &= ~HID_QUIRK_INPUT_PER_APP; hdev->quirks |= HID_QUIRK_MULTI_INPUT; } timer_setup(&td->release_timer, mt_expired_timeout, 0); ret = hid_parse(hdev); if (ret != 0) return ret; if (mtclass->quirks & MT_QUIRK_FIX_CONST_CONTACT_ID) mt_fix_const_fields(hdev, HID_DG_CONTACTID); if (hdev->vendor == USB_VENDOR_ID_SIS_TOUCH) hdev->quirks |= HID_QUIRK_NOGET; ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) return ret; ret = sysfs_create_group(&hdev->dev.kobj, &mt_attribute_group); if (ret) dev_warn(&hdev->dev, "Cannot allocate sysfs group for %s\n", hdev->name); mt_set_modes(hdev, HID_LATENCY_NORMAL, TOUCHPAD_REPORT_ALL); return 0; } static int mt_suspend(struct hid_device *hdev, pm_message_t state) { struct mt_device *td = hid_get_drvdata(hdev); /* High latency is desirable for power savings during S3/S0ix */ if ((td->mtclass.quirks & MT_QUIRK_DISABLE_WAKEUP) || !hid_hw_may_wakeup(hdev)) mt_set_modes(hdev, HID_LATENCY_HIGH, TOUCHPAD_REPORT_NONE); else mt_set_modes(hdev, HID_LATENCY_HIGH, TOUCHPAD_REPORT_ALL); return 0; } static int mt_reset_resume(struct hid_device *hdev) { mt_release_contacts(hdev); mt_set_modes(hdev, HID_LATENCY_NORMAL, TOUCHPAD_REPORT_ALL); return 0; } static int mt_resume(struct hid_device *hdev) { /* Some Elan legacy devices require SET_IDLE to be set on resume. * It should be safe to send it to other devices too. * Tested on 3M, Stantum, Cypress, Zytronic, eGalax, and Elan panels. */ hid_hw_idle(hdev, 0, 0, HID_REQ_SET_IDLE); mt_set_modes(hdev, HID_LATENCY_NORMAL, TOUCHPAD_REPORT_ALL); return 0; } static void mt_remove(struct hid_device *hdev) { struct mt_device *td = hid_get_drvdata(hdev); del_timer_sync(&td->release_timer); sysfs_remove_group(&hdev->dev.kobj, &mt_attribute_group); hid_hw_stop(hdev); } /* * This list contains only: * - VID/PID of products not working with the default multitouch handling * - 2 generic rules. * So there is no point in adding here any device with MT_CLS_DEFAULT. */ static const struct hid_device_id mt_devices[] = { /* 3M panels */ { .driver_data = MT_CLS_3M, MT_USB_DEVICE(USB_VENDOR_ID_3M, USB_DEVICE_ID_3M1968) }, { .driver_data = MT_CLS_3M, MT_USB_DEVICE(USB_VENDOR_ID_3M, USB_DEVICE_ID_3M2256) }, { .driver_data = MT_CLS_3M, MT_USB_DEVICE(USB_VENDOR_ID_3M, USB_DEVICE_ID_3M3266) }, /* Anton devices */ { .driver_data = MT_CLS_EXPORT_ALL_INPUTS, MT_USB_DEVICE(USB_VENDOR_ID_ANTON, USB_DEVICE_ID_ANTON_TOUCH_PAD) }, /* Asus T101HA */ { .driver_data = MT_CLS_WIN_8_DISABLE_WAKEUP, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T101HA_KEYBOARD) }, /* Asus T304UA */ { .driver_data = MT_CLS_ASUS, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T304_KEYBOARD) }, /* Atmel panels */ { .driver_data = MT_CLS_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_ATMEL, USB_DEVICE_ID_ATMEL_MXT_DIGITIZER) }, /* Baanto multitouch devices */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_BAANTO, USB_DEVICE_ID_BAANTO_MT_190W2) }, /* Cando panels */ { .driver_data = MT_CLS_DUAL_INRANGE_CONTACTNUMBER, MT_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH) }, { .driver_data = MT_CLS_DUAL_INRANGE_CONTACTNUMBER, MT_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH_15_6) }, /* Chunghwa Telecom touch panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_CHUNGHWAT, USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH) }, /* CJTouch panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_CJTOUCH, USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0020) }, { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_CJTOUCH, USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0040) }, /* CVTouch panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_CVTOUCH, USB_DEVICE_ID_CVTOUCH_SCREEN) }, /* eGalax devices (SAW) */ { .driver_data = MT_CLS_EXPORT_ALL_INPUTS, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_EGALAX_TOUCHCONTROLLER) }, /* eGalax devices (resistive) */ { .driver_data = MT_CLS_EGALAX, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_480D) }, { .driver_data = MT_CLS_EGALAX, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_480E) }, /* eGalax devices (capacitive) */ { .driver_data = MT_CLS_EGALAX_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7207) }, { .driver_data = MT_CLS_EGALAX, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_720C) }, { .driver_data = MT_CLS_EGALAX_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7224) }, { .driver_data = MT_CLS_EGALAX_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_722A) }, { .driver_data = MT_CLS_EGALAX_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_725E) }, { .driver_data = MT_CLS_EGALAX_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7262) }, { .driver_data = MT_CLS_EGALAX, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_726B) }, { .driver_data = MT_CLS_EGALAX, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72A1) }, { .driver_data = MT_CLS_EGALAX_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72AA) }, { .driver_data = MT_CLS_EGALAX, HID_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72C4) }, { .driver_data = MT_CLS_EGALAX, HID_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72D0) }, { .driver_data = MT_CLS_EGALAX, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72FA) }, { .driver_data = MT_CLS_EGALAX, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7302) }, { .driver_data = MT_CLS_EGALAX_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7349) }, { .driver_data = MT_CLS_EGALAX_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_73F7) }, { .driver_data = MT_CLS_EGALAX_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_A001) }, { .driver_data = MT_CLS_EGALAX, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_C002) }, /* Elan devices */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_ELAN, 0x313a) }, { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_ELAN, 0x3148) }, { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_ELAN, 0x32ae) }, /* Elitegroup panel */ { .driver_data = MT_CLS_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_ELITEGROUP, USB_DEVICE_ID_ELITEGROUP_05D8) }, /* Flatfrog Panels */ { .driver_data = MT_CLS_FLATFROG, MT_USB_DEVICE(USB_VENDOR_ID_FLATFROG, USB_DEVICE_ID_MULTITOUCH_3200) }, /* FocalTech Panels */ { .driver_data = MT_CLS_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_FOCALTECH_FTXXXX_MULTITOUCH) }, /* GeneralTouch panel */ { .driver_data = MT_CLS_GENERALTOUCH_TWOFINGERS, MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, USB_DEVICE_ID_GENERAL_TOUCH_WIN7_TWOFINGERS) }, { .driver_data = MT_CLS_GENERALTOUCH_PWT_TENFINGERS, MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PWT_TENFINGERS) }, { .driver_data = MT_CLS_GENERALTOUCH_TWOFINGERS, MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_0101) }, { .driver_data = MT_CLS_GENERALTOUCH_PWT_TENFINGERS, MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_0102) }, { .driver_data = MT_CLS_GENERALTOUCH_PWT_TENFINGERS, MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_0106) }, { .driver_data = MT_CLS_GENERALTOUCH_PWT_TENFINGERS, MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_010A) }, { .driver_data = MT_CLS_GENERALTOUCH_PWT_TENFINGERS, MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100) }, /* Gametel game controller */ { .driver_data = MT_CLS_NSMU, MT_BT_DEVICE(USB_VENDOR_ID_FRUCTEL, USB_DEVICE_ID_GAMETEL_MT_MODE) }, /* Goodix GT7868Q devices */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, I2C_DEVICE_ID_GOODIX_01E8) }, { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, I2C_DEVICE_ID_GOODIX_01E9) }, /* GoodTouch panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_GOODTOUCH, USB_DEVICE_ID_GOODTOUCH_000f) }, /* Hanvon panels */ { .driver_data = MT_CLS_DUAL_INRANGE_CONTACTID, MT_USB_DEVICE(USB_VENDOR_ID_HANVON_ALT, USB_DEVICE_ID_HANVON_ALT_MULTITOUCH) }, /* HONOR GLO-GXXX panel */ { .driver_data = MT_CLS_VTL, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, 0x347d, 0x7853) }, /* HONOR MagicBook Art 14 touchpad */ { .driver_data = MT_CLS_VTL, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, 0x35cc, 0x0104) }, /* Ilitek dual touch panel */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_ILITEK, USB_DEVICE_ID_ILITEK_MULTITOUCH) }, /* LG Melfas panel */ { .driver_data = MT_CLS_LG, HID_USB_DEVICE(USB_VENDOR_ID_LG, USB_DEVICE_ID_LG_MELFAS_MT) }, { .driver_data = MT_CLS_LG, HID_DEVICE(BUS_I2C, HID_GROUP_GENERIC, USB_VENDOR_ID_LG, I2C_DEVICE_ID_LG_7010) }, /* Lenovo X1 TAB Gen 2 */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB) }, /* Lenovo X1 TAB Gen 3 */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB3) }, /* Lenovo X12 TAB Gen 1 */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X12_TAB) }, /* Lenovo X12 TAB Gen 2 */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X12_TAB2) }, /* Logitech devices */ { .driver_data = MT_CLS_NSMU, HID_DEVICE(BUS_BLUETOOTH, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_CASA_TOUCHPAD) }, { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_BOLT_RECEIVER) }, /* MosArt panels */ { .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE, MT_USB_DEVICE(USB_VENDOR_ID_ASUS, USB_DEVICE_ID_ASUS_T91MT)}, { .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE, MT_USB_DEVICE(USB_VENDOR_ID_ASUS, USB_DEVICE_ID_ASUSTEK_MULTITOUCH_YFO) }, { .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE, MT_USB_DEVICE(USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_TOUCHSCREEN_MOSART) }, /* Novatek Panel */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_NOVATEK, USB_DEVICE_ID_NOVATEK_PCT) }, /* Ntrig Panel */ { .driver_data = MT_CLS_NSMU, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_NTRIG, 0x1b05) }, /* Panasonic panels */ { .driver_data = MT_CLS_PANASONIC, MT_USB_DEVICE(USB_VENDOR_ID_PANASONIC, USB_DEVICE_ID_PANABOARD_UBT780) }, { .driver_data = MT_CLS_PANASONIC, MT_USB_DEVICE(USB_VENDOR_ID_PANASONIC, USB_DEVICE_ID_PANABOARD_UBT880) }, /* PixArt optical touch screen */ { .driver_data = MT_CLS_INRANGE_CONTACTNUMBER, MT_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN) }, { .driver_data = MT_CLS_INRANGE_CONTACTNUMBER, MT_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1) }, { .driver_data = MT_CLS_INRANGE_CONTACTNUMBER, MT_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN2) }, /* PixCir-based panels */ { .driver_data = MT_CLS_DUAL_INRANGE_CONTACTID, MT_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_PIXCIR_MULTI_TOUCH) }, /* Quanta-based panels */ { .driver_data = MT_CLS_CONFIDENCE_CONTACT_ID, MT_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001) }, /* Razer touchpads */ { .driver_data = MT_CLS_RAZER_BLADE_STEALTH, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0x8323) }, /* Smart Tech panels */ { .driver_data = MT_CLS_SMART_TECH, MT_USB_DEVICE(0x0b8c, 0x0092)}, /* Stantum panels */ { .driver_data = MT_CLS_CONFIDENCE, MT_USB_DEVICE(USB_VENDOR_ID_STANTUM_STM, USB_DEVICE_ID_MTP_STM)}, /* Synaptics devices */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xcd7e) }, { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xcddc) }, { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xce08) }, { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xce09) }, /* TopSeed panels */ { .driver_data = MT_CLS_TOPSEED, MT_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, USB_DEVICE_ID_TOPSEED2_PERIPAD_701) }, /* Touch International panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_TOUCH_INTL, USB_DEVICE_ID_TOUCH_INTL_MULTI_TOUCH) }, /* Unitec panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_UNITEC, USB_DEVICE_ID_UNITEC_USB_TOUCH_0709) }, { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_UNITEC, USB_DEVICE_ID_UNITEC_USB_TOUCH_0A19) }, /* VTL panels */ { .driver_data = MT_CLS_VTL, MT_USB_DEVICE(USB_VENDOR_ID_VTL, USB_DEVICE_ID_VTL_MULTITOUCH_FF3F) }, /* Winbond Electronics Corp. */ { .driver_data = MT_CLS_WIN_8_NO_STICKY_FINGERS, HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_WINBOND, USB_DEVICE_ID_TSTP_MTOUCH) }, /* Wistron panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_WISTRON, USB_DEVICE_ID_WISTRON_OPTICAL_TOUCH) }, /* XAT */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_XAT, USB_DEVICE_ID_XAT_CSR) }, /* Xiroku */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_SPX) }, { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_MPX) }, { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_CSR) }, { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_SPX1) }, { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_MPX1) }, { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_CSR1) }, { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_SPX2) }, { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_MPX2) }, { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_CSR2) }, /* Google MT devices */ { .driver_data = MT_CLS_GOOGLE, HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_TOUCH_ROSE) }, { .driver_data = MT_CLS_GOOGLE, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_WHISKERS) }, /* sis */ { .driver_data = MT_CLS_SIS, HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_SIS_TOUCH, HID_ANY_ID) }, /* Hantick */ { .driver_data = MT_CLS_NSMU, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288) }, /* Generic MT device */ { HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) }, /* Generic Win 8 certified MT device */ { .driver_data = MT_CLS_WIN_8, HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH_WIN_8, HID_ANY_ID, HID_ANY_ID) }, { } }; MODULE_DEVICE_TABLE(hid, mt_devices); static const struct hid_usage_id mt_grabbed_usages[] = { { HID_ANY_ID, HID_ANY_ID, HID_ANY_ID }, { HID_ANY_ID - 1, HID_ANY_ID - 1, HID_ANY_ID - 1} }; static struct hid_driver mt_driver = { .name = "hid-multitouch", .id_table = mt_devices, .probe = mt_probe, .remove = mt_remove, .input_mapping = mt_input_mapping, .input_mapped = mt_input_mapped, .input_configured = mt_input_configured, .feature_mapping = mt_feature_mapping, .usage_table = mt_grabbed_usages, .event = mt_event, .report_fixup = mt_report_fixup, .report = mt_report, .suspend = pm_ptr(mt_suspend), .reset_resume = pm_ptr(mt_reset_resume), .resume = pm_ptr(mt_resume), }; module_hid_driver(mt_driver);
16 16 16 5 5 5 5 5 5 5 5 5 47 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C)2003,2004 USAGI/WIDE Project * * Authors Mitsuru KANDA <mk@linux-ipv6.org> * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> * * Based on net/ipv4/xfrm4_tunnel.c */ #include <linux/module.h> #include <linux/xfrm.h> #include <linux/slab.h> #include <linux/rculist.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/ipv6.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/mutex.h> #include <net/netns/generic.h> #define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256 #define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256 #define XFRM6_TUNNEL_SPI_MIN 1 #define XFRM6_TUNNEL_SPI_MAX 0xffffffff struct xfrm6_tunnel_net { struct hlist_head spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE]; struct hlist_head spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE]; u32 spi; }; static unsigned int xfrm6_tunnel_net_id __read_mostly; static inline struct xfrm6_tunnel_net *xfrm6_tunnel_pernet(struct net *net) { return net_generic(net, xfrm6_tunnel_net_id); } /* * xfrm_tunnel_spi things are for allocating unique id ("spi") * per xfrm_address_t. */ struct xfrm6_tunnel_spi { struct hlist_node list_byaddr; struct hlist_node list_byspi; xfrm_address_t addr; u32 spi; refcount_t refcnt; struct rcu_head rcu_head; }; static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock); static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly; static inline unsigned int xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *addr) { unsigned int h; h = ipv6_addr_hash((const struct in6_addr *)addr); h ^= h >> 16; h ^= h >> 8; h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1; return h; } static inline unsigned int xfrm6_tunnel_spi_hash_byspi(u32 spi) { return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE; } static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr) { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; hlist_for_each_entry_rcu(x6spi, &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr, lockdep_is_held(&xfrm6_tunnel_spi_lock)) { if (xfrm6_addr_equal(&x6spi->addr, saddr)) return x6spi; } return NULL; } __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr) { struct xfrm6_tunnel_spi *x6spi; u32 spi; rcu_read_lock_bh(); x6spi = __xfrm6_tunnel_spi_lookup(net, saddr); spi = x6spi ? x6spi->spi : 0; rcu_read_unlock_bh(); return htonl(spi); } EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup); static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi) { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; int index = xfrm6_tunnel_spi_hash_byspi(spi); hlist_for_each_entry(x6spi, &xfrm6_tn->spi_byspi[index], list_byspi) { if (x6spi->spi == spi) return -1; } return index; } static u32 __xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr) { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); u32 spi; struct xfrm6_tunnel_spi *x6spi; int index; if (xfrm6_tn->spi < XFRM6_TUNNEL_SPI_MIN || xfrm6_tn->spi >= XFRM6_TUNNEL_SPI_MAX) xfrm6_tn->spi = XFRM6_TUNNEL_SPI_MIN; else xfrm6_tn->spi++; for (spi = xfrm6_tn->spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) { index = __xfrm6_tunnel_spi_check(net, spi); if (index >= 0) goto alloc_spi; if (spi == XFRM6_TUNNEL_SPI_MAX) break; } for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tn->spi; spi++) { index = __xfrm6_tunnel_spi_check(net, spi); if (index >= 0) goto alloc_spi; } spi = 0; goto out; alloc_spi: xfrm6_tn->spi = spi; x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, GFP_ATOMIC); if (!x6spi) goto out; memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr)); x6spi->spi = spi; refcount_set(&x6spi->refcnt, 1); hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tn->spi_byspi[index]); index = xfrm6_tunnel_spi_hash_byaddr(saddr); hlist_add_head_rcu(&x6spi->list_byaddr, &xfrm6_tn->spi_byaddr[index]); out: return spi; } __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr) { struct xfrm6_tunnel_spi *x6spi; u32 spi; spin_lock_bh(&xfrm6_tunnel_spi_lock); x6spi = __xfrm6_tunnel_spi_lookup(net, saddr); if (x6spi) { refcount_inc(&x6spi->refcnt); spi = x6spi->spi; } else spi = __xfrm6_tunnel_alloc_spi(net, saddr); spin_unlock_bh(&xfrm6_tunnel_spi_lock); return htonl(spi); } EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi); static void x6spi_destroy_rcu(struct rcu_head *head) { kmem_cache_free(xfrm6_tunnel_spi_kmem, container_of(head, struct xfrm6_tunnel_spi, rcu_head)); } static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; struct hlist_node *n; spin_lock_bh(&xfrm6_tunnel_spi_lock); hlist_for_each_entry_safe(x6spi, n, &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { if (xfrm6_addr_equal(&x6spi->addr, saddr)) { if (refcount_dec_and_test(&x6spi->refcnt)) { hlist_del_rcu(&x6spi->list_byaddr); hlist_del_rcu(&x6spi->list_byspi); call_rcu(&x6spi->rcu_head, x6spi_destroy_rcu); break; } } } spin_unlock_bh(&xfrm6_tunnel_spi_lock); } static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { skb_push(skb, -skb_network_offset(skb)); return 0; } static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { return skb_network_header(skb)[IP6CB(skb)->nhoff]; } static int xfrm6_tunnel_rcv(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct ipv6hdr *iph = ipv6_hdr(skb); __be32 spi; spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr); return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL); } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { /* xfrm6_tunnel native err handling */ switch (type) { case ICMPV6_DEST_UNREACH: switch (code) { case ICMPV6_NOROUTE: case ICMPV6_ADM_PROHIBITED: case ICMPV6_NOT_NEIGHBOUR: case ICMPV6_ADDR_UNREACH: case ICMPV6_PORT_UNREACH: default: break; } break; case ICMPV6_PKT_TOOBIG: break; case ICMPV6_TIME_EXCEED: switch (code) { case ICMPV6_EXC_HOPLIMIT: break; case ICMPV6_EXC_FRAGTIME: default: break; } break; case ICMPV6_PARAMPROB: switch (code) { case ICMPV6_HDR_FIELD: break; case ICMPV6_UNK_NEXTHDR: break; case ICMPV6_UNK_OPTION: break; } break; default: break; } return 0; } static int xfrm6_tunnel_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->props.mode != XFRM_MODE_TUNNEL) { NL_SET_ERR_MSG(extack, "IPv6 tunnel can only be used with tunnel mode"); return -EINVAL; } if (x->encap) { NL_SET_ERR_MSG(extack, "IPv6 tunnel is not compatible with encapsulation"); return -EINVAL; } x->props.header_len = sizeof(struct ipv6hdr); return 0; } static void xfrm6_tunnel_destroy(struct xfrm_state *x) { struct net *net = xs_net(x); xfrm6_tunnel_free_spi(net, (xfrm_address_t *)&x->props.saddr); } static const struct xfrm_type xfrm6_tunnel_type = { .owner = THIS_MODULE, .proto = IPPROTO_IPV6, .init_state = xfrm6_tunnel_init_state, .destructor = xfrm6_tunnel_destroy, .input = xfrm6_tunnel_input, .output = xfrm6_tunnel_output, }; static struct xfrm6_tunnel xfrm6_tunnel_handler __read_mostly = { .handler = xfrm6_tunnel_rcv, .err_handler = xfrm6_tunnel_err, .priority = 3, }; static struct xfrm6_tunnel xfrm46_tunnel_handler __read_mostly = { .handler = xfrm6_tunnel_rcv, .err_handler = xfrm6_tunnel_err, .priority = 3, }; static int __net_init xfrm6_tunnel_net_init(struct net *net) { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); unsigned int i; for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) INIT_HLIST_HEAD(&xfrm6_tn->spi_byaddr[i]); for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) INIT_HLIST_HEAD(&xfrm6_tn->spi_byspi[i]); xfrm6_tn->spi = 0; return 0; } static void __net_exit xfrm6_tunnel_net_exit(struct net *net) { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); unsigned int i; xfrm_flush_gc(); xfrm_state_flush(net, 0, false, true); for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i])); for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byspi[i])); } static struct pernet_operations xfrm6_tunnel_net_ops = { .init = xfrm6_tunnel_net_init, .exit = xfrm6_tunnel_net_exit, .id = &xfrm6_tunnel_net_id, .size = sizeof(struct xfrm6_tunnel_net), }; static int __init xfrm6_tunnel_init(void) { int rv; xfrm6_tunnel_spi_kmem = KMEM_CACHE(xfrm6_tunnel_spi, SLAB_HWCACHE_ALIGN); if (!xfrm6_tunnel_spi_kmem) return -ENOMEM; rv = register_pernet_subsys(&xfrm6_tunnel_net_ops); if (rv < 0) goto out_pernet; rv = xfrm_register_type(&xfrm6_tunnel_type, AF_INET6); if (rv < 0) goto out_type; rv = xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6); if (rv < 0) goto out_xfrm6; rv = xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET); if (rv < 0) goto out_xfrm46; return 0; out_xfrm46: xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); out_xfrm6: xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); out_type: unregister_pernet_subsys(&xfrm6_tunnel_net_ops); out_pernet: kmem_cache_destroy(xfrm6_tunnel_spi_kmem); return rv; } static void __exit xfrm6_tunnel_fini(void) { xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET); xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); unregister_pernet_subsys(&xfrm6_tunnel_net_ops); /* Someone maybe has gotten the xfrm6_tunnel_spi. * So need to wait it. */ rcu_barrier(); kmem_cache_destroy(xfrm6_tunnel_spi_kmem); } module_init(xfrm6_tunnel_init); module_exit(xfrm6_tunnel_fini); MODULE_DESCRIPTION("IPv6 XFRM tunnel driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6);
8 176 179 178 296 295 297 224 17 78 131 163 25 56 83 26 68 70 34 3 74 9 176 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _XFRM_HASH_H #define _XFRM_HASH_H #include <linux/xfrm.h> #include <linux/socket.h> #include <linux/jhash.h> static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr) { return ntohl(addr->a4); } static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr) { return jhash2((__force u32 *)addr->a6, 4, 0); } static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr) { u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4; return ntohl((__force __be32)sum); } static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr) { return __xfrm6_addr_hash(daddr) ^ __xfrm6_addr_hash(saddr); } static inline u32 __bits2mask32(__u8 bits) { u32 mask32 = 0xffffffff; if (bits == 0) mask32 = 0; else if (bits < 32) mask32 <<= (32 - bits); return mask32; } static inline unsigned int __xfrm4_dpref_spref_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr, __u8 dbits, __u8 sbits) { return jhash_2words(ntohl(daddr->a4) & __bits2mask32(dbits), ntohl(saddr->a4) & __bits2mask32(sbits), 0); } static inline unsigned int __xfrm6_pref_hash(const xfrm_address_t *addr, __u8 prefixlen) { unsigned int pdw; unsigned int pbi; u32 initval = 0; pdw = prefixlen >> 5; /* num of whole u32 in prefix */ pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */ if (pbi) { __be32 mask; mask = htonl((0xffffffff) << (32 - pbi)); initval = (__force u32)(addr->a6[pdw] & mask); } return jhash2((__force u32 *)addr->a6, pdw, initval); } static inline unsigned int __xfrm6_dpref_spref_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr, __u8 dbits, __u8 sbits) { return __xfrm6_pref_hash(daddr, dbits) ^ __xfrm6_pref_hash(saddr, sbits); } static inline unsigned int __xfrm_dst_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr, u32 reqid, unsigned short family, unsigned int hmask) { unsigned int h = family ^ reqid; switch (family) { case AF_INET: h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); break; case AF_INET6: h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); break; } return (h ^ (h >> 16)) & hmask; } static inline unsigned int __xfrm_src_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family, unsigned int hmask) { unsigned int h = family; switch (family) { case AF_INET: h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); break; case AF_INET6: h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); break; } return (h ^ (h >> 16)) & hmask; } static inline unsigned int __xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family, unsigned int hmask) { unsigned int h = (__force u32)spi ^ proto; switch (family) { case AF_INET: h ^= __xfrm4_addr_hash(daddr); break; case AF_INET6: h ^= __xfrm6_addr_hash(daddr); break; } return (h ^ (h >> 10) ^ (h >> 20)) & hmask; } static inline unsigned int __xfrm_seq_hash(u32 seq, unsigned int hmask) { unsigned int h = seq; return (h ^ (h >> 10) ^ (h >> 20)) & hmask; } static inline unsigned int __idx_hash(u32 index, unsigned int hmask) { return (index ^ (index >> 8)) & hmask; } static inline unsigned int __sel_hash(const struct xfrm_selector *sel, unsigned short family, unsigned int hmask, u8 dbits, u8 sbits) { const xfrm_address_t *daddr = &sel->daddr; const xfrm_address_t *saddr = &sel->saddr; unsigned int h = 0; switch (family) { case AF_INET: if (sel->prefixlen_d < dbits || sel->prefixlen_s < sbits) return hmask + 1; h = __xfrm4_dpref_spref_hash(daddr, saddr, dbits, sbits); break; case AF_INET6: if (sel->prefixlen_d < dbits || sel->prefixlen_s < sbits) return hmask + 1; h = __xfrm6_dpref_spref_hash(daddr, saddr, dbits, sbits); break; } h ^= (h >> 16); return h & hmask; } static inline unsigned int __addr_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family, unsigned int hmask, u8 dbits, u8 sbits) { unsigned int h = 0; switch (family) { case AF_INET: h = __xfrm4_dpref_spref_hash(daddr, saddr, dbits, sbits); break; case AF_INET6: h = __xfrm6_dpref_spref_hash(daddr, saddr, dbits, sbits); break; } h ^= (h >> 16); return h & hmask; } struct hlist_head *xfrm_hash_alloc(unsigned int sz); void xfrm_hash_free(struct hlist_head *n, unsigned int sz); #endif /* _XFRM_HASH_H */
7 1 6 22 1 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 // SPDX-License-Identifier: GPL-2.0-only /* * TTL modification target for IP tables * (C) 2000,2005 by Harald Welte <laforge@netfilter.org> * * Hop Limit modification target for ip6tables * Maciej Soltysiak <solt@dns.toxicfilms.tv> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <net/checksum.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_TTL.h> #include <linux/netfilter_ipv6/ip6t_HL.h> MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target"); MODULE_LICENSE("GPL"); static unsigned int ttl_tg(struct sk_buff *skb, const struct xt_action_param *par) { struct iphdr *iph; const struct ipt_TTL_info *info = par->targinfo; int new_ttl; if (skb_ensure_writable(skb, sizeof(*iph))) return NF_DROP; iph = ip_hdr(skb); switch (info->mode) { case IPT_TTL_SET: new_ttl = info->ttl; break; case IPT_TTL_INC: new_ttl = iph->ttl + info->ttl; if (new_ttl > 255) new_ttl = 255; break; case IPT_TTL_DEC: new_ttl = iph->ttl - info->ttl; if (new_ttl < 0) new_ttl = 0; break; default: new_ttl = iph->ttl; break; } if (new_ttl != iph->ttl) { csum_replace2(&iph->check, htons(iph->ttl << 8), htons(new_ttl << 8)); iph->ttl = new_ttl; } return XT_CONTINUE; } static unsigned int hl_tg6(struct sk_buff *skb, const struct xt_action_param *par) { struct ipv6hdr *ip6h; const struct ip6t_HL_info *info = par->targinfo; int new_hl; if (skb_ensure_writable(skb, sizeof(*ip6h))) return NF_DROP; ip6h = ipv6_hdr(skb); switch (info->mode) { case IP6T_HL_SET: new_hl = info->hop_limit; break; case IP6T_HL_INC: new_hl = ip6h->hop_limit + info->hop_limit; if (new_hl > 255) new_hl = 255; break; case IP6T_HL_DEC: new_hl = ip6h->hop_limit - info->hop_limit; if (new_hl < 0) new_hl = 0; break; default: new_hl = ip6h->hop_limit; break; } ip6h->hop_limit = new_hl; return XT_CONTINUE; } static int ttl_tg_check(const struct xt_tgchk_param *par) { const struct ipt_TTL_info *info = par->targinfo; if (info->mode > IPT_TTL_MAXMODE) return -EINVAL; if (info->mode != IPT_TTL_SET && info->ttl == 0) return -EINVAL; return 0; } static int hl_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_HL_info *info = par->targinfo; if (info->mode > IP6T_HL_MAXMODE) return -EINVAL; if (info->mode != IP6T_HL_SET && info->hop_limit == 0) return -EINVAL; return 0; } static struct xt_target hl_tg_reg[] __read_mostly = { { .name = "TTL", .revision = 0, .family = NFPROTO_IPV4, .target = ttl_tg, .targetsize = sizeof(struct ipt_TTL_info), .table = "mangle", .checkentry = ttl_tg_check, .me = THIS_MODULE, }, { .name = "HL", .revision = 0, .family = NFPROTO_IPV6, .target = hl_tg6, .targetsize = sizeof(struct ip6t_HL_info), .table = "mangle", .checkentry = hl_tg6_check, .me = THIS_MODULE, }, }; static int __init hl_tg_init(void) { return xt_register_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg)); } static void __exit hl_tg_exit(void) { xt_unregister_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg)); } module_init(hl_tg_init); module_exit(hl_tg_exit); MODULE_ALIAS("ipt_TTL"); MODULE_ALIAS("ip6t_HL");
169 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PATH_H #define _LINUX_PATH_H struct dentry; struct vfsmount; struct path { struct vfsmount *mnt; struct dentry *dentry; } __randomize_layout; extern void path_get(const struct path *); extern void path_put(const struct path *); static inline int path_equal(const struct path *path1, const struct path *path2) { return path1->mnt == path2->mnt && path1->dentry == path2->dentry; } /* * Cleanup macro for use with __free(path_put). Avoids dereference and * copying @path unlike DEFINE_FREE(). path_put() will handle the empty * path correctly just ensure @path is initialized: * * struct path path __free(path_put) = {}; */ #define __free_path_put path_put #endif /* _LINUX_PATH_H */
2 35 37 37 1 1 1 1 1 2 27 5 29 3 29 3 6 1 2 3 25 1 1 5 21 12 12 12 8 12 6 12 8 8 2 1 1 25 9 25 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 /* * linux/fs/hfs/mdb.c * * Copyright (C) 1995-1997 Paul H. Hargrove * (C) 2003 Ardis Technologies <roman@ardistech.com> * This file may be distributed under the terms of the GNU General Public License. * * This file contains functions for reading/writing the MDB. */ #include <linux/cdrom.h> #include <linux/blkdev.h> #include <linux/nls.h> #include <linux/slab.h> #include "hfs_fs.h" #include "btree.h" /*================ File-local data types ================*/ /* * The HFS Master Directory Block (MDB). * * Also known as the Volume Information Block (VIB), this structure is * the HFS equivalent of a superblock. * * Reference: _Inside Macintosh: Files_ pages 2-59 through 2-62 * * modified for HFS Extended */ static int hfs_get_last_session(struct super_block *sb, sector_t *start, sector_t *size) { struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk); /* default values */ *start = 0; *size = bdev_nr_sectors(sb->s_bdev); if (HFS_SB(sb)->session >= 0) { struct cdrom_tocentry te; if (!cdi) return -EINVAL; te.cdte_track = HFS_SB(sb)->session; te.cdte_format = CDROM_LBA; if (cdrom_read_tocentry(cdi, &te) || (te.cdte_ctrl & CDROM_DATA_TRACK) != 4) { pr_err("invalid session number or type of track\n"); return -EINVAL; } *start = (sector_t)te.cdte_addr.lba << 2; } else if (cdi) { struct cdrom_multisession ms_info; ms_info.addr_format = CDROM_LBA; if (cdrom_multisession(cdi, &ms_info) == 0 && ms_info.xa_flag) *start = (sector_t)ms_info.addr.lba << 2; } return 0; } /* * hfs_mdb_get() * * Build the in-core MDB for a filesystem, including * the B-trees and the volume bitmap. */ int hfs_mdb_get(struct super_block *sb) { struct buffer_head *bh; struct hfs_mdb *mdb, *mdb2; unsigned int block; char *ptr; int off2, len, size, sect; sector_t part_start, part_size; loff_t off; __be16 attrib; /* set the device driver to 512-byte blocks */ size = sb_min_blocksize(sb, HFS_SECTOR_SIZE); if (!size) return -EINVAL; if (hfs_get_last_session(sb, &part_start, &part_size)) return -EINVAL; while (1) { /* See if this is an HFS filesystem */ bh = sb_bread512(sb, part_start + HFS_MDB_BLK, mdb); if (!bh) goto out; if (mdb->drSigWord == cpu_to_be16(HFS_SUPER_MAGIC)) break; brelse(bh); /* check for a partition block * (should do this only for cdrom/loop though) */ if (hfs_part_find(sb, &part_start, &part_size)) goto out; } HFS_SB(sb)->alloc_blksz = size = be32_to_cpu(mdb->drAlBlkSiz); if (!size || (size & (HFS_SECTOR_SIZE - 1))) { pr_err("bad allocation block size %d\n", size); goto out_bh; } size = min(HFS_SB(sb)->alloc_blksz, (u32)PAGE_SIZE); /* size must be a multiple of 512 */ while (size & (size - 1)) size -= HFS_SECTOR_SIZE; sect = be16_to_cpu(mdb->drAlBlSt) + part_start; /* align block size to first sector */ while (sect & ((size - 1) >> HFS_SECTOR_SIZE_BITS)) size >>= 1; /* align block size to weird alloc size */ while (HFS_SB(sb)->alloc_blksz & (size - 1)) size >>= 1; brelse(bh); if (!sb_set_blocksize(sb, size)) { pr_err("unable to set blocksize to %u\n", size); goto out; } bh = sb_bread512(sb, part_start + HFS_MDB_BLK, mdb); if (!bh) goto out; if (mdb->drSigWord != cpu_to_be16(HFS_SUPER_MAGIC)) goto out_bh; HFS_SB(sb)->mdb_bh = bh; HFS_SB(sb)->mdb = mdb; /* These parameters are read from the MDB, and never written */ HFS_SB(sb)->part_start = part_start; HFS_SB(sb)->fs_ablocks = be16_to_cpu(mdb->drNmAlBlks); HFS_SB(sb)->fs_div = HFS_SB(sb)->alloc_blksz >> sb->s_blocksize_bits; HFS_SB(sb)->clumpablks = be32_to_cpu(mdb->drClpSiz) / HFS_SB(sb)->alloc_blksz; if (!HFS_SB(sb)->clumpablks) HFS_SB(sb)->clumpablks = 1; HFS_SB(sb)->fs_start = (be16_to_cpu(mdb->drAlBlSt) + part_start) >> (sb->s_blocksize_bits - HFS_SECTOR_SIZE_BITS); /* These parameters are read from and written to the MDB */ HFS_SB(sb)->free_ablocks = be16_to_cpu(mdb->drFreeBks); HFS_SB(sb)->next_id = be32_to_cpu(mdb->drNxtCNID); HFS_SB(sb)->root_files = be16_to_cpu(mdb->drNmFls); HFS_SB(sb)->root_dirs = be16_to_cpu(mdb->drNmRtDirs); HFS_SB(sb)->file_count = be32_to_cpu(mdb->drFilCnt); HFS_SB(sb)->folder_count = be32_to_cpu(mdb->drDirCnt); /* TRY to get the alternate (backup) MDB. */ sect = part_start + part_size - 2; bh = sb_bread512(sb, sect, mdb2); if (bh) { if (mdb2->drSigWord == cpu_to_be16(HFS_SUPER_MAGIC)) { HFS_SB(sb)->alt_mdb_bh = bh; HFS_SB(sb)->alt_mdb = mdb2; } else brelse(bh); } if (!HFS_SB(sb)->alt_mdb) { pr_warn("unable to locate alternate MDB\n"); pr_warn("continuing without an alternate MDB\n"); } HFS_SB(sb)->bitmap = kmalloc(8192, GFP_KERNEL); if (!HFS_SB(sb)->bitmap) goto out; /* read in the bitmap */ block = be16_to_cpu(mdb->drVBMSt) + part_start; off = (loff_t)block << HFS_SECTOR_SIZE_BITS; size = (HFS_SB(sb)->fs_ablocks + 8) / 8; ptr = (u8 *)HFS_SB(sb)->bitmap; while (size) { bh = sb_bread(sb, off >> sb->s_blocksize_bits); if (!bh) { pr_err("unable to read volume bitmap\n"); goto out; } off2 = off & (sb->s_blocksize - 1); len = min((int)sb->s_blocksize - off2, size); memcpy(ptr, bh->b_data + off2, len); brelse(bh); ptr += len; off += len; size -= len; } HFS_SB(sb)->ext_tree = hfs_btree_open(sb, HFS_EXT_CNID, hfs_ext_keycmp); if (!HFS_SB(sb)->ext_tree) { pr_err("unable to open extent tree\n"); goto out; } HFS_SB(sb)->cat_tree = hfs_btree_open(sb, HFS_CAT_CNID, hfs_cat_keycmp); if (!HFS_SB(sb)->cat_tree) { pr_err("unable to open catalog tree\n"); goto out; } attrib = mdb->drAtrb; if (!(attrib & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) { pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended. mounting read-only.\n"); sb->s_flags |= SB_RDONLY; } if ((attrib & cpu_to_be16(HFS_SB_ATTRIB_SLOCK))) { pr_warn("filesystem is marked locked, mounting read-only.\n"); sb->s_flags |= SB_RDONLY; } if (!sb_rdonly(sb)) { /* Mark the volume uncleanly unmounted in case we crash */ attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT); attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT); mdb->drAtrb = attrib; be32_add_cpu(&mdb->drWrCnt, 1); mdb->drLsMod = hfs_mtime(); mark_buffer_dirty(HFS_SB(sb)->mdb_bh); sync_dirty_buffer(HFS_SB(sb)->mdb_bh); } return 0; out_bh: brelse(bh); out: hfs_mdb_put(sb); return -EIO; } /* * hfs_mdb_commit() * * Description: * This updates the MDB on disk. * It does not check, if the superblock has been modified, or * if the filesystem has been mounted read-only. It is mainly * called by hfs_sync_fs() and flush_mdb(). * Input Variable(s): * struct hfs_mdb *mdb: Pointer to the hfs MDB * int backup; * Output Variable(s): * NONE * Returns: * void * Preconditions: * 'mdb' points to a "valid" (struct hfs_mdb). * Postconditions: * The HFS MDB and on disk will be updated, by copying the possibly * modified fields from the in memory MDB (in native byte order) to * the disk block buffer. * If 'backup' is non-zero then the alternate MDB is also written * and the function doesn't return until it is actually on disk. */ void hfs_mdb_commit(struct super_block *sb) { struct hfs_mdb *mdb = HFS_SB(sb)->mdb; if (sb_rdonly(sb)) return; lock_buffer(HFS_SB(sb)->mdb_bh); if (test_and_clear_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags)) { /* These parameters may have been modified, so write them back */ mdb->drLsMod = hfs_mtime(); mdb->drFreeBks = cpu_to_be16(HFS_SB(sb)->free_ablocks); mdb->drNxtCNID = cpu_to_be32(HFS_SB(sb)->next_id); mdb->drNmFls = cpu_to_be16(HFS_SB(sb)->root_files); mdb->drNmRtDirs = cpu_to_be16(HFS_SB(sb)->root_dirs); mdb->drFilCnt = cpu_to_be32(HFS_SB(sb)->file_count); mdb->drDirCnt = cpu_to_be32(HFS_SB(sb)->folder_count); /* write MDB to disk */ mark_buffer_dirty(HFS_SB(sb)->mdb_bh); } /* write the backup MDB, not returning until it is written. * we only do this when either the catalog or extents overflow * files grow. */ if (test_and_clear_bit(HFS_FLG_ALT_MDB_DIRTY, &HFS_SB(sb)->flags) && HFS_SB(sb)->alt_mdb) { hfs_inode_write_fork(HFS_SB(sb)->ext_tree->inode, mdb->drXTExtRec, &mdb->drXTFlSize, NULL); hfs_inode_write_fork(HFS_SB(sb)->cat_tree->inode, mdb->drCTExtRec, &mdb->drCTFlSize, NULL); lock_buffer(HFS_SB(sb)->alt_mdb_bh); memcpy(HFS_SB(sb)->alt_mdb, HFS_SB(sb)->mdb, HFS_SECTOR_SIZE); HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT); HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT); unlock_buffer(HFS_SB(sb)->alt_mdb_bh); mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh); sync_dirty_buffer(HFS_SB(sb)->alt_mdb_bh); } if (test_and_clear_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags)) { struct buffer_head *bh; sector_t block; char *ptr; int off, size, len; block = be16_to_cpu(HFS_SB(sb)->mdb->drVBMSt) + HFS_SB(sb)->part_start; off = (block << HFS_SECTOR_SIZE_BITS) & (sb->s_blocksize - 1); block >>= sb->s_blocksize_bits - HFS_SECTOR_SIZE_BITS; size = (HFS_SB(sb)->fs_ablocks + 7) / 8; ptr = (u8 *)HFS_SB(sb)->bitmap; while (size) { bh = sb_bread(sb, block); if (!bh) { pr_err("unable to read volume bitmap\n"); break; } len = min((int)sb->s_blocksize - off, size); lock_buffer(bh); memcpy(bh->b_data + off, ptr, len); unlock_buffer(bh); mark_buffer_dirty(bh); brelse(bh); block++; off = 0; ptr += len; size -= len; } } unlock_buffer(HFS_SB(sb)->mdb_bh); } void hfs_mdb_close(struct super_block *sb) { /* update volume attributes */ if (sb_rdonly(sb)) return; HFS_SB(sb)->mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT); HFS_SB(sb)->mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT); mark_buffer_dirty(HFS_SB(sb)->mdb_bh); } /* * hfs_mdb_put() * * Release the resources associated with the in-core MDB. */ void hfs_mdb_put(struct super_block *sb) { if (!HFS_SB(sb)) return; /* free the B-trees */ hfs_btree_close(HFS_SB(sb)->ext_tree); hfs_btree_close(HFS_SB(sb)->cat_tree); /* free the buffers holding the primary and alternate MDBs */ brelse(HFS_SB(sb)->mdb_bh); brelse(HFS_SB(sb)->alt_mdb_bh); unload_nls(HFS_SB(sb)->nls_io); unload_nls(HFS_SB(sb)->nls_disk); kfree(HFS_SB(sb)->bitmap); kfree(HFS_SB(sb)); sb->s_fs_info = NULL; }
1 2 2 1 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 // SPDX-License-Identifier: GPL-2.0-only /* * Sally Floyd's High Speed TCP (RFC 3649) congestion control * * See https://www.icir.org/floyd/hstcp.html * * John Heffner <jheffner@psc.edu> */ #include <linux/module.h> #include <net/tcp.h> /* From AIMD tables from RFC 3649 appendix B, * with fixed-point MD scaled <<8. */ static const struct hstcp_aimd_val { unsigned int cwnd; unsigned int md; } hstcp_aimd_vals[] = { { 38, 128, /* 0.50 */ }, { 118, 112, /* 0.44 */ }, { 221, 104, /* 0.41 */ }, { 347, 98, /* 0.38 */ }, { 495, 93, /* 0.37 */ }, { 663, 89, /* 0.35 */ }, { 851, 86, /* 0.34 */ }, { 1058, 83, /* 0.33 */ }, { 1284, 81, /* 0.32 */ }, { 1529, 78, /* 0.31 */ }, { 1793, 76, /* 0.30 */ }, { 2076, 74, /* 0.29 */ }, { 2378, 72, /* 0.28 */ }, { 2699, 71, /* 0.28 */ }, { 3039, 69, /* 0.27 */ }, { 3399, 68, /* 0.27 */ }, { 3778, 66, /* 0.26 */ }, { 4177, 65, /* 0.26 */ }, { 4596, 64, /* 0.25 */ }, { 5036, 62, /* 0.25 */ }, { 5497, 61, /* 0.24 */ }, { 5979, 60, /* 0.24 */ }, { 6483, 59, /* 0.23 */ }, { 7009, 58, /* 0.23 */ }, { 7558, 57, /* 0.22 */ }, { 8130, 56, /* 0.22 */ }, { 8726, 55, /* 0.22 */ }, { 9346, 54, /* 0.21 */ }, { 9991, 53, /* 0.21 */ }, { 10661, 52, /* 0.21 */ }, { 11358, 52, /* 0.20 */ }, { 12082, 51, /* 0.20 */ }, { 12834, 50, /* 0.20 */ }, { 13614, 49, /* 0.19 */ }, { 14424, 48, /* 0.19 */ }, { 15265, 48, /* 0.19 */ }, { 16137, 47, /* 0.19 */ }, { 17042, 46, /* 0.18 */ }, { 17981, 45, /* 0.18 */ }, { 18955, 45, /* 0.18 */ }, { 19965, 44, /* 0.17 */ }, { 21013, 43, /* 0.17 */ }, { 22101, 43, /* 0.17 */ }, { 23230, 42, /* 0.17 */ }, { 24402, 41, /* 0.16 */ }, { 25618, 41, /* 0.16 */ }, { 26881, 40, /* 0.16 */ }, { 28193, 39, /* 0.16 */ }, { 29557, 39, /* 0.15 */ }, { 30975, 38, /* 0.15 */ }, { 32450, 38, /* 0.15 */ }, { 33986, 37, /* 0.15 */ }, { 35586, 36, /* 0.14 */ }, { 37253, 36, /* 0.14 */ }, { 38992, 35, /* 0.14 */ }, { 40808, 35, /* 0.14 */ }, { 42707, 34, /* 0.13 */ }, { 44694, 33, /* 0.13 */ }, { 46776, 33, /* 0.13 */ }, { 48961, 32, /* 0.13 */ }, { 51258, 32, /* 0.13 */ }, { 53677, 31, /* 0.12 */ }, { 56230, 30, /* 0.12 */ }, { 58932, 30, /* 0.12 */ }, { 61799, 29, /* 0.12 */ }, { 64851, 28, /* 0.11 */ }, { 68113, 28, /* 0.11 */ }, { 71617, 27, /* 0.11 */ }, { 75401, 26, /* 0.10 */ }, { 79517, 26, /* 0.10 */ }, { 84035, 25, /* 0.10 */ }, { 89053, 24, /* 0.10 */ }, }; #define HSTCP_AIMD_MAX ARRAY_SIZE(hstcp_aimd_vals) struct hstcp { u32 ai; }; static void hstcp_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); ca->ai = 0; /* Ensure the MD arithmetic works. This is somewhat pedantic, * since I don't think we will see a cwnd this large. :) */ tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); if (!tcp_is_cwnd_limited(sk)) return; if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else { /* Update AIMD parameters. * * We want to guarantee that: * hstcp_aimd_vals[ca->ai-1].cwnd < * snd_cwnd <= * hstcp_aimd_vals[ca->ai].cwnd */ if (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd) { while (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd && ca->ai < HSTCP_AIMD_MAX - 1) ca->ai++; } else if (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd) { while (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd) ca->ai--; } /* Do additive increase */ if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) { /* cwnd = cwnd + a(w) / cwnd */ tp->snd_cwnd_cnt += ca->ai + 1; if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) { tp->snd_cwnd_cnt -= tcp_snd_cwnd(tp); tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); } } } } static u32 hstcp_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); /* Do multiplicative decrease */ return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); } static struct tcp_congestion_ops tcp_highspeed __read_mostly = { .init = hstcp_init, .ssthresh = hstcp_ssthresh, .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = hstcp_cong_avoid, .owner = THIS_MODULE, .name = "highspeed" }; static int __init hstcp_register(void) { BUILD_BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_highspeed); } static void __exit hstcp_unregister(void) { tcp_unregister_congestion_control(&tcp_highspeed); } module_init(hstcp_register); module_exit(hstcp_unregister); MODULE_AUTHOR("John Heffner"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("High Speed TCP");
114 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Cryptographic API for algorithms (i.e., low-level API). * * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_ALGAPI_H #define _CRYPTO_ALGAPI_H #include <crypto/utils.h> #include <linux/align.h> #include <linux/cache.h> #include <linux/crypto.h> #include <linux/types.h> #include <linux/workqueue.h> /* * Maximum values for blocksize and alignmask, used to allocate * static buffers that are big enough for any combination of * algs and architectures. Ciphers have a lower maximum size. */ #define MAX_ALGAPI_BLOCKSIZE 160 #define MAX_ALGAPI_ALIGNMASK 127 #define MAX_CIPHER_BLOCKSIZE 16 #define MAX_CIPHER_ALIGNMASK 15 #ifdef ARCH_DMA_MINALIGN #define CRYPTO_DMA_ALIGN ARCH_DMA_MINALIGN #else #define CRYPTO_DMA_ALIGN CRYPTO_MINALIGN #endif #define CRYPTO_DMA_PADDING ((CRYPTO_DMA_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1)) /* * Autoloaded crypto modules should only use a prefixed name to avoid allowing * arbitrary modules to be loaded. Loading from userspace may still need the * unprefixed names, so retains those aliases as well. * This uses __MODULE_INFO directly instead of MODULE_ALIAS because pre-4.3 * gcc (e.g. avr32 toolchain) uses __LINE__ for uniqueness, and this macro * expands twice on the same line. Instead, use a separate base name for the * alias. */ #define MODULE_ALIAS_CRYPTO(name) \ __MODULE_INFO(alias, alias_userspace, name); \ __MODULE_INFO(alias, alias_crypto, "crypto-" name) struct crypto_aead; struct crypto_instance; struct module; struct notifier_block; struct rtattr; struct scatterlist; struct seq_file; struct sk_buff; struct crypto_type { unsigned int (*ctxsize)(struct crypto_alg *alg, u32 type, u32 mask); unsigned int (*extsize)(struct crypto_alg *alg); int (*init_tfm)(struct crypto_tfm *tfm); void (*show)(struct seq_file *m, struct crypto_alg *alg); int (*report)(struct sk_buff *skb, struct crypto_alg *alg); void (*free)(struct crypto_instance *inst); unsigned int type; unsigned int maskclear; unsigned int maskset; unsigned int tfmsize; }; struct crypto_instance { struct crypto_alg alg; struct crypto_template *tmpl; union { /* Node in list of instances after registration. */ struct hlist_node list; /* List of attached spawns before registration. */ struct crypto_spawn *spawns; }; struct work_struct free_work; void *__ctx[] CRYPTO_MINALIGN_ATTR; }; struct crypto_template { struct list_head list; struct hlist_head instances; struct module *module; int (*create)(struct crypto_template *tmpl, struct rtattr **tb); char name[CRYPTO_MAX_ALG_NAME]; }; struct crypto_spawn { struct list_head list; struct crypto_alg *alg; union { /* Back pointer to instance after registration.*/ struct crypto_instance *inst; /* Spawn list pointer prior to registration. */ struct crypto_spawn *next; }; const struct crypto_type *frontend; u32 mask; bool dead; bool registered; }; struct crypto_queue { struct list_head list; struct list_head *backlog; unsigned int qlen; unsigned int max_qlen; }; struct scatter_walk { struct scatterlist *sg; unsigned int offset; }; struct crypto_attr_alg { char name[CRYPTO_MAX_ALG_NAME]; }; struct crypto_attr_type { u32 type; u32 mask; }; /* * Algorithm registration interface. */ int crypto_register_alg(struct crypto_alg *alg); void crypto_unregister_alg(struct crypto_alg *alg); int crypto_register_algs(struct crypto_alg *algs, int count); void crypto_unregister_algs(struct crypto_alg *algs, int count); void crypto_mod_put(struct crypto_alg *alg); int crypto_register_template(struct crypto_template *tmpl); int crypto_register_templates(struct crypto_template *tmpls, int count); void crypto_unregister_template(struct crypto_template *tmpl); void crypto_unregister_templates(struct crypto_template *tmpls, int count); struct crypto_template *crypto_lookup_template(const char *name); int crypto_register_instance(struct crypto_template *tmpl, struct crypto_instance *inst); void crypto_unregister_instance(struct crypto_instance *inst); int crypto_grab_spawn(struct crypto_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); void crypto_drop_spawn(struct crypto_spawn *spawn); struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type, u32 mask); void *crypto_spawn_tfm2(struct crypto_spawn *spawn); struct crypto_attr_type *crypto_get_attr_type(struct rtattr **tb); int crypto_check_attr_type(struct rtattr **tb, u32 type, u32 *mask_ret); const char *crypto_attr_alg_name(struct rtattr *rta); int crypto_inst_setname(struct crypto_instance *inst, const char *name, struct crypto_alg *alg); void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen); int crypto_enqueue_request(struct crypto_queue *queue, struct crypto_async_request *request); void crypto_enqueue_request_head(struct crypto_queue *queue, struct crypto_async_request *request); struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue); static inline unsigned int crypto_queue_len(struct crypto_queue *queue) { return queue->qlen; } void crypto_inc(u8 *a, unsigned int size); static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) { return tfm->__crt_ctx; } static inline void *crypto_tfm_ctx_align(struct crypto_tfm *tfm, unsigned int align) { if (align <= crypto_tfm_ctx_alignment()) align = 1; return PTR_ALIGN(crypto_tfm_ctx(tfm), align); } static inline unsigned int crypto_dma_align(void) { return CRYPTO_DMA_ALIGN; } static inline unsigned int crypto_dma_padding(void) { return (crypto_dma_align() - 1) & ~(crypto_tfm_ctx_alignment() - 1); } static inline void *crypto_tfm_ctx_dma(struct crypto_tfm *tfm) { return crypto_tfm_ctx_align(tfm, crypto_dma_align()); } static inline struct crypto_instance *crypto_tfm_alg_instance( struct crypto_tfm *tfm) { return container_of(tfm->__crt_alg, struct crypto_instance, alg); } static inline void *crypto_instance_ctx(struct crypto_instance *inst) { return inst->__ctx; } static inline struct crypto_async_request *crypto_get_backlog( struct crypto_queue *queue) { return queue->backlog == &queue->list ? NULL : container_of(queue->backlog, struct crypto_async_request, list); } static inline u32 crypto_requires_off(struct crypto_attr_type *algt, u32 off) { return (algt->type ^ off) & algt->mask & off; } /* * When an algorithm uses another algorithm (e.g., if it's an instance of a * template), these are the flags that should always be set on the "outer" * algorithm if any "inner" algorithm has them set. */ #define CRYPTO_ALG_INHERITED_FLAGS \ (CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK | \ CRYPTO_ALG_ALLOCATES_MEMORY) /* * Given the type and mask that specify the flags restrictions on a template * instance being created, return the mask that should be passed to * crypto_grab_*() (along with type=0) to honor any request the user made to * have any of the CRYPTO_ALG_INHERITED_FLAGS clear. */ static inline u32 crypto_algt_inherited_mask(struct crypto_attr_type *algt) { return crypto_requires_off(algt, CRYPTO_ALG_INHERITED_FLAGS); } int crypto_register_notifier(struct notifier_block *nb); int crypto_unregister_notifier(struct notifier_block *nb); /* Crypto notification events. */ enum { CRYPTO_MSG_ALG_REQUEST, CRYPTO_MSG_ALG_REGISTER, CRYPTO_MSG_ALG_LOADED, }; static inline void crypto_request_complete(struct crypto_async_request *req, int err) { req->complete(req->data, err); } static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm) { return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK; } #endif /* _CRYPTO_ALGAPI_H */
1 37 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHEFS_CHECKSUM_H #define _BCACHEFS_CHECKSUM_H #include "bcachefs.h" #include "extents_types.h" #include "super-io.h" #include <linux/crc64.h> #include <crypto/chacha.h> static inline bool bch2_checksum_mergeable(unsigned type) { switch (type) { case BCH_CSUM_none: case BCH_CSUM_crc32c: case BCH_CSUM_crc64: return true; default: return false; } } struct bch_csum bch2_checksum_merge(unsigned, struct bch_csum, struct bch_csum, size_t); #define BCH_NONCE_EXTENT cpu_to_le32(1 << 28) #define BCH_NONCE_BTREE cpu_to_le32(2 << 28) #define BCH_NONCE_JOURNAL cpu_to_le32(3 << 28) #define BCH_NONCE_PRIO cpu_to_le32(4 << 28) #define BCH_NONCE_POLY cpu_to_le32(1 << 31) struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce, const void *, size_t); /* * This is used for various on disk data structures - bch_sb, prio_set, bset, * jset: The checksum is _always_ the first field of these structs */ #define csum_vstruct(_c, _type, _nonce, _i) \ ({ \ const void *_start = ((const void *) (_i)) + sizeof((_i)->csum);\ \ bch2_checksum(_c, _type, _nonce, _start, vstruct_end(_i) - _start);\ }) static inline void bch2_csum_to_text(struct printbuf *out, enum bch_csum_type type, struct bch_csum csum) { const u8 *p = (u8 *) &csum; unsigned bytes = type < BCH_CSUM_NR ? bch_crc_bytes[type] : 16; for (unsigned i = 0; i < bytes; i++) prt_hex_byte(out, p[i]); } static inline void bch2_csum_err_msg(struct printbuf *out, enum bch_csum_type type, struct bch_csum expected, struct bch_csum got) { prt_str(out, "checksum error, type "); bch2_prt_csum_type(out, type); prt_str(out, ": got "); bch2_csum_to_text(out, type, got); prt_str(out, " should be "); bch2_csum_to_text(out, type, expected); } int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t); int bch2_request_key(struct bch_sb *, struct bch_key *); #ifndef __KERNEL__ int bch2_revoke_key(struct bch_sb *); #endif int bch2_encrypt(struct bch_fs *, unsigned, struct nonce, void *data, size_t); struct bch_csum bch2_checksum_bio(struct bch_fs *, unsigned, struct nonce, struct bio *); int bch2_rechecksum_bio(struct bch_fs *, struct bio *, struct bversion, struct bch_extent_crc_unpacked, struct bch_extent_crc_unpacked *, struct bch_extent_crc_unpacked *, unsigned, unsigned, unsigned); int __bch2_encrypt_bio(struct bch_fs *, unsigned, struct nonce, struct bio *); static inline int bch2_encrypt_bio(struct bch_fs *c, unsigned type, struct nonce nonce, struct bio *bio) { return bch2_csum_type_is_encryption(type) ? __bch2_encrypt_bio(c, type, nonce, bio) : 0; } extern const struct bch_sb_field_ops bch_sb_field_ops_crypt; int bch2_decrypt_sb_key(struct bch_fs *, struct bch_sb_field_crypt *, struct bch_key *); int bch2_disable_encryption(struct bch_fs *); int bch2_enable_encryption(struct bch_fs *, bool); void bch2_fs_encryption_exit(struct bch_fs *); int bch2_fs_encryption_init(struct bch_fs *); static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opt type, bool data) { switch (type) { case BCH_CSUM_OPT_none: return BCH_CSUM_none; case BCH_CSUM_OPT_crc32c: return data ? BCH_CSUM_crc32c : BCH_CSUM_crc32c_nonzero; case BCH_CSUM_OPT_crc64: return data ? BCH_CSUM_crc64 : BCH_CSUM_crc64_nonzero; case BCH_CSUM_OPT_xxhash: return BCH_CSUM_xxhash; default: BUG(); } } static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c, struct bch_io_opts opts) { if (opts.nocow) return 0; if (c->sb.encryption_type) return c->opts.wide_macs ? BCH_CSUM_chacha20_poly1305_128 : BCH_CSUM_chacha20_poly1305_80; return bch2_csum_opt_to_type(opts.data_checksum, true); } static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c) { if (c->sb.encryption_type) return BCH_CSUM_chacha20_poly1305_128; return bch2_csum_opt_to_type(c->opts.metadata_checksum, false); } static inline bool bch2_checksum_type_valid(const struct bch_fs *c, unsigned type) { if (type >= BCH_CSUM_NR) return false; if (bch2_csum_type_is_encryption(type) && !c->chacha20) return false; return true; } /* returns true if not equal */ static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r) { /* * XXX: need some way of preventing the compiler from optimizing this * into a form that isn't constant time.. */ return ((l.lo ^ r.lo) | (l.hi ^ r.hi)) != 0; } /* for skipping ahead and encrypting/decrypting at an offset: */ static inline struct nonce nonce_add(struct nonce nonce, unsigned offset) { EBUG_ON(offset & (CHACHA_BLOCK_SIZE - 1)); le32_add_cpu(&nonce.d[0], offset / CHACHA_BLOCK_SIZE); return nonce; } static inline struct nonce null_nonce(void) { struct nonce ret; memset(&ret, 0, sizeof(ret)); return ret; } static inline struct nonce extent_nonce(struct bversion version, struct bch_extent_crc_unpacked crc) { unsigned compression_type = crc_is_compressed(crc) ? crc.compression_type : 0; unsigned size = compression_type ? crc.uncompressed_size : 0; struct nonce nonce = (struct nonce) {{ [0] = cpu_to_le32(size << 22), [1] = cpu_to_le32(version.lo), [2] = cpu_to_le32(version.lo >> 32), [3] = cpu_to_le32(version.hi| (compression_type << 24))^BCH_NONCE_EXTENT, }}; return nonce_add(nonce, crc.nonce << 9); } static inline bool bch2_key_is_encrypted(struct bch_encrypted_key *key) { return le64_to_cpu(key->magic) != BCH_KEY_MAGIC; } static inline struct nonce __bch2_sb_key_nonce(struct bch_sb *sb) { __le64 magic = __bch2_sb_magic(sb); return (struct nonce) {{ [0] = 0, [1] = 0, [2] = ((__le32 *) &magic)[0], [3] = ((__le32 *) &magic)[1], }}; } static inline struct nonce bch2_sb_key_nonce(struct bch_fs *c) { __le64 magic = bch2_sb_magic(c); return (struct nonce) {{ [0] = 0, [1] = 0, [2] = ((__le32 *) &magic)[0], [3] = ((__le32 *) &magic)[1], }}; } #endif /* _BCACHEFS_CHECKSUM_H */
155 158 1285 3 1284 10 3 1271 17 486 9 151 444 62 72 19 28 10 326 527 216 323 524 601 12 175 527 68 71 17 30 408 656 260 403 650 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/readdir.c * * Copyright (C) 1995 Linus Torvalds */ #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/errno.h> #include <linux/stat.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/fsnotify.h> #include <linux/dirent.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/unistd.h> #include <linux/compat.h> #include <linux/uaccess.h> /* * Some filesystems were never converted to '->iterate_shared()' * and their directory iterators want the inode lock held for * writing. This wrapper allows for converting from the shared * semantics to the exclusive inode use. */ int wrap_directory_iterator(struct file *file, struct dir_context *ctx, int (*iter)(struct file *, struct dir_context *)) { struct inode *inode = file_inode(file); int ret; /* * We'd love to have an 'inode_upgrade_trylock()' operation, * see the comment in mmap_upgrade_trylock() in mm/memory.c. * * But considering this is for "filesystems that never got * converted", it really doesn't matter. * * Also note that since we have to return with the lock held * for reading, we can't use the "killable()" locking here, * since we do need to get the lock even if we're dying. * * We could do the write part killably and then get the read * lock unconditionally if it mattered, but see above on why * this does the very simplistic conversion. */ up_read(&inode->i_rwsem); down_write(&inode->i_rwsem); /* * Since we dropped the inode lock, we should do the * DEADDIR test again. See 'iterate_dir()' below. * * Note that we don't need to re-do the f_pos games, * since the file must be locked wrt f_pos anyway. */ ret = -ENOENT; if (!IS_DEADDIR(inode)) ret = iter(file, ctx); downgrade_write(&inode->i_rwsem); return ret; } EXPORT_SYMBOL(wrap_directory_iterator); /* * Note the "unsafe_put_user()" semantics: we goto a * label for errors. */ #define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \ char __user *dst = (_dst); \ const char *src = (_src); \ size_t len = (_len); \ unsafe_put_user(0, dst+len, label); \ unsafe_copy_to_user(dst, src, len, label); \ } while (0) int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); int res = -ENOTDIR; if (!file->f_op->iterate_shared) goto out; res = security_file_permission(file, MAY_READ); if (res) goto out; res = fsnotify_file_perm(file, MAY_READ); if (res) goto out; res = down_read_killable(&inode->i_rwsem); if (res) goto out; res = -ENOENT; if (!IS_DEADDIR(inode)) { ctx->pos = file->f_pos; res = file->f_op->iterate_shared(file, ctx); file->f_pos = ctx->pos; fsnotify_access(file); file_accessed(file); } inode_unlock_shared(inode); out: return res; } EXPORT_SYMBOL(iterate_dir); /* * POSIX says that a dirent name cannot contain NULL or a '/'. * * It's not 100% clear what we should really do in this case. * The filesystem is clearly corrupted, but returning a hard * error means that you now don't see any of the other names * either, so that isn't a perfect alternative. * * And if you return an error, what error do you use? Several * filesystems seem to have decided on EUCLEAN being the error * code for EFSCORRUPTED, and that may be the error to use. Or * just EIO, which is perhaps more obvious to users. * * In order to see the other file names in the directory, the * caller might want to make this a "soft" error: skip the * entry, and return the error at the end instead. * * Note that this should likely do a "memchr(name, 0, len)" * check too, since that would be filesystem corruption as * well. However, that case can't actually confuse user space, * which has to do a strlen() on the name anyway to find the * filename length, and the above "soft error" worry means * that it's probably better left alone until we have that * issue clarified. * * Note the PATH_MAX check - it's arbitrary but the real * kernel limit on a possible path component, not NAME_MAX, * which is the technical standard limit. */ static int verify_dirent_name(const char *name, int len) { if (len <= 0 || len >= PATH_MAX) return -EIO; if (memchr(name, '/', len)) return -EIO; return 0; } /* * Traditional linux readdir() handling.. * * "count=1" is a special case, meaning that the buffer is one * dirent-structure in size and that the code can't handle more * anyway. Thus the special "fillonedir()" function for that * case (the low-level handlers don't need to care about this). */ #ifdef __ARCH_WANT_OLD_READDIR struct old_linux_dirent { unsigned long d_ino; unsigned long d_offset; unsigned short d_namlen; char d_name[]; }; struct readdir_callback { struct dir_context ctx; struct old_linux_dirent __user * dirent; int result; }; static bool fillonedir(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct readdir_callback *buf = container_of(ctx, struct readdir_callback, ctx); struct old_linux_dirent __user * dirent; unsigned long d_ino; if (buf->result) return false; buf->result = verify_dirent_name(name, namlen); if (buf->result) return false; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { buf->result = -EOVERFLOW; return false; } buf->result++; dirent = buf->dirent; if (!user_write_access_begin(dirent, (unsigned long)(dirent->d_name + namlen + 1) - (unsigned long)dirent)) goto efault; unsafe_put_user(d_ino, &dirent->d_ino, efault_end); unsafe_put_user(offset, &dirent->d_offset, efault_end); unsafe_put_user(namlen, &dirent->d_namlen, efault_end); unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); user_write_access_end(); return true; efault_end: user_write_access_end(); efault: buf->result = -EFAULT; return false; } SYSCALL_DEFINE3(old_readdir, unsigned int, fd, struct old_linux_dirent __user *, dirent, unsigned int, count) { int error; CLASS(fd_pos, f)(fd); struct readdir_callback buf = { .ctx.actor = fillonedir, .dirent = dirent }; if (fd_empty(f)) return -EBADF; error = iterate_dir(fd_file(f), &buf.ctx); if (buf.result) error = buf.result; return error; } #endif /* __ARCH_WANT_OLD_READDIR */ /* * New, all-improved, singing, dancing, iBCS2-compliant getdents() * interface. */ struct linux_dirent { unsigned long d_ino; unsigned long d_off; unsigned short d_reclen; char d_name[]; }; struct getdents_callback { struct dir_context ctx; struct linux_dirent __user * current_dir; int prev_reclen; int count; int error; }; static bool filldir(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct linux_dirent __user *dirent, *prev; struct getdents_callback *buf = container_of(ctx, struct getdents_callback, ctx); unsigned long d_ino; int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, sizeof(long)); int prev_reclen; buf->error = verify_dirent_name(name, namlen); if (unlikely(buf->error)) return false; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return false; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { buf->error = -EOVERFLOW; return false; } prev_reclen = buf->prev_reclen; if (prev_reclen && signal_pending(current)) return false; dirent = buf->current_dir; prev = (void __user *) dirent - prev_reclen; if (!user_write_access_begin(prev, reclen + prev_reclen)) goto efault; /* This might be 'dirent->d_off', but if so it will get overwritten */ unsafe_put_user(offset, &prev->d_off, efault_end); unsafe_put_user(d_ino, &dirent->d_ino, efault_end); unsafe_put_user(reclen, &dirent->d_reclen, efault_end); unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); user_write_access_end(); buf->current_dir = (void __user *)dirent + reclen; buf->prev_reclen = reclen; buf->count -= reclen; return true; efault_end: user_write_access_end(); efault: buf->error = -EFAULT; return false; } SYSCALL_DEFINE3(getdents, unsigned int, fd, struct linux_dirent __user *, dirent, unsigned int, count) { CLASS(fd_pos, f)(fd); struct getdents_callback buf = { .ctx.actor = filldir, .count = count, .current_dir = dirent }; int error; if (fd_empty(f)) return -EBADF; error = iterate_dir(fd_file(f), &buf.ctx); if (error >= 0) error = buf.error; if (buf.prev_reclen) { struct linux_dirent __user * lastdirent; lastdirent = (void __user *)buf.current_dir - buf.prev_reclen; if (put_user(buf.ctx.pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } return error; } struct getdents_callback64 { struct dir_context ctx; struct linux_dirent64 __user * current_dir; int prev_reclen; int count; int error; }; static bool filldir64(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct linux_dirent64 __user *dirent, *prev; struct getdents_callback64 *buf = container_of(ctx, struct getdents_callback64, ctx); int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, sizeof(u64)); int prev_reclen; buf->error = verify_dirent_name(name, namlen); if (unlikely(buf->error)) return false; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return false; prev_reclen = buf->prev_reclen; if (prev_reclen && signal_pending(current)) return false; dirent = buf->current_dir; prev = (void __user *)dirent - prev_reclen; if (!user_write_access_begin(prev, reclen + prev_reclen)) goto efault; /* This might be 'dirent->d_off', but if so it will get overwritten */ unsafe_put_user(offset, &prev->d_off, efault_end); unsafe_put_user(ino, &dirent->d_ino, efault_end); unsafe_put_user(reclen, &dirent->d_reclen, efault_end); unsafe_put_user(d_type, &dirent->d_type, efault_end); unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); user_write_access_end(); buf->prev_reclen = reclen; buf->current_dir = (void __user *)dirent + reclen; buf->count -= reclen; return true; efault_end: user_write_access_end(); efault: buf->error = -EFAULT; return false; } SYSCALL_DEFINE3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count) { CLASS(fd_pos, f)(fd); struct getdents_callback64 buf = { .ctx.actor = filldir64, .count = count, .current_dir = dirent }; int error; if (fd_empty(f)) return -EBADF; error = iterate_dir(fd_file(f), &buf.ctx); if (error >= 0) error = buf.error; if (buf.prev_reclen) { struct linux_dirent64 __user * lastdirent; typeof(lastdirent->d_off) d_off = buf.ctx.pos; lastdirent = (void __user *) buf.current_dir - buf.prev_reclen; if (put_user(d_off, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } return error; } #ifdef CONFIG_COMPAT struct compat_old_linux_dirent { compat_ulong_t d_ino; compat_ulong_t d_offset; unsigned short d_namlen; char d_name[]; }; struct compat_readdir_callback { struct dir_context ctx; struct compat_old_linux_dirent __user *dirent; int result; }; static bool compat_fillonedir(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct compat_readdir_callback *buf = container_of(ctx, struct compat_readdir_callback, ctx); struct compat_old_linux_dirent __user *dirent; compat_ulong_t d_ino; if (buf->result) return false; buf->result = verify_dirent_name(name, namlen); if (buf->result) return false; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { buf->result = -EOVERFLOW; return false; } buf->result++; dirent = buf->dirent; if (!user_write_access_begin(dirent, (unsigned long)(dirent->d_name + namlen + 1) - (unsigned long)dirent)) goto efault; unsafe_put_user(d_ino, &dirent->d_ino, efault_end); unsafe_put_user(offset, &dirent->d_offset, efault_end); unsafe_put_user(namlen, &dirent->d_namlen, efault_end); unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); user_write_access_end(); return true; efault_end: user_write_access_end(); efault: buf->result = -EFAULT; return false; } COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, struct compat_old_linux_dirent __user *, dirent, unsigned int, count) { int error; CLASS(fd_pos, f)(fd); struct compat_readdir_callback buf = { .ctx.actor = compat_fillonedir, .dirent = dirent }; if (fd_empty(f)) return -EBADF; error = iterate_dir(fd_file(f), &buf.ctx); if (buf.result) error = buf.result; return error; } struct compat_linux_dirent { compat_ulong_t d_ino; compat_ulong_t d_off; unsigned short d_reclen; char d_name[]; }; struct compat_getdents_callback { struct dir_context ctx; struct compat_linux_dirent __user *current_dir; int prev_reclen; int count; int error; }; static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct compat_linux_dirent __user *dirent, *prev; struct compat_getdents_callback *buf = container_of(ctx, struct compat_getdents_callback, ctx); compat_ulong_t d_ino; int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + namlen + 2, sizeof(compat_long_t)); int prev_reclen; buf->error = verify_dirent_name(name, namlen); if (unlikely(buf->error)) return false; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return false; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { buf->error = -EOVERFLOW; return false; } prev_reclen = buf->prev_reclen; if (prev_reclen && signal_pending(current)) return false; dirent = buf->current_dir; prev = (void __user *) dirent - prev_reclen; if (!user_write_access_begin(prev, reclen + prev_reclen)) goto efault; unsafe_put_user(offset, &prev->d_off, efault_end); unsafe_put_user(d_ino, &dirent->d_ino, efault_end); unsafe_put_user(reclen, &dirent->d_reclen, efault_end); unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); user_write_access_end(); buf->prev_reclen = reclen; buf->current_dir = (void __user *)dirent + reclen; buf->count -= reclen; return true; efault_end: user_write_access_end(); efault: buf->error = -EFAULT; return false; } COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, struct compat_linux_dirent __user *, dirent, unsigned int, count) { CLASS(fd_pos, f)(fd); struct compat_getdents_callback buf = { .ctx.actor = compat_filldir, .current_dir = dirent, .count = count }; int error; if (fd_empty(f)) return -EBADF; error = iterate_dir(fd_file(f), &buf.ctx); if (error >= 0) error = buf.error; if (buf.prev_reclen) { struct compat_linux_dirent __user * lastdirent; lastdirent = (void __user *)buf.current_dir - buf.prev_reclen; if (put_user(buf.ctx.pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } return error; } #endif
269 13 156 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/xattr_security.c * Handler for storing security labels as extended attributes. */ #include <linux/string.h> #include <linux/fs.h> #include <linux/security.h> #include <linux/slab.h> #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" static int ext4_xattr_security_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) { return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name, buffer, size); } static int ext4_xattr_security_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) { return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name, value, size, flags); } static int ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *fs_info) { const struct xattr *xattr; handle_t *handle = fs_info; int err = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { err = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_SECURITY, xattr->name, xattr->value, xattr->value_len, XATTR_CREATE); if (err < 0) break; } return err; } int ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir, const struct qstr *qstr) { return security_inode_init_security(inode, dir, qstr, &ext4_initxattrs, handle); } const struct xattr_handler ext4_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, .get = ext4_xattr_security_get, .set = ext4_xattr_security_set, };
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 // SPDX-License-Identifier: GPL-2.0-or-later /* A driver for the D-Link DSB-R100 USB radio and Gemtek USB Radio 21. * The device plugs into both the USB and an analog audio input, so this thing * only deals with initialisation and frequency setting, the * audio data has to be handled by a sound driver. * * Major issue: I can't find out where the device reports the signal * strength, and indeed the windows software appearantly just looks * at the stereo indicator as well. So, scanning will only find * stereo stations. Sad, but I can't help it. * * Also, the windows program sends oodles of messages over to the * device, and I couldn't figure out their meaning. My suspicion * is that they don't have any:-) * * You might find some interesting stuff about this module at * http://unimut.fsk.uni-heidelberg.de/unimut/demi/dsbr * * Fully tested with the Keene USB FM Transmitter and the v4l2-compliance tool. * * Copyright (c) 2000 Markus Demleitner <msdemlei@cl.uni-heidelberg.de> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/input.h> #include <linux/videodev2.h> #include <linux/usb.h> #include <media/v4l2-device.h> #include <media/v4l2-ioctl.h> #include <media/v4l2-ctrls.h> #include <media/v4l2-event.h> /* * Version Information */ MODULE_AUTHOR("Markus Demleitner <msdemlei@tucana.harvard.edu>"); MODULE_DESCRIPTION("D-Link DSB-R100 USB FM radio driver"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.1.0"); #define DSB100_VENDOR 0x04b4 #define DSB100_PRODUCT 0x1002 /* Commands the device appears to understand */ #define DSB100_TUNE 1 #define DSB100_ONOFF 2 #define TB_LEN 16 /* Frequency limits in MHz -- these are European values. For Japanese devices, that would be 76 and 91. */ #define FREQ_MIN 87.5 #define FREQ_MAX 108.0 #define FREQ_MUL 16000 #define v4l2_dev_to_radio(d) container_of(d, struct dsbr100_device, v4l2_dev) static int radio_nr = -1; module_param(radio_nr, int, 0); /* Data for one (physical) device */ struct dsbr100_device { struct usb_device *usbdev; struct video_device videodev; struct v4l2_device v4l2_dev; struct v4l2_ctrl_handler hdl; u8 *transfer_buffer; struct mutex v4l2_lock; int curfreq; bool stereo; bool muted; }; /* Low-level device interface begins here */ /* set a frequency, freq is defined by v4l's TUNER_LOW, i.e. 1/16th kHz */ static int dsbr100_setfreq(struct dsbr100_device *radio, unsigned freq) { unsigned f = (freq / 16 * 80) / 1000 + 856; int retval = 0; if (!radio->muted) { retval = usb_control_msg(radio->usbdev, usb_rcvctrlpipe(radio->usbdev, 0), DSB100_TUNE, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, (f >> 8) & 0x00ff, f & 0xff, radio->transfer_buffer, 8, 300); if (retval >= 0) mdelay(1); } if (retval >= 0) { radio->curfreq = freq; return 0; } dev_err(&radio->usbdev->dev, "%s - usb_control_msg returned %i, request %i\n", __func__, retval, DSB100_TUNE); return retval; } /* switch on radio */ static int dsbr100_start(struct dsbr100_device *radio) { int retval = usb_control_msg(radio->usbdev, usb_rcvctrlpipe(radio->usbdev, 0), DSB100_ONOFF, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x01, 0x00, radio->transfer_buffer, 8, 300); if (retval >= 0) return dsbr100_setfreq(radio, radio->curfreq); dev_err(&radio->usbdev->dev, "%s - usb_control_msg returned %i, request %i\n", __func__, retval, DSB100_ONOFF); return retval; } /* switch off radio */ static int dsbr100_stop(struct dsbr100_device *radio) { int retval = usb_control_msg(radio->usbdev, usb_rcvctrlpipe(radio->usbdev, 0), DSB100_ONOFF, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x00, 0x00, radio->transfer_buffer, 8, 300); if (retval >= 0) return 0; dev_err(&radio->usbdev->dev, "%s - usb_control_msg returned %i, request %i\n", __func__, retval, DSB100_ONOFF); return retval; } /* return the device status. This is, in effect, just whether it sees a stereo signal or not. Pity. */ static void dsbr100_getstat(struct dsbr100_device *radio) { int retval = usb_control_msg(radio->usbdev, usb_rcvctrlpipe(radio->usbdev, 0), USB_REQ_GET_STATUS, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x00, 0x24, radio->transfer_buffer, 8, 300); if (retval < 0) { radio->stereo = false; dev_err(&radio->usbdev->dev, "%s - usb_control_msg returned %i, request %i\n", __func__, retval, USB_REQ_GET_STATUS); } else { radio->stereo = !(radio->transfer_buffer[0] & 0x01); } } static int vidioc_querycap(struct file *file, void *priv, struct v4l2_capability *v) { struct dsbr100_device *radio = video_drvdata(file); strscpy(v->driver, "dsbr100", sizeof(v->driver)); strscpy(v->card, "D-Link R-100 USB FM Radio", sizeof(v->card)); usb_make_path(radio->usbdev, v->bus_info, sizeof(v->bus_info)); return 0; } static int vidioc_g_tuner(struct file *file, void *priv, struct v4l2_tuner *v) { struct dsbr100_device *radio = video_drvdata(file); if (v->index > 0) return -EINVAL; dsbr100_getstat(radio); strscpy(v->name, "FM", sizeof(v->name)); v->type = V4L2_TUNER_RADIO; v->rangelow = FREQ_MIN * FREQ_MUL; v->rangehigh = FREQ_MAX * FREQ_MUL; v->rxsubchans = radio->stereo ? V4L2_TUNER_SUB_STEREO : V4L2_TUNER_SUB_MONO; v->capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO; v->audmode = V4L2_TUNER_MODE_STEREO; v->signal = radio->stereo ? 0xffff : 0; /* We can't get the signal strength */ return 0; } static int vidioc_s_tuner(struct file *file, void *priv, const struct v4l2_tuner *v) { return v->index ? -EINVAL : 0; } static int vidioc_s_frequency(struct file *file, void *priv, const struct v4l2_frequency *f) { struct dsbr100_device *radio = video_drvdata(file); if (f->tuner != 0 || f->type != V4L2_TUNER_RADIO) return -EINVAL; return dsbr100_setfreq(radio, clamp_t(unsigned, f->frequency, FREQ_MIN * FREQ_MUL, FREQ_MAX * FREQ_MUL)); } static int vidioc_g_frequency(struct file *file, void *priv, struct v4l2_frequency *f) { struct dsbr100_device *radio = video_drvdata(file); if (f->tuner) return -EINVAL; f->type = V4L2_TUNER_RADIO; f->frequency = radio->curfreq; return 0; } static int usb_dsbr100_s_ctrl(struct v4l2_ctrl *ctrl) { struct dsbr100_device *radio = container_of(ctrl->handler, struct dsbr100_device, hdl); switch (ctrl->id) { case V4L2_CID_AUDIO_MUTE: radio->muted = ctrl->val; return radio->muted ? dsbr100_stop(radio) : dsbr100_start(radio); } return -EINVAL; } /* USB subsystem interface begins here */ /* * Handle unplugging of the device. * We call video_unregister_device in any case. * The last function called in this procedure is * usb_dsbr100_video_device_release */ static void usb_dsbr100_disconnect(struct usb_interface *intf) { struct dsbr100_device *radio = usb_get_intfdata(intf); mutex_lock(&radio->v4l2_lock); /* * Disconnect is also called on unload, and in that case we need to * mute the device. This call will silently fail if it is called * after a physical disconnect. */ usb_control_msg(radio->usbdev, usb_rcvctrlpipe(radio->usbdev, 0), DSB100_ONOFF, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x00, 0x00, radio->transfer_buffer, 8, 300); usb_set_intfdata(intf, NULL); video_unregister_device(&radio->videodev); v4l2_device_disconnect(&radio->v4l2_dev); mutex_unlock(&radio->v4l2_lock); v4l2_device_put(&radio->v4l2_dev); } /* Suspend device - stop device. */ static int usb_dsbr100_suspend(struct usb_interface *intf, pm_message_t message) { struct dsbr100_device *radio = usb_get_intfdata(intf); mutex_lock(&radio->v4l2_lock); if (!radio->muted && dsbr100_stop(radio) < 0) dev_warn(&intf->dev, "dsbr100_stop failed\n"); mutex_unlock(&radio->v4l2_lock); dev_info(&intf->dev, "going into suspend..\n"); return 0; } /* Resume device - start device. */ static int usb_dsbr100_resume(struct usb_interface *intf) { struct dsbr100_device *radio = usb_get_intfdata(intf); mutex_lock(&radio->v4l2_lock); if (!radio->muted && dsbr100_start(radio) < 0) dev_warn(&intf->dev, "dsbr100_start failed\n"); mutex_unlock(&radio->v4l2_lock); dev_info(&intf->dev, "coming out of suspend..\n"); return 0; } /* free data structures */ static void usb_dsbr100_release(struct v4l2_device *v4l2_dev) { struct dsbr100_device *radio = v4l2_dev_to_radio(v4l2_dev); v4l2_ctrl_handler_free(&radio->hdl); v4l2_device_unregister(&radio->v4l2_dev); kfree(radio->transfer_buffer); kfree(radio); } static const struct v4l2_ctrl_ops usb_dsbr100_ctrl_ops = { .s_ctrl = usb_dsbr100_s_ctrl, }; /* File system interface */ static const struct v4l2_file_operations usb_dsbr100_fops = { .owner = THIS_MODULE, .unlocked_ioctl = video_ioctl2, .open = v4l2_fh_open, .release = v4l2_fh_release, .poll = v4l2_ctrl_poll, }; static const struct v4l2_ioctl_ops usb_dsbr100_ioctl_ops = { .vidioc_querycap = vidioc_querycap, .vidioc_g_tuner = vidioc_g_tuner, .vidioc_s_tuner = vidioc_s_tuner, .vidioc_g_frequency = vidioc_g_frequency, .vidioc_s_frequency = vidioc_s_frequency, .vidioc_log_status = v4l2_ctrl_log_status, .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, .vidioc_unsubscribe_event = v4l2_event_unsubscribe, }; /* check if the device is present and register with v4l and usb if it is */ static int usb_dsbr100_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct dsbr100_device *radio; struct v4l2_device *v4l2_dev; int retval; radio = kzalloc(sizeof(struct dsbr100_device), GFP_KERNEL); if (!radio) return -ENOMEM; radio->transfer_buffer = kmalloc(TB_LEN, GFP_KERNEL); if (!(radio->transfer_buffer)) { kfree(radio); return -ENOMEM; } v4l2_dev = &radio->v4l2_dev; v4l2_dev->release = usb_dsbr100_release; retval = v4l2_device_register(&intf->dev, v4l2_dev); if (retval < 0) { v4l2_err(v4l2_dev, "couldn't register v4l2_device\n"); goto err_reg_dev; } v4l2_ctrl_handler_init(&radio->hdl, 1); v4l2_ctrl_new_std(&radio->hdl, &usb_dsbr100_ctrl_ops, V4L2_CID_AUDIO_MUTE, 0, 1, 1, 1); if (radio->hdl.error) { retval = radio->hdl.error; v4l2_err(v4l2_dev, "couldn't register control\n"); goto err_reg_ctrl; } mutex_init(&radio->v4l2_lock); strscpy(radio->videodev.name, v4l2_dev->name, sizeof(radio->videodev.name)); radio->videodev.v4l2_dev = v4l2_dev; radio->videodev.fops = &usb_dsbr100_fops; radio->videodev.ioctl_ops = &usb_dsbr100_ioctl_ops; radio->videodev.release = video_device_release_empty; radio->videodev.lock = &radio->v4l2_lock; radio->videodev.ctrl_handler = &radio->hdl; radio->videodev.device_caps = V4L2_CAP_RADIO | V4L2_CAP_TUNER; radio->usbdev = interface_to_usbdev(intf); radio->curfreq = FREQ_MIN * FREQ_MUL; radio->muted = true; video_set_drvdata(&radio->videodev, radio); usb_set_intfdata(intf, radio); retval = video_register_device(&radio->videodev, VFL_TYPE_RADIO, radio_nr); if (retval == 0) return 0; v4l2_err(v4l2_dev, "couldn't register video device\n"); err_reg_ctrl: v4l2_ctrl_handler_free(&radio->hdl); v4l2_device_unregister(v4l2_dev); err_reg_dev: kfree(radio->transfer_buffer); kfree(radio); return retval; } static const struct usb_device_id usb_dsbr100_device_table[] = { { USB_DEVICE(DSB100_VENDOR, DSB100_PRODUCT) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, usb_dsbr100_device_table); /* USB subsystem interface */ static struct usb_driver usb_dsbr100_driver = { .name = "dsbr100", .probe = usb_dsbr100_probe, .disconnect = usb_dsbr100_disconnect, .id_table = usb_dsbr100_device_table, .suspend = usb_dsbr100_suspend, .resume = usb_dsbr100_resume, .reset_resume = usb_dsbr100_resume, }; module_usb_driver(usb_dsbr100_driver);
64 2 1 68 2 67 6 4 1 55 1 2 1 1 53 55 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 // SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * * namei.c */ /* * This file implements code to do filename lookup in directories. * * Like inodes, directories are packed into compressed metadata blocks, stored * in a directory table. Directories are accessed using the start address of * the metablock containing the directory and the offset into the * decompressed block (<block, offset>). * * Directories are organised in a slightly complex way, and are not simply * a list of file names. The organisation takes advantage of the * fact that (in most cases) the inodes of the files will be in the same * compressed metadata block, and therefore, can share the start block. * Directories are therefore organised in a two level list, a directory * header containing the shared start block value, and a sequence of directory * entries, each of which share the shared start block. A new directory header * is written once/if the inode start block changes. The directory * header/directory entry list is repeated as many times as necessary. * * Directories are sorted, and can contain a directory index to speed up * file lookup. Directory indexes store one entry per metablock, each entry * storing the index/filename mapping to the first directory header * in each metadata block. Directories are sorted in alphabetical order, * and at lookup the index is scanned linearly looking for the first filename * alphabetically larger than the filename being looked up. At this point the * location of the metadata block the filename is in has been found. * The general idea of the index is ensure only one metadata block needs to be * decompressed to do a lookup irrespective of the length of the directory. * This scheme has the advantage that it doesn't require extra memory overhead * and doesn't require much extra storage on disk. */ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/dcache.h> #include <linux/xattr.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs_fs_i.h" #include "squashfs.h" #include "xattr.h" /* * Lookup name in the directory index, returning the location of the metadata * block containing it, and the directory index this represents. * * If we get an error reading the index then return the part of the index * (if any) we have managed to read - the index isn't essential, just * quicker. */ static int get_dir_index_using_name(struct super_block *sb, u64 *next_block, int *next_offset, u64 index_start, int index_offset, int i_count, const char *name) { struct squashfs_sb_info *msblk = sb->s_fs_info; int i, length = 0, err; unsigned int size; struct squashfs_dir_index *index; TRACE("Entered get_dir_index_using_name, i_count %d\n", i_count); index = kmalloc(sizeof(*index) + SQUASHFS_NAME_LEN + 1, GFP_KERNEL); if (index == NULL) { ERROR("Failed to allocate squashfs_dir_index\n"); goto out; } for (i = 0; i < i_count; i++) { err = squashfs_read_metadata(sb, index, &index_start, &index_offset, sizeof(*index)); if (err < 0) break; size = le32_to_cpu(index->size) + 1; if (size > SQUASHFS_NAME_LEN) break; err = squashfs_read_metadata(sb, index->name, &index_start, &index_offset, size); if (err < 0) break; index->name[size] = '\0'; if (strcmp(index->name, name) > 0) break; length = le32_to_cpu(index->index); *next_block = le32_to_cpu(index->start_block) + msblk->directory_table; } *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; kfree(index); out: /* * Return index (f_pos) of the looked up metadata block. Translate * from internal f_pos to external f_pos which is offset by 3 because * we invent "." and ".." entries which are not actually stored in the * directory. */ return length + 3; } static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { const unsigned char *name = dentry->d_name.name; int len = dentry->d_name.len; struct inode *inode = NULL; struct squashfs_sb_info *msblk = dir->i_sb->s_fs_info; struct squashfs_dir_header dirh; struct squashfs_dir_entry *dire; u64 block = squashfs_i(dir)->start + msblk->directory_table; int offset = squashfs_i(dir)->offset; int err, length; unsigned int dir_count, size; TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset); dire = kmalloc(sizeof(*dire) + SQUASHFS_NAME_LEN + 1, GFP_KERNEL); if (dire == NULL) { ERROR("Failed to allocate squashfs_dir_entry\n"); return ERR_PTR(-ENOMEM); } if (len > SQUASHFS_NAME_LEN) { err = -ENAMETOOLONG; goto failed; } length = get_dir_index_using_name(dir->i_sb, &block, &offset, squashfs_i(dir)->dir_idx_start, squashfs_i(dir)->dir_idx_offset, squashfs_i(dir)->dir_idx_cnt, name); while (length < i_size_read(dir)) { /* * Read directory header. */ err = squashfs_read_metadata(dir->i_sb, &dirh, &block, &offset, sizeof(dirh)); if (err < 0) goto read_failure; length += sizeof(dirh); dir_count = le32_to_cpu(dirh.count) + 1; if (dir_count > SQUASHFS_DIR_COUNT) goto data_error; while (dir_count--) { /* * Read directory entry. */ err = squashfs_read_metadata(dir->i_sb, dire, &block, &offset, sizeof(*dire)); if (err < 0) goto read_failure; size = le16_to_cpu(dire->size) + 1; /* size should never be larger than SQUASHFS_NAME_LEN */ if (size > SQUASHFS_NAME_LEN) goto data_error; err = squashfs_read_metadata(dir->i_sb, dire->name, &block, &offset, size); if (err < 0) goto read_failure; length += sizeof(*dire) + size; if (name[0] < dire->name[0]) goto exit_lookup; if (len == size && !strncmp(name, dire->name, len)) { unsigned int blk, off, ino_num; long long ino; blk = le32_to_cpu(dirh.start_block); off = le16_to_cpu(dire->offset); ino_num = le32_to_cpu(dirh.inode_number) + (short) le16_to_cpu(dire->inode_number); ino = SQUASHFS_MKINODE(blk, off); TRACE("calling squashfs_iget for directory " "entry %s, inode %x:%x, %d\n", name, blk, off, ino_num); inode = squashfs_iget(dir->i_sb, ino, ino_num); goto exit_lookup; } } } exit_lookup: kfree(dire); return d_splice_alias(inode, dentry); data_error: err = -EIO; read_failure: ERROR("Unable to read directory block [%llx:%x]\n", squashfs_i(dir)->start + msblk->directory_table, squashfs_i(dir)->offset); failed: kfree(dire); return ERR_PTR(err); } const struct inode_operations squashfs_dir_inode_ops = { .lookup = squashfs_lookup, .listxattr = squashfs_listxattr };
3 2 3 1 1 18 18 4 14 3 1 1 1 1 1 1 1 1 2 1 2 1 1 11 2 3 4 5 3 8 4 3 7 2 4 1 3 2 2 3 10 1 3 2 2 3 1 1 5 1 1 7 1 3 1 1 1 1 4 42 27 15 2 2 2 2 2 15 2 17 17 44 44 44 44 44 44 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/fs_context.c * * Copyright (C) 1992 Rick Sladkey * Conversion to new mount api Copyright (C) David Howells * * NFS mount handling. * * Split from fs/nfs/super.c by David Howells <dhowells@redhat.com> */ #include <linux/compat.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> #include <net/handshake.h> #include "nfs.h" #include "internal.h" #include "nfstrace.h" #define NFSDBG_FACILITY NFSDBG_MOUNT #if IS_ENABLED(CONFIG_NFS_V3) #define NFS_DEFAULT_VERSION 3 #else #define NFS_DEFAULT_VERSION 2 #endif #define NFS_MAX_CONNECTIONS 16 enum nfs_param { Opt_ac, Opt_acdirmax, Opt_acdirmin, Opt_acl, Opt_acregmax, Opt_acregmin, Opt_actimeo, Opt_addr, Opt_bg, Opt_bsize, Opt_clientaddr, Opt_cto, Opt_alignwrite, Opt_fg, Opt_fscache, Opt_fscache_flag, Opt_hard, Opt_intr, Opt_local_lock, Opt_lock, Opt_lookupcache, Opt_migration, Opt_minorversion, Opt_mountaddr, Opt_mounthost, Opt_mountport, Opt_mountproto, Opt_mountvers, Opt_namelen, Opt_nconnect, Opt_max_connect, Opt_port, Opt_posix, Opt_proto, Opt_rdirplus, Opt_rdma, Opt_resvport, Opt_retrans, Opt_retry, Opt_rsize, Opt_sec, Opt_sharecache, Opt_sloppy, Opt_soft, Opt_softerr, Opt_softreval, Opt_source, Opt_tcp, Opt_timeo, Opt_trunkdiscovery, Opt_udp, Opt_v, Opt_vers, Opt_wsize, Opt_write, Opt_xprtsec, }; enum { Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_none, Opt_local_lock_posix, }; static const struct constant_table nfs_param_enums_local_lock[] = { { "all", Opt_local_lock_all }, { "flock", Opt_local_lock_flock }, { "posix", Opt_local_lock_posix }, { "none", Opt_local_lock_none }, {} }; enum { Opt_lookupcache_all, Opt_lookupcache_none, Opt_lookupcache_positive, }; static const struct constant_table nfs_param_enums_lookupcache[] = { { "all", Opt_lookupcache_all }, { "none", Opt_lookupcache_none }, { "pos", Opt_lookupcache_positive }, { "positive", Opt_lookupcache_positive }, {} }; enum { Opt_write_lazy, Opt_write_eager, Opt_write_wait, }; static const struct constant_table nfs_param_enums_write[] = { { "lazy", Opt_write_lazy }, { "eager", Opt_write_eager }, { "wait", Opt_write_wait }, {} }; static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_flag_no("ac", Opt_ac), fsparam_u32 ("acdirmax", Opt_acdirmax), fsparam_u32 ("acdirmin", Opt_acdirmin), fsparam_flag_no("acl", Opt_acl), fsparam_u32 ("acregmax", Opt_acregmax), fsparam_u32 ("acregmin", Opt_acregmin), fsparam_u32 ("actimeo", Opt_actimeo), fsparam_string("addr", Opt_addr), fsparam_flag ("bg", Opt_bg), fsparam_u32 ("bsize", Opt_bsize), fsparam_string("clientaddr", Opt_clientaddr), fsparam_flag_no("cto", Opt_cto), fsparam_flag_no("alignwrite", Opt_alignwrite), fsparam_flag ("fg", Opt_fg), fsparam_flag_no("fsc", Opt_fscache_flag), fsparam_string("fsc", Opt_fscache), fsparam_flag ("hard", Opt_hard), __fsparam(NULL, "intr", Opt_intr, fs_param_neg_with_no|fs_param_deprecated, NULL), fsparam_enum ("local_lock", Opt_local_lock, nfs_param_enums_local_lock), fsparam_flag_no("lock", Opt_lock), fsparam_enum ("lookupcache", Opt_lookupcache, nfs_param_enums_lookupcache), fsparam_flag_no("migration", Opt_migration), fsparam_u32 ("minorversion", Opt_minorversion), fsparam_string("mountaddr", Opt_mountaddr), fsparam_string("mounthost", Opt_mounthost), fsparam_u32 ("mountport", Opt_mountport), fsparam_string("mountproto", Opt_mountproto), fsparam_u32 ("mountvers", Opt_mountvers), fsparam_u32 ("namlen", Opt_namelen), fsparam_u32 ("nconnect", Opt_nconnect), fsparam_u32 ("max_connect", Opt_max_connect), fsparam_string("nfsvers", Opt_vers), fsparam_u32 ("port", Opt_port), fsparam_flag_no("posix", Opt_posix), fsparam_string("proto", Opt_proto), fsparam_flag_no("rdirplus", Opt_rdirplus), fsparam_flag ("rdma", Opt_rdma), fsparam_flag_no("resvport", Opt_resvport), fsparam_u32 ("retrans", Opt_retrans), fsparam_string("retry", Opt_retry), fsparam_u32 ("rsize", Opt_rsize), fsparam_string("sec", Opt_sec), fsparam_flag_no("sharecache", Opt_sharecache), fsparam_flag ("sloppy", Opt_sloppy), fsparam_flag ("soft", Opt_soft), fsparam_flag ("softerr", Opt_softerr), fsparam_flag ("softreval", Opt_softreval), fsparam_string("source", Opt_source), fsparam_flag ("tcp", Opt_tcp), fsparam_u32 ("timeo", Opt_timeo), fsparam_flag_no("trunkdiscovery", Opt_trunkdiscovery), fsparam_flag ("udp", Opt_udp), fsparam_flag ("v2", Opt_v), fsparam_flag ("v3", Opt_v), fsparam_flag ("v4", Opt_v), fsparam_flag ("v4.0", Opt_v), fsparam_flag ("v4.1", Opt_v), fsparam_flag ("v4.2", Opt_v), fsparam_string("vers", Opt_vers), fsparam_enum ("write", Opt_write, nfs_param_enums_write), fsparam_u32 ("wsize", Opt_wsize), fsparam_string("xprtsec", Opt_xprtsec), {} }; enum { Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, Opt_vers_4_1, Opt_vers_4_2, }; static const struct constant_table nfs_vers_tokens[] = { { "2", Opt_vers_2 }, { "3", Opt_vers_3 }, { "4", Opt_vers_4 }, { "4.0", Opt_vers_4_0 }, { "4.1", Opt_vers_4_1 }, { "4.2", Opt_vers_4_2 }, {} }; enum { Opt_xprt_rdma, Opt_xprt_rdma6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_udp, Opt_xprt_udp6, nr__Opt_xprt }; static const struct constant_table nfs_xprt_protocol_tokens[] = { { "rdma", Opt_xprt_rdma }, { "rdma6", Opt_xprt_rdma6 }, { "tcp", Opt_xprt_tcp }, { "tcp6", Opt_xprt_tcp6 }, { "udp", Opt_xprt_udp }, { "udp6", Opt_xprt_udp6 }, {} }; enum { Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp, Opt_sec_none, Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp, Opt_sec_sys, nr__Opt_sec }; static const struct constant_table nfs_secflavor_tokens[] = { { "krb5", Opt_sec_krb5 }, { "krb5i", Opt_sec_krb5i }, { "krb5p", Opt_sec_krb5p }, { "lkey", Opt_sec_lkey }, { "lkeyi", Opt_sec_lkeyi }, { "lkeyp", Opt_sec_lkeyp }, { "none", Opt_sec_none }, { "null", Opt_sec_none }, { "spkm3", Opt_sec_spkm }, { "spkm3i", Opt_sec_spkmi }, { "spkm3p", Opt_sec_spkmp }, { "sys", Opt_sec_sys }, {} }; enum { Opt_xprtsec_none, Opt_xprtsec_tls, Opt_xprtsec_mtls, nr__Opt_xprtsec }; static const struct constant_table nfs_xprtsec_policies[] = { { "none", Opt_xprtsec_none }, { "tls", Opt_xprtsec_tls }, { "mtls", Opt_xprtsec_mtls }, {} }; /* * Sanity-check a server address provided by the mount command. * * Address family must be initialized, and address must not be * the ANY address for that family. */ static int nfs_verify_server_address(struct sockaddr_storage *addr) { switch (addr->ss_family) { case AF_INET: { struct sockaddr_in *sa = (struct sockaddr_in *)addr; return sa->sin_addr.s_addr != htonl(INADDR_ANY); } case AF_INET6: { struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; return !ipv6_addr_any(sa); } } return 0; } #ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx) { return true; } #else static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx) { if (ctx->version == 4) return true; return false; } #endif /* * Sanity check the NFS transport protocol. */ static int nfs_validate_transport_protocol(struct fs_context *fc, struct nfs_fs_context *ctx) { switch (ctx->nfs_server.protocol) { case XPRT_TRANSPORT_UDP: if (nfs_server_transport_udp_invalid(ctx)) goto out_invalid_transport_udp; break; case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_RDMA: break; default: ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; } if (ctx->xprtsec.policy != RPC_XPRTSEC_NONE) switch (ctx->nfs_server.protocol) { case XPRT_TRANSPORT_TCP: ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP_TLS; break; default: goto out_invalid_xprtsec_policy; } return 0; out_invalid_transport_udp: return nfs_invalf(fc, "NFS: Unsupported transport protocol udp"); out_invalid_xprtsec_policy: return nfs_invalf(fc, "NFS: Transport does not support xprtsec"); } /* * For text based NFSv2/v3 mounts, the mount protocol transport default * settings should depend upon the specified NFS transport. */ static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx) { if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP || ctx->mount_server.protocol == XPRT_TRANSPORT_TCP) return; switch (ctx->nfs_server.protocol) { case XPRT_TRANSPORT_UDP: ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_RDMA: ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; } } /* * Add 'flavor' to 'auth_info' if not already present. * Returns true if 'flavor' ends up in the list, false otherwise */ static int nfs_auth_info_add(struct fs_context *fc, struct nfs_auth_info *auth_info, rpc_authflavor_t flavor) { unsigned int i; unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors); /* make sure this flavor isn't already in the list */ for (i = 0; i < auth_info->flavor_len; i++) { if (flavor == auth_info->flavors[i]) return 0; } if (auth_info->flavor_len + 1 >= max_flavor_len) return nfs_invalf(fc, "NFS: too many sec= flavors"); auth_info->flavors[auth_info->flavor_len++] = flavor; return 0; } /* * Parse the value of the 'sec=' option. */ static int nfs_parse_security_flavors(struct fs_context *fc, struct fs_parameter *param) { struct nfs_fs_context *ctx = nfs_fc2context(fc); rpc_authflavor_t pseudoflavor; char *string = param->string, *p; int ret; trace_nfs_mount_assign(param->key, string); while ((p = strsep(&string, ":")) != NULL) { if (!*p) continue; switch (lookup_constant(nfs_secflavor_tokens, p, -1)) { case Opt_sec_none: pseudoflavor = RPC_AUTH_NULL; break; case Opt_sec_sys: pseudoflavor = RPC_AUTH_UNIX; break; case Opt_sec_krb5: pseudoflavor = RPC_AUTH_GSS_KRB5; break; case Opt_sec_krb5i: pseudoflavor = RPC_AUTH_GSS_KRB5I; break; case Opt_sec_krb5p: pseudoflavor = RPC_AUTH_GSS_KRB5P; break; case Opt_sec_lkey: pseudoflavor = RPC_AUTH_GSS_LKEY; break; case Opt_sec_lkeyi: pseudoflavor = RPC_AUTH_GSS_LKEYI; break; case Opt_sec_lkeyp: pseudoflavor = RPC_AUTH_GSS_LKEYP; break; case Opt_sec_spkm: pseudoflavor = RPC_AUTH_GSS_SPKM; break; case Opt_sec_spkmi: pseudoflavor = RPC_AUTH_GSS_SPKMI; break; case Opt_sec_spkmp: pseudoflavor = RPC_AUTH_GSS_SPKMP; break; default: return nfs_invalf(fc, "NFS: sec=%s option not recognized", p); } ret = nfs_auth_info_add(fc, &ctx->auth_info, pseudoflavor); if (ret < 0) return ret; } return 0; } static int nfs_parse_xprtsec_policy(struct fs_context *fc, struct fs_parameter *param) { struct nfs_fs_context *ctx = nfs_fc2context(fc); trace_nfs_mount_assign(param->key, param->string); switch (lookup_constant(nfs_xprtsec_policies, param->string, -1)) { case Opt_xprtsec_none: ctx->xprtsec.policy = RPC_XPRTSEC_NONE; break; case Opt_xprtsec_tls: ctx->xprtsec.policy = RPC_XPRTSEC_TLS_ANON; break; case Opt_xprtsec_mtls: ctx->xprtsec.policy = RPC_XPRTSEC_TLS_X509; break; default: return nfs_invalf(fc, "NFS: Unrecognized transport security policy"); } return 0; } static int nfs_parse_version_string(struct fs_context *fc, const char *string) { struct nfs_fs_context *ctx = nfs_fc2context(fc); ctx->flags &= ~NFS_MOUNT_VER3; switch (lookup_constant(nfs_vers_tokens, string, -1)) { case Opt_vers_2: ctx->version = 2; break; case Opt_vers_3: ctx->flags |= NFS_MOUNT_VER3; ctx->version = 3; break; case Opt_vers_4: /* Backward compatibility option. In future, * the mount program should always supply * a NFSv4 minor version number. */ ctx->version = 4; break; case Opt_vers_4_0: ctx->version = 4; ctx->minorversion = 0; break; case Opt_vers_4_1: ctx->version = 4; ctx->minorversion = 1; break; case Opt_vers_4_2: ctx->version = 4; ctx->minorversion = 2; break; default: return nfs_invalf(fc, "NFS: Unsupported NFS version"); } return 0; } /* * Parse a single mount parameter. */ static int nfs_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct fs_parse_result result; struct nfs_fs_context *ctx = nfs_fc2context(fc); unsigned short protofamily, mountfamily; unsigned int len; int ret, opt; trace_nfs_mount_option(param); opt = fs_parse(fc, nfs_fs_parameters, param, &result); if (opt < 0) return (opt == -ENOPARAM && ctx->sloppy) ? 1 : opt; if (fc->security) ctx->has_sec_mnt_opts = 1; switch (opt) { case Opt_source: if (fc->source) return nfs_invalf(fc, "NFS: Multiple sources not supported"); fc->source = param->string; param->string = NULL; break; /* * boolean options: foo/nofoo */ case Opt_soft: ctx->flags |= NFS_MOUNT_SOFT; ctx->flags &= ~NFS_MOUNT_SOFTERR; break; case Opt_softerr: ctx->flags |= NFS_MOUNT_SOFTERR | NFS_MOUNT_SOFTREVAL; ctx->flags &= ~NFS_MOUNT_SOFT; break; case Opt_hard: ctx->flags &= ~(NFS_MOUNT_SOFT | NFS_MOUNT_SOFTERR | NFS_MOUNT_SOFTREVAL); break; case Opt_softreval: if (result.negated) ctx->flags &= ~NFS_MOUNT_SOFTREVAL; else ctx->flags |= NFS_MOUNT_SOFTREVAL; break; case Opt_posix: if (result.negated) ctx->flags &= ~NFS_MOUNT_POSIX; else ctx->flags |= NFS_MOUNT_POSIX; break; case Opt_cto: if (result.negated) ctx->flags |= NFS_MOUNT_NOCTO; else ctx->flags &= ~NFS_MOUNT_NOCTO; break; case Opt_trunkdiscovery: if (result.negated) ctx->flags &= ~NFS_MOUNT_TRUNK_DISCOVERY; else ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY; break; case Opt_alignwrite: if (result.negated) ctx->flags |= NFS_MOUNT_NO_ALIGNWRITE; else ctx->flags &= ~NFS_MOUNT_NO_ALIGNWRITE; break; case Opt_ac: if (result.negated) ctx->flags |= NFS_MOUNT_NOAC; else ctx->flags &= ~NFS_MOUNT_NOAC; break; case Opt_lock: if (result.negated) { ctx->lock_status = NFS_LOCK_NOLOCK; ctx->flags |= NFS_MOUNT_NONLM; ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } else { ctx->lock_status = NFS_LOCK_LOCK; ctx->flags &= ~NFS_MOUNT_NONLM; ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } break; case Opt_udp: ctx->flags &= ~NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_tcp: case Opt_rdma: ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */ ret = xprt_find_transport_ident(param->key); if (ret < 0) goto out_bad_transport; ctx->nfs_server.protocol = ret; break; case Opt_acl: if (result.negated) ctx->flags |= NFS_MOUNT_NOACL; else ctx->flags &= ~NFS_MOUNT_NOACL; break; case Opt_rdirplus: if (result.negated) ctx->flags |= NFS_MOUNT_NORDIRPLUS; else ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; break; case Opt_sharecache: if (result.negated) ctx->flags |= NFS_MOUNT_UNSHARED; else ctx->flags &= ~NFS_MOUNT_UNSHARED; break; case Opt_resvport: if (result.negated) ctx->flags |= NFS_MOUNT_NORESVPORT; else ctx->flags &= ~NFS_MOUNT_NORESVPORT; break; case Opt_fscache_flag: if (result.negated) ctx->options &= ~NFS_OPTION_FSCACHE; else ctx->options |= NFS_OPTION_FSCACHE; kfree(ctx->fscache_uniq); ctx->fscache_uniq = NULL; break; case Opt_fscache: trace_nfs_mount_assign(param->key, param->string); ctx->options |= NFS_OPTION_FSCACHE; kfree(ctx->fscache_uniq); ctx->fscache_uniq = param->string; param->string = NULL; break; case Opt_migration: if (result.negated) ctx->options &= ~NFS_OPTION_MIGRATION; else ctx->options |= NFS_OPTION_MIGRATION; break; /* * options that take numeric values */ case Opt_port: if (result.uint_32 > USHRT_MAX) goto out_of_bounds; ctx->nfs_server.port = result.uint_32; break; case Opt_rsize: ctx->rsize = result.uint_32; break; case Opt_wsize: ctx->wsize = result.uint_32; break; case Opt_bsize: ctx->bsize = result.uint_32; break; case Opt_timeo: if (result.uint_32 < 1 || result.uint_32 > INT_MAX) goto out_of_bounds; ctx->timeo = result.uint_32; break; case Opt_retrans: if (result.uint_32 > INT_MAX) goto out_of_bounds; ctx->retrans = result.uint_32; break; case Opt_acregmin: ctx->acregmin = result.uint_32; break; case Opt_acregmax: ctx->acregmax = result.uint_32; break; case Opt_acdirmin: ctx->acdirmin = result.uint_32; break; case Opt_acdirmax: ctx->acdirmax = result.uint_32; break; case Opt_actimeo: ctx->acregmin = result.uint_32; ctx->acregmax = result.uint_32; ctx->acdirmin = result.uint_32; ctx->acdirmax = result.uint_32; break; case Opt_namelen: ctx->namlen = result.uint_32; break; case Opt_mountport: if (result.uint_32 > USHRT_MAX) goto out_of_bounds; ctx->mount_server.port = result.uint_32; break; case Opt_mountvers: if (result.uint_32 < NFS_MNT_VERSION || result.uint_32 > NFS_MNT3_VERSION) goto out_of_bounds; ctx->mount_server.version = result.uint_32; break; case Opt_minorversion: if (result.uint_32 > NFS4_MAX_MINOR_VERSION) goto out_of_bounds; ctx->minorversion = result.uint_32; break; /* * options that take text values */ case Opt_v: ret = nfs_parse_version_string(fc, param->key + 1); if (ret < 0) return ret; break; case Opt_vers: if (!param->string) goto out_invalid_value; trace_nfs_mount_assign(param->key, param->string); ret = nfs_parse_version_string(fc, param->string); if (ret < 0) return ret; break; case Opt_sec: ret = nfs_parse_security_flavors(fc, param); if (ret < 0) return ret; break; case Opt_xprtsec: ret = nfs_parse_xprtsec_policy(fc, param); if (ret < 0) return ret; break; case Opt_proto: if (!param->string) goto out_invalid_value; trace_nfs_mount_assign(param->key, param->string); protofamily = AF_INET; switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { case Opt_xprt_udp6: protofamily = AF_INET6; fallthrough; case Opt_xprt_udp: ctx->flags &= ~NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: protofamily = AF_INET6; fallthrough; case Opt_xprt_tcp: ctx->flags |= NFS_MOUNT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_xprt_rdma6: protofamily = AF_INET6; fallthrough; case Opt_xprt_rdma: /* vector side protocols to TCP */ ctx->flags |= NFS_MOUNT_TCP; ret = xprt_find_transport_ident(param->string); if (ret < 0) goto out_bad_transport; ctx->nfs_server.protocol = ret; break; default: goto out_bad_transport; } ctx->protofamily = protofamily; break; case Opt_mountproto: if (!param->string) goto out_invalid_value; trace_nfs_mount_assign(param->key, param->string); mountfamily = AF_INET; switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { case Opt_xprt_udp6: mountfamily = AF_INET6; fallthrough; case Opt_xprt_udp: ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: mountfamily = AF_INET6; fallthrough; case Opt_xprt_tcp: ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_xprt_rdma: /* not used for side protocols */ default: goto out_bad_transport; } ctx->mountfamily = mountfamily; break; case Opt_addr: trace_nfs_mount_assign(param->key, param->string); len = rpc_pton(fc->net_ns, param->string, param->size, &ctx->nfs_server.address, sizeof(ctx->nfs_server._address)); if (len == 0) goto out_invalid_address; ctx->nfs_server.addrlen = len; break; case Opt_clientaddr: trace_nfs_mount_assign(param->key, param->string); kfree(ctx->client_address); ctx->client_address = param->string; param->string = NULL; break; case Opt_mounthost: trace_nfs_mount_assign(param->key, param->string); kfree(ctx->mount_server.hostname); ctx->mount_server.hostname = param->string; param->string = NULL; break; case Opt_mountaddr: trace_nfs_mount_assign(param->key, param->string); len = rpc_pton(fc->net_ns, param->string, param->size, &ctx->mount_server.address, sizeof(ctx->mount_server._address)); if (len == 0) goto out_invalid_address; ctx->mount_server.addrlen = len; break; case Opt_nconnect: trace_nfs_mount_assign(param->key, param->string); if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_CONNECTIONS) goto out_of_bounds; ctx->nfs_server.nconnect = result.uint_32; break; case Opt_max_connect: trace_nfs_mount_assign(param->key, param->string); if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_TRANSPORTS) goto out_of_bounds; ctx->nfs_server.max_connect = result.uint_32; break; case Opt_lookupcache: trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_lookupcache_all: ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); break; case Opt_lookupcache_positive: ctx->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; break; case Opt_lookupcache_none: ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; break; default: goto out_invalid_value; } break; case Opt_local_lock: trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_local_lock_all: ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; case Opt_local_lock_flock: ctx->flags |= NFS_MOUNT_LOCAL_FLOCK; break; case Opt_local_lock_posix: ctx->flags |= NFS_MOUNT_LOCAL_FCNTL; break; case Opt_local_lock_none: ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; default: goto out_invalid_value; } break; case Opt_write: trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_write_lazy: ctx->flags &= ~(NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT); break; case Opt_write_eager: ctx->flags |= NFS_MOUNT_WRITE_EAGER; ctx->flags &= ~NFS_MOUNT_WRITE_WAIT; break; case Opt_write_wait: ctx->flags |= NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT; break; default: goto out_invalid_value; } break; /* * Special options */ case Opt_sloppy: ctx->sloppy = true; break; } return 0; out_invalid_value: return nfs_invalf(fc, "NFS: Bad mount option value specified"); out_invalid_address: return nfs_invalf(fc, "NFS: Bad IP address specified"); out_of_bounds: return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key); out_bad_transport: return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); } /* * Split fc->source into "hostname:export_path". * * The leftmost colon demarks the split between the server's hostname * and the export path. If the hostname starts with a left square * bracket, then it may contain colons. * * Note: caller frees hostname and export path, even on error. */ static int nfs_parse_source(struct fs_context *fc, size_t maxnamlen, size_t maxpathlen) { struct nfs_fs_context *ctx = nfs_fc2context(fc); const char *dev_name = fc->source; size_t len; const char *end; if (unlikely(!dev_name || !*dev_name)) return -EINVAL; /* Is the host name protected with square brakcets? */ if (*dev_name == '[') { end = strchr(++dev_name, ']'); if (end == NULL || end[1] != ':') goto out_bad_devname; len = end - dev_name; end++; } else { const char *comma; end = strchr(dev_name, ':'); if (end == NULL) goto out_bad_devname; len = end - dev_name; /* kill possible hostname list: not supported */ comma = memchr(dev_name, ',', len); if (comma) len = comma - dev_name; } if (len > maxnamlen) goto out_hostname; kfree(ctx->nfs_server.hostname); /* N.B. caller will free nfs_server.hostname in all cases */ ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL); if (!ctx->nfs_server.hostname) goto out_nomem; len = strlen(++end); if (len > maxpathlen) goto out_path; ctx->nfs_server.export_path = kmemdup_nul(end, len, GFP_KERNEL); if (!ctx->nfs_server.export_path) goto out_nomem; trace_nfs_mount_path(ctx->nfs_server.export_path); return 0; out_bad_devname: return nfs_invalf(fc, "NFS: device name not in host:path format"); out_nomem: nfs_errorf(fc, "NFS: not enough memory to parse device name"); return -ENOMEM; out_hostname: nfs_errorf(fc, "NFS: server hostname too long"); return -ENAMETOOLONG; out_path: nfs_errorf(fc, "NFS: export pathname too long"); return -ENAMETOOLONG; } static inline bool is_remount_fc(struct fs_context *fc) { return fc->root != NULL; } /* * Parse monolithic NFS2/NFS3 mount data * - fills in the mount root filehandle * * For option strings, user space handles the following behaviors: * * + DNS: mapping server host name to IP address ("addr=" option) * * + failure mode: how to behave if a mount request can't be handled * immediately ("fg/bg" option) * * + retry: how often to retry a mount request ("retry=" option) * * + breaking back: trying proto=udp after proto=tcp, v2 after v3, * mountproto=tcp after mountproto=udp, and so on */ static int nfs23_parse_monolithic(struct fs_context *fc, struct nfs_mount_data *data) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_fh *mntfh = ctx->mntfh; struct sockaddr_storage *sap = &ctx->nfs_server._address; int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; int ret; if (data == NULL) goto out_no_data; ctx->version = NFS_DEFAULT_VERSION; switch (data->version) { case 1: data->namlen = 0; fallthrough; case 2: data->bsize = 0; fallthrough; case 3: if (data->flags & NFS_MOUNT_VER3) goto out_no_v3; data->root.size = NFS2_FHSIZE; memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); /* Turn off security negotiation */ extra_flags |= NFS_MOUNT_SECFLAVOUR; fallthrough; case 4: if (data->flags & NFS_MOUNT_SECFLAVOUR) goto out_no_sec; fallthrough; case 5: memset(data->context, 0, sizeof(data->context)); fallthrough; case 6: if (data->flags & NFS_MOUNT_VER3) { if (data->root.size > NFS3_FHSIZE || data->root.size == 0) goto out_invalid_fh; mntfh->size = data->root.size; ctx->version = 3; } else { mntfh->size = NFS2_FHSIZE; ctx->version = 2; } memcpy(mntfh->data, data->root.data, mntfh->size); if (mntfh->size < sizeof(mntfh->data)) memset(mntfh->data + mntfh->size, 0, sizeof(mntfh->data) - mntfh->size); /* * for proto == XPRT_TRANSPORT_UDP, which is what uses * to_exponential, implying shift: limit the shift value * to BITS_PER_LONG (majortimeo is unsigned long) */ if (!(data->flags & NFS_MOUNT_TCP)) /* this will be UDP */ if (data->retrans >= 64) /* shift value is too large */ goto out_invalid_data; /* * Translate to nfs_fs_context, which nfs_fill_super * can deal with. */ ctx->flags = data->flags & NFS_MOUNT_FLAGMASK; ctx->flags |= extra_flags; ctx->rsize = data->rsize; ctx->wsize = data->wsize; ctx->timeo = data->timeo; ctx->retrans = data->retrans; ctx->acregmin = data->acregmin; ctx->acregmax = data->acregmax; ctx->acdirmin = data->acdirmin; ctx->acdirmax = data->acdirmax; ctx->need_mount = false; if (!is_remount_fc(fc)) { memcpy(sap, &data->addr, sizeof(data->addr)); ctx->nfs_server.addrlen = sizeof(data->addr); ctx->nfs_server.port = ntohs(data->addr.sin_port); } if (sap->ss_family != AF_INET || !nfs_verify_server_address(sap)) goto out_no_address; if (!(data->flags & NFS_MOUNT_TCP)) ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; /* N.B. caller will free nfs_server.hostname in all cases */ ctx->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); if (!ctx->nfs_server.hostname) goto out_nomem; ctx->namlen = data->namlen; ctx->bsize = data->bsize; if (data->flags & NFS_MOUNT_SECFLAVOUR) ctx->selected_flavor = data->pseudoflavor; else ctx->selected_flavor = RPC_AUTH_UNIX; if (!(data->flags & NFS_MOUNT_NONLM)) ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| NFS_MOUNT_LOCAL_FCNTL); else ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK| NFS_MOUNT_LOCAL_FCNTL); /* * The legacy version 6 binary mount data from userspace has a * field used only to transport selinux information into the * kernel. To continue to support that functionality we * have a touch of selinux knowledge here in the NFS code. The * userspace code converted context=blah to just blah so we are * converting back to the full string selinux understands. */ if (data->context[0]){ #ifdef CONFIG_SECURITY_SELINUX int ret; data->context[NFS_MAX_CONTEXT_LEN] = '\0'; ret = vfs_parse_fs_string(fc, "context", data->context, strlen(data->context)); if (ret < 0) return ret; #else return -EINVAL; #endif } break; default: goto generic; } ret = nfs_validate_transport_protocol(fc, ctx); if (ret) return ret; ctx->skip_reconfig_option_check = true; return 0; generic: return generic_parse_monolithic(fc, data); out_no_data: if (is_remount_fc(fc)) { ctx->skip_reconfig_option_check = true; return 0; } return nfs_invalf(fc, "NFS: mount program didn't pass any mount data"); out_no_v3: return nfs_invalf(fc, "NFS: nfs_mount_data version does not support v3"); out_no_sec: return nfs_invalf(fc, "NFS: nfs_mount_data version supports only AUTH_SYS"); out_nomem: return -ENOMEM; out_no_address: return nfs_invalf(fc, "NFS: mount program didn't pass remote address"); out_invalid_fh: return nfs_invalf(fc, "NFS: invalid root filehandle"); out_invalid_data: return nfs_invalf(fc, "NFS: invalid binary mount data"); } #if IS_ENABLED(CONFIG_NFS_V4) struct compat_nfs_string { compat_uint_t len; compat_uptr_t data; }; static inline void compat_nfs_string(struct nfs_string *dst, struct compat_nfs_string *src) { dst->data = compat_ptr(src->data); dst->len = src->len; } struct compat_nfs4_mount_data_v1 { compat_int_t version; compat_int_t flags; compat_int_t rsize; compat_int_t wsize; compat_int_t timeo; compat_int_t retrans; compat_int_t acregmin; compat_int_t acregmax; compat_int_t acdirmin; compat_int_t acdirmax; struct compat_nfs_string client_addr; struct compat_nfs_string mnt_path; struct compat_nfs_string hostname; compat_uint_t host_addrlen; compat_uptr_t host_addr; compat_int_t proto; compat_int_t auth_flavourlen; compat_uptr_t auth_flavours; }; static void nfs4_compat_mount_data_conv(struct nfs4_mount_data *data) { struct compat_nfs4_mount_data_v1 *compat = (struct compat_nfs4_mount_data_v1 *)data; /* copy the fields backwards */ data->auth_flavours = compat_ptr(compat->auth_flavours); data->auth_flavourlen = compat->auth_flavourlen; data->proto = compat->proto; data->host_addr = compat_ptr(compat->host_addr); data->host_addrlen = compat->host_addrlen; compat_nfs_string(&data->hostname, &compat->hostname); compat_nfs_string(&data->mnt_path, &compat->mnt_path); compat_nfs_string(&data->client_addr, &compat->client_addr); data->acdirmax = compat->acdirmax; data->acdirmin = compat->acdirmin; data->acregmax = compat->acregmax; data->acregmin = compat->acregmin; data->retrans = compat->retrans; data->timeo = compat->timeo; data->wsize = compat->wsize; data->rsize = compat->rsize; data->flags = compat->flags; data->version = compat->version; } /* * Validate NFSv4 mount options */ static int nfs4_parse_monolithic(struct fs_context *fc, struct nfs4_mount_data *data) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct sockaddr_storage *sap = &ctx->nfs_server._address; int ret; char *c; if (!data) { if (is_remount_fc(fc)) goto done; return nfs_invalf(fc, "NFS4: mount program didn't pass any mount data"); } ctx->version = 4; if (data->version != 1) return generic_parse_monolithic(fc, data); if (in_compat_syscall()) nfs4_compat_mount_data_conv(data); if (data->host_addrlen > sizeof(ctx->nfs_server.address)) goto out_no_address; if (data->host_addrlen == 0) goto out_no_address; ctx->nfs_server.addrlen = data->host_addrlen; if (copy_from_user(sap, data->host_addr, data->host_addrlen)) return -EFAULT; if (!nfs_verify_server_address(sap)) goto out_no_address; ctx->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); if (data->auth_flavourlen) { rpc_authflavor_t pseudoflavor; if (data->auth_flavourlen > 1) goto out_inval_auth; if (copy_from_user(&pseudoflavor, data->auth_flavours, sizeof(pseudoflavor))) return -EFAULT; ctx->selected_flavor = pseudoflavor; } else { ctx->selected_flavor = RPC_AUTH_UNIX; } c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); if (IS_ERR(c)) return PTR_ERR(c); ctx->nfs_server.hostname = c; c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); if (IS_ERR(c)) return PTR_ERR(c); ctx->nfs_server.export_path = c; trace_nfs_mount_path(c); c = strndup_user(data->client_addr.data, 16); if (IS_ERR(c)) return PTR_ERR(c); ctx->client_address = c; /* * Translate to nfs_fs_context, which nfs_fill_super * can deal with. */ ctx->flags = data->flags & NFS4_MOUNT_FLAGMASK; ctx->rsize = data->rsize; ctx->wsize = data->wsize; ctx->timeo = data->timeo; ctx->retrans = data->retrans; ctx->acregmin = data->acregmin; ctx->acregmax = data->acregmax; ctx->acdirmin = data->acdirmin; ctx->acdirmax = data->acdirmax; ctx->nfs_server.protocol = data->proto; ret = nfs_validate_transport_protocol(fc, ctx); if (ret) return ret; done: ctx->skip_reconfig_option_check = true; return 0; out_inval_auth: return nfs_invalf(fc, "NFS4: Invalid number of RPC auth flavours %d", data->auth_flavourlen); out_no_address: return nfs_invalf(fc, "NFS4: mount program didn't pass remote address"); } #endif /* * Parse a monolithic block of data from sys_mount(). */ static int nfs_fs_context_parse_monolithic(struct fs_context *fc, void *data) { if (fc->fs_type == &nfs_fs_type) return nfs23_parse_monolithic(fc, data); #if IS_ENABLED(CONFIG_NFS_V4) if (fc->fs_type == &nfs4_fs_type) return nfs4_parse_monolithic(fc, data); #endif return nfs_invalf(fc, "NFS: Unsupported monolithic data version"); } /* * Validate the preparsed information in the config. */ static int nfs_fs_context_validate(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_subversion *nfs_mod; struct sockaddr_storage *sap = &ctx->nfs_server._address; int max_namelen = PAGE_SIZE; int max_pathlen = NFS_MAXPATHLEN; int port = 0; int ret; if (!fc->source) goto out_no_device_name; /* Check for sanity first. */ if (ctx->minorversion && ctx->version != 4) goto out_minorversion_mismatch; if (ctx->options & NFS_OPTION_MIGRATION && (ctx->version != 4 || ctx->minorversion != 0)) goto out_migration_misuse; /* Verify that any proto=/mountproto= options match the address * families in the addr=/mountaddr= options. */ if (ctx->protofamily != AF_UNSPEC && ctx->protofamily != ctx->nfs_server.address.sa_family) goto out_proto_mismatch; if (ctx->mountfamily != AF_UNSPEC) { if (ctx->mount_server.addrlen) { if (ctx->mountfamily != ctx->mount_server.address.sa_family) goto out_mountproto_mismatch; } else { if (ctx->mountfamily != ctx->nfs_server.address.sa_family) goto out_mountproto_mismatch; } } if (!nfs_verify_server_address(sap)) goto out_no_address; ret = nfs_validate_transport_protocol(fc, ctx); if (ret) return ret; if (ctx->version == 4) { if (IS_ENABLED(CONFIG_NFS_V4)) { if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) port = NFS_RDMA_PORT; else port = NFS_PORT; max_namelen = NFS4_MAXNAMLEN; max_pathlen = NFS4_MAXPATHLEN; ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL | NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } else { goto out_v4_not_compiled; } } else { nfs_set_mount_transport_protocol(ctx); if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) port = NFS_RDMA_PORT; } nfs_set_port(sap, &ctx->nfs_server.port, port); ret = nfs_parse_source(fc, max_namelen, max_pathlen); if (ret < 0) return ret; /* Load the NFS protocol module if we haven't done so yet */ if (!ctx->nfs_mod) { nfs_mod = find_nfs_version(ctx->version); if (IS_ERR(nfs_mod)) { ret = PTR_ERR(nfs_mod); goto out_version_unavailable; } ctx->nfs_mod = nfs_mod; } /* Ensure the filesystem context has the correct fs_type */ if (fc->fs_type != ctx->nfs_mod->nfs_fs) { module_put(fc->fs_type->owner); __module_get(ctx->nfs_mod->nfs_fs->owner); fc->fs_type = ctx->nfs_mod->nfs_fs; } return 0; out_no_device_name: return nfs_invalf(fc, "NFS: Device name not specified"); out_v4_not_compiled: nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel"); return -EPROTONOSUPPORT; out_no_address: return nfs_invalf(fc, "NFS: mount program didn't pass remote address"); out_mountproto_mismatch: return nfs_invalf(fc, "NFS: Mount server address does not match mountproto= option"); out_proto_mismatch: return nfs_invalf(fc, "NFS: Server address does not match proto= option"); out_minorversion_mismatch: return nfs_invalf(fc, "NFS: Mount option vers=%u does not support minorversion=%u", ctx->version, ctx->minorversion); out_migration_misuse: return nfs_invalf(fc, "NFS: 'Migration' not supported for this NFS version"); out_version_unavailable: nfs_errorf(fc, "NFS: Version unavailable"); return ret; } /* * Create an NFS superblock by the appropriate method. */ static int nfs_get_tree(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); int err = nfs_fs_context_validate(fc); if (err) return err; if (!ctx->internal) return ctx->nfs_mod->rpc_ops->try_get_tree(fc); else return nfs_get_tree_common(fc); } /* * Handle duplication of a configuration. The caller copied *src into *sc, but * it can't deal with resource pointers in the filesystem context, so we have * to do that. We need to clear pointers, copy data or get extra refs as * appropriate. */ static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) { struct nfs_fs_context *src = nfs_fc2context(src_fc), *ctx; ctx = kmemdup(src, sizeof(struct nfs_fs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->mntfh = nfs_alloc_fhandle(); if (!ctx->mntfh) { kfree(ctx); return -ENOMEM; } nfs_copy_fh(ctx->mntfh, src->mntfh); get_nfs_version(ctx->nfs_mod); ctx->client_address = NULL; ctx->mount_server.hostname = NULL; ctx->nfs_server.export_path = NULL; ctx->nfs_server.hostname = NULL; ctx->fscache_uniq = NULL; ctx->clone_data.fattr = NULL; fc->fs_private = ctx; return 0; } static void nfs_fs_context_free(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); if (ctx) { if (ctx->server) nfs_free_server(ctx->server); if (ctx->nfs_mod) put_nfs_version(ctx->nfs_mod); kfree(ctx->client_address); kfree(ctx->mount_server.hostname); kfree(ctx->nfs_server.export_path); kfree(ctx->nfs_server.hostname); kfree(ctx->fscache_uniq); nfs_free_fhandle(ctx->mntfh); nfs_free_fattr(ctx->clone_data.fattr); kfree(ctx); } } static const struct fs_context_operations nfs_fs_context_ops = { .free = nfs_fs_context_free, .dup = nfs_fs_context_dup, .parse_param = nfs_fs_context_parse_param, .parse_monolithic = nfs_fs_context_parse_monolithic, .get_tree = nfs_get_tree, .reconfigure = nfs_reconfigure, }; /* * Prepare superblock configuration. We use the namespaces attached to the * context. This may be the current process's namespaces, or it may be a * container's namespaces. */ static int nfs_init_fs_context(struct fs_context *fc) { struct nfs_fs_context *ctx; ctx = kzalloc(sizeof(struct nfs_fs_context), GFP_KERNEL); if (unlikely(!ctx)) return -ENOMEM; ctx->mntfh = nfs_alloc_fhandle(); if (unlikely(!ctx->mntfh)) { kfree(ctx); return -ENOMEM; } ctx->protofamily = AF_UNSPEC; ctx->mountfamily = AF_UNSPEC; ctx->mount_server.port = NFS_UNSPEC_PORT; if (fc->root) { /* reconfigure, start with the current config */ struct nfs_server *nfss = fc->root->d_sb->s_fs_info; struct net *net = nfss->nfs_client->cl_net; ctx->flags = nfss->flags; ctx->rsize = nfss->rsize; ctx->wsize = nfss->wsize; ctx->retrans = nfss->client->cl_timeout->to_retries; ctx->selected_flavor = nfss->client->cl_auth->au_flavor; ctx->acregmin = nfss->acregmin / HZ; ctx->acregmax = nfss->acregmax / HZ; ctx->acdirmin = nfss->acdirmin / HZ; ctx->acdirmax = nfss->acdirmax / HZ; ctx->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; ctx->nfs_server.port = nfss->port; ctx->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; ctx->version = nfss->nfs_client->rpc_ops->version; ctx->minorversion = nfss->nfs_client->cl_minorversion; memcpy(&ctx->nfs_server._address, &nfss->nfs_client->cl_addr, ctx->nfs_server.addrlen); if (fc->net_ns != net) { put_net(fc->net_ns); fc->net_ns = get_net(net); } ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod; get_nfs_version(ctx->nfs_mod); } else { /* defaults */ ctx->timeo = NFS_UNSPEC_TIMEO; ctx->retrans = NFS_UNSPEC_RETRANS; ctx->acregmin = NFS_DEF_ACREGMIN; ctx->acregmax = NFS_DEF_ACREGMAX; ctx->acdirmin = NFS_DEF_ACDIRMIN; ctx->acdirmax = NFS_DEF_ACDIRMAX; ctx->nfs_server.port = NFS_UNSPEC_PORT; ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; ctx->selected_flavor = RPC_AUTH_MAXFLAVOR; ctx->minorversion = 0; ctx->need_mount = true; ctx->xprtsec.policy = RPC_XPRTSEC_NONE; ctx->xprtsec.cert_serial = TLS_NO_CERT; ctx->xprtsec.privkey_serial = TLS_NO_PRIVKEY; fc->s_iflags |= SB_I_STABLE_WRITES; } fc->fs_private = ctx; fc->ops = &nfs_fs_context_ops; return 0; } struct file_system_type nfs_fs_type = { .owner = THIS_MODULE, .name = "nfs", .init_fs_context = nfs_init_fs_context, .parameters = nfs_fs_parameters, .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; MODULE_ALIAS_FS("nfs"); EXPORT_SYMBOL_GPL(nfs_fs_type); #if IS_ENABLED(CONFIG_NFS_V4) struct file_system_type nfs4_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .init_fs_context = nfs_init_fs_context, .parameters = nfs_fs_parameters, .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; MODULE_ALIAS_FS("nfs4"); MODULE_ALIAS("nfs4"); EXPORT_SYMBOL_GPL(nfs4_fs_type); #endif /* CONFIG_NFS_V4 */
98 98 27 27 1 1 1 1 1 1 1 1 97 26 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) by Jaroslav Kysela <perex@perex.cz> * Takashi Iwai <tiwai@suse.de> * * Generic memory allocators */ #include <linux/slab.h> #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/dma-map-ops.h> #include <linux/genalloc.h> #include <linux/highmem.h> #include <linux/vmalloc.h> #ifdef CONFIG_X86 #include <asm/set_memory.h> #endif #include <sound/memalloc.h> struct snd_malloc_ops { void *(*alloc)(struct snd_dma_buffer *dmab, size_t size); void (*free)(struct snd_dma_buffer *dmab); dma_addr_t (*get_addr)(struct snd_dma_buffer *dmab, size_t offset); struct page *(*get_page)(struct snd_dma_buffer *dmab, size_t offset); unsigned int (*get_chunk_size)(struct snd_dma_buffer *dmab, unsigned int ofs, unsigned int size); int (*mmap)(struct snd_dma_buffer *dmab, struct vm_area_struct *area); void (*sync)(struct snd_dma_buffer *dmab, enum snd_dma_sync_mode mode); }; #define DEFAULT_GFP \ (GFP_KERNEL | \ __GFP_RETRY_MAYFAIL | /* don't trigger OOM-killer */ \ __GFP_NOWARN) /* no stack trace print - this call is non-critical */ static const struct snd_malloc_ops *snd_dma_get_ops(struct snd_dma_buffer *dmab); static void *__snd_dma_alloc_pages(struct snd_dma_buffer *dmab, size_t size) { const struct snd_malloc_ops *ops = snd_dma_get_ops(dmab); if (WARN_ON_ONCE(!ops || !ops->alloc)) return NULL; return ops->alloc(dmab, size); } /** * snd_dma_alloc_dir_pages - allocate the buffer area according to the given * type and direction * @type: the DMA buffer type * @device: the device pointer * @dir: DMA direction * @size: the buffer size to allocate * @dmab: buffer allocation record to store the allocated data * * Calls the memory-allocator function for the corresponding * buffer type. * * Return: Zero if the buffer with the given size is allocated successfully, * otherwise a negative value on error. */ int snd_dma_alloc_dir_pages(int type, struct device *device, enum dma_data_direction dir, size_t size, struct snd_dma_buffer *dmab) { if (WARN_ON(!size)) return -ENXIO; if (WARN_ON(!dmab)) return -ENXIO; size = PAGE_ALIGN(size); dmab->dev.type = type; dmab->dev.dev = device; dmab->dev.dir = dir; dmab->bytes = 0; dmab->addr = 0; dmab->private_data = NULL; dmab->area = __snd_dma_alloc_pages(dmab, size); if (!dmab->area) return -ENOMEM; dmab->bytes = size; return 0; } EXPORT_SYMBOL(snd_dma_alloc_dir_pages); /** * snd_dma_alloc_pages_fallback - allocate the buffer area according to the given type with fallback * @type: the DMA buffer type * @device: the device pointer * @size: the buffer size to allocate * @dmab: buffer allocation record to store the allocated data * * Calls the memory-allocator function for the corresponding * buffer type. When no space is left, this function reduces the size and * tries to allocate again. The size actually allocated is stored in * res_size argument. * * Return: Zero if the buffer with the given size is allocated successfully, * otherwise a negative value on error. */ int snd_dma_alloc_pages_fallback(int type, struct device *device, size_t size, struct snd_dma_buffer *dmab) { int err; while ((err = snd_dma_alloc_pages(type, device, size, dmab)) < 0) { if (err != -ENOMEM) return err; if (size <= PAGE_SIZE) return -ENOMEM; size >>= 1; size = PAGE_SIZE << get_order(size); } if (! dmab->area) return -ENOMEM; return 0; } EXPORT_SYMBOL(snd_dma_alloc_pages_fallback); /** * snd_dma_free_pages - release the allocated buffer * @dmab: the buffer allocation record to release * * Releases the allocated buffer via snd_dma_alloc_pages(). */ void snd_dma_free_pages(struct snd_dma_buffer *dmab) { const struct snd_malloc_ops *ops = snd_dma_get_ops(dmab); if (ops && ops->free) ops->free(dmab); } EXPORT_SYMBOL(snd_dma_free_pages); /* called by devres */ static void __snd_release_pages(struct device *dev, void *res) { snd_dma_free_pages(res); } /** * snd_devm_alloc_dir_pages - allocate the buffer and manage with devres * @dev: the device pointer * @type: the DMA buffer type * @dir: DMA direction * @size: the buffer size to allocate * * Allocate buffer pages depending on the given type and manage using devres. * The pages will be released automatically at the device removal. * * Unlike snd_dma_alloc_pages(), this function requires the real device pointer, * hence it can't work with SNDRV_DMA_TYPE_CONTINUOUS or * SNDRV_DMA_TYPE_VMALLOC type. * * Return: the snd_dma_buffer object at success, or NULL if failed */ struct snd_dma_buffer * snd_devm_alloc_dir_pages(struct device *dev, int type, enum dma_data_direction dir, size_t size) { struct snd_dma_buffer *dmab; int err; if (WARN_ON(type == SNDRV_DMA_TYPE_CONTINUOUS || type == SNDRV_DMA_TYPE_VMALLOC)) return NULL; dmab = devres_alloc(__snd_release_pages, sizeof(*dmab), GFP_KERNEL); if (!dmab) return NULL; err = snd_dma_alloc_dir_pages(type, dev, dir, size, dmab); if (err < 0) { devres_free(dmab); return NULL; } devres_add(dev, dmab); return dmab; } EXPORT_SYMBOL_GPL(snd_devm_alloc_dir_pages); /** * snd_dma_buffer_mmap - perform mmap of the given DMA buffer * @dmab: buffer allocation information * @area: VM area information * * Return: zero if successful, or a negative error code */ int snd_dma_buffer_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { const struct snd_malloc_ops *ops; if (!dmab) return -ENOENT; ops = snd_dma_get_ops(dmab); if (ops && ops->mmap) return ops->mmap(dmab, area); else return -ENOENT; } EXPORT_SYMBOL(snd_dma_buffer_mmap); #ifdef CONFIG_HAS_DMA /** * snd_dma_buffer_sync - sync DMA buffer between CPU and device * @dmab: buffer allocation information * @mode: sync mode */ void snd_dma_buffer_sync(struct snd_dma_buffer *dmab, enum snd_dma_sync_mode mode) { const struct snd_malloc_ops *ops; if (!dmab || !dmab->dev.need_sync) return; ops = snd_dma_get_ops(dmab); if (ops && ops->sync) ops->sync(dmab, mode); } EXPORT_SYMBOL_GPL(snd_dma_buffer_sync); #endif /* CONFIG_HAS_DMA */ /** * snd_sgbuf_get_addr - return the physical address at the corresponding offset * @dmab: buffer allocation information * @offset: offset in the ring buffer * * Return: the physical address */ dma_addr_t snd_sgbuf_get_addr(struct snd_dma_buffer *dmab, size_t offset) { const struct snd_malloc_ops *ops = snd_dma_get_ops(dmab); if (ops && ops->get_addr) return ops->get_addr(dmab, offset); else return dmab->addr + offset; } EXPORT_SYMBOL(snd_sgbuf_get_addr); /** * snd_sgbuf_get_page - return the physical page at the corresponding offset * @dmab: buffer allocation information * @offset: offset in the ring buffer * * Return: the page pointer */ struct page *snd_sgbuf_get_page(struct snd_dma_buffer *dmab, size_t offset) { const struct snd_malloc_ops *ops = snd_dma_get_ops(dmab); if (ops && ops->get_page) return ops->get_page(dmab, offset); else return virt_to_page(dmab->area + offset); } EXPORT_SYMBOL(snd_sgbuf_get_page); /** * snd_sgbuf_get_chunk_size - compute the max chunk size with continuous pages * on sg-buffer * @dmab: buffer allocation information * @ofs: offset in the ring buffer * @size: the requested size * * Return: the chunk size */ unsigned int snd_sgbuf_get_chunk_size(struct snd_dma_buffer *dmab, unsigned int ofs, unsigned int size) { const struct snd_malloc_ops *ops = snd_dma_get_ops(dmab); if (ops && ops->get_chunk_size) return ops->get_chunk_size(dmab, ofs, size); else return size; } EXPORT_SYMBOL(snd_sgbuf_get_chunk_size); /* * Continuous pages allocator */ static void *do_alloc_pages(struct device *dev, size_t size, dma_addr_t *addr, bool wc) { void *p; gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; again: p = alloc_pages_exact(size, gfp); if (!p) return NULL; *addr = page_to_phys(virt_to_page(p)); if (!dev) return p; if ((*addr + size - 1) & ~dev->coherent_dma_mask) { if (IS_ENABLED(CONFIG_ZONE_DMA32) && !(gfp & GFP_DMA32)) { gfp |= GFP_DMA32; goto again; } if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) { gfp = (gfp & ~GFP_DMA32) | GFP_DMA; goto again; } } #ifdef CONFIG_X86 if (wc) set_memory_wc((unsigned long)(p), size >> PAGE_SHIFT); #endif return p; } static void do_free_pages(void *p, size_t size, bool wc) { #ifdef CONFIG_X86 if (wc) set_memory_wb((unsigned long)(p), size >> PAGE_SHIFT); #endif free_pages_exact(p, size); } static void *snd_dma_continuous_alloc(struct snd_dma_buffer *dmab, size_t size) { return do_alloc_pages(dmab->dev.dev, size, &dmab->addr, false); } static void snd_dma_continuous_free(struct snd_dma_buffer *dmab) { do_free_pages(dmab->area, dmab->bytes, false); } static int snd_dma_continuous_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { return remap_pfn_range(area, area->vm_start, dmab->addr >> PAGE_SHIFT, area->vm_end - area->vm_start, area->vm_page_prot); } static const struct snd_malloc_ops snd_dma_continuous_ops = { .alloc = snd_dma_continuous_alloc, .free = snd_dma_continuous_free, .mmap = snd_dma_continuous_mmap, }; /* * VMALLOC allocator */ static void *snd_dma_vmalloc_alloc(struct snd_dma_buffer *dmab, size_t size) { return vmalloc(size); } static void snd_dma_vmalloc_free(struct snd_dma_buffer *dmab) { vfree(dmab->area); } static int snd_dma_vmalloc_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { return remap_vmalloc_range(area, dmab->area, 0); } #define get_vmalloc_page_addr(dmab, offset) \ page_to_phys(vmalloc_to_page((dmab)->area + (offset))) static dma_addr_t snd_dma_vmalloc_get_addr(struct snd_dma_buffer *dmab, size_t offset) { return get_vmalloc_page_addr(dmab, offset) + offset % PAGE_SIZE; } static struct page *snd_dma_vmalloc_get_page(struct snd_dma_buffer *dmab, size_t offset) { return vmalloc_to_page(dmab->area + offset); } static unsigned int snd_dma_vmalloc_get_chunk_size(struct snd_dma_buffer *dmab, unsigned int ofs, unsigned int size) { unsigned int start, end; unsigned long addr; start = ALIGN_DOWN(ofs, PAGE_SIZE); end = ofs + size - 1; /* the last byte address */ /* check page continuity */ addr = get_vmalloc_page_addr(dmab, start); for (;;) { start += PAGE_SIZE; if (start > end) break; addr += PAGE_SIZE; if (get_vmalloc_page_addr(dmab, start) != addr) return start - ofs; } /* ok, all on continuous pages */ return size; } static const struct snd_malloc_ops snd_dma_vmalloc_ops = { .alloc = snd_dma_vmalloc_alloc, .free = snd_dma_vmalloc_free, .mmap = snd_dma_vmalloc_mmap, .get_addr = snd_dma_vmalloc_get_addr, .get_page = snd_dma_vmalloc_get_page, .get_chunk_size = snd_dma_vmalloc_get_chunk_size, }; #ifdef CONFIG_HAS_DMA /* * IRAM allocator */ #ifdef CONFIG_GENERIC_ALLOCATOR static void *snd_dma_iram_alloc(struct snd_dma_buffer *dmab, size_t size) { struct device *dev = dmab->dev.dev; struct gen_pool *pool; void *p; if (dev->of_node) { pool = of_gen_pool_get(dev->of_node, "iram", 0); /* Assign the pool into private_data field */ dmab->private_data = pool; p = gen_pool_dma_alloc_align(pool, size, &dmab->addr, PAGE_SIZE); if (p) return p; } /* Internal memory might have limited size and no enough space, * so if we fail to malloc, try to fetch memory traditionally. */ dmab->dev.type = SNDRV_DMA_TYPE_DEV; return __snd_dma_alloc_pages(dmab, size); } static void snd_dma_iram_free(struct snd_dma_buffer *dmab) { struct gen_pool *pool = dmab->private_data; if (pool && dmab->area) gen_pool_free(pool, (unsigned long)dmab->area, dmab->bytes); } static int snd_dma_iram_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { area->vm_page_prot = pgprot_writecombine(area->vm_page_prot); return remap_pfn_range(area, area->vm_start, dmab->addr >> PAGE_SHIFT, area->vm_end - area->vm_start, area->vm_page_prot); } static const struct snd_malloc_ops snd_dma_iram_ops = { .alloc = snd_dma_iram_alloc, .free = snd_dma_iram_free, .mmap = snd_dma_iram_mmap, }; #endif /* CONFIG_GENERIC_ALLOCATOR */ /* * Coherent device pages allocator */ static void *snd_dma_dev_alloc(struct snd_dma_buffer *dmab, size_t size) { return dma_alloc_coherent(dmab->dev.dev, size, &dmab->addr, DEFAULT_GFP); } static void snd_dma_dev_free(struct snd_dma_buffer *dmab) { dma_free_coherent(dmab->dev.dev, dmab->bytes, dmab->area, dmab->addr); } static int snd_dma_dev_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { return dma_mmap_coherent(dmab->dev.dev, area, dmab->area, dmab->addr, dmab->bytes); } static const struct snd_malloc_ops snd_dma_dev_ops = { .alloc = snd_dma_dev_alloc, .free = snd_dma_dev_free, .mmap = snd_dma_dev_mmap, }; /* * Write-combined pages */ #ifdef CONFIG_SND_DMA_SGBUF /* x86-specific allocations */ static void *snd_dma_wc_alloc(struct snd_dma_buffer *dmab, size_t size) { void *p = do_alloc_pages(dmab->dev.dev, size, &dmab->addr, true); if (!p) return NULL; dmab->addr = dma_map_single(dmab->dev.dev, p, size, DMA_BIDIRECTIONAL); if (dma_mapping_error(dmab->dev.dev, dmab->addr)) { do_free_pages(dmab->area, size, true); return NULL; } return p; } static void snd_dma_wc_free(struct snd_dma_buffer *dmab) { dma_unmap_single(dmab->dev.dev, dmab->addr, dmab->bytes, DMA_BIDIRECTIONAL); do_free_pages(dmab->area, dmab->bytes, true); } static int snd_dma_wc_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { area->vm_page_prot = pgprot_writecombine(area->vm_page_prot); return dma_mmap_coherent(dmab->dev.dev, area, dmab->area, dmab->addr, dmab->bytes); } #else static void *snd_dma_wc_alloc(struct snd_dma_buffer *dmab, size_t size) { return dma_alloc_wc(dmab->dev.dev, size, &dmab->addr, DEFAULT_GFP); } static void snd_dma_wc_free(struct snd_dma_buffer *dmab) { dma_free_wc(dmab->dev.dev, dmab->bytes, dmab->area, dmab->addr); } static int snd_dma_wc_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { return dma_mmap_wc(dmab->dev.dev, area, dmab->area, dmab->addr, dmab->bytes); } #endif static const struct snd_malloc_ops snd_dma_wc_ops = { .alloc = snd_dma_wc_alloc, .free = snd_dma_wc_free, .mmap = snd_dma_wc_mmap, }; /* * Non-contiguous pages allocator */ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) { struct sg_table *sgt; void *p; sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, DEFAULT_GFP, 0); if (!sgt) return NULL; dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, sg_dma_address(sgt->sgl)); p = dma_vmap_noncontiguous(dmab->dev.dev, size, sgt); if (p) { dmab->private_data = sgt; /* store the first page address for convenience */ dmab->addr = snd_sgbuf_get_addr(dmab, 0); } else { dma_free_noncontiguous(dmab->dev.dev, size, sgt, dmab->dev.dir); } return p; } static void snd_dma_noncontig_free(struct snd_dma_buffer *dmab) { dma_vunmap_noncontiguous(dmab->dev.dev, dmab->area); dma_free_noncontiguous(dmab->dev.dev, dmab->bytes, dmab->private_data, dmab->dev.dir); } static int snd_dma_noncontig_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { return dma_mmap_noncontiguous(dmab->dev.dev, area, dmab->bytes, dmab->private_data); } static void snd_dma_noncontig_sync(struct snd_dma_buffer *dmab, enum snd_dma_sync_mode mode) { if (mode == SNDRV_DMA_SYNC_CPU) { if (dmab->dev.dir == DMA_TO_DEVICE) return; invalidate_kernel_vmap_range(dmab->area, dmab->bytes); dma_sync_sgtable_for_cpu(dmab->dev.dev, dmab->private_data, dmab->dev.dir); } else { if (dmab->dev.dir == DMA_FROM_DEVICE) return; flush_kernel_vmap_range(dmab->area, dmab->bytes); dma_sync_sgtable_for_device(dmab->dev.dev, dmab->private_data, dmab->dev.dir); } } static inline void snd_dma_noncontig_iter_set(struct snd_dma_buffer *dmab, struct sg_page_iter *piter, size_t offset) { struct sg_table *sgt = dmab->private_data; __sg_page_iter_start(piter, sgt->sgl, sgt->orig_nents, offset >> PAGE_SHIFT); } static dma_addr_t snd_dma_noncontig_get_addr(struct snd_dma_buffer *dmab, size_t offset) { struct sg_dma_page_iter iter; snd_dma_noncontig_iter_set(dmab, &iter.base, offset); __sg_page_iter_dma_next(&iter); return sg_page_iter_dma_address(&iter) + offset % PAGE_SIZE; } static struct page *snd_dma_noncontig_get_page(struct snd_dma_buffer *dmab, size_t offset) { struct sg_page_iter iter; snd_dma_noncontig_iter_set(dmab, &iter, offset); __sg_page_iter_next(&iter); return sg_page_iter_page(&iter); } static unsigned int snd_dma_noncontig_get_chunk_size(struct snd_dma_buffer *dmab, unsigned int ofs, unsigned int size) { struct sg_dma_page_iter iter; unsigned int start, end; unsigned long addr; start = ALIGN_DOWN(ofs, PAGE_SIZE); end = ofs + size - 1; /* the last byte address */ snd_dma_noncontig_iter_set(dmab, &iter.base, start); if (!__sg_page_iter_dma_next(&iter)) return 0; /* check page continuity */ addr = sg_page_iter_dma_address(&iter); for (;;) { start += PAGE_SIZE; if (start > end) break; addr += PAGE_SIZE; if (!__sg_page_iter_dma_next(&iter) || sg_page_iter_dma_address(&iter) != addr) return start - ofs; } /* ok, all on continuous pages */ return size; } static const struct snd_malloc_ops snd_dma_noncontig_ops = { .alloc = snd_dma_noncontig_alloc, .free = snd_dma_noncontig_free, .mmap = snd_dma_noncontig_mmap, .sync = snd_dma_noncontig_sync, .get_addr = snd_dma_noncontig_get_addr, .get_page = snd_dma_noncontig_get_page, .get_chunk_size = snd_dma_noncontig_get_chunk_size, }; #ifdef CONFIG_SND_DMA_SGBUF /* Fallback SG-buffer allocations for x86 */ struct snd_dma_sg_fallback { struct sg_table sgt; /* used by get_addr - must be the first item */ size_t count; struct page **pages; unsigned int *npages; }; static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab, struct snd_dma_sg_fallback *sgbuf) { bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG; size_t i, size; if (sgbuf->pages && sgbuf->npages) { i = 0; while (i < sgbuf->count) { size = sgbuf->npages[i]; if (!size) break; do_free_pages(page_address(sgbuf->pages[i]), size << PAGE_SHIFT, wc); i += size; } } kvfree(sgbuf->pages); kvfree(sgbuf->npages); kfree(sgbuf); } /* fallback manual S/G buffer allocations */ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) { bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG; struct snd_dma_sg_fallback *sgbuf; struct page **pagep, *curp; size_t chunk; dma_addr_t addr; unsigned int idx, npages; void *p; sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL); if (!sgbuf) return NULL; size = PAGE_ALIGN(size); sgbuf->count = size >> PAGE_SHIFT; sgbuf->pages = kvcalloc(sgbuf->count, sizeof(*sgbuf->pages), GFP_KERNEL); sgbuf->npages = kvcalloc(sgbuf->count, sizeof(*sgbuf->npages), GFP_KERNEL); if (!sgbuf->pages || !sgbuf->npages) goto error; pagep = sgbuf->pages; chunk = size; idx = 0; while (size > 0) { chunk = min(size, chunk); p = do_alloc_pages(dmab->dev.dev, chunk, &addr, wc); if (!p) { if (chunk <= PAGE_SIZE) goto error; chunk >>= 1; chunk = PAGE_SIZE << get_order(chunk); continue; } size -= chunk; /* fill pages */ npages = chunk >> PAGE_SHIFT; sgbuf->npages[idx] = npages; idx += npages; curp = virt_to_page(p); while (npages--) *pagep++ = curp++; } if (sg_alloc_table_from_pages(&sgbuf->sgt, sgbuf->pages, sgbuf->count, 0, sgbuf->count << PAGE_SHIFT, GFP_KERNEL)) goto error; if (dma_map_sgtable(dmab->dev.dev, &sgbuf->sgt, DMA_BIDIRECTIONAL, 0)) goto error_dma_map; p = vmap(sgbuf->pages, sgbuf->count, VM_MAP, PAGE_KERNEL); if (!p) goto error_vmap; dmab->private_data = sgbuf; /* store the first page address for convenience */ dmab->addr = snd_sgbuf_get_addr(dmab, 0); return p; error_vmap: dma_unmap_sgtable(dmab->dev.dev, &sgbuf->sgt, DMA_BIDIRECTIONAL, 0); error_dma_map: sg_free_table(&sgbuf->sgt); error: __snd_dma_sg_fallback_free(dmab, sgbuf); return NULL; } static void snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab) { struct snd_dma_sg_fallback *sgbuf = dmab->private_data; vunmap(dmab->area); dma_unmap_sgtable(dmab->dev.dev, &sgbuf->sgt, DMA_BIDIRECTIONAL, 0); sg_free_table(&sgbuf->sgt); __snd_dma_sg_fallback_free(dmab, dmab->private_data); } static int snd_dma_sg_fallback_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { struct snd_dma_sg_fallback *sgbuf = dmab->private_data; if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) area->vm_page_prot = pgprot_writecombine(area->vm_page_prot); return vm_map_pages(area, sgbuf->pages, sgbuf->count); } static void *snd_dma_sg_alloc(struct snd_dma_buffer *dmab, size_t size) { int type = dmab->dev.type; void *p; /* try the standard DMA API allocation at first */ if (type == SNDRV_DMA_TYPE_DEV_WC_SG) dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC; else dmab->dev.type = SNDRV_DMA_TYPE_DEV; p = __snd_dma_alloc_pages(dmab, size); if (p) return p; dmab->dev.type = type; /* restore the type */ return snd_dma_sg_fallback_alloc(dmab, size); } static const struct snd_malloc_ops snd_dma_sg_ops = { .alloc = snd_dma_sg_alloc, .free = snd_dma_sg_fallback_free, .mmap = snd_dma_sg_fallback_mmap, /* reuse noncontig helper */ .get_addr = snd_dma_noncontig_get_addr, /* reuse vmalloc helpers */ .get_page = snd_dma_vmalloc_get_page, .get_chunk_size = snd_dma_vmalloc_get_chunk_size, }; #endif /* CONFIG_SND_DMA_SGBUF */ /* * Non-coherent pages allocator */ static void *snd_dma_noncoherent_alloc(struct snd_dma_buffer *dmab, size_t size) { void *p; p = dma_alloc_noncoherent(dmab->dev.dev, size, &dmab->addr, dmab->dev.dir, DEFAULT_GFP); if (p) dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->addr); return p; } static void snd_dma_noncoherent_free(struct snd_dma_buffer *dmab) { dma_free_noncoherent(dmab->dev.dev, dmab->bytes, dmab->area, dmab->addr, dmab->dev.dir); } static int snd_dma_noncoherent_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { area->vm_page_prot = vm_get_page_prot(area->vm_flags); return dma_mmap_pages(dmab->dev.dev, area, area->vm_end - area->vm_start, virt_to_page(dmab->area)); } static void snd_dma_noncoherent_sync(struct snd_dma_buffer *dmab, enum snd_dma_sync_mode mode) { if (mode == SNDRV_DMA_SYNC_CPU) { if (dmab->dev.dir != DMA_TO_DEVICE) dma_sync_single_for_cpu(dmab->dev.dev, dmab->addr, dmab->bytes, dmab->dev.dir); } else { if (dmab->dev.dir != DMA_FROM_DEVICE) dma_sync_single_for_device(dmab->dev.dev, dmab->addr, dmab->bytes, dmab->dev.dir); } } static const struct snd_malloc_ops snd_dma_noncoherent_ops = { .alloc = snd_dma_noncoherent_alloc, .free = snd_dma_noncoherent_free, .mmap = snd_dma_noncoherent_mmap, .sync = snd_dma_noncoherent_sync, }; #endif /* CONFIG_HAS_DMA */ /* * Entry points */ static const struct snd_malloc_ops *snd_dma_ops[] = { [SNDRV_DMA_TYPE_CONTINUOUS] = &snd_dma_continuous_ops, [SNDRV_DMA_TYPE_VMALLOC] = &snd_dma_vmalloc_ops, #ifdef CONFIG_HAS_DMA [SNDRV_DMA_TYPE_DEV] = &snd_dma_dev_ops, [SNDRV_DMA_TYPE_DEV_WC] = &snd_dma_wc_ops, [SNDRV_DMA_TYPE_NONCONTIG] = &snd_dma_noncontig_ops, [SNDRV_DMA_TYPE_NONCOHERENT] = &snd_dma_noncoherent_ops, #ifdef CONFIG_SND_DMA_SGBUF [SNDRV_DMA_TYPE_DEV_SG] = &snd_dma_sg_ops, [SNDRV_DMA_TYPE_DEV_WC_SG] = &snd_dma_sg_ops, #endif #ifdef CONFIG_GENERIC_ALLOCATOR [SNDRV_DMA_TYPE_DEV_IRAM] = &snd_dma_iram_ops, #endif /* CONFIG_GENERIC_ALLOCATOR */ #endif /* CONFIG_HAS_DMA */ }; static const struct snd_malloc_ops *snd_dma_get_ops(struct snd_dma_buffer *dmab) { if (WARN_ON_ONCE(!dmab)) return NULL; if (WARN_ON_ONCE(dmab->dev.type <= SNDRV_DMA_TYPE_UNKNOWN || dmab->dev.type >= ARRAY_SIZE(snd_dma_ops))) return NULL; return snd_dma_ops[dmab->dev.type]; }
1 1 2 1 2 170 12 7 174 4 2 16 15 16 16 16 188 9 179 6 173 6 167 54 9 9 8 3 3 23 23 5 11 11 11 22 19 4 9 9 8 9 20 20 9 20 23 22 23 11 7 23 20 9 4 2 5 2 1 1 2 34 6 29 4 3 5 4 52 52 10 6 10 13 12 13 13 2 14 10 8 1 13 8 52 13 7 52 8 5 7 51 52 52 15 9 52 52 15 13 3 9 14 14 52 52 52 52 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/pnode.c * * (C) Copyright IBM Corporation 2005. * Author : Ram Pai (linuxram@us.ibm.com) */ #include <linux/mnt_namespace.h> #include <linux/mount.h> #include <linux/fs.h> #include <linux/nsproxy.h> #include <uapi/linux/mount.h> #include "internal.h" #include "pnode.h" /* return the next shared peer mount of @p */ static inline struct mount *next_peer(struct mount *p) { return list_entry(p->mnt_share.next, struct mount, mnt_share); } static inline struct mount *first_slave(struct mount *p) { return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave); } static inline struct mount *last_slave(struct mount *p) { return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave); } static inline struct mount *next_slave(struct mount *p) { return list_entry(p->mnt_slave.next, struct mount, mnt_slave); } static struct mount *get_peer_under_root(struct mount *mnt, struct mnt_namespace *ns, const struct path *root) { struct mount *m = mnt; do { /* Check the namespace first for optimization */ if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root)) return m; m = next_peer(m); } while (m != mnt); return NULL; } /* * Get ID of closest dominating peer group having a representative * under the given root. * * Caller must hold namespace_sem */ int get_dominating_id(struct mount *mnt, const struct path *root) { struct mount *m; for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) { struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root); if (d) return d->mnt_group_id; } return 0; } static int do_make_slave(struct mount *mnt) { struct mount *master, *slave_mnt; if (list_empty(&mnt->mnt_share)) { if (IS_MNT_SHARED(mnt)) { mnt_release_group_id(mnt); CLEAR_MNT_SHARED(mnt); } master = mnt->mnt_master; if (!master) { struct list_head *p = &mnt->mnt_slave_list; while (!list_empty(p)) { slave_mnt = list_first_entry(p, struct mount, mnt_slave); list_del_init(&slave_mnt->mnt_slave); slave_mnt->mnt_master = NULL; } return 0; } } else { struct mount *m; /* * slave 'mnt' to a peer mount that has the * same root dentry. If none is available then * slave it to anything that is available. */ for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) { if (m->mnt.mnt_root == mnt->mnt.mnt_root) { master = m; break; } } list_del_init(&mnt->mnt_share); mnt->mnt_group_id = 0; CLEAR_MNT_SHARED(mnt); } list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) slave_mnt->mnt_master = master; list_move(&mnt->mnt_slave, &master->mnt_slave_list); list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev); INIT_LIST_HEAD(&mnt->mnt_slave_list); mnt->mnt_master = master; return 0; } /* * vfsmount lock must be held for write */ void change_mnt_propagation(struct mount *mnt, int type) { if (type == MS_SHARED) { set_mnt_shared(mnt); return; } do_make_slave(mnt); if (type != MS_SLAVE) { list_del_init(&mnt->mnt_slave); mnt->mnt_master = NULL; if (type == MS_UNBINDABLE) mnt->mnt.mnt_flags |= MNT_UNBINDABLE; else mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE; } } /* * get the next mount in the propagation tree. * @m: the mount seen last * @origin: the original mount from where the tree walk initiated * * Note that peer groups form contiguous segments of slave lists. * We rely on that in get_source() to be able to find out if * vfsmount found while iterating with propagation_next() is * a peer of one we'd found earlier. */ static struct mount *propagation_next(struct mount *m, struct mount *origin) { /* are there any slaves of this mount? */ if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) return first_slave(m); while (1) { struct mount *master = m->mnt_master; if (master == origin->mnt_master) { struct mount *next = next_peer(m); return (next == origin) ? NULL : next; } else if (m->mnt_slave.next != &master->mnt_slave_list) return next_slave(m); /* back at master */ m = master; } } static struct mount *skip_propagation_subtree(struct mount *m, struct mount *origin) { /* * Advance m such that propagation_next will not return * the slaves of m. */ if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) m = last_slave(m); return m; } static struct mount *next_group(struct mount *m, struct mount *origin) { while (1) { while (1) { struct mount *next; if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) return first_slave(m); next = next_peer(m); if (m->mnt_group_id == origin->mnt_group_id) { if (next == origin) return NULL; } else if (m->mnt_slave.next != &next->mnt_slave) break; m = next; } /* m is the last peer */ while (1) { struct mount *master = m->mnt_master; if (m->mnt_slave.next != &master->mnt_slave_list) return next_slave(m); m = next_peer(master); if (master->mnt_group_id == origin->mnt_group_id) break; if (master->mnt_slave.next == &m->mnt_slave) break; m = master; } if (m == origin) return NULL; } } /* all accesses are serialized by namespace_sem */ static struct mount *last_dest, *first_source, *last_source, *dest_master; static struct hlist_head *list; static inline bool peers(const struct mount *m1, const struct mount *m2) { return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id; } static int propagate_one(struct mount *m, struct mountpoint *dest_mp) { struct mount *child; int type; /* skip ones added by this propagate_mnt() */ if (IS_MNT_NEW(m)) return 0; /* skip if mountpoint isn't covered by it */ if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) return 0; if (peers(m, last_dest)) { type = CL_MAKE_SHARED; } else { struct mount *n, *p; bool done; for (n = m; ; n = p) { p = n->mnt_master; if (p == dest_master || IS_MNT_MARKED(p)) break; } do { struct mount *parent = last_source->mnt_parent; if (peers(last_source, first_source)) break; done = parent->mnt_master == p; if (done && peers(n, parent)) break; last_source = last_source->mnt_master; } while (!done); type = CL_SLAVE; /* beginning of peer group among the slaves? */ if (IS_MNT_SHARED(m)) type |= CL_MAKE_SHARED; } child = copy_tree(last_source, last_source->mnt.mnt_root, type); if (IS_ERR(child)) return PTR_ERR(child); read_seqlock_excl(&mount_lock); mnt_set_mountpoint(m, dest_mp, child); if (m->mnt_master != dest_master) SET_MNT_MARK(m->mnt_master); read_sequnlock_excl(&mount_lock); last_dest = m; last_source = child; hlist_add_head(&child->mnt_hash, list); return count_mounts(m->mnt_ns, child); } /* * mount 'source_mnt' under the destination 'dest_mnt' at * dentry 'dest_dentry'. And propagate that mount to * all the peer and slave mounts of 'dest_mnt'. * Link all the new mounts into a propagation tree headed at * source_mnt. Also link all the new mounts using ->mnt_list * headed at source_mnt's ->mnt_list * * @dest_mnt: destination mount. * @dest_dentry: destination dentry. * @source_mnt: source mount. * @tree_list : list of heads of trees to be attached. */ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, struct mount *source_mnt, struct hlist_head *tree_list) { struct mount *m, *n; int ret = 0; /* * we don't want to bother passing tons of arguments to * propagate_one(); everything is serialized by namespace_sem, * so globals will do just fine. */ last_dest = dest_mnt; first_source = source_mnt; last_source = source_mnt; list = tree_list; dest_master = dest_mnt->mnt_master; /* all peers of dest_mnt, except dest_mnt itself */ for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) { ret = propagate_one(n, dest_mp); if (ret) goto out; } /* all slave groups */ for (m = next_group(dest_mnt, dest_mnt); m; m = next_group(m, dest_mnt)) { /* everything in that slave group */ n = m; do { ret = propagate_one(n, dest_mp); if (ret) goto out; n = next_peer(n); } while (n != m); } out: read_seqlock_excl(&mount_lock); hlist_for_each_entry(n, tree_list, mnt_hash) { m = n->mnt_parent; if (m->mnt_master != dest_mnt->mnt_master) CLEAR_MNT_MARK(m->mnt_master); } read_sequnlock_excl(&mount_lock); return ret; } static struct mount *find_topper(struct mount *mnt) { /* If there is exactly one mount covering mnt completely return it. */ struct mount *child; if (!list_is_singular(&mnt->mnt_mounts)) return NULL; child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child); if (child->mnt_mountpoint != mnt->mnt.mnt_root) return NULL; return child; } /* * return true if the refcount is greater than count */ static inline int do_refcount_check(struct mount *mnt, int count) { return mnt_get_count(mnt) > count; } /** * propagation_would_overmount - check whether propagation from @from * would overmount @to * @from: shared mount * @to: mount to check * @mp: future mountpoint of @to on @from * * If @from propagates mounts to @to, @from and @to must either be peers * or one of the masters in the hierarchy of masters of @to must be a * peer of @from. * * If the root of the @to mount is equal to the future mountpoint @mp of * the @to mount on @from then @to will be overmounted by whatever is * propagated to it. * * Context: This function expects namespace_lock() to be held and that * @mp is stable. * Return: If @from overmounts @to, true is returned, false if not. */ bool propagation_would_overmount(const struct mount *from, const struct mount *to, const struct mountpoint *mp) { if (!IS_MNT_SHARED(from)) return false; if (IS_MNT_NEW(to)) return false; if (to->mnt.mnt_root != mp->m_dentry) return false; for (const struct mount *m = to; m; m = m->mnt_master) { if (peers(from, m)) return true; } return false; } /* * check if the mount 'mnt' can be unmounted successfully. * @mnt: the mount to be checked for unmount * NOTE: unmounting 'mnt' would naturally propagate to all * other mounts its parent propagates to. * Check if any of these mounts that **do not have submounts** * have more references than 'refcnt'. If so return busy. * * vfsmount lock must be held for write */ int propagate_mount_busy(struct mount *mnt, int refcnt) { struct mount *m, *child, *topper; struct mount *parent = mnt->mnt_parent; if (mnt == parent) return do_refcount_check(mnt, refcnt); /* * quickly check if the current mount can be unmounted. * If not, we don't have to go checking for all other * mounts */ if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt)) return 1; for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { int count = 1; child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); if (!child) continue; /* Is there exactly one mount on the child that covers * it completely whose reference should be ignored? */ topper = find_topper(child); if (topper) count += 1; else if (!list_empty(&child->mnt_mounts)) continue; if (do_refcount_check(child, count)) return 1; } return 0; } /* * Clear MNT_LOCKED when it can be shown to be safe. * * mount_lock lock must be held for write */ void propagate_mount_unlock(struct mount *mnt) { struct mount *parent = mnt->mnt_parent; struct mount *m, *child; BUG_ON(parent == mnt); for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); if (child) child->mnt.mnt_flags &= ~MNT_LOCKED; } } static void umount_one(struct mount *mnt, struct list_head *to_umount) { CLEAR_MNT_MARK(mnt); mnt->mnt.mnt_flags |= MNT_UMOUNT; list_del_init(&mnt->mnt_child); list_del_init(&mnt->mnt_umounting); move_from_ns(mnt, to_umount); } /* * NOTE: unmounting 'mnt' naturally propagates to all other mounts its * parent propagates to. */ static bool __propagate_umount(struct mount *mnt, struct list_head *to_umount, struct list_head *to_restore) { bool progress = false; struct mount *child; /* * The state of the parent won't change if this mount is * already unmounted or marked as without children. */ if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED)) goto out; /* Verify topper is the only grandchild that has not been * speculatively unmounted. */ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { if (child->mnt_mountpoint == mnt->mnt.mnt_root) continue; if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child)) continue; /* Found a mounted child */ goto children; } /* Mark mounts that can be unmounted if not locked */ SET_MNT_MARK(mnt); progress = true; /* If a mount is without children and not locked umount it. */ if (!IS_MNT_LOCKED(mnt)) { umount_one(mnt, to_umount); } else { children: list_move_tail(&mnt->mnt_umounting, to_restore); } out: return progress; } static void umount_list(struct list_head *to_umount, struct list_head *to_restore) { struct mount *mnt, *child, *tmp; list_for_each_entry(mnt, to_umount, mnt_list) { list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) { /* topper? */ if (child->mnt_mountpoint == mnt->mnt.mnt_root) list_move_tail(&child->mnt_umounting, to_restore); else umount_one(child, to_umount); } } } static void restore_mounts(struct list_head *to_restore) { /* Restore mounts to a clean working state */ while (!list_empty(to_restore)) { struct mount *mnt, *parent; struct mountpoint *mp; mnt = list_first_entry(to_restore, struct mount, mnt_umounting); CLEAR_MNT_MARK(mnt); list_del_init(&mnt->mnt_umounting); /* Should this mount be reparented? */ mp = mnt->mnt_mp; parent = mnt->mnt_parent; while (parent->mnt.mnt_flags & MNT_UMOUNT) { mp = parent->mnt_mp; parent = parent->mnt_parent; } if (parent != mnt->mnt_parent) mnt_change_mountpoint(parent, mp, mnt); } } static void cleanup_umount_visitations(struct list_head *visited) { while (!list_empty(visited)) { struct mount *mnt = list_first_entry(visited, struct mount, mnt_umounting); list_del_init(&mnt->mnt_umounting); } } /* * collect all mounts that receive propagation from the mount in @list, * and return these additional mounts in the same list. * @list: the list of mounts to be unmounted. * * vfsmount lock must be held for write */ int propagate_umount(struct list_head *list) { struct mount *mnt; LIST_HEAD(to_restore); LIST_HEAD(to_umount); LIST_HEAD(visited); /* Find candidates for unmounting */ list_for_each_entry_reverse(mnt, list, mnt_list) { struct mount *parent = mnt->mnt_parent; struct mount *m; /* * If this mount has already been visited it is known that it's * entire peer group and all of their slaves in the propagation * tree for the mountpoint has already been visited and there is * no need to visit them again. */ if (!list_empty(&mnt->mnt_umounting)) continue; list_add_tail(&mnt->mnt_umounting, &visited); for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { struct mount *child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); if (!child) continue; if (!list_empty(&child->mnt_umounting)) { /* * If the child has already been visited it is * know that it's entire peer group and all of * their slaves in the propgation tree for the * mountpoint has already been visited and there * is no need to visit this subtree again. */ m = skip_propagation_subtree(m, parent); continue; } else if (child->mnt.mnt_flags & MNT_UMOUNT) { /* * We have come across a partially unmounted * mount in a list that has not been visited * yet. Remember it has been visited and * continue about our merry way. */ list_add_tail(&child->mnt_umounting, &visited); continue; } /* Check the child and parents while progress is made */ while (__propagate_umount(child, &to_umount, &to_restore)) { /* Is the parent a umount candidate? */ child = child->mnt_parent; if (list_empty(&child->mnt_umounting)) break; } } } umount_list(&to_umount, &to_restore); restore_mounts(&to_restore); cleanup_umount_visitations(&visited); list_splice_tail(&to_umount, list); return 0; }
11 5 92 2 38 64 15 4 11 2 2 12 7 1 1 2 3 2 2 11 1 2 2 6 10 10 10 7 2 5 14 2 1 4 1 4 4 4 3 7 2 1 1 4 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/minix/namei.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include "minix.h" static int add_nondir(struct dentry *dentry, struct inode *inode) { int err = minix_add_link(dentry, inode); if (!err) { d_instantiate(dentry, inode); return 0; } inode_dec_link_count(inode); iput(inode); return err; } static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) { struct inode * inode = NULL; ino_t ino; if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen) return ERR_PTR(-ENAMETOOLONG); ino = minix_inode_by_name(dentry); if (ino) inode = minix_iget(dir->i_sb, ino); return d_splice_alias(inode, dentry); } static int minix_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; if (!old_valid_dev(rdev)) return -EINVAL; inode = minix_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); minix_set_inode(inode, rdev); mark_inode_dirty(inode); return add_nondir(dentry, inode); } static int minix_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { struct inode *inode = minix_new_inode(dir, mode); if (IS_ERR(inode)) return finish_open_simple(file, PTR_ERR(inode)); minix_set_inode(inode, 0); mark_inode_dirty(inode); d_tmpfile(file, inode); return finish_open_simple(file, 0); } static int minix_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return minix_mknod(&nop_mnt_idmap, dir, dentry, mode, 0); } static int minix_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { int i = strlen(symname)+1; struct inode * inode; int err; if (i > dir->i_sb->s_blocksize) return -ENAMETOOLONG; inode = minix_new_inode(dir, S_IFLNK | 0777); if (IS_ERR(inode)) return PTR_ERR(inode); minix_set_inode(inode, 0); err = page_symlink(inode, symname, i); if (unlikely(err)) { inode_dec_link_count(inode); iput(inode); return err; } return add_nondir(dentry, inode); } static int minix_link(struct dentry * old_dentry, struct inode * dir, struct dentry *dentry) { struct inode *inode = d_inode(old_dentry); inode_set_ctime_current(inode); inode_inc_link_count(inode); ihold(inode); return add_nondir(dentry, inode); } static int minix_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode * inode; int err; inode = minix_new_inode(dir, S_IFDIR | mode); if (IS_ERR(inode)) return PTR_ERR(inode); inode_inc_link_count(dir); minix_set_inode(inode, 0); inode_inc_link_count(inode); err = minix_make_empty(inode, dir); if (err) goto out_fail; err = minix_add_link(dentry, inode); if (err) goto out_fail; d_instantiate(dentry, inode); out: return err; out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); iput(inode); inode_dec_link_count(dir); goto out; } static int minix_unlink(struct inode * dir, struct dentry *dentry) { struct inode * inode = d_inode(dentry); struct folio *folio; struct minix_dir_entry * de; int err; de = minix_find_entry(dentry, &folio); if (!de) return -ENOENT; err = minix_delete_entry(de, folio); folio_release_kmap(folio, de); if (err) return err; inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); inode_dec_link_count(inode); return 0; } static int minix_rmdir(struct inode * dir, struct dentry *dentry) { struct inode * inode = d_inode(dentry); int err = -ENOTEMPTY; if (minix_empty_dir(inode)) { err = minix_unlink(dir, dentry); if (!err) { inode_dec_link_count(dir); inode_dec_link_count(inode); } } return err; } static int minix_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { struct inode * old_inode = d_inode(old_dentry); struct inode * new_inode = d_inode(new_dentry); struct folio * dir_folio = NULL; struct minix_dir_entry * dir_de = NULL; struct folio *old_folio; struct minix_dir_entry * old_de; int err = -ENOENT; if (flags & ~RENAME_NOREPLACE) return -EINVAL; old_de = minix_find_entry(old_dentry, &old_folio); if (!old_de) goto out; if (S_ISDIR(old_inode->i_mode)) { err = -EIO; dir_de = minix_dotdot(old_inode, &dir_folio); if (!dir_de) goto out_old; } if (new_inode) { struct folio *new_folio; struct minix_dir_entry * new_de; err = -ENOTEMPTY; if (dir_de && !minix_empty_dir(new_inode)) goto out_dir; err = -ENOENT; new_de = minix_find_entry(new_dentry, &new_folio); if (!new_de) goto out_dir; err = minix_set_link(new_de, new_folio, old_inode); folio_release_kmap(new_folio, new_de); if (err) goto out_dir; inode_set_ctime_current(new_inode); if (dir_de) drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { err = minix_add_link(new_dentry, old_inode); if (err) goto out_dir; if (dir_de) inode_inc_link_count(new_dir); } err = minix_delete_entry(old_de, old_folio); if (err) goto out_dir; mark_inode_dirty(old_inode); if (dir_de) { err = minix_set_link(dir_de, dir_folio, new_dir); if (!err) inode_dec_link_count(old_dir); } out_dir: if (dir_de) folio_release_kmap(dir_folio, dir_de); out_old: folio_release_kmap(old_folio, old_de); out: return err; } /* * directories can handle most operations... */ const struct inode_operations minix_dir_inode_operations = { .create = minix_create, .lookup = minix_lookup, .link = minix_link, .unlink = minix_unlink, .symlink = minix_symlink, .mkdir = minix_mkdir, .rmdir = minix_rmdir, .mknod = minix_mknod, .rename = minix_rename, .getattr = minix_getattr, .tmpfile = minix_tmpfile, };
3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 // SPDX-License-Identifier: GPL-2.0 /* * io_misc.c - fallocate, fpunch, truncate: */ #include "bcachefs.h" #include "alloc_foreground.h" #include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" #include "clock.h" #include "error.h" #include "extents.h" #include "extent_update.h" #include "inode.h" #include "io_misc.h" #include "io_write.h" #include "logged_ops.h" #include "rebalance.h" #include "subvolume.h" /* Overwrites whatever was present with zeroes: */ int bch2_extent_fallocate(struct btree_trans *trans, subvol_inum inum, struct btree_iter *iter, u64 sectors, struct bch_io_opts opts, s64 *i_sectors_delta, struct write_point_specifier write_point) { struct bch_fs *c = trans->c; struct disk_reservation disk_res = { 0 }; struct closure cl; struct open_buckets open_buckets = { 0 }; struct bkey_s_c k; struct bkey_buf old, new; unsigned sectors_allocated = 0, new_replicas; bool unwritten = opts.nocow && c->sb.version >= bcachefs_metadata_version_unwritten_extents; int ret; bch2_bkey_buf_init(&old); bch2_bkey_buf_init(&new); closure_init_stack(&cl); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) return ret; sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset); new_replicas = max(0, (int) opts.data_replicas - (int) bch2_bkey_nr_ptrs_fully_allocated(k)); /* * Get a disk reservation before (in the nocow case) calling * into the allocator: */ ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); if (unlikely(ret)) goto err_noprint; bch2_bkey_buf_reassemble(&old, c, k); if (!unwritten) { struct bkey_i_reservation *reservation; bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64)); reservation = bkey_reservation_init(new.k); reservation->k.p = iter->pos; bch2_key_resize(&reservation->k, sectors); reservation->v.nr_replicas = opts.data_replicas; } else { struct bkey_i_extent *e; struct bch_devs_list devs_have; struct write_point *wp; devs_have.nr = 0; bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX); e = bkey_extent_init(new.k); e->k.p = iter->pos; ret = bch2_alloc_sectors_start_trans(trans, opts.foreground_target, false, write_point, &devs_have, opts.data_replicas, opts.data_replicas, BCH_WATERMARK_normal, 0, &cl, &wp); if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) ret = -BCH_ERR_transaction_restart_nested; if (ret) goto err; sectors = min_t(u64, sectors, wp->sectors_free); sectors_allocated = sectors; bch2_key_resize(&e->k, sectors); bch2_open_bucket_get(c, wp, &open_buckets); bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); bch2_alloc_sectors_done(c, wp); extent_for_each_ptr(extent_i_to_s(e), ptr) ptr->unwritten = true; } ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, 0, i_sectors_delta, true); err: if (!ret && sectors_allocated) bch2_increment_clock(c, sectors_allocated, WRITE); if (should_print_err(ret)) { struct printbuf buf = PRINTBUF; bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9); prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); printbuf_exit(&buf); } err_noprint: bch2_open_buckets_put(c, &open_buckets); bch2_disk_reservation_put(c, &disk_res); bch2_bkey_buf_exit(&new, c); bch2_bkey_buf_exit(&old, c); if (closure_nr_remaining(&cl) != 1) { bch2_trans_unlock_long(trans); bch2_wait_on_allocator(c, &cl); } return ret; } /* * Returns -BCH_ERR_transacton_restart if we had to drop locks: */ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, subvol_inum inum, u64 end, s64 *i_sectors_delta) { struct bch_fs *c = trans->c; unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); struct bpos end_pos = POS(inum.inum, end); struct bkey_s_c k; int ret = 0, ret2 = 0; u32 snapshot; while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete; if (ret) ret2 = ret; bch2_trans_begin(trans); ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (ret) continue; bch2_btree_iter_set_snapshot(iter, snapshot); /* * peek_max() doesn't have ideal semantics for extents: */ k = bch2_btree_iter_peek_max(iter, end_pos); if (!k.k) break; ret = bkey_err(k); if (ret) continue; bkey_init(&delete.k); delete.k.p = iter->pos; /* create the biggest key we can */ bch2_key_resize(&delete.k, max_sectors); bch2_cut_back(end_pos, &delete); ret = bch2_extent_update(trans, inum, iter, &delete, &disk_res, 0, i_sectors_delta, false); bch2_disk_reservation_put(c, &disk_res); } return ret ?: ret2; } int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, s64 *i_sectors_delta) { struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, start), BTREE_ITER_intent); ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta); bch2_trans_iter_exit(trans, &iter); bch2_trans_put(trans); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; return ret; } /* truncate: */ void bch2_logged_op_truncate_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_logged_op_truncate op = bkey_s_c_to_logged_op_truncate(k); prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol)); prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum)); prt_printf(out, " new_i_size=%llu", le64_to_cpu(op.v->new_i_size)); } static int truncate_set_isize(struct btree_trans *trans, subvol_inum inum, u64 new_i_size, bool warn) { struct btree_iter iter = { NULL }; struct bch_inode_unpacked inode_u; int ret; ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn) ?: (inode_u.bi_size = new_i_size, 0) ?: bch2_inode_write(trans, &iter, &inode_u); bch2_trans_iter_exit(trans, &iter); return ret; } static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i *op_k, u64 *i_sectors_delta) { struct bch_fs *c = trans->c; struct btree_iter fpunch_iter; struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; u64 new_i_size = le64_to_cpu(op->v.new_i_size); bool warn_errors = i_sectors_delta != NULL; int ret; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, truncate_set_isize(trans, inum, new_i_size, i_sectors_delta != NULL)); if (ret) goto err; bch2_trans_iter_init(trans, &fpunch_iter, BTREE_ID_extents, POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9), BTREE_ITER_intent); ret = bch2_fpunch_at(trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta); bch2_trans_iter_exit(trans, &fpunch_iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; err: if (warn_errors) bch_err_fn(c, ret); return ret; } int bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i *op_k) { return __bch2_resume_logged_op_truncate(trans, op_k, NULL); } int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sectors_delta) { struct bkey_i_logged_op_truncate op; bkey_logged_op_truncate_init(&op.k_i); op.v.subvol = cpu_to_le32(inum.subvol); op.v.inum = cpu_to_le64(inum.inum); op.v.new_i_size = cpu_to_le64(new_i_size); /* * Logged ops aren't atomic w.r.t. snapshot creation: creating a * snapshot while they're in progress, then crashing, will result in the * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); struct btree_trans *trans = bch2_trans_get(c); int ret = bch2_logged_op_start(trans, &op.k_i); if (ret) goto out; ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta); ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; out: bch2_trans_put(trans); up_read(&c->snapshot_create_lock); return ret; } /* finsert/fcollapse: */ void bch2_logged_op_finsert_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_logged_op_finsert op = bkey_s_c_to_logged_op_finsert(k); prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol)); prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum)); prt_printf(out, " dst_offset=%lli", le64_to_cpu(op.v->dst_offset)); prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset)); } static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len, bool warn) { struct btree_iter iter; struct bch_inode_unpacked inode_u; int ret; offset <<= 9; len <<= 9; ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn); if (ret) return ret; if (len > 0) { if (MAX_LFS_FILESIZE - inode_u.bi_size < len) { ret = -EFBIG; goto err; } if (offset >= inode_u.bi_size) { ret = -EINVAL; goto err; } } inode_u.bi_size += len; inode_u.bi_mtime = inode_u.bi_ctime = bch2_current_time(trans->c); ret = bch2_inode_write(trans, &iter, &inode_u); err: bch2_trans_iter_exit(trans, &iter); return ret; } static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, struct bkey_i *op_k, u64 *i_sectors_delta) { struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; struct bch_io_opts opts; u64 dst_offset = le64_to_cpu(op->v.dst_offset); u64 src_offset = le64_to_cpu(op->v.src_offset); s64 shift = dst_offset - src_offset; u64 len = abs(shift); u64 pos = le64_to_cpu(op->v.pos); bool insert = shift > 0; u32 snapshot; bool warn_errors = i_sectors_delta != NULL; int ret = 0; ret = bch2_inum_opts_get(trans, inum, &opts); if (ret) return ret; /* * check for missing subvolume before fpunch, as in resume we don't want * it to be a fatal error */ ret = lockrestart_do(trans, __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors)); if (ret) return ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, 0), BTREE_ITER_intent); switch (op->v.state) { case LOGGED_OP_FINSERT_start: op->v.state = LOGGED_OP_FINSERT_shift_extents; if (insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, src_offset, len, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); if (ret) goto err; } else { bch2_btree_iter_set_pos(&iter, POS(inum.inum, src_offset)); ret = bch2_fpunch_at(trans, &iter, inum, src_offset + len, i_sectors_delta); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto err; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_logged_op_update(trans, &op->k_i)); } fallthrough; case LOGGED_OP_FINSERT_shift_extents: while (1) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete, *copy; struct bkey_s_c k; struct bpos src_pos = POS(inum.inum, src_offset); bch2_trans_begin(trans); ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors); if (ret) goto btree_err; bch2_btree_iter_set_snapshot(&iter, snapshot); bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot)); k = insert ? bch2_btree_iter_peek_prev_min(&iter, POS(inum.inum, 0)) : bch2_btree_iter_peek_max(&iter, POS(inum.inum, U64_MAX)); if ((ret = bkey_err(k))) goto btree_err; if (!k.k || k.k->p.inode != inum.inum || bkey_le(k.k->p, POS(inum.inum, src_offset))) break; copy = bch2_bkey_make_mut_noupdate(trans, k); if ((ret = PTR_ERR_OR_ZERO(copy))) goto btree_err; if (insert && bkey_lt(bkey_start_pos(k.k), src_pos)) { bch2_cut_front(src_pos, copy); /* Splitting compressed extent? */ bch2_disk_reservation_add(c, &disk_res, copy->k.size * bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy)), BCH_DISK_RESERVATION_NOFAIL); } bkey_init(&delete.k); delete.k.p = copy->k.p; delete.k.p.snapshot = snapshot; delete.k.size = copy->k.size; copy->k.p.offset += shift; copy->k.p.snapshot = snapshot; op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); ret = bch2_bkey_set_needs_rebalance(c, &opts, copy) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: bch2_logged_op_update(trans, &op->k_i) ?: bch2_trans_commit(trans, &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc); btree_err: bch2_disk_reservation_put(c, &disk_res); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) goto err; pos = le64_to_cpu(op->v.pos); } op->v.state = LOGGED_OP_FINSERT_finish; if (!insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, src_offset, shift, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); } else { /* We need an inode update to update bi_journal_seq for fsync: */ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, 0, 0, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); } break; case LOGGED_OP_FINSERT_finish: break; } err: bch2_trans_iter_exit(trans, &iter); if (warn_errors) bch_err_fn(c, ret); return ret; } int bch2_resume_logged_op_finsert(struct btree_trans *trans, struct bkey_i *op_k) { return __bch2_resume_logged_op_finsert(trans, op_k, NULL); } int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, u64 offset, u64 len, bool insert, s64 *i_sectors_delta) { struct bkey_i_logged_op_finsert op; s64 shift = insert ? len : -len; bkey_logged_op_finsert_init(&op.k_i); op.v.subvol = cpu_to_le32(inum.subvol); op.v.inum = cpu_to_le64(inum.inum); op.v.dst_offset = cpu_to_le64(offset + shift); op.v.src_offset = cpu_to_le64(offset); op.v.pos = cpu_to_le64(insert ? U64_MAX : offset); /* * Logged ops aren't atomic w.r.t. snapshot creation: creating a * snapshot while they're in progress, then crashing, will result in the * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); struct btree_trans *trans = bch2_trans_get(c); int ret = bch2_logged_op_start(trans, &op.k_i); if (ret) goto out; ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta); ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; out: bch2_trans_put(trans); up_read(&c->snapshot_create_lock); return ret; }
2 2 2 2 2 2 6 4 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 // SPDX-License-Identifier: GPL-2.0-or-later /* * Afatech AF9035 DVB USB driver * * Copyright (C) 2009 Antti Palosaari <crope@iki.fi> * Copyright (C) 2012 Antti Palosaari <crope@iki.fi> */ #include "af9035.h" /* Max transfer size done by I2C transfer functions */ #define MAX_XFER_SIZE 64 DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); static u16 af9035_checksum(const u8 *buf, size_t len) { size_t i; u16 checksum = 0; for (i = 1; i < len; i++) { if (i % 2) checksum += buf[i] << 8; else checksum += buf[i]; } checksum = ~checksum; return checksum; } static int af9035_ctrl_msg(struct dvb_usb_device *d, struct usb_req *req) { #define REQ_HDR_LEN 4 /* send header size */ #define ACK_HDR_LEN 3 /* rece header size */ #define CHECKSUM_LEN 2 #define USB_TIMEOUT 2000 struct state *state = d_to_priv(d); struct usb_interface *intf = d->intf; int ret, wlen, rlen; u16 checksum, tmp_checksum; mutex_lock(&d->usb_mutex); /* buffer overflow check */ if (req->wlen > (BUF_LEN - REQ_HDR_LEN - CHECKSUM_LEN) || req->rlen > (BUF_LEN - ACK_HDR_LEN - CHECKSUM_LEN)) { dev_err(&intf->dev, "too much data wlen=%d rlen=%d\n", req->wlen, req->rlen); ret = -EINVAL; goto exit; } state->buf[0] = REQ_HDR_LEN + req->wlen + CHECKSUM_LEN - 1; state->buf[1] = req->mbox; state->buf[2] = req->cmd; state->buf[3] = state->seq++; memcpy(&state->buf[REQ_HDR_LEN], req->wbuf, req->wlen); wlen = REQ_HDR_LEN + req->wlen + CHECKSUM_LEN; rlen = ACK_HDR_LEN + req->rlen + CHECKSUM_LEN; /* calc and add checksum */ checksum = af9035_checksum(state->buf, state->buf[0] - 1); state->buf[state->buf[0] - 1] = (checksum >> 8); state->buf[state->buf[0] - 0] = (checksum & 0xff); /* no ack for these packets */ if (req->cmd == CMD_FW_DL) rlen = 0; ret = dvb_usbv2_generic_rw_locked(d, state->buf, wlen, state->buf, rlen); if (ret) goto exit; /* no ack for those packets */ if (req->cmd == CMD_FW_DL) goto exit; /* verify checksum */ checksum = af9035_checksum(state->buf, rlen - 2); tmp_checksum = (state->buf[rlen - 2] << 8) | state->buf[rlen - 1]; if (tmp_checksum != checksum) { dev_err(&intf->dev, "command=%02x checksum mismatch (%04x != %04x)\n", req->cmd, tmp_checksum, checksum); ret = -EIO; goto exit; } /* check status */ if (state->buf[2]) { /* fw returns status 1 when IR code was not received */ if (req->cmd == CMD_IR_GET || state->buf[2] == 1) { ret = 1; goto exit; } dev_dbg(&intf->dev, "command=%02x failed fw error=%d\n", req->cmd, state->buf[2]); ret = -EIO; goto exit; } /* read request, copy returned data to return buf */ if (req->rlen) memcpy(req->rbuf, &state->buf[ACK_HDR_LEN], req->rlen); exit: mutex_unlock(&d->usb_mutex); return ret; } /* write multiple registers */ static int af9035_wr_regs(struct dvb_usb_device *d, u32 reg, u8 *val, int len) { struct usb_interface *intf = d->intf; u8 wbuf[MAX_XFER_SIZE]; u8 mbox = (reg >> 16) & 0xff; struct usb_req req = { CMD_MEM_WR, mbox, 6 + len, wbuf, 0, NULL }; if (6 + len > sizeof(wbuf)) { dev_warn(&intf->dev, "i2c wr: len=%d is too big!\n", len); return -EOPNOTSUPP; } wbuf[0] = len; wbuf[1] = 2; wbuf[2] = 0; wbuf[3] = 0; wbuf[4] = (reg >> 8) & 0xff; wbuf[5] = (reg >> 0) & 0xff; memcpy(&wbuf[6], val, len); return af9035_ctrl_msg(d, &req); } /* read multiple registers */ static int af9035_rd_regs(struct dvb_usb_device *d, u32 reg, u8 *val, int len) { u8 wbuf[] = { len, 2, 0, 0, (reg >> 8) & 0xff, reg & 0xff }; u8 mbox = (reg >> 16) & 0xff; struct usb_req req = { CMD_MEM_RD, mbox, sizeof(wbuf), wbuf, len, val }; return af9035_ctrl_msg(d, &req); } /* write single register */ static int af9035_wr_reg(struct dvb_usb_device *d, u32 reg, u8 val) { return af9035_wr_regs(d, reg, &val, 1); } /* read single register */ static int af9035_rd_reg(struct dvb_usb_device *d, u32 reg, u8 *val) { return af9035_rd_regs(d, reg, val, 1); } /* write single register with mask */ static int af9035_wr_reg_mask(struct dvb_usb_device *d, u32 reg, u8 val, u8 mask) { int ret; u8 tmp; /* no need for read if whole reg is written */ if (mask != 0xff) { ret = af9035_rd_regs(d, reg, &tmp, 1); if (ret) return ret; val &= mask; tmp &= ~mask; val |= tmp; } return af9035_wr_regs(d, reg, &val, 1); } static int af9035_add_i2c_dev(struct dvb_usb_device *d, const char *type, u8 addr, void *platform_data, struct i2c_adapter *adapter) { int ret, num; struct state *state = d_to_priv(d); struct usb_interface *intf = d->intf; struct i2c_client *client; struct i2c_board_info board_info = { .addr = addr, .platform_data = platform_data, }; strscpy(board_info.type, type, I2C_NAME_SIZE); /* find first free client */ for (num = 0; num < AF9035_I2C_CLIENT_MAX; num++) { if (state->i2c_client[num] == NULL) break; } dev_dbg(&intf->dev, "num=%d\n", num); if (num == AF9035_I2C_CLIENT_MAX) { dev_err(&intf->dev, "I2C client out of index\n"); ret = -ENODEV; goto err; } request_module("%s", board_info.type); /* register I2C device */ client = i2c_new_client_device(adapter, &board_info); if (!i2c_client_has_driver(client)) { dev_err(&intf->dev, "failed to bind i2c device to %s driver\n", type); ret = -ENODEV; goto err; } /* increase I2C driver usage count */ if (!try_module_get(client->dev.driver->owner)) { i2c_unregister_device(client); ret = -ENODEV; goto err; } state->i2c_client[num] = client; return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } static void af9035_del_i2c_dev(struct dvb_usb_device *d) { int num; struct state *state = d_to_priv(d); struct usb_interface *intf = d->intf; struct i2c_client *client; /* find last used client */ num = AF9035_I2C_CLIENT_MAX; while (num--) { if (state->i2c_client[num] != NULL) break; } dev_dbg(&intf->dev, "num=%d\n", num); if (num == -1) { dev_err(&intf->dev, "I2C client out of index\n"); goto err; } client = state->i2c_client[num]; /* decrease I2C driver usage count */ module_put(client->dev.driver->owner); /* unregister I2C device */ i2c_unregister_device(client); state->i2c_client[num] = NULL; return; err: dev_dbg(&intf->dev, "failed\n"); } static int af9035_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int num) { struct dvb_usb_device *d = i2c_get_adapdata(adap); struct state *state = d_to_priv(d); int ret; if (mutex_lock_interruptible(&d->i2c_mutex) < 0) return -EAGAIN; /* * AF9035 I2C sub header is 5 bytes long. Meaning of those bytes are: * 0: data len * 1: I2C addr << 1 * 2: reg addr len * byte 3 and 4 can be used as reg addr * 3: reg addr MSB * used when reg addr len is set to 2 * 4: reg addr LSB * used when reg addr len is set to 1 or 2 * * For the simplify we do not use register addr at all. * NOTE: As a firmware knows tuner type there is very small possibility * there could be some tuner I2C hacks done by firmware and this may * lead problems if firmware expects those bytes are used. * * TODO: Here is few hacks. AF9035 chip integrates AF9033 demodulator. * IT9135 chip integrates AF9033 demodulator and RF tuner. For dual * tuner devices, there is also external AF9033 demodulator connected * via external I2C bus. All AF9033 demod I2C traffic, both single and * dual tuner configuration, is covered by firmware - actual USB IO * looks just like a memory access. * In case of IT913x chip, there is own tuner driver. It is implemented * currently as a I2C driver, even tuner IP block is likely build * directly into the demodulator memory space and there is no own I2C * bus. I2C subsystem does not allow register multiple devices to same * bus, having same slave address. Due to that we reuse demod address, * shifted by one bit, on that case. * * For IT930x we use a different command and the sub header is * different as well: * 0: data len * 1: I2C bus (0x03 seems to be only value used) * 2: I2C addr << 1 */ #define AF9035_IS_I2C_XFER_WRITE_READ(_msg, _num) \ (_num == 2 && !(_msg[0].flags & I2C_M_RD) && (_msg[1].flags & I2C_M_RD)) #define AF9035_IS_I2C_XFER_WRITE(_msg, _num) \ (_num == 1 && !(_msg[0].flags & I2C_M_RD)) #define AF9035_IS_I2C_XFER_READ(_msg, _num) \ (_num == 1 && (_msg[0].flags & I2C_M_RD)) if (AF9035_IS_I2C_XFER_WRITE_READ(msg, num)) { if (msg[0].len > 40 || msg[1].len > 40) { /* TODO: correct limits > 40 */ ret = -EOPNOTSUPP; } else if ((msg[0].addr == state->af9033_i2c_addr[0]) || (msg[0].addr == state->af9033_i2c_addr[1])) { /* demod access via firmware interface */ u32 reg; if (msg[0].len < 3 || msg[1].len < 1) { ret = -EOPNOTSUPP; goto unlock; } reg = msg[0].buf[0] << 16 | msg[0].buf[1] << 8 | msg[0].buf[2]; if (msg[0].addr == state->af9033_i2c_addr[1]) reg |= 0x100000; ret = af9035_rd_regs(d, reg, &msg[1].buf[0], msg[1].len); } else if (state->no_read) { memset(msg[1].buf, 0, msg[1].len); ret = 0; } else { /* I2C write + read */ u8 buf[MAX_XFER_SIZE]; struct usb_req req = { CMD_I2C_RD, 0, 5 + msg[0].len, buf, msg[1].len, msg[1].buf }; if (state->chip_type == 0x9306) { req.cmd = CMD_GENERIC_I2C_RD; req.wlen = 3 + msg[0].len; } req.mbox |= ((msg[0].addr & 0x80) >> 3); buf[0] = msg[1].len; if (state->chip_type == 0x9306) { buf[1] = 0x03; /* I2C bus */ buf[2] = msg[0].addr << 1; memcpy(&buf[3], msg[0].buf, msg[0].len); } else { buf[1] = msg[0].addr << 1; buf[3] = 0x00; /* reg addr MSB */ buf[4] = 0x00; /* reg addr LSB */ /* Keep prev behavior for write req len > 2*/ if (msg[0].len > 2) { buf[2] = 0x00; /* reg addr len */ memcpy(&buf[5], msg[0].buf, msg[0].len); /* Use reg addr fields if write req len <= 2 */ } else { req.wlen = 5; buf[2] = msg[0].len; if (msg[0].len == 2) { buf[3] = msg[0].buf[0]; buf[4] = msg[0].buf[1]; } else if (msg[0].len == 1) { buf[4] = msg[0].buf[0]; } } } ret = af9035_ctrl_msg(d, &req); } } else if (AF9035_IS_I2C_XFER_WRITE(msg, num)) { if (msg[0].len > 40) { /* TODO: correct limits > 40 */ ret = -EOPNOTSUPP; } else if ((msg[0].addr == state->af9033_i2c_addr[0]) || (msg[0].addr == state->af9033_i2c_addr[1])) { /* demod access via firmware interface */ u32 reg; if (msg[0].len < 3) { ret = -EOPNOTSUPP; goto unlock; } reg = msg[0].buf[0] << 16 | msg[0].buf[1] << 8 | msg[0].buf[2]; if (msg[0].addr == state->af9033_i2c_addr[1]) reg |= 0x100000; ret = af9035_wr_regs(d, reg, &msg[0].buf[3], msg[0].len - 3); } else { /* I2C write */ u8 buf[MAX_XFER_SIZE]; struct usb_req req = { CMD_I2C_WR, 0, 5 + msg[0].len, buf, 0, NULL }; if (state->chip_type == 0x9306) { req.cmd = CMD_GENERIC_I2C_WR; req.wlen = 3 + msg[0].len; } req.mbox |= ((msg[0].addr & 0x80) >> 3); buf[0] = msg[0].len; if (state->chip_type == 0x9306) { buf[1] = 0x03; /* I2C bus */ buf[2] = msg[0].addr << 1; memcpy(&buf[3], msg[0].buf, msg[0].len); } else { buf[1] = msg[0].addr << 1; buf[2] = 0x00; /* reg addr len */ buf[3] = 0x00; /* reg addr MSB */ buf[4] = 0x00; /* reg addr LSB */ memcpy(&buf[5], msg[0].buf, msg[0].len); } ret = af9035_ctrl_msg(d, &req); } } else if (AF9035_IS_I2C_XFER_READ(msg, num)) { if (msg[0].len > 40) { /* TODO: correct limits > 40 */ ret = -EOPNOTSUPP; } else if (state->no_read) { memset(msg[0].buf, 0, msg[0].len); ret = 0; } else { /* I2C read */ u8 buf[5]; struct usb_req req = { CMD_I2C_RD, 0, sizeof(buf), buf, msg[0].len, msg[0].buf }; if (state->chip_type == 0x9306) { req.cmd = CMD_GENERIC_I2C_RD; req.wlen = 3; } req.mbox |= ((msg[0].addr & 0x80) >> 3); buf[0] = msg[0].len; if (state->chip_type == 0x9306) { buf[1] = 0x03; /* I2C bus */ buf[2] = msg[0].addr << 1; } else { buf[1] = msg[0].addr << 1; buf[2] = 0x00; /* reg addr len */ buf[3] = 0x00; /* reg addr MSB */ buf[4] = 0x00; /* reg addr LSB */ } ret = af9035_ctrl_msg(d, &req); } } else { /* * We support only three kind of I2C transactions: * 1) 1 x write + 1 x read (repeated start) * 2) 1 x write * 3) 1 x read */ ret = -EOPNOTSUPP; } unlock: mutex_unlock(&d->i2c_mutex); if (ret < 0) return ret; else return num; } static u32 af9035_i2c_functionality(struct i2c_adapter *adapter) { return I2C_FUNC_I2C; } static struct i2c_algorithm af9035_i2c_algo = { .master_xfer = af9035_i2c_master_xfer, .functionality = af9035_i2c_functionality, }; static int af9035_identify_state(struct dvb_usb_device *d, const char **name) { struct state *state = d_to_priv(d); struct usb_interface *intf = d->intf; int ret, i, ts_mode_invalid; unsigned int utmp, eeprom_addr; u8 tmp; u8 wbuf[1] = { 1 }; u8 rbuf[4]; struct usb_req req = { CMD_FW_QUERYINFO, 0, sizeof(wbuf), wbuf, sizeof(rbuf), rbuf }; ret = af9035_rd_regs(d, 0x1222, rbuf, 3); if (ret < 0) goto err; state->chip_version = rbuf[0]; state->chip_type = rbuf[2] << 8 | rbuf[1] << 0; ret = af9035_rd_reg(d, 0x384f, &state->prechip_version); if (ret < 0) goto err; dev_info(&intf->dev, "prechip_version=%02x chip_version=%02x chip_type=%04x\n", state->prechip_version, state->chip_version, state->chip_type); if (state->chip_type == 0x9135) { if (state->chip_version == 0x02) { *name = AF9035_FIRMWARE_IT9135_V2; utmp = 0x00461d; } else { *name = AF9035_FIRMWARE_IT9135_V1; utmp = 0x00461b; } /* Check if eeprom exists */ ret = af9035_rd_reg(d, utmp, &tmp); if (ret < 0) goto err; if (tmp == 0x00) { dev_dbg(&intf->dev, "no eeprom\n"); state->no_eeprom = true; goto check_firmware_status; } eeprom_addr = EEPROM_BASE_IT9135; } else if (state->chip_type == 0x9306) { *name = AF9035_FIRMWARE_IT9303; state->no_eeprom = true; goto check_firmware_status; } else { *name = AF9035_FIRMWARE_AF9035; eeprom_addr = EEPROM_BASE_AF9035; } /* Read and store eeprom */ for (i = 0; i < 256; i += 32) { ret = af9035_rd_regs(d, eeprom_addr + i, &state->eeprom[i], 32); if (ret < 0) goto err; } dev_dbg(&intf->dev, "eeprom dump:\n"); for (i = 0; i < 256; i += 16) dev_dbg(&intf->dev, "%*ph\n", 16, &state->eeprom[i]); /* check for dual tuner mode */ tmp = state->eeprom[EEPROM_TS_MODE]; ts_mode_invalid = 0; switch (tmp) { case 0: break; case 1: case 3: state->dual_mode = true; break; case 5: if (state->chip_type != 0x9135 && state->chip_type != 0x9306) state->dual_mode = true; /* AF9035 */ else ts_mode_invalid = 1; break; default: ts_mode_invalid = 1; } dev_dbg(&intf->dev, "ts mode=%d dual mode=%d\n", tmp, state->dual_mode); if (ts_mode_invalid) dev_info(&intf->dev, "ts mode=%d not supported, defaulting to single tuner mode!", tmp); check_firmware_status: ret = af9035_ctrl_msg(d, &req); if (ret < 0) goto err; dev_dbg(&intf->dev, "reply=%*ph\n", 4, rbuf); if (rbuf[0] || rbuf[1] || rbuf[2] || rbuf[3]) ret = WARM; else ret = COLD; return ret; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } static int af9035_download_firmware_old(struct dvb_usb_device *d, const struct firmware *fw) { struct usb_interface *intf = d->intf; int ret, i, j, len; u8 wbuf[1]; struct usb_req req = { 0, 0, 0, NULL, 0, NULL }; struct usb_req req_fw_dl = { CMD_FW_DL, 0, 0, wbuf, 0, NULL }; u8 hdr_core; u16 hdr_addr, hdr_data_len, hdr_checksum; #define MAX_DATA 58 #define HDR_SIZE 7 /* * Thanks to Daniel Glöckner <daniel-gl@gmx.net> about that info! * * byte 0: MCS 51 core * There are two inside the AF9035 (1=Link and 2=OFDM) with separate * address spaces * byte 1-2: Big endian destination address * byte 3-4: Big endian number of data bytes following the header * byte 5-6: Big endian header checksum, apparently ignored by the chip * Calculated as ~(h[0]*256+h[1]+h[2]*256+h[3]+h[4]*256) */ for (i = fw->size; i > HDR_SIZE;) { hdr_core = fw->data[fw->size - i + 0]; hdr_addr = fw->data[fw->size - i + 1] << 8; hdr_addr |= fw->data[fw->size - i + 2] << 0; hdr_data_len = fw->data[fw->size - i + 3] << 8; hdr_data_len |= fw->data[fw->size - i + 4] << 0; hdr_checksum = fw->data[fw->size - i + 5] << 8; hdr_checksum |= fw->data[fw->size - i + 6] << 0; dev_dbg(&intf->dev, "core=%d addr=%04x data_len=%d checksum=%04x\n", hdr_core, hdr_addr, hdr_data_len, hdr_checksum); if (((hdr_core != 1) && (hdr_core != 2)) || (hdr_data_len > i)) { dev_dbg(&intf->dev, "bad firmware\n"); break; } /* download begin packet */ req.cmd = CMD_FW_DL_BEGIN; ret = af9035_ctrl_msg(d, &req); if (ret < 0) goto err; /* download firmware packet(s) */ for (j = HDR_SIZE + hdr_data_len; j > 0; j -= MAX_DATA) { len = j; if (len > MAX_DATA) len = MAX_DATA; req_fw_dl.wlen = len; req_fw_dl.wbuf = (u8 *) &fw->data[fw->size - i + HDR_SIZE + hdr_data_len - j]; ret = af9035_ctrl_msg(d, &req_fw_dl); if (ret < 0) goto err; } /* download end packet */ req.cmd = CMD_FW_DL_END; ret = af9035_ctrl_msg(d, &req); if (ret < 0) goto err; i -= hdr_data_len + HDR_SIZE; dev_dbg(&intf->dev, "data uploaded=%zu\n", fw->size - i); } /* print warn if firmware is bad, continue and see what happens */ if (i) dev_warn(&intf->dev, "bad firmware\n"); return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } static int af9035_download_firmware_new(struct dvb_usb_device *d, const struct firmware *fw) { struct usb_interface *intf = d->intf; int ret, i, i_prev; struct usb_req req_fw_dl = { CMD_FW_SCATTER_WR, 0, 0, NULL, 0, NULL }; #define HDR_SIZE 7 /* * There seems to be following firmware header. Meaning of bytes 0-3 * is unknown. * * 0: 3 * 1: 0, 1 * 2: 0 * 3: 1, 2, 3 * 4: addr MSB * 5: addr LSB * 6: count of data bytes ? */ for (i = HDR_SIZE, i_prev = 0; i <= fw->size; i++) { if (i == fw->size || (fw->data[i + 0] == 0x03 && (fw->data[i + 1] == 0x00 || fw->data[i + 1] == 0x01) && fw->data[i + 2] == 0x00)) { req_fw_dl.wlen = i - i_prev; req_fw_dl.wbuf = (u8 *) &fw->data[i_prev]; i_prev = i; ret = af9035_ctrl_msg(d, &req_fw_dl); if (ret < 0) goto err; dev_dbg(&intf->dev, "data uploaded=%d\n", i); } } return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } static int af9035_download_firmware(struct dvb_usb_device *d, const struct firmware *fw) { struct usb_interface *intf = d->intf; struct state *state = d_to_priv(d); int ret; u8 wbuf[1]; u8 rbuf[4]; u8 tmp; struct usb_req req = { 0, 0, 0, NULL, 0, NULL }; struct usb_req req_fw_ver = { CMD_FW_QUERYINFO, 0, 1, wbuf, 4, rbuf }; dev_dbg(&intf->dev, "\n"); /* * In case of dual tuner configuration we need to do some extra * initialization in order to download firmware to slave demod too, * which is done by master demod. * Master feeds also clock and controls power via GPIO. */ if (state->dual_mode) { /* configure gpioh1, reset & power slave demod */ ret = af9035_wr_reg_mask(d, 0x00d8b0, 0x01, 0x01); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0x00d8b1, 0x01, 0x01); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0x00d8af, 0x00, 0x01); if (ret < 0) goto err; usleep_range(10000, 50000); ret = af9035_wr_reg_mask(d, 0x00d8af, 0x01, 0x01); if (ret < 0) goto err; /* tell the slave I2C address */ tmp = state->eeprom[EEPROM_2ND_DEMOD_ADDR]; /* Use default I2C address if eeprom has no address set */ if (!tmp) tmp = 0x1d << 1; /* 8-bit format used by chip */ if ((state->chip_type == 0x9135) || (state->chip_type == 0x9306)) { ret = af9035_wr_reg(d, 0x004bfb, tmp); if (ret < 0) goto err; } else { ret = af9035_wr_reg(d, 0x00417f, tmp); if (ret < 0) goto err; /* enable clock out */ ret = af9035_wr_reg_mask(d, 0x00d81a, 0x01, 0x01); if (ret < 0) goto err; } } if (fw->data[0] == 0x01) ret = af9035_download_firmware_old(d, fw); else ret = af9035_download_firmware_new(d, fw); if (ret < 0) goto err; /* firmware loaded, request boot */ req.cmd = CMD_FW_BOOT; ret = af9035_ctrl_msg(d, &req); if (ret < 0) goto err; /* ensure firmware starts */ wbuf[0] = 1; ret = af9035_ctrl_msg(d, &req_fw_ver); if (ret < 0) goto err; if (!(rbuf[0] || rbuf[1] || rbuf[2] || rbuf[3])) { dev_err(&intf->dev, "firmware did not run\n"); ret = -ENODEV; goto err; } dev_info(&intf->dev, "firmware version=%d.%d.%d.%d", rbuf[0], rbuf[1], rbuf[2], rbuf[3]); return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } static int af9035_read_config(struct dvb_usb_device *d) { struct usb_interface *intf = d->intf; struct state *state = d_to_priv(d); int ret, i; u8 tmp; u16 tmp16; /* Demod I2C address */ state->af9033_i2c_addr[0] = 0x1c; state->af9033_i2c_addr[1] = 0x1d; state->af9033_config[0].adc_multiplier = AF9033_ADC_MULTIPLIER_2X; state->af9033_config[1].adc_multiplier = AF9033_ADC_MULTIPLIER_2X; state->af9033_config[0].ts_mode = AF9033_TS_MODE_USB; state->af9033_config[1].ts_mode = AF9033_TS_MODE_SERIAL; state->it930x_addresses = 0; if (state->chip_type == 0x9135) { /* feed clock for integrated RF tuner */ state->af9033_config[0].dyn0_clk = true; state->af9033_config[1].dyn0_clk = true; if (state->chip_version == 0x02) { state->af9033_config[0].tuner = AF9033_TUNER_IT9135_60; state->af9033_config[1].tuner = AF9033_TUNER_IT9135_60; } else { state->af9033_config[0].tuner = AF9033_TUNER_IT9135_38; state->af9033_config[1].tuner = AF9033_TUNER_IT9135_38; } if (state->no_eeprom) { /* Remote controller to NEC polling by default */ state->ir_mode = 0x05; state->ir_type = 0x00; goto skip_eeprom; } } else if (state->chip_type == 0x9306) { /* * IT930x is an USB bridge, only single demod-single tuner * configurations seen so far. */ if ((le16_to_cpu(d->udev->descriptor.idVendor) == USB_VID_AVERMEDIA) && (le16_to_cpu(d->udev->descriptor.idProduct) == USB_PID_AVERMEDIA_TD310)) { state->it930x_addresses = 1; /* TD310 RC works with NEC defaults */ state->ir_mode = 0x05; state->ir_type = 0x00; } return 0; } /* Remote controller */ state->ir_mode = state->eeprom[EEPROM_IR_MODE]; state->ir_type = state->eeprom[EEPROM_IR_TYPE]; if (state->dual_mode) { /* Read 2nd demodulator I2C address. 8-bit format on eeprom */ tmp = state->eeprom[EEPROM_2ND_DEMOD_ADDR]; if (tmp) state->af9033_i2c_addr[1] = tmp >> 1; dev_dbg(&intf->dev, "2nd demod I2C addr=%02x\n", state->af9033_i2c_addr[1]); } for (i = 0; i < state->dual_mode + 1; i++) { unsigned int eeprom_offset = 0; /* tuner */ tmp = state->eeprom[EEPROM_1_TUNER_ID + eeprom_offset]; dev_dbg(&intf->dev, "[%d]tuner=%02x\n", i, tmp); /* tuner sanity check */ if (state->chip_type == 0x9135) { if (state->chip_version == 0x02) { /* IT9135 BX (v2) */ switch (tmp) { case AF9033_TUNER_IT9135_60: case AF9033_TUNER_IT9135_61: case AF9033_TUNER_IT9135_62: state->af9033_config[i].tuner = tmp; break; } } else { /* IT9135 AX (v1) */ switch (tmp) { case AF9033_TUNER_IT9135_38: case AF9033_TUNER_IT9135_51: case AF9033_TUNER_IT9135_52: state->af9033_config[i].tuner = tmp; break; } } } else { /* AF9035 */ state->af9033_config[i].tuner = tmp; } if (state->af9033_config[i].tuner != tmp) { dev_info(&intf->dev, "[%d] overriding tuner from %02x to %02x\n", i, tmp, state->af9033_config[i].tuner); } switch (state->af9033_config[i].tuner) { case AF9033_TUNER_TUA9001: case AF9033_TUNER_FC0011: case AF9033_TUNER_MXL5007T: case AF9033_TUNER_TDA18218: case AF9033_TUNER_FC2580: case AF9033_TUNER_FC0012: state->af9033_config[i].spec_inv = 1; break; case AF9033_TUNER_IT9135_38: case AF9033_TUNER_IT9135_51: case AF9033_TUNER_IT9135_52: case AF9033_TUNER_IT9135_60: case AF9033_TUNER_IT9135_61: case AF9033_TUNER_IT9135_62: break; default: dev_warn(&intf->dev, "tuner id=%02x not supported, please report!", tmp); } /* disable dual mode if driver does not support it */ if (i == 1) switch (state->af9033_config[i].tuner) { case AF9033_TUNER_FC0012: case AF9033_TUNER_IT9135_38: case AF9033_TUNER_IT9135_51: case AF9033_TUNER_IT9135_52: case AF9033_TUNER_IT9135_60: case AF9033_TUNER_IT9135_61: case AF9033_TUNER_IT9135_62: case AF9033_TUNER_MXL5007T: break; default: state->dual_mode = false; dev_info(&intf->dev, "driver does not support 2nd tuner and will disable it"); } /* tuner IF frequency */ tmp = state->eeprom[EEPROM_1_IF_L + eeprom_offset]; tmp16 = tmp << 0; tmp = state->eeprom[EEPROM_1_IF_H + eeprom_offset]; tmp16 |= tmp << 8; dev_dbg(&intf->dev, "[%d]IF=%d\n", i, tmp16); eeprom_offset += 0x10; /* shift for the 2nd tuner params */ } skip_eeprom: /* get demod clock */ ret = af9035_rd_reg(d, 0x00d800, &tmp); if (ret < 0) goto err; tmp = (tmp >> 0) & 0x0f; for (i = 0; i < ARRAY_SIZE(state->af9033_config); i++) { if (state->chip_type == 0x9135) state->af9033_config[i].clock = clock_lut_it9135[tmp]; else state->af9033_config[i].clock = clock_lut_af9035[tmp]; } state->no_read = false; /* Some MXL5007T devices cannot properly handle tuner I2C read ops. */ if (state->af9033_config[0].tuner == AF9033_TUNER_MXL5007T && le16_to_cpu(d->udev->descriptor.idVendor) == USB_VID_AVERMEDIA) switch (le16_to_cpu(d->udev->descriptor.idProduct)) { case USB_PID_AVERMEDIA_A867: case USB_PID_AVERMEDIA_TWINSTAR: dev_info(&intf->dev, "Device may have issues with I2C read operations. Enabling fix.\n"); state->no_read = true; break; } return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } static int af9035_tua9001_tuner_callback(struct dvb_usb_device *d, int cmd, int arg) { struct usb_interface *intf = d->intf; int ret; u8 val; dev_dbg(&intf->dev, "cmd=%d arg=%d\n", cmd, arg); /* * CEN always enabled by hardware wiring * RESETN GPIOT3 * RXEN GPIOT2 */ switch (cmd) { case TUA9001_CMD_RESETN: if (arg) val = 0x00; else val = 0x01; ret = af9035_wr_reg_mask(d, 0x00d8e7, val, 0x01); if (ret < 0) goto err; break; case TUA9001_CMD_RXEN: if (arg) val = 0x01; else val = 0x00; ret = af9035_wr_reg_mask(d, 0x00d8eb, val, 0x01); if (ret < 0) goto err; break; } return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } static int af9035_fc0011_tuner_callback(struct dvb_usb_device *d, int cmd, int arg) { struct usb_interface *intf = d->intf; int ret; switch (cmd) { case FC0011_FE_CALLBACK_POWER: /* Tuner enable */ ret = af9035_wr_reg_mask(d, 0xd8eb, 1, 1); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0xd8ec, 1, 1); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0xd8ed, 1, 1); if (ret < 0) goto err; /* LED */ ret = af9035_wr_reg_mask(d, 0xd8d0, 1, 1); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0xd8d1, 1, 1); if (ret < 0) goto err; usleep_range(10000, 50000); break; case FC0011_FE_CALLBACK_RESET: ret = af9035_wr_reg(d, 0xd8e9, 1); if (ret < 0) goto err; ret = af9035_wr_reg(d, 0xd8e8, 1); if (ret < 0) goto err; ret = af9035_wr_reg(d, 0xd8e7, 1); if (ret < 0) goto err; usleep_range(10000, 20000); ret = af9035_wr_reg(d, 0xd8e7, 0); if (ret < 0) goto err; usleep_range(10000, 20000); break; default: ret = -EINVAL; goto err; } return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } static int af9035_tuner_callback(struct dvb_usb_device *d, int cmd, int arg) { struct state *state = d_to_priv(d); switch (state->af9033_config[0].tuner) { case AF9033_TUNER_FC0011: return af9035_fc0011_tuner_callback(d, cmd, arg); case AF9033_TUNER_TUA9001: return af9035_tua9001_tuner_callback(d, cmd, arg); default: break; } return 0; } static int af9035_frontend_callback(void *adapter_priv, int component, int cmd, int arg) { struct i2c_adapter *adap = adapter_priv; struct dvb_usb_device *d = i2c_get_adapdata(adap); struct usb_interface *intf = d->intf; dev_dbg(&intf->dev, "component=%d cmd=%d arg=%d\n", component, cmd, arg); switch (component) { case DVB_FRONTEND_COMPONENT_TUNER: return af9035_tuner_callback(d, cmd, arg); default: break; } return 0; } static int af9035_get_adapter_count(struct dvb_usb_device *d) { struct state *state = d_to_priv(d); return state->dual_mode + 1; } static int af9035_frontend_attach(struct dvb_usb_adapter *adap) { struct state *state = adap_to_priv(adap); struct dvb_usb_device *d = adap_to_d(adap); struct usb_interface *intf = d->intf; int ret; dev_dbg(&intf->dev, "adap->id=%d\n", adap->id); if (!state->af9033_config[adap->id].tuner) { /* unsupported tuner */ ret = -ENODEV; goto err; } state->af9033_config[adap->id].fe = &adap->fe[0]; state->af9033_config[adap->id].ops = &state->ops; ret = af9035_add_i2c_dev(d, "af9033", state->af9033_i2c_addr[adap->id], &state->af9033_config[adap->id], &d->i2c_adap); if (ret) goto err; if (adap->fe[0] == NULL) { ret = -ENODEV; goto err; } /* disable I2C-gate */ adap->fe[0]->ops.i2c_gate_ctrl = NULL; adap->fe[0]->callback = af9035_frontend_callback; return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } /* * The I2C speed register is calculated with: * I2C speed register = (1000000000 / (24.4 * 16 * I2C_speed)) * * The default speed register for it930x is 7, with means a * speed of ~366 kbps */ #define I2C_SPEED_366K 7 static int it930x_frontend_attach(struct dvb_usb_adapter *adap) { struct state *state = adap_to_priv(adap); struct dvb_usb_device *d = adap_to_d(adap); struct usb_interface *intf = d->intf; int ret; struct si2168_config si2168_config; struct i2c_adapter *adapter; dev_dbg(&intf->dev, "adap->id=%d\n", adap->id); /* I2C master bus 2 clock speed 366k */ ret = af9035_wr_reg(d, 0x00f6a7, I2C_SPEED_366K); if (ret < 0) goto err; /* I2C master bus 1,3 clock speed 366k */ ret = af9035_wr_reg(d, 0x00f103, I2C_SPEED_366K); if (ret < 0) goto err; /* set gpio11 low */ ret = af9035_wr_reg_mask(d, 0xd8d4, 0x01, 0x01); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0xd8d5, 0x01, 0x01); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0xd8d3, 0x01, 0x01); if (ret < 0) goto err; /* Tuner enable using gpiot2_en, gpiot2_on and gpiot2_o (reset) */ ret = af9035_wr_reg_mask(d, 0xd8b8, 0x01, 0x01); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0xd8b9, 0x01, 0x01); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0xd8b7, 0x00, 0x01); if (ret < 0) goto err; msleep(200); ret = af9035_wr_reg_mask(d, 0xd8b7, 0x01, 0x01); if (ret < 0) goto err; memset(&si2168_config, 0, sizeof(si2168_config)); si2168_config.i2c_adapter = &adapter; si2168_config.fe = &adap->fe[0]; si2168_config.ts_mode = SI2168_TS_SERIAL; state->af9033_config[adap->id].fe = &adap->fe[0]; state->af9033_config[adap->id].ops = &state->ops; ret = af9035_add_i2c_dev(d, "si2168", it930x_addresses_table[state->it930x_addresses].frontend_i2c_addr, &si2168_config, &d->i2c_adap); if (ret) goto err; if (adap->fe[0] == NULL) { ret = -ENODEV; goto err; } state->i2c_adapter_demod = adapter; return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } static int af9035_frontend_detach(struct dvb_usb_adapter *adap) { struct state *state = adap_to_priv(adap); struct dvb_usb_device *d = adap_to_d(adap); struct usb_interface *intf = d->intf; dev_dbg(&intf->dev, "adap->id=%d\n", adap->id); if (adap->id == 1) { if (state->i2c_client[1]) af9035_del_i2c_dev(d); } else if (adap->id == 0) { if (state->i2c_client[0]) af9035_del_i2c_dev(d); } return 0; } static const struct fc0011_config af9035_fc0011_config = { .i2c_address = 0x60, }; static struct mxl5007t_config af9035_mxl5007t_config[] = { { .xtal_freq_hz = MxL_XTAL_24_MHZ, .if_freq_hz = MxL_IF_4_57_MHZ, .invert_if = 0, .loop_thru_enable = 0, .clk_out_enable = 0, .clk_out_amp = MxL_CLKOUT_AMP_0_94V, }, { .xtal_freq_hz = MxL_XTAL_24_MHZ, .if_freq_hz = MxL_IF_4_57_MHZ, .invert_if = 0, .loop_thru_enable = 1, .clk_out_enable = 1, .clk_out_amp = MxL_CLKOUT_AMP_0_94V, } }; static struct tda18218_config af9035_tda18218_config = { .i2c_address = 0x60, .i2c_wr_max = 21, }; static const struct fc0012_config af9035_fc0012_config[] = { { .i2c_address = 0x63, .xtal_freq = FC_XTAL_36_MHZ, .dual_master = true, .loop_through = true, .clock_out = true, }, { .i2c_address = 0x63 | 0x80, /* I2C bus select hack */ .xtal_freq = FC_XTAL_36_MHZ, .dual_master = true, } }; static int af9035_tuner_attach(struct dvb_usb_adapter *adap) { struct state *state = adap_to_priv(adap); struct dvb_usb_device *d = adap_to_d(adap); struct usb_interface *intf = d->intf; int ret; struct dvb_frontend *fe; struct i2c_msg msg[1]; u8 tuner_addr; dev_dbg(&intf->dev, "adap->id=%d\n", adap->id); /* * XXX: Hack used in that function: we abuse unused I2C address bit [7] * to carry info about used I2C bus for dual tuner configuration. */ switch (state->af9033_config[adap->id].tuner) { case AF9033_TUNER_TUA9001: { struct tua9001_platform_data tua9001_pdata = { .dvb_frontend = adap->fe[0], }; /* * AF9035 gpiot3 = TUA9001 RESETN * AF9035 gpiot2 = TUA9001 RXEN */ /* configure gpiot2 and gpiot2 as output */ ret = af9035_wr_reg_mask(d, 0x00d8ec, 0x01, 0x01); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0x00d8ed, 0x01, 0x01); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0x00d8e8, 0x01, 0x01); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0x00d8e9, 0x01, 0x01); if (ret < 0) goto err; /* attach tuner */ ret = af9035_add_i2c_dev(d, "tua9001", 0x60, &tua9001_pdata, &d->i2c_adap); if (ret) goto err; fe = adap->fe[0]; break; } case AF9033_TUNER_FC0011: fe = dvb_attach(fc0011_attach, adap->fe[0], &d->i2c_adap, &af9035_fc0011_config); break; case AF9033_TUNER_MXL5007T: if (adap->id == 0) { ret = af9035_wr_reg(d, 0x00d8e0, 1); if (ret < 0) goto err; ret = af9035_wr_reg(d, 0x00d8e1, 1); if (ret < 0) goto err; ret = af9035_wr_reg(d, 0x00d8df, 0); if (ret < 0) goto err; msleep(30); ret = af9035_wr_reg(d, 0x00d8df, 1); if (ret < 0) goto err; msleep(300); ret = af9035_wr_reg(d, 0x00d8c0, 1); if (ret < 0) goto err; ret = af9035_wr_reg(d, 0x00d8c1, 1); if (ret < 0) goto err; ret = af9035_wr_reg(d, 0x00d8bf, 0); if (ret < 0) goto err; ret = af9035_wr_reg(d, 0x00d8b4, 1); if (ret < 0) goto err; ret = af9035_wr_reg(d, 0x00d8b5, 1); if (ret < 0) goto err; ret = af9035_wr_reg(d, 0x00d8b3, 1); if (ret < 0) goto err; tuner_addr = 0x60; } else { tuner_addr = 0x60 | 0x80; /* I2C bus hack */ } /* attach tuner */ fe = dvb_attach(mxl5007t_attach, adap->fe[0], &d->i2c_adap, tuner_addr, &af9035_mxl5007t_config[adap->id]); break; case AF9033_TUNER_TDA18218: /* attach tuner */ fe = dvb_attach(tda18218_attach, adap->fe[0], &d->i2c_adap, &af9035_tda18218_config); break; case AF9033_TUNER_FC2580: { struct fc2580_platform_data fc2580_pdata = { .dvb_frontend = adap->fe[0], }; /* Tuner enable using gpiot2_o, gpiot2_en and gpiot2_on */ ret = af9035_wr_reg_mask(d, 0xd8eb, 0x01, 0x01); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0xd8ec, 0x01, 0x01); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0xd8ed, 0x01, 0x01); if (ret < 0) goto err; usleep_range(10000, 50000); /* attach tuner */ ret = af9035_add_i2c_dev(d, "fc2580", 0x56, &fc2580_pdata, &d->i2c_adap); if (ret) goto err; fe = adap->fe[0]; break; } case AF9033_TUNER_FC0012: /* * AF9035 gpiot2 = FC0012 enable * XXX: there seems to be something on gpioh8 too, but on my * test I didn't find any difference. */ if (adap->id == 0) { /* configure gpiot2 as output and high */ ret = af9035_wr_reg_mask(d, 0xd8eb, 0x01, 0x01); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0xd8ec, 0x01, 0x01); if (ret < 0) goto err; ret = af9035_wr_reg_mask(d, 0xd8ed, 0x01, 0x01); if (ret < 0) goto err; } else { /* * FIXME: That belongs for the FC0012 driver. * Write 02 to FC0012 master tuner register 0d directly * in order to make slave tuner working. */ msg[0].addr = 0x63; msg[0].flags = 0; msg[0].len = 2; msg[0].buf = "\x0d\x02"; ret = i2c_transfer(&d->i2c_adap, msg, 1); if (ret < 0) goto err; } usleep_range(10000, 50000); fe = dvb_attach(fc0012_attach, adap->fe[0], &d->i2c_adap, &af9035_fc0012_config[adap->id]); break; case AF9033_TUNER_IT9135_38: case AF9033_TUNER_IT9135_51: case AF9033_TUNER_IT9135_52: case AF9033_TUNER_IT9135_60: case AF9033_TUNER_IT9135_61: case AF9033_TUNER_IT9135_62: { struct platform_device *pdev; const char *name; struct it913x_platform_data it913x_pdata = { .regmap = state->af9033_config[adap->id].regmap, .fe = adap->fe[0], }; switch (state->af9033_config[adap->id].tuner) { case AF9033_TUNER_IT9135_38: case AF9033_TUNER_IT9135_51: case AF9033_TUNER_IT9135_52: name = "it9133ax-tuner"; break; case AF9033_TUNER_IT9135_60: case AF9033_TUNER_IT9135_61: case AF9033_TUNER_IT9135_62: name = "it9133bx-tuner"; break; default: ret = -ENODEV; goto err; } if (state->dual_mode) { if (adap->id == 0) it913x_pdata.role = IT913X_ROLE_DUAL_MASTER; else it913x_pdata.role = IT913X_ROLE_DUAL_SLAVE; } else { it913x_pdata.role = IT913X_ROLE_SINGLE; } request_module("%s", "it913x"); pdev = platform_device_register_data(&d->intf->dev, name, PLATFORM_DEVID_AUTO, &it913x_pdata, sizeof(it913x_pdata)); if (IS_ERR(pdev) || !pdev->dev.driver) { ret = -ENODEV; goto err; } if (!try_module_get(pdev->dev.driver->owner)) { platform_device_unregister(pdev); ret = -ENODEV; goto err; } state->platform_device_tuner[adap->id] = pdev; fe = adap->fe[0]; break; } default: fe = NULL; } if (fe == NULL) { ret = -ENODEV; goto err; } return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } static int it930x_tuner_attach(struct dvb_usb_adapter *adap) { struct state *state = adap_to_priv(adap); struct dvb_usb_device *d = adap_to_d(adap); struct usb_interface *intf = d->intf; int ret; struct si2157_config si2157_config; dev_dbg(&intf->dev, "adap->id=%d\n", adap->id); memset(&si2157_config, 0, sizeof(si2157_config)); si2157_config.fe = adap->fe[0]; /* * HACK: The Logilink VG0022A and TerraTec TC2 Stick have * a bug: when the si2157 firmware that came with the device * is replaced by a new one, the I2C transfers to the tuner * will return just 0xff. * * Probably, the vendor firmware has some patch specifically * designed for this device. So, we can't replace by the * generic firmware. The right solution would be to extract * the si2157 firmware from the original driver and ask the * driver to load the specifically designed firmware, but, * while we don't have that, the next best solution is to just * keep the original firmware at the device. */ if ((le16_to_cpu(d->udev->descriptor.idVendor) == USB_VID_DEXATEK && le16_to_cpu(d->udev->descriptor.idProduct) == 0x0100) || (le16_to_cpu(d->udev->descriptor.idVendor) == USB_VID_TERRATEC && le16_to_cpu(d->udev->descriptor.idProduct) == USB_PID_TERRATEC_CINERGY_TC2_STICK)) si2157_config.dont_load_firmware = true; si2157_config.if_port = it930x_addresses_table[state->it930x_addresses].tuner_if_port; ret = af9035_add_i2c_dev(d, "si2157", it930x_addresses_table[state->it930x_addresses].tuner_i2c_addr, &si2157_config, state->i2c_adapter_demod); if (ret) goto err; return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } static int it930x_tuner_detach(struct dvb_usb_adapter *adap) { struct state *state = adap_to_priv(adap); struct dvb_usb_device *d = adap_to_d(adap); struct usb_interface *intf = d->intf; dev_dbg(&intf->dev, "adap->id=%d\n", adap->id); if (adap->id == 1) { if (state->i2c_client[3]) af9035_del_i2c_dev(d); } else if (adap->id == 0) { if (state->i2c_client[1]) af9035_del_i2c_dev(d); } return 0; } static int af9035_tuner_detach(struct dvb_usb_adapter *adap) { struct state *state = adap_to_priv(adap); struct dvb_usb_device *d = adap_to_d(adap); struct usb_interface *intf = d->intf; dev_dbg(&intf->dev, "adap->id=%d\n", adap->id); switch (state->af9033_config[adap->id].tuner) { case AF9033_TUNER_TUA9001: case AF9033_TUNER_FC2580: if (adap->id == 1) { if (state->i2c_client[3]) af9035_del_i2c_dev(d); } else if (adap->id == 0) { if (state->i2c_client[1]) af9035_del_i2c_dev(d); } break; case AF9033_TUNER_IT9135_38: case AF9033_TUNER_IT9135_51: case AF9033_TUNER_IT9135_52: case AF9033_TUNER_IT9135_60: case AF9033_TUNER_IT9135_61: case AF9033_TUNER_IT9135_62: { struct platform_device *pdev; pdev = state->platform_device_tuner[adap->id]; if (pdev) { module_put(pdev->dev.driver->owner); platform_device_unregister(pdev); } break; } } return 0; } static int af9035_init(struct dvb_usb_device *d) { struct state *state = d_to_priv(d); struct usb_interface *intf = d->intf; int ret, i; u16 frame_size = (d->udev->speed == USB_SPEED_FULL ? 5 : 87) * 188 / 4; u8 packet_size = (d->udev->speed == USB_SPEED_FULL ? 64 : 512) / 4; struct reg_val_mask tab[] = { { 0x80f99d, 0x01, 0x01 }, { 0x80f9a4, 0x01, 0x01 }, { 0x00dd11, 0x00, 0x20 }, { 0x00dd11, 0x00, 0x40 }, { 0x00dd13, 0x00, 0x20 }, { 0x00dd13, 0x00, 0x40 }, { 0x00dd11, 0x20, 0x20 }, { 0x00dd88, (frame_size >> 0) & 0xff, 0xff}, { 0x00dd89, (frame_size >> 8) & 0xff, 0xff}, { 0x00dd0c, packet_size, 0xff}, { 0x00dd11, state->dual_mode << 6, 0x40 }, { 0x00dd8a, (frame_size >> 0) & 0xff, 0xff}, { 0x00dd8b, (frame_size >> 8) & 0xff, 0xff}, { 0x00dd0d, packet_size, 0xff }, { 0x80f9a3, state->dual_mode, 0x01 }, { 0x80f9cd, state->dual_mode, 0x01 }, { 0x80f99d, 0x00, 0x01 }, { 0x80f9a4, 0x00, 0x01 }, }; dev_dbg(&intf->dev, "USB speed=%d frame_size=%04x packet_size=%02x\n", d->udev->speed, frame_size, packet_size); /* init endpoints */ for (i = 0; i < ARRAY_SIZE(tab); i++) { ret = af9035_wr_reg_mask(d, tab[i].reg, tab[i].val, tab[i].mask); if (ret < 0) goto err; } return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } static int it930x_init(struct dvb_usb_device *d) { struct state *state = d_to_priv(d); struct usb_interface *intf = d->intf; int ret, i; u16 frame_size = (d->udev->speed == USB_SPEED_FULL ? 5 : 816) * 188 / 4; u8 packet_size = (d->udev->speed == USB_SPEED_FULL ? 64 : 512) / 4; struct reg_val_mask tab[] = { { 0x00da1a, 0x00, 0x01 }, /* ignore_sync_byte */ { 0x00f41f, 0x04, 0x04 }, /* dvbt_inten */ { 0x00da10, 0x00, 0x01 }, /* mpeg_full_speed */ { 0x00f41a, 0x01, 0x01 }, /* dvbt_en */ { 0x00da1d, 0x01, 0x01 }, /* mp2_sw_rst, reset EP4 */ { 0x00dd11, 0x00, 0x20 }, /* ep4_tx_en, disable EP4 */ { 0x00dd13, 0x00, 0x20 }, /* ep4_tx_nak, disable EP4 NAK */ { 0x00dd11, 0x20, 0x20 }, /* ep4_tx_en, enable EP4 */ { 0x00dd11, 0x00, 0x40 }, /* ep5_tx_en, disable EP5 */ { 0x00dd13, 0x00, 0x40 }, /* ep5_tx_nak, disable EP5 NAK */ { 0x00dd11, state->dual_mode << 6, 0x40 }, /* enable EP5 */ { 0x00dd88, (frame_size >> 0) & 0xff, 0xff}, { 0x00dd89, (frame_size >> 8) & 0xff, 0xff}, { 0x00dd0c, packet_size, 0xff}, { 0x00dd8a, (frame_size >> 0) & 0xff, 0xff}, { 0x00dd8b, (frame_size >> 8) & 0xff, 0xff}, { 0x00dd0d, packet_size, 0xff }, { 0x00da1d, 0x00, 0x01 }, /* mp2_sw_rst, disable */ { 0x00d833, 0x01, 0xff }, /* slew rate ctrl: slew rate boosts */ { 0x00d830, 0x00, 0xff }, /* Bit 0 of output driving control */ { 0x00d831, 0x01, 0xff }, /* Bit 1 of output driving control */ { 0x00d832, 0x00, 0xff }, /* Bit 2 of output driving control */ /* suspend gpio1 for TS-C */ { 0x00d8b0, 0x01, 0xff }, /* gpio1 */ { 0x00d8b1, 0x01, 0xff }, /* gpio1 */ { 0x00d8af, 0x00, 0xff }, /* gpio1 */ /* suspend gpio7 for TS-D */ { 0x00d8c4, 0x01, 0xff }, /* gpio7 */ { 0x00d8c5, 0x01, 0xff }, /* gpio7 */ { 0x00d8c3, 0x00, 0xff }, /* gpio7 */ /* suspend gpio13 for TS-B */ { 0x00d8dc, 0x01, 0xff }, /* gpio13 */ { 0x00d8dd, 0x01, 0xff }, /* gpio13 */ { 0x00d8db, 0x00, 0xff }, /* gpio13 */ /* suspend gpio14 for TS-E */ { 0x00d8e4, 0x01, 0xff }, /* gpio14 */ { 0x00d8e5, 0x01, 0xff }, /* gpio14 */ { 0x00d8e3, 0x00, 0xff }, /* gpio14 */ /* suspend gpio15 for TS-A */ { 0x00d8e8, 0x01, 0xff }, /* gpio15 */ { 0x00d8e9, 0x01, 0xff }, /* gpio15 */ { 0x00d8e7, 0x00, 0xff }, /* gpio15 */ { 0x00da58, 0x00, 0x01 }, /* ts_in_src, serial */ { 0x00da73, 0x01, 0xff }, /* ts0_aggre_mode */ { 0x00da78, 0x47, 0xff }, /* ts0_sync_byte */ { 0x00da4c, 0x01, 0xff }, /* ts0_en */ { 0x00da5a, 0x1f, 0xff }, /* ts_fail_ignore */ }; dev_dbg(&intf->dev, "USB speed=%d frame_size=%04x packet_size=%02x\n", d->udev->speed, frame_size, packet_size); /* init endpoints */ for (i = 0; i < ARRAY_SIZE(tab); i++) { ret = af9035_wr_reg_mask(d, tab[i].reg, tab[i].val, tab[i].mask); if (ret < 0) goto err; } return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } #if IS_ENABLED(CONFIG_RC_CORE) static int af9035_rc_query(struct dvb_usb_device *d) { struct usb_interface *intf = d->intf; int ret; enum rc_proto proto; u32 key; u8 buf[4]; struct usb_req req = { CMD_IR_GET, 0, 0, NULL, 4, buf }; ret = af9035_ctrl_msg(d, &req); if (ret == 1) return 0; else if (ret < 0) goto err; if ((buf[2] + buf[3]) == 0xff) { if ((buf[0] + buf[1]) == 0xff) { /* NEC standard 16bit */ key = RC_SCANCODE_NEC(buf[0], buf[2]); proto = RC_PROTO_NEC; } else { /* NEC extended 24bit */ key = RC_SCANCODE_NECX(buf[0] << 8 | buf[1], buf[2]); proto = RC_PROTO_NECX; } } else { /* NEC full code 32bit */ key = RC_SCANCODE_NEC32(buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]); proto = RC_PROTO_NEC32; } dev_dbg(&intf->dev, "%*ph\n", 4, buf); rc_keydown(d->rc_dev, proto, key, 0); return 0; err: dev_dbg(&intf->dev, "failed=%d\n", ret); return ret; } static int af9035_get_rc_config(struct dvb_usb_device *d, struct dvb_usb_rc *rc) { struct state *state = d_to_priv(d); struct usb_interface *intf = d->intf; dev_dbg(&intf->dev, "ir_mode=%02x ir_type=%02x\n", state->ir_mode, state->ir_type); /* don't activate rc if in HID mode or if not available */ if (state->ir_mode == 0x05) { switch (state->ir_type) { case 0: /* NEC */ default: rc->allowed_protos = RC_PROTO_BIT_NEC | RC_PROTO_BIT_NECX | RC_PROTO_BIT_NEC32; break; case 1: /* RC6 */ rc->allowed_protos = RC_PROTO_BIT_RC6_MCE; break; } rc->query = af9035_rc_query; rc->interval = 500; /* load empty to enable rc */ if (!rc->map_name) rc->map_name = RC_MAP_EMPTY; } return 0; } #else #define af9035_get_rc_config NULL #endif static int af9035_get_stream_config(struct dvb_frontend *fe, u8 *ts_type, struct usb_data_stream_properties *stream) { struct dvb_usb_device *d = fe_to_d(fe); struct usb_interface *intf = d->intf; dev_dbg(&intf->dev, "adap=%d\n", fe_to_adap(fe)->id); if (d->udev->speed == USB_SPEED_FULL) stream->u.bulk.buffersize = 5 * 188; return 0; } static int af9035_pid_filter_ctrl(struct dvb_usb_adapter *adap, int onoff) { struct state *state = adap_to_priv(adap); return state->ops.pid_filter_ctrl(adap->fe[0], onoff); } static int af9035_pid_filter(struct dvb_usb_adapter *adap, int index, u16 pid, int onoff) { struct state *state = adap_to_priv(adap); return state->ops.pid_filter(adap->fe[0], index, pid, onoff); } static int af9035_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); char manufacturer[sizeof("Afatech")]; memset(manufacturer, 0, sizeof(manufacturer)); usb_string(udev, udev->descriptor.iManufacturer, manufacturer, sizeof(manufacturer)); /* * There is two devices having same ID but different chipset. One uses * AF9015 and the other IT9135 chipset. Only difference seen on lsusb * is iManufacturer string. * * idVendor 0x0ccd TerraTec Electronic GmbH * idProduct 0x0099 * bcdDevice 2.00 * iManufacturer 1 Afatech * iProduct 2 DVB-T 2 * * idVendor 0x0ccd TerraTec Electronic GmbH * idProduct 0x0099 * bcdDevice 2.00 * iManufacturer 1 ITE Technologies, Inc. * iProduct 2 DVB-T TV Stick */ if ((le16_to_cpu(udev->descriptor.idVendor) == USB_VID_TERRATEC) && (le16_to_cpu(udev->descriptor.idProduct) == 0x0099)) { if (!strcmp("Afatech", manufacturer)) { dev_dbg(&udev->dev, "rejecting device\n"); return -ENODEV; } } return dvb_usbv2_probe(intf, id); } /* interface 0 is used by DVB-T receiver and interface 1 is for remote controller (HID) */ static const struct dvb_usb_device_properties af9035_props = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .identify_state = af9035_identify_state, .download_firmware = af9035_download_firmware, .i2c_algo = &af9035_i2c_algo, .read_config = af9035_read_config, .frontend_attach = af9035_frontend_attach, .frontend_detach = af9035_frontend_detach, .tuner_attach = af9035_tuner_attach, .tuner_detach = af9035_tuner_detach, .init = af9035_init, .get_rc_config = af9035_get_rc_config, .get_stream_config = af9035_get_stream_config, .get_adapter_count = af9035_get_adapter_count, .adapter = { { .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, .pid_filter_count = 32, .pid_filter_ctrl = af9035_pid_filter_ctrl, .pid_filter = af9035_pid_filter, .stream = DVB_USB_STREAM_BULK(0x84, 6, 87 * 188), }, { .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, .pid_filter_count = 32, .pid_filter_ctrl = af9035_pid_filter_ctrl, .pid_filter = af9035_pid_filter, .stream = DVB_USB_STREAM_BULK(0x85, 6, 87 * 188), }, }, }; static const struct dvb_usb_device_properties it930x_props = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .identify_state = af9035_identify_state, .download_firmware = af9035_download_firmware, .i2c_algo = &af9035_i2c_algo, .read_config = af9035_read_config, .frontend_attach = it930x_frontend_attach, .frontend_detach = af9035_frontend_detach, .tuner_attach = it930x_tuner_attach, .tuner_detach = it930x_tuner_detach, .init = it930x_init, /* * dvb_usbv2_remote_init() calls rc_config() only for those devices * which have non-empty rc_map, so it's safe to enable it for every IT930x */ .get_rc_config = af9035_get_rc_config, .get_stream_config = af9035_get_stream_config, .get_adapter_count = af9035_get_adapter_count, .adapter = { { .stream = DVB_USB_STREAM_BULK(0x84, 4, 816 * 188), }, { .stream = DVB_USB_STREAM_BULK(0x85, 4, 816 * 188), }, }, }; static const struct usb_device_id af9035_id_table[] = { /* AF9035 devices */ { DVB_USB_DEVICE(USB_VID_AFATECH, USB_PID_AFATECH_AF9035_9035, &af9035_props, "Afatech AF9035 reference design", NULL) }, { DVB_USB_DEVICE(USB_VID_AFATECH, USB_PID_AFATECH_AF9035_1000, &af9035_props, "Afatech AF9035 reference design", NULL) }, { DVB_USB_DEVICE(USB_VID_AFATECH, USB_PID_AFATECH_AF9035_1001, &af9035_props, "Afatech AF9035 reference design", NULL) }, { DVB_USB_DEVICE(USB_VID_AFATECH, USB_PID_AFATECH_AF9035_1002, &af9035_props, "Afatech AF9035 reference design", NULL) }, { DVB_USB_DEVICE(USB_VID_AFATECH, USB_PID_AFATECH_AF9035_1003, &af9035_props, "Afatech AF9035 reference design", NULL) }, { DVB_USB_DEVICE(USB_VID_TERRATEC, USB_PID_TERRATEC_CINERGY_T_STICK, &af9035_props, "TerraTec Cinergy T Stick", NULL) }, { DVB_USB_DEVICE(USB_VID_AVERMEDIA, USB_PID_AVERMEDIA_A835, &af9035_props, "AVerMedia AVerTV Volar HD/PRO (A835)", NULL) }, { DVB_USB_DEVICE(USB_VID_AVERMEDIA, USB_PID_AVERMEDIA_B835, &af9035_props, "AVerMedia AVerTV Volar HD/PRO (A835)", NULL) }, { DVB_USB_DEVICE(USB_VID_AVERMEDIA, USB_PID_AVERMEDIA_1867, &af9035_props, "AVerMedia HD Volar (A867)", NULL) }, { DVB_USB_DEVICE(USB_VID_AVERMEDIA, USB_PID_AVERMEDIA_A867, &af9035_props, "AVerMedia HD Volar (A867)", NULL) }, { DVB_USB_DEVICE(USB_VID_AVERMEDIA, USB_PID_AVERMEDIA_TWINSTAR, &af9035_props, "AVerMedia Twinstar (A825)", NULL) }, { DVB_USB_DEVICE(USB_VID_ASUS, USB_PID_ASUS_U3100MINI_PLUS, &af9035_props, "Asus U3100Mini Plus", NULL) }, { DVB_USB_DEVICE(USB_VID_TERRATEC, 0x00aa, &af9035_props, "TerraTec Cinergy T Stick (rev. 2)", NULL) }, { DVB_USB_DEVICE(USB_VID_AVERMEDIA, 0x0337, &af9035_props, "AVerMedia HD Volar (A867)", NULL) }, { DVB_USB_DEVICE(USB_VID_GTEK, USB_PID_EVOLVEO_XTRATV_STICK, &af9035_props, "EVOLVEO XtraTV stick", NULL) }, /* IT9135 devices */ { DVB_USB_DEVICE(USB_VID_ITETECH, USB_PID_ITETECH_IT9135, &af9035_props, "ITE 9135 Generic", RC_MAP_IT913X_V1) }, { DVB_USB_DEVICE(USB_VID_ITETECH, USB_PID_ITETECH_IT9135_9005, &af9035_props, "ITE 9135(9005) Generic", RC_MAP_IT913X_V2) }, { DVB_USB_DEVICE(USB_VID_ITETECH, USB_PID_ITETECH_IT9135_9006, &af9035_props, "ITE 9135(9006) Generic", RC_MAP_IT913X_V1) }, { DVB_USB_DEVICE(USB_VID_AVERMEDIA, USB_PID_AVERMEDIA_A835B_1835, &af9035_props, "Avermedia A835B(1835)", RC_MAP_IT913X_V2) }, { DVB_USB_DEVICE(USB_VID_AVERMEDIA, USB_PID_AVERMEDIA_A835B_2835, &af9035_props, "Avermedia A835B(2835)", RC_MAP_IT913X_V2) }, { DVB_USB_DEVICE(USB_VID_AVERMEDIA, USB_PID_AVERMEDIA_A835B_3835, &af9035_props, "Avermedia A835B(3835)", RC_MAP_IT913X_V2) }, { DVB_USB_DEVICE(USB_VID_AVERMEDIA, USB_PID_AVERMEDIA_A835B_4835, &af9035_props, "Avermedia A835B(4835)", RC_MAP_IT913X_V2) }, { DVB_USB_DEVICE(USB_VID_AVERMEDIA, USB_PID_AVERMEDIA_TD110, &af9035_props, "Avermedia AverTV Volar HD 2 (TD110)", RC_MAP_AVERMEDIA_RM_KS) }, { DVB_USB_DEVICE(USB_VID_AVERMEDIA, USB_PID_AVERMEDIA_H335, &af9035_props, "Avermedia H335", RC_MAP_IT913X_V2) }, { DVB_USB_DEVICE(USB_VID_KWORLD_2, USB_PID_KWORLD_UB499_2T_T09, &af9035_props, "Kworld UB499-2T T09", RC_MAP_IT913X_V1) }, { DVB_USB_DEVICE(USB_VID_KWORLD_2, USB_PID_SVEON_STV22_IT9137, &af9035_props, "Sveon STV22 Dual DVB-T HDTV", RC_MAP_IT913X_V1) }, { DVB_USB_DEVICE(USB_VID_KWORLD_2, USB_PID_CTVDIGDUAL_V2, &af9035_props, "Digital Dual TV Receiver CTVDIGDUAL_V2", RC_MAP_IT913X_V1) }, { DVB_USB_DEVICE(USB_VID_TERRATEC, USB_PID_TERRATEC_T1, &af9035_props, "TerraTec T1", RC_MAP_IT913X_V1) }, /* XXX: that same ID [0ccd:0099] is used by af9015 driver too */ { DVB_USB_DEVICE(USB_VID_TERRATEC, 0x0099, &af9035_props, "TerraTec Cinergy T Stick Dual RC (rev. 2)", NULL) }, { DVB_USB_DEVICE(USB_VID_LEADTEK, 0x6a05, &af9035_props, "Leadtek WinFast DTV Dongle Dual", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xf900, &af9035_props, "Hauppauge WinTV-MiniStick 2", NULL) }, { DVB_USB_DEVICE(USB_VID_PCTV, USB_PID_PCTV_78E, &af9035_props, "PCTV AndroiDTV (78e)", RC_MAP_IT913X_V1) }, { DVB_USB_DEVICE(USB_VID_PCTV, USB_PID_PCTV_79E, &af9035_props, "PCTV microStick (79e)", RC_MAP_IT913X_V2) }, /* IT930x devices */ { DVB_USB_DEVICE(USB_VID_ITETECH, USB_PID_ITETECH_IT9303, &it930x_props, "ITE 9303 Generic", NULL) }, { DVB_USB_DEVICE(USB_VID_AVERMEDIA, USB_PID_AVERMEDIA_TD310, &it930x_props, "AVerMedia TD310 DVB-T2", RC_MAP_AVERMEDIA_RM_KS) }, { DVB_USB_DEVICE(USB_VID_DEXATEK, 0x0100, &it930x_props, "Logilink VG0022A", NULL) }, { DVB_USB_DEVICE(USB_VID_TERRATEC, USB_PID_TERRATEC_CINERGY_TC2_STICK, &it930x_props, "TerraTec Cinergy TC2 Stick", NULL) }, { } }; MODULE_DEVICE_TABLE(usb, af9035_id_table); static struct usb_driver af9035_usb_driver = { .name = KBUILD_MODNAME, .id_table = af9035_id_table, .probe = af9035_probe, .disconnect = dvb_usbv2_disconnect, .suspend = dvb_usbv2_suspend, .resume = dvb_usbv2_resume, .reset_resume = dvb_usbv2_reset_resume, .no_dynamic_id = 1, .soft_unbind = 1, }; module_usb_driver(af9035_usb_driver); MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>"); MODULE_DESCRIPTION("Afatech AF9035 driver"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(AF9035_FIRMWARE_AF9035); MODULE_FIRMWARE(AF9035_FIRMWARE_IT9135_V1); MODULE_FIRMWARE(AF9035_FIRMWARE_IT9135_V2); MODULE_FIRMWARE(AF9035_FIRMWARE_IT9303);
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 // SPDX-License-Identifier: GPL-2.0-or-later /* * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org> * (C) 2011 Intra2net AG <https://www.intra2net.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/nfnetlink_acct.h> #include <linux/netfilter/xt_nfacct.h> MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_nfacct"); MODULE_ALIAS("ip6t_nfacct"); static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) { int overquota; const struct xt_nfacct_match_info *info = par->targinfo; nfnl_acct_update(skb, info->nfacct); overquota = nfnl_acct_overquota(xt_net(par), info->nfacct); return overquota != NFACCT_UNDERQUOTA; } static int nfacct_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_nfacct_match_info *info = par->matchinfo; struct nf_acct *nfacct; nfacct = nfnl_acct_find_get(par->net, info->name); if (nfacct == NULL) { pr_info_ratelimited("accounting object `%s' does not exists\n", info->name); return -ENOENT; } info->nfacct = nfacct; return 0; } static void nfacct_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_nfacct_match_info *info = par->matchinfo; nfnl_acct_put(info->nfacct); } static struct xt_match nfacct_mt_reg[] __read_mostly = { { .name = "nfacct", .revision = 0, .family = NFPROTO_UNSPEC, .checkentry = nfacct_mt_checkentry, .match = nfacct_mt, .destroy = nfacct_mt_destroy, .matchsize = sizeof(struct xt_nfacct_match_info), .usersize = offsetof(struct xt_nfacct_match_info, nfacct), .me = THIS_MODULE, }, { .name = "nfacct", .revision = 1, .family = NFPROTO_UNSPEC, .checkentry = nfacct_mt_checkentry, .match = nfacct_mt, .destroy = nfacct_mt_destroy, .matchsize = sizeof(struct xt_nfacct_match_info_v1), .usersize = offsetof(struct xt_nfacct_match_info_v1, nfacct), .me = THIS_MODULE, }, }; static int __init nfacct_mt_init(void) { return xt_register_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg)); } static void __exit nfacct_mt_exit(void) { xt_unregister_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg)); } module_init(nfacct_mt_init); module_exit(nfacct_mt_exit);
1 1 2 2 2 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2007-2008 BalaBit IT Ltd. * Author: Krisztian Kovacs */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <net/tcp.h> #include <net/udp.h> #include <net/icmp.h> #include <net/sock.h> #include <net/inet_sock.h> #include <net/netfilter/nf_socket.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #endif static int extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, __be32 *raddr, __be32 *laddr, __be16 *rport, __be16 *lport) { unsigned int outside_hdrlen = ip_hdrlen(skb); struct iphdr *inside_iph, _inside_iph; struct icmphdr *icmph, _icmph; __be16 *ports, _ports[2]; icmph = skb_header_pointer(skb, outside_hdrlen, sizeof(_icmph), &_icmph); if (icmph == NULL) return 1; if (!icmp_is_err(icmph->type)) return 1; inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(struct icmphdr), sizeof(_inside_iph), &_inside_iph); if (inside_iph == NULL) return 1; if (inside_iph->protocol != IPPROTO_TCP && inside_iph->protocol != IPPROTO_UDP) return 1; ports = skb_header_pointer(skb, outside_hdrlen + sizeof(struct icmphdr) + (inside_iph->ihl << 2), sizeof(_ports), &_ports); if (ports == NULL) return 1; /* the inside IP packet is the one quoted from our side, thus * its saddr is the local address */ *protocol = inside_iph->protocol; *laddr = inside_iph->saddr; *lport = ports[0]; *raddr = inside_iph->daddr; *rport = ports[1]; return 0; } static struct sock * nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, const struct net_device *in) { switch (protocol) { case IPPROTO_TCP: return inet_lookup(net, net->ipv4.tcp_death_row.hashinfo, skb, doff, saddr, sport, daddr, dport, in->ifindex); case IPPROTO_UDP: return udp4_lib_lookup(net, saddr, sport, daddr, dport, in->ifindex); } return NULL; } struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, const struct net_device *indev) { __be32 daddr, saddr; __be16 dport, sport; const struct iphdr *iph = ip_hdr(skb); struct sk_buff *data_skb = NULL; u8 protocol; #if IS_ENABLED(CONFIG_NF_CONNTRACK) enum ip_conntrack_info ctinfo; struct nf_conn const *ct; #endif int doff = 0; if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { struct tcphdr _hdr; struct udphdr *hp; hp = skb_header_pointer(skb, ip_hdrlen(skb), iph->protocol == IPPROTO_UDP ? sizeof(*hp) : sizeof(_hdr), &_hdr); if (hp == NULL) return NULL; protocol = iph->protocol; saddr = iph->saddr; sport = hp->source; daddr = iph->daddr; dport = hp->dest; data_skb = (struct sk_buff *)skb; doff = iph->protocol == IPPROTO_TCP ? ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : ip_hdrlen(skb) + sizeof(*hp); } else if (iph->protocol == IPPROTO_ICMP) { if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, &sport, &dport)) return NULL; } else { return NULL; } #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Do the lookup with the original socket address in * case this is a reply packet of an established * SNAT-ted connection. */ ct = nf_ct_get(skb, &ctinfo); if (ct && ((iph->protocol != IPPROTO_ICMP && ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && ctinfo == IP_CT_RELATED_REPLY)) && (ct->status & IPS_SRC_NAT_DONE)) { daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; dport = (iph->protocol == IPPROTO_TCP) ? ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; } #endif return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, daddr, sport, dport, indev); } EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure");
434 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PIPE_FS_I_H #define _LINUX_PIPE_FS_I_H #define PIPE_DEF_BUFFERS 16 #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ #define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ #define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ #define PIPE_BUF_FLAG_PACKET 0x08 /* read() as a packet */ #define PIPE_BUF_FLAG_CAN_MERGE 0x10 /* can merge buffers */ #define PIPE_BUF_FLAG_WHOLE 0x20 /* read() must return entire buffer or error */ #ifdef CONFIG_WATCH_QUEUE #define PIPE_BUF_FLAG_LOSS 0x40 /* Message loss happened after this buffer */ #endif /** * struct pipe_buffer - a linux kernel pipe buffer * @page: the page containing the data for the pipe buffer * @offset: offset of data inside the @page * @len: length of data inside the @page * @ops: operations associated with this buffer. See @pipe_buf_operations. * @flags: pipe buffer flags. See above. * @private: private data owned by the ops. **/ struct pipe_buffer { struct page *page; unsigned int offset, len; const struct pipe_buf_operations *ops; unsigned int flags; unsigned long private; }; /** * struct pipe_inode_info - a linux kernel pipe * @mutex: mutex protecting the whole thing * @rd_wait: reader wait point in case of empty pipe * @wr_wait: writer wait point in case of full pipe * @head: The point of buffer production * @tail: The point of buffer consumption * @note_loss: The next read() should insert a data-lost message * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) * @nr_accounted: The amount this pipe accounts for in user->pipe_bufs * @tmp_page: cached released page * @readers: number of current readers of this pipe * @writers: number of current writers of this pipe * @files: number of struct file referring this pipe (protected by ->i_lock) * @r_counter: reader counter * @w_counter: writer counter * @poll_usage: is this pipe used for epoll, which has crazy wakeups? * @fasync_readers: reader side fasync * @fasync_writers: writer side fasync * @bufs: the circular array of pipe buffers * @user: the user who created this pipe * @watch_queue: If this pipe is a watch_queue, this is the stuff for that **/ struct pipe_inode_info { struct mutex mutex; wait_queue_head_t rd_wait, wr_wait; unsigned int head; unsigned int tail; unsigned int max_usage; unsigned int ring_size; unsigned int nr_accounted; unsigned int readers; unsigned int writers; unsigned int files; unsigned int r_counter; unsigned int w_counter; bool poll_usage; #ifdef CONFIG_WATCH_QUEUE bool note_loss; #endif struct page *tmp_page; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; struct pipe_buffer *bufs; struct user_struct *user; #ifdef CONFIG_WATCH_QUEUE struct watch_queue *watch_queue; #endif }; /* * Note on the nesting of these functions: * * ->confirm() * ->try_steal() * * That is, ->try_steal() must be called on a confirmed buffer. See below for * the meaning of each operation. Also see the kerneldoc in fs/pipe.c for the * pipe and generic variants of these hooks. */ struct pipe_buf_operations { /* * ->confirm() verifies that the data in the pipe buffer is there * and that the contents are good. If the pages in the pipe belong * to a file system, we may need to wait for IO completion in this * hook. Returns 0 for good, or a negative error value in case of * error. If not present all pages are considered good. */ int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *); /* * When the contents of this pipe buffer has been completely * consumed by a reader, ->release() is called. */ void (*release)(struct pipe_inode_info *, struct pipe_buffer *); /* * Attempt to take ownership of the pipe buffer and its contents. * ->try_steal() returns %true for success, in which case the contents * of the pipe (the buf->page) is locked and now completely owned by the * caller. The page may then be transferred to a different mapping, the * most often used case is insertion into different file address space * cache. */ bool (*try_steal)(struct pipe_inode_info *, struct pipe_buffer *); /* * Get a reference to the pipe buffer. */ bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; /** * pipe_has_watch_queue - Check whether the pipe is a watch_queue, * i.e. it was created with O_NOTIFICATION_PIPE * @pipe: The pipe to check * * Return: true if pipe is a watch queue, false otherwise. */ static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) { #ifdef CONFIG_WATCH_QUEUE return pipe->watch_queue != NULL; #else return false; #endif } /** * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ static inline bool pipe_empty(unsigned int head, unsigned int tail) { return head == tail; } /** * pipe_occupancy - Return number of slots used in the pipe * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) { return head - tail; } /** * pipe_full - Return true if the pipe is full * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer * @limit: The maximum amount of slots available. */ static inline bool pipe_full(unsigned int head, unsigned int tail, unsigned int limit) { return pipe_occupancy(head, tail) >= limit; } /** * pipe_buf - Return the pipe buffer for the specified slot in the pipe ring * @pipe: The pipe to access * @slot: The slot of interest */ static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe, unsigned int slot) { return &pipe->bufs[slot & (pipe->ring_size - 1)]; } /** * pipe_head_buf - Return the pipe buffer at the head of the pipe ring * @pipe: The pipe to access */ static inline struct pipe_buffer *pipe_head_buf(const struct pipe_inode_info *pipe) { return pipe_buf(pipe, pipe->head); } /** * pipe_buf_get - get a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to get a reference to * * Return: %true if the reference was successfully obtained. */ static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { return buf->ops->get(pipe, buf); } /** * pipe_buf_release - put a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to put a reference to */ static inline void pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { const struct pipe_buf_operations *ops = buf->ops; buf->ops = NULL; ops->release(pipe, buf); } /** * pipe_buf_confirm - verify contents of the pipe buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to confirm */ static inline int pipe_buf_confirm(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { if (!buf->ops->confirm) return 0; return buf->ops->confirm(pipe, buf); } /** * pipe_buf_try_steal - attempt to take ownership of a pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to attempt to steal */ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { if (!buf->ops->try_steal) return false; return buf->ops->try_steal(pipe, buf); } static inline void pipe_discard_from(struct pipe_inode_info *pipe, unsigned int old_head) { unsigned int mask = pipe->ring_size - 1; while (pipe->head > old_head) pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]); } /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE /* Pipe lock and unlock operations */ void pipe_lock(struct pipe_inode_info *); void pipe_unlock(struct pipe_inode_info *); void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); /* Wait for a pipe to be readable/writable while dropping the pipe lock */ void pipe_wait_readable(struct pipe_inode_info *); void pipe_wait_writable(struct pipe_inode_info *); struct pipe_inode_info *alloc_pipe_info(void); void free_pipe_info(struct pipe_inode_info *); /* Generic pipe buffer ops functions */ bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); bool generic_pipe_buf_try_steal(struct pipe_inode_info *, struct pipe_buffer *); void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); extern const struct pipe_buf_operations nosteal_pipe_buf_ops; unsigned long account_pipe_buffers(struct user_struct *user, unsigned long old, unsigned long new); bool too_many_pipe_buffers_soft(unsigned long user_bufs); bool too_many_pipe_buffers_hard(unsigned long user_bufs); bool pipe_is_unprivileged_user(void); /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots); long pipe_fcntl(struct file *, unsigned int, unsigned int arg); struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); int create_pipe_files(struct file **, int); unsigned int round_pipe_size(unsigned int size); #endif
1 1 5 5 5 4 1 5 5 5 2 2 1 3 3 2 2 5 5 5 3 2 1 2 6 3 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Red Hat, Inc. */ #include "fuse_i.h" #include <linux/uio.h> #include <linux/compat.h> #include <linux/fileattr.h> #include <linux/fsverity.h> #define FUSE_VERITY_ENABLE_ARG_MAX_PAGES 256 static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args, struct fuse_ioctl_out *outarg) { ssize_t ret; args->out_args[0].size = sizeof(*outarg); args->out_args[0].value = outarg; ret = fuse_simple_request(fm, args); /* Translate ENOSYS, which shouldn't be returned from fs */ if (ret == -ENOSYS) ret = -ENOTTY; if (ret >= 0 && outarg->result == -ENOSYS) outarg->result = -ENOTTY; return ret; } /* * CUSE servers compiled on 32bit broke on 64bit kernels because the * ABI was defined to be 'struct iovec' which is different on 32bit * and 64bit. Fortunately we can determine which structure the server * used from the size of the reply. */ static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src, size_t transferred, unsigned count, bool is_compat) { #ifdef CONFIG_COMPAT if (count * sizeof(struct compat_iovec) == transferred) { struct compat_iovec *ciov = src; unsigned i; /* * With this interface a 32bit server cannot support * non-compat (i.e. ones coming from 64bit apps) ioctl * requests */ if (!is_compat) return -EINVAL; for (i = 0; i < count; i++) { dst[i].iov_base = compat_ptr(ciov[i].iov_base); dst[i].iov_len = ciov[i].iov_len; } return 0; } #endif if (count * sizeof(struct iovec) != transferred) return -EIO; memcpy(dst, src, transferred); return 0; } /* Make sure iov_length() won't overflow */ static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov, size_t count) { size_t n; u32 max = fc->max_pages << PAGE_SHIFT; for (n = 0; n < count; n++, iov++) { if (iov->iov_len > (size_t) max) return -ENOMEM; max -= iov->iov_len; } return 0; } static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst, void *src, size_t transferred, unsigned count, bool is_compat) { unsigned i; struct fuse_ioctl_iovec *fiov = src; if (fc->minor < 16) { return fuse_copy_ioctl_iovec_old(dst, src, transferred, count, is_compat); } if (count * sizeof(struct fuse_ioctl_iovec) != transferred) return -EIO; for (i = 0; i < count; i++) { /* Did the server supply an inappropriate value? */ if (fiov[i].base != (unsigned long) fiov[i].base || fiov[i].len != (unsigned long) fiov[i].len) return -EIO; dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base; dst[i].iov_len = (size_t) fiov[i].len; #ifdef CONFIG_COMPAT if (is_compat && (ptr_to_compat(dst[i].iov_base) != fiov[i].base || (compat_size_t) dst[i].iov_len != fiov[i].len)) return -EIO; #endif } return 0; } /* For fs-verity, determine iov lengths from input */ static int fuse_setup_measure_verity(unsigned long arg, struct iovec *iov) { __u16 digest_size; struct fsverity_digest __user *uarg = (void __user *)arg; if (copy_from_user(&digest_size, &uarg->digest_size, sizeof(digest_size))) return -EFAULT; if (digest_size > SIZE_MAX - sizeof(struct fsverity_digest)) return -EINVAL; iov->iov_len = sizeof(struct fsverity_digest) + digest_size; return 0; } static int fuse_setup_enable_verity(unsigned long arg, struct iovec *iov, unsigned int *in_iovs) { struct fsverity_enable_arg enable; struct fsverity_enable_arg __user *uarg = (void __user *)arg; const __u32 max_buffer_len = FUSE_VERITY_ENABLE_ARG_MAX_PAGES * PAGE_SIZE; if (copy_from_user(&enable, uarg, sizeof(enable))) return -EFAULT; if (enable.salt_size > max_buffer_len || enable.sig_size > max_buffer_len) return -ENOMEM; if (enable.salt_size > 0) { iov++; (*in_iovs)++; iov->iov_base = u64_to_user_ptr(enable.salt_ptr); iov->iov_len = enable.salt_size; } if (enable.sig_size > 0) { iov++; (*in_iovs)++; iov->iov_base = u64_to_user_ptr(enable.sig_ptr); iov->iov_len = enable.sig_size; } return 0; } /* * For ioctls, there is no generic way to determine how much memory * needs to be read and/or written. Furthermore, ioctls are allowed * to dereference the passed pointer, so the parameter requires deep * copying but FUSE has no idea whatsoever about what to copy in or * out. * * This is solved by allowing FUSE server to retry ioctl with * necessary in/out iovecs. Let's assume the ioctl implementation * needs to read in the following structure. * * struct a { * char *buf; * size_t buflen; * } * * On the first callout to FUSE server, inarg->in_size and * inarg->out_size will be NULL; then, the server completes the ioctl * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and * the actual iov array to * * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } } * * which tells FUSE to copy in the requested area and retry the ioctl. * On the second round, the server has access to the structure and * from that it can tell what to look for next, so on the invocation, * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to * * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) }, * { .iov_base = a.buf, .iov_len = a.buflen } } * * FUSE will copy both struct a and the pointed buffer from the * process doing the ioctl and retry ioctl with both struct a and the * buffer. * * This time, FUSE server has everything it needs and completes ioctl * without FUSE_IOCTL_RETRY which finishes the ioctl call. * * Copying data out works the same way. * * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel * automatically initializes in and out iovs by decoding @cmd with * _IOC_* macros and the server is not allowed to request RETRY. This * limits ioctl data transfers to well-formed ioctls and is the forced * behavior for all FUSE servers. */ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, unsigned int flags) { struct fuse_file *ff = file->private_data; struct fuse_mount *fm = ff->fm; struct fuse_ioctl_in inarg = { .fh = ff->fh, .cmd = cmd, .arg = arg, .flags = flags }; struct fuse_ioctl_out outarg; struct iovec *iov_page = NULL; struct iovec *in_iov = NULL, *out_iov = NULL; unsigned int in_iovs = 0, out_iovs = 0, max_pages; size_t in_size, out_size, c; ssize_t transferred; int err, i; struct iov_iter ii; struct fuse_args_pages ap = {}; #if BITS_PER_LONG == 32 inarg.flags |= FUSE_IOCTL_32BIT; #else if (flags & FUSE_IOCTL_COMPAT) { inarg.flags |= FUSE_IOCTL_32BIT; #ifdef CONFIG_X86_X32_ABI if (in_x32_syscall()) inarg.flags |= FUSE_IOCTL_COMPAT_X32; #endif } #endif /* assume all the iovs returned by client always fits in a page */ BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); err = -ENOMEM; ap.folios = fuse_folios_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs); iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); if (!ap.folios || !iov_page) goto out; fuse_folio_descs_length_init(ap.descs, 0, fm->fc->max_pages); /* * If restricted, initialize IO parameters as encoded in @cmd. * RETRY from server is not allowed. */ if (!(flags & FUSE_IOCTL_UNRESTRICTED)) { struct iovec *iov = iov_page; iov->iov_base = (void __user *)arg; iov->iov_len = _IOC_SIZE(cmd); if (_IOC_DIR(cmd) & _IOC_WRITE) { in_iov = iov; in_iovs = 1; } if (_IOC_DIR(cmd) & _IOC_READ) { out_iov = iov; out_iovs = 1; } err = 0; switch (cmd) { case FS_IOC_MEASURE_VERITY: err = fuse_setup_measure_verity(arg, iov); break; case FS_IOC_ENABLE_VERITY: err = fuse_setup_enable_verity(arg, iov, &in_iovs); break; } if (err) goto out; } retry: inarg.in_size = in_size = iov_length(in_iov, in_iovs); inarg.out_size = out_size = iov_length(out_iov, out_iovs); /* * Out data can be used either for actual out data or iovs, * make sure there always is at least one page. */ out_size = max_t(size_t, out_size, PAGE_SIZE); max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE); /* make sure there are enough buffer pages and init request with them */ err = -ENOMEM; if (max_pages > fm->fc->max_pages) goto out; while (ap.num_folios < max_pages) { ap.folios[ap.num_folios] = folio_alloc(GFP_KERNEL | __GFP_HIGHMEM, 0); if (!ap.folios[ap.num_folios]) goto out; ap.num_folios++; } /* okay, let's send it to the client */ ap.args.opcode = FUSE_IOCTL; ap.args.nodeid = ff->nodeid; ap.args.in_numargs = 1; ap.args.in_args[0].size = sizeof(inarg); ap.args.in_args[0].value = &inarg; if (in_size) { ap.args.in_numargs++; ap.args.in_args[1].size = in_size; ap.args.in_pages = true; err = -EFAULT; iov_iter_init(&ii, ITER_SOURCE, in_iov, in_iovs, in_size); for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_folios); i++) { c = copy_folio_from_iter(ap.folios[i], 0, PAGE_SIZE, &ii); if (c != PAGE_SIZE && iov_iter_count(&ii)) goto out; } } ap.args.out_numargs = 2; ap.args.out_args[1].size = out_size; ap.args.out_pages = true; ap.args.out_argvar = true; transferred = fuse_send_ioctl(fm, &ap.args, &outarg); err = transferred; if (transferred < 0) goto out; /* did it ask for retry? */ if (outarg.flags & FUSE_IOCTL_RETRY) { void *vaddr; /* no retry if in restricted mode */ err = -EIO; if (!(flags & FUSE_IOCTL_UNRESTRICTED)) goto out; in_iovs = outarg.in_iovs; out_iovs = outarg.out_iovs; /* * Make sure things are in boundary, separate checks * are to protect against overflow. */ err = -ENOMEM; if (in_iovs > FUSE_IOCTL_MAX_IOV || out_iovs > FUSE_IOCTL_MAX_IOV || in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) goto out; vaddr = kmap_local_folio(ap.folios[0], 0); err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr, transferred, in_iovs + out_iovs, (flags & FUSE_IOCTL_COMPAT) != 0); kunmap_local(vaddr); if (err) goto out; in_iov = iov_page; out_iov = in_iov + in_iovs; err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs); if (err) goto out; err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs); if (err) goto out; goto retry; } err = -EIO; if (transferred > inarg.out_size) goto out; err = -EFAULT; iov_iter_init(&ii, ITER_DEST, out_iov, out_iovs, transferred); for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_folios); i++) { c = copy_folio_to_iter(ap.folios[i], 0, PAGE_SIZE, &ii); if (c != PAGE_SIZE && iov_iter_count(&ii)) goto out; } err = 0; out: free_page((unsigned long) iov_page); while (ap.num_folios) folio_put(ap.folios[--ap.num_folios]); kfree(ap.folios); return err ? err : outarg.result; } EXPORT_SYMBOL_GPL(fuse_do_ioctl); long fuse_ioctl_common(struct file *file, unsigned int cmd, unsigned long arg, unsigned int flags) { struct inode *inode = file_inode(file); struct fuse_conn *fc = get_fuse_conn(inode); if (!fuse_allow_current_process(fc)) return -EACCES; if (fuse_is_bad(inode)) return -EIO; return fuse_do_ioctl(file, cmd, arg, flags); } long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return fuse_ioctl_common(file, cmd, arg, 0); } long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); } static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff, unsigned int cmd, void *ptr, size_t size) { struct fuse_mount *fm = ff->fm; struct fuse_ioctl_in inarg; struct fuse_ioctl_out outarg; FUSE_ARGS(args); int err; memset(&inarg, 0, sizeof(inarg)); inarg.fh = ff->fh; inarg.cmd = cmd; #if BITS_PER_LONG == 32 inarg.flags |= FUSE_IOCTL_32BIT; #endif if (S_ISDIR(inode->i_mode)) inarg.flags |= FUSE_IOCTL_DIR; if (_IOC_DIR(cmd) & _IOC_READ) inarg.out_size = size; if (_IOC_DIR(cmd) & _IOC_WRITE) inarg.in_size = size; args.opcode = FUSE_IOCTL; args.nodeid = ff->nodeid; args.in_numargs = 2; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; args.in_args[1].size = inarg.in_size; args.in_args[1].value = ptr; args.out_numargs = 2; args.out_args[1].size = inarg.out_size; args.out_args[1].value = ptr; err = fuse_send_ioctl(fm, &args, &outarg); if (!err) { if (outarg.result < 0) err = outarg.result; else if (outarg.flags & FUSE_IOCTL_RETRY) err = -EIO; } return err; } static struct fuse_file *fuse_priv_ioctl_prepare(struct inode *inode) { struct fuse_mount *fm = get_fuse_mount(inode); bool isdir = S_ISDIR(inode->i_mode); if (!fuse_allow_current_process(fm->fc)) return ERR_PTR(-EACCES); if (fuse_is_bad(inode)) return ERR_PTR(-EIO); if (!S_ISREG(inode->i_mode) && !isdir) return ERR_PTR(-ENOTTY); return fuse_file_open(fm, get_node_id(inode), O_RDONLY, isdir); } static void fuse_priv_ioctl_cleanup(struct inode *inode, struct fuse_file *ff) { fuse_file_release(inode, ff, O_RDONLY, NULL, S_ISDIR(inode->i_mode)); } int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); struct fuse_file *ff; unsigned int flags; struct fsxattr xfa; int err; ff = fuse_priv_ioctl_prepare(inode); if (IS_ERR(ff)) return PTR_ERR(ff); if (fa->flags_valid) { err = fuse_priv_ioctl(inode, ff, FS_IOC_GETFLAGS, &flags, sizeof(flags)); if (err) goto cleanup; fileattr_fill_flags(fa, flags); } else { err = fuse_priv_ioctl(inode, ff, FS_IOC_FSGETXATTR, &xfa, sizeof(xfa)); if (err) goto cleanup; fileattr_fill_xflags(fa, xfa.fsx_xflags); fa->fsx_extsize = xfa.fsx_extsize; fa->fsx_nextents = xfa.fsx_nextents; fa->fsx_projid = xfa.fsx_projid; fa->fsx_cowextsize = xfa.fsx_cowextsize; } cleanup: fuse_priv_ioctl_cleanup(inode, ff); return err; } int fuse_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); struct fuse_file *ff; unsigned int flags = fa->flags; struct fsxattr xfa; int err; ff = fuse_priv_ioctl_prepare(inode); if (IS_ERR(ff)) return PTR_ERR(ff); if (fa->flags_valid) { err = fuse_priv_ioctl(inode, ff, FS_IOC_SETFLAGS, &flags, sizeof(flags)); if (err) goto cleanup; } else { memset(&xfa, 0, sizeof(xfa)); xfa.fsx_xflags = fa->fsx_xflags; xfa.fsx_extsize = fa->fsx_extsize; xfa.fsx_nextents = fa->fsx_nextents; xfa.fsx_projid = fa->fsx_projid; xfa.fsx_cowextsize = fa->fsx_cowextsize; err = fuse_priv_ioctl(inode, ff, FS_IOC_FSSETXATTR, &xfa, sizeof(xfa)); } cleanup: fuse_priv_ioctl_cleanup(inode, ff); return err; }
70 2 1 1 3 1 5 6 67 6 63 15 55 39 30 30 11 7 5 5 2 3 2 19 12 21 9 12 3 1 2 5 2 3 20 5 1 1 2 3 6 9 87 81 31 91 77 68 79 6 109 109 13 3 31 19 30 89 50 1 83 84 48 151 135 132 133 1 86 4 4 3 4 4 4 125 1 16 79 39 7 116 61 57 4 86 86 46 85 48 38 76 9 86 177 7 171 3 176 3 177 2 4 2 4 8 10 10 3 1 9 4 5 9 1 2 7 3 5 4 3 5 3 14 14 12 2 5 4 1 10 246 246 247 22 248 2 245 247 211 44 105 3 247 248 248 247 248 245 5 266 264 264 13 13 7 11 8 8 8 11 12 8 8 8 8 8 8 5 8 8 8 8 1 8 8 8 8 8 8 2 8 1 2 4 4 4 4 4 4 2 2 260 260 260 260 260 260 260 260 259 258 259 260 259 260 225 260 260 259 259 260 260 51 219 41 205 133 194 244 249 227 252 260 259 329 330 24 15 15 32 32 98 21 53 24 64 7 62 25 25 10 8 8 1 9 44 204 125 123 196 56 246 245 247 248 248 247 248 248 177 177 177 175 177 11 248 1 247 91 158 247 247 233 13 248 248 7 241 58 188 59 189 247 1 248 248 235 13 165 164 108 106 107 51 51 211 246 248 248 247 248 247 99 177 177 177 177 248 177 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 // SPDX-License-Identifier: GPL-2.0-only /* * The input core * * Copyright (c) 1999-2002 Vojtech Pavlik */ #define pr_fmt(fmt) KBUILD_BASENAME ": " fmt #include <linux/init.h> #include <linux/types.h> #include <linux/idr.h> #include <linux/input/mt.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/random.h> #include <linux/major.h> #include <linux/proc_fs.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/pm.h> #include <linux/poll.h> #include <linux/device.h> #include <linux/kstrtox.h> #include <linux/mutex.h> #include <linux/rcupdate.h> #include "input-compat.h" #include "input-core-private.h" #include "input-poller.h" MODULE_AUTHOR("Vojtech Pavlik <vojtech@suse.cz>"); MODULE_DESCRIPTION("Input core"); MODULE_LICENSE("GPL"); #define INPUT_MAX_CHAR_DEVICES 1024 #define INPUT_FIRST_DYNAMIC_DEV 256 static DEFINE_IDA(input_ida); static LIST_HEAD(input_dev_list); static LIST_HEAD(input_handler_list); /* * input_mutex protects access to both input_dev_list and input_handler_list. * This also causes input_[un]register_device and input_[un]register_handler * be mutually exclusive which simplifies locking in drivers implementing * input handlers. */ static DEFINE_MUTEX(input_mutex); static const struct input_value input_value_sync = { EV_SYN, SYN_REPORT, 1 }; static const unsigned int input_max_code[EV_CNT] = { [EV_KEY] = KEY_MAX, [EV_REL] = REL_MAX, [EV_ABS] = ABS_MAX, [EV_MSC] = MSC_MAX, [EV_SW] = SW_MAX, [EV_LED] = LED_MAX, [EV_SND] = SND_MAX, [EV_FF] = FF_MAX, }; static inline int is_event_supported(unsigned int code, unsigned long *bm, unsigned int max) { return code <= max && test_bit(code, bm); } static int input_defuzz_abs_event(int value, int old_val, int fuzz) { if (fuzz) { if (value > old_val - fuzz / 2 && value < old_val + fuzz / 2) return old_val; if (value > old_val - fuzz && value < old_val + fuzz) return (old_val * 3 + value) / 4; if (value > old_val - fuzz * 2 && value < old_val + fuzz * 2) return (old_val + value) / 2; } return value; } static void input_start_autorepeat(struct input_dev *dev, int code) { if (test_bit(EV_REP, dev->evbit) && dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] && dev->timer.function) { dev->repeat_key = code; mod_timer(&dev->timer, jiffies + msecs_to_jiffies(dev->rep[REP_DELAY])); } } static void input_stop_autorepeat(struct input_dev *dev) { del_timer(&dev->timer); } /* * Pass values first through all filters and then, if event has not been * filtered out, through all open handles. This order is achieved by placing * filters at the head of the list of handles attached to the device, and * placing regular handles at the tail of the list. * * This function is called with dev->event_lock held and interrupts disabled. */ static void input_pass_values(struct input_dev *dev, struct input_value *vals, unsigned int count) { struct input_handle *handle; struct input_value *v; lockdep_assert_held(&dev->event_lock); scoped_guard(rcu) { handle = rcu_dereference(dev->grab); if (handle) { count = handle->handle_events(handle, vals, count); break; } list_for_each_entry_rcu(handle, &dev->h_list, d_node) { if (handle->open) { count = handle->handle_events(handle, vals, count); if (!count) break; } } } /* trigger auto repeat for key events */ if (test_bit(EV_REP, dev->evbit) && test_bit(EV_KEY, dev->evbit)) { for (v = vals; v != vals + count; v++) { if (v->type == EV_KEY && v->value != 2) { if (v->value) input_start_autorepeat(dev, v->code); else input_stop_autorepeat(dev); } } } } #define INPUT_IGNORE_EVENT 0 #define INPUT_PASS_TO_HANDLERS 1 #define INPUT_PASS_TO_DEVICE 2 #define INPUT_SLOT 4 #define INPUT_FLUSH 8 #define INPUT_PASS_TO_ALL (INPUT_PASS_TO_HANDLERS | INPUT_PASS_TO_DEVICE) static int input_handle_abs_event(struct input_dev *dev, unsigned int code, int *pval) { struct input_mt *mt = dev->mt; bool is_new_slot = false; bool is_mt_event; int *pold; if (code == ABS_MT_SLOT) { /* * "Stage" the event; we'll flush it later, when we * get actual touch data. */ if (mt && *pval >= 0 && *pval < mt->num_slots) mt->slot = *pval; return INPUT_IGNORE_EVENT; } is_mt_event = input_is_mt_value(code); if (!is_mt_event) { pold = &dev->absinfo[code].value; } else if (mt) { pold = &mt->slots[mt->slot].abs[code - ABS_MT_FIRST]; is_new_slot = mt->slot != dev->absinfo[ABS_MT_SLOT].value; } else { /* * Bypass filtering for multi-touch events when * not employing slots. */ pold = NULL; } if (pold) { *pval = input_defuzz_abs_event(*pval, *pold, dev->absinfo[code].fuzz); if (*pold == *pval) return INPUT_IGNORE_EVENT; *pold = *pval; } /* Flush pending "slot" event */ if (is_new_slot) { dev->absinfo[ABS_MT_SLOT].value = mt->slot; return INPUT_PASS_TO_HANDLERS | INPUT_SLOT; } return INPUT_PASS_TO_HANDLERS; } static int input_get_disposition(struct input_dev *dev, unsigned int type, unsigned int code, int *pval) { int disposition = INPUT_IGNORE_EVENT; int value = *pval; /* filter-out events from inhibited devices */ if (dev->inhibited) return INPUT_IGNORE_EVENT; switch (type) { case EV_SYN: switch (code) { case SYN_CONFIG: disposition = INPUT_PASS_TO_ALL; break; case SYN_REPORT: disposition = INPUT_PASS_TO_HANDLERS | INPUT_FLUSH; break; case SYN_MT_REPORT: disposition = INPUT_PASS_TO_HANDLERS; break; } break; case EV_KEY: if (is_event_supported(code, dev->keybit, KEY_MAX)) { /* auto-repeat bypasses state updates */ if (value == 2) { disposition = INPUT_PASS_TO_HANDLERS; break; } if (!!test_bit(code, dev->key) != !!value) { __change_bit(code, dev->key); disposition = INPUT_PASS_TO_HANDLERS; } } break; case EV_SW: if (is_event_supported(code, dev->swbit, SW_MAX) && !!test_bit(code, dev->sw) != !!value) { __change_bit(code, dev->sw); disposition = INPUT_PASS_TO_HANDLERS; } break; case EV_ABS: if (is_event_supported(code, dev->absbit, ABS_MAX)) disposition = input_handle_abs_event(dev, code, &value); break; case EV_REL: if (is_event_supported(code, dev->relbit, REL_MAX) && value) disposition = INPUT_PASS_TO_HANDLERS; break; case EV_MSC: if (is_event_supported(code, dev->mscbit, MSC_MAX)) disposition = INPUT_PASS_TO_ALL; break; case EV_LED: if (is_event_supported(code, dev->ledbit, LED_MAX) && !!test_bit(code, dev->led) != !!value) { __change_bit(code, dev->led); disposition = INPUT_PASS_TO_ALL; } break; case EV_SND: if (is_event_supported(code, dev->sndbit, SND_MAX)) { if (!!test_bit(code, dev->snd) != !!value) __change_bit(code, dev->snd); disposition = INPUT_PASS_TO_ALL; } break; case EV_REP: if (code <= REP_MAX && value >= 0 && dev->rep[code] != value) { dev->rep[code] = value; disposition = INPUT_PASS_TO_ALL; } break; case EV_FF: if (value >= 0) disposition = INPUT_PASS_TO_ALL; break; case EV_PWR: disposition = INPUT_PASS_TO_ALL; break; } *pval = value; return disposition; } static void input_event_dispose(struct input_dev *dev, int disposition, unsigned int type, unsigned int code, int value) { if ((disposition & INPUT_PASS_TO_DEVICE) && dev->event) dev->event(dev, type, code, value); if (disposition & INPUT_PASS_TO_HANDLERS) { struct input_value *v; if (disposition & INPUT_SLOT) { v = &dev->vals[dev->num_vals++]; v->type = EV_ABS; v->code = ABS_MT_SLOT; v->value = dev->mt->slot; } v = &dev->vals[dev->num_vals++]; v->type = type; v->code = code; v->value = value; } if (disposition & INPUT_FLUSH) { if (dev->num_vals >= 2) input_pass_values(dev, dev->vals, dev->num_vals); dev->num_vals = 0; /* * Reset the timestamp on flush so we won't end up * with a stale one. Note we only need to reset the * monolithic one as we use its presence when deciding * whether to generate a synthetic timestamp. */ dev->timestamp[INPUT_CLK_MONO] = ktime_set(0, 0); } else if (dev->num_vals >= dev->max_vals - 2) { dev->vals[dev->num_vals++] = input_value_sync; input_pass_values(dev, dev->vals, dev->num_vals); dev->num_vals = 0; } } void input_handle_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { int disposition; lockdep_assert_held(&dev->event_lock); disposition = input_get_disposition(dev, type, code, &value); if (disposition != INPUT_IGNORE_EVENT) { if (type != EV_SYN) add_input_randomness(type, code, value); input_event_dispose(dev, disposition, type, code, value); } } /** * input_event() - report new input event * @dev: device that generated the event * @type: type of the event * @code: event code * @value: value of the event * * This function should be used by drivers implementing various input * devices to report input events. See also input_inject_event(). * * NOTE: input_event() may be safely used right after input device was * allocated with input_allocate_device(), even before it is registered * with input_register_device(), but the event will not reach any of the * input handlers. Such early invocation of input_event() may be used * to 'seed' initial state of a switch or initial position of absolute * axis, etc. */ void input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { if (is_event_supported(type, dev->evbit, EV_MAX)) { guard(spinlock_irqsave)(&dev->event_lock); input_handle_event(dev, type, code, value); } } EXPORT_SYMBOL(input_event); /** * input_inject_event() - send input event from input handler * @handle: input handle to send event through * @type: type of the event * @code: event code * @value: value of the event * * Similar to input_event() but will ignore event if device is * "grabbed" and handle injecting event is not the one that owns * the device. */ void input_inject_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) { struct input_dev *dev = handle->dev; struct input_handle *grab; if (is_event_supported(type, dev->evbit, EV_MAX)) { guard(spinlock_irqsave)(&dev->event_lock); guard(rcu)(); grab = rcu_dereference(dev->grab); if (!grab || grab == handle) input_handle_event(dev, type, code, value); } } EXPORT_SYMBOL(input_inject_event); /** * input_alloc_absinfo - allocates array of input_absinfo structs * @dev: the input device emitting absolute events * * If the absinfo struct the caller asked for is already allocated, this * functions will not do anything. */ void input_alloc_absinfo(struct input_dev *dev) { if (dev->absinfo) return; dev->absinfo = kcalloc(ABS_CNT, sizeof(*dev->absinfo), GFP_KERNEL); if (!dev->absinfo) { dev_err(dev->dev.parent ?: &dev->dev, "%s: unable to allocate memory\n", __func__); /* * We will handle this allocation failure in * input_register_device() when we refuse to register input * device with ABS bits but without absinfo. */ } } EXPORT_SYMBOL(input_alloc_absinfo); void input_set_abs_params(struct input_dev *dev, unsigned int axis, int min, int max, int fuzz, int flat) { struct input_absinfo *absinfo; __set_bit(EV_ABS, dev->evbit); __set_bit(axis, dev->absbit); input_alloc_absinfo(dev); if (!dev->absinfo) return; absinfo = &dev->absinfo[axis]; absinfo->minimum = min; absinfo->maximum = max; absinfo->fuzz = fuzz; absinfo->flat = flat; } EXPORT_SYMBOL(input_set_abs_params); /** * input_copy_abs - Copy absinfo from one input_dev to another * @dst: Destination input device to copy the abs settings to * @dst_axis: ABS_* value selecting the destination axis * @src: Source input device to copy the abs settings from * @src_axis: ABS_* value selecting the source axis * * Set absinfo for the selected destination axis by copying it from * the specified source input device's source axis. * This is useful to e.g. setup a pen/stylus input-device for combined * touchscreen/pen hardware where the pen uses the same coordinates as * the touchscreen. */ void input_copy_abs(struct input_dev *dst, unsigned int dst_axis, const struct input_dev *src, unsigned int src_axis) { /* src must have EV_ABS and src_axis set */ if (WARN_ON(!(test_bit(EV_ABS, src->evbit) && test_bit(src_axis, src->absbit)))) return; /* * input_alloc_absinfo() may have failed for the source. Our caller is * expected to catch this when registering the input devices, which may * happen after the input_copy_abs() call. */ if (!src->absinfo) return; input_set_capability(dst, EV_ABS, dst_axis); if (!dst->absinfo) return; dst->absinfo[dst_axis] = src->absinfo[src_axis]; } EXPORT_SYMBOL(input_copy_abs); /** * input_grab_device - grabs device for exclusive use * @handle: input handle that wants to own the device * * When a device is grabbed by an input handle all events generated by * the device are delivered only to this handle. Also events injected * by other input handles are ignored while device is grabbed. */ int input_grab_device(struct input_handle *handle) { struct input_dev *dev = handle->dev; scoped_cond_guard(mutex_intr, return -EINTR, &dev->mutex) { if (dev->grab) return -EBUSY; rcu_assign_pointer(dev->grab, handle); } return 0; } EXPORT_SYMBOL(input_grab_device); static void __input_release_device(struct input_handle *handle) { struct input_dev *dev = handle->dev; struct input_handle *grabber; grabber = rcu_dereference_protected(dev->grab, lockdep_is_held(&dev->mutex)); if (grabber == handle) { rcu_assign_pointer(dev->grab, NULL); /* Make sure input_pass_values() notices that grab is gone */ synchronize_rcu(); list_for_each_entry(handle, &dev->h_list, d_node) if (handle->open && handle->handler->start) handle->handler->start(handle); } } /** * input_release_device - release previously grabbed device * @handle: input handle that owns the device * * Releases previously grabbed device so that other input handles can * start receiving input events. Upon release all handlers attached * to the device have their start() method called so they have a change * to synchronize device state with the rest of the system. */ void input_release_device(struct input_handle *handle) { struct input_dev *dev = handle->dev; guard(mutex)(&dev->mutex); __input_release_device(handle); } EXPORT_SYMBOL(input_release_device); /** * input_open_device - open input device * @handle: handle through which device is being accessed * * This function should be called by input handlers when they * want to start receive events from given input device. */ int input_open_device(struct input_handle *handle) { struct input_dev *dev = handle->dev; int error; scoped_cond_guard(mutex_intr, return -EINTR, &dev->mutex) { if (dev->going_away) return -ENODEV; handle->open++; if (handle->handler->passive_observer) return 0; if (dev->users++ || dev->inhibited) { /* * Device is already opened and/or inhibited, * so we can exit immediately and report success. */ return 0; } if (dev->open) { error = dev->open(dev); if (error) { dev->users--; handle->open--; /* * Make sure we are not delivering any more * events through this handle. */ synchronize_rcu(); return error; } } if (dev->poller) input_dev_poller_start(dev->poller); } return 0; } EXPORT_SYMBOL(input_open_device); int input_flush_device(struct input_handle *handle, struct file *file) { struct input_dev *dev = handle->dev; scoped_cond_guard(mutex_intr, return -EINTR, &dev->mutex) { if (dev->flush) return dev->flush(dev, file); } return 0; } EXPORT_SYMBOL(input_flush_device); /** * input_close_device - close input device * @handle: handle through which device is being accessed * * This function should be called by input handlers when they * want to stop receive events from given input device. */ void input_close_device(struct input_handle *handle) { struct input_dev *dev = handle->dev; guard(mutex)(&dev->mutex); __input_release_device(handle); if (!handle->handler->passive_observer) { if (!--dev->users && !dev->inhibited) { if (dev->poller) input_dev_poller_stop(dev->poller); if (dev->close) dev->close(dev); } } if (!--handle->open) { /* * synchronize_rcu() makes sure that input_pass_values() * completed and that no more input events are delivered * through this handle */ synchronize_rcu(); } } EXPORT_SYMBOL(input_close_device); /* * Simulate keyup events for all keys that are marked as pressed. * The function must be called with dev->event_lock held. */ static bool input_dev_release_keys(struct input_dev *dev) { bool need_sync = false; int code; lockdep_assert_held(&dev->event_lock); if (is_event_supported(EV_KEY, dev->evbit, EV_MAX)) { for_each_set_bit(code, dev->key, KEY_CNT) { input_handle_event(dev, EV_KEY, code, 0); need_sync = true; } } return need_sync; } /* * Prepare device for unregistering */ static void input_disconnect_device(struct input_dev *dev) { struct input_handle *handle; /* * Mark device as going away. Note that we take dev->mutex here * not to protect access to dev->going_away but rather to ensure * that there are no threads in the middle of input_open_device() */ scoped_guard(mutex, &dev->mutex) dev->going_away = true; guard(spinlock_irq)(&dev->event_lock); /* * Simulate keyup events for all pressed keys so that handlers * are not left with "stuck" keys. The driver may continue * generate events even after we done here but they will not * reach any handlers. */ if (input_dev_release_keys(dev)) input_handle_event(dev, EV_SYN, SYN_REPORT, 1); list_for_each_entry(handle, &dev->h_list, d_node) handle->open = 0; } /** * input_scancode_to_scalar() - converts scancode in &struct input_keymap_entry * @ke: keymap entry containing scancode to be converted. * @scancode: pointer to the location where converted scancode should * be stored. * * This function is used to convert scancode stored in &struct keymap_entry * into scalar form understood by legacy keymap handling methods. These * methods expect scancodes to be represented as 'unsigned int'. */ int input_scancode_to_scalar(const struct input_keymap_entry *ke, unsigned int *scancode) { switch (ke->len) { case 1: *scancode = *((u8 *)ke->scancode); break; case 2: *scancode = *((u16 *)ke->scancode); break; case 4: *scancode = *((u32 *)ke->scancode); break; default: return -EINVAL; } return 0; } EXPORT_SYMBOL(input_scancode_to_scalar); /* * Those routines handle the default case where no [gs]etkeycode() is * defined. In this case, an array indexed by the scancode is used. */ static unsigned int input_fetch_keycode(struct input_dev *dev, unsigned int index) { switch (dev->keycodesize) { case 1: return ((u8 *)dev->keycode)[index]; case 2: return ((u16 *)dev->keycode)[index]; default: return ((u32 *)dev->keycode)[index]; } } static int input_default_getkeycode(struct input_dev *dev, struct input_keymap_entry *ke) { unsigned int index; int error; if (!dev->keycodesize) return -EINVAL; if (ke->flags & INPUT_KEYMAP_BY_INDEX) index = ke->index; else { error = input_scancode_to_scalar(ke, &index); if (error) return error; } if (index >= dev->keycodemax) return -EINVAL; ke->keycode = input_fetch_keycode(dev, index); ke->index = index; ke->len = sizeof(index); memcpy(ke->scancode, &index, sizeof(index)); return 0; } static int input_default_setkeycode(struct input_dev *dev, const struct input_keymap_entry *ke, unsigned int *old_keycode) { unsigned int index; int error; int i; if (!dev->keycodesize) return -EINVAL; if (ke->flags & INPUT_KEYMAP_BY_INDEX) { index = ke->index; } else { error = input_scancode_to_scalar(ke, &index); if (error) return error; } if (index >= dev->keycodemax) return -EINVAL; if (dev->keycodesize < sizeof(ke->keycode) && (ke->keycode >> (dev->keycodesize * 8))) return -EINVAL; switch (dev->keycodesize) { case 1: { u8 *k = (u8 *)dev->keycode; *old_keycode = k[index]; k[index] = ke->keycode; break; } case 2: { u16 *k = (u16 *)dev->keycode; *old_keycode = k[index]; k[index] = ke->keycode; break; } default: { u32 *k = (u32 *)dev->keycode; *old_keycode = k[index]; k[index] = ke->keycode; break; } } if (*old_keycode <= KEY_MAX) { __clear_bit(*old_keycode, dev->keybit); for (i = 0; i < dev->keycodemax; i++) { if (input_fetch_keycode(dev, i) == *old_keycode) { __set_bit(*old_keycode, dev->keybit); /* Setting the bit twice is useless, so break */ break; } } } __set_bit(ke->keycode, dev->keybit); return 0; } /** * input_get_keycode - retrieve keycode currently mapped to a given scancode * @dev: input device which keymap is being queried * @ke: keymap entry * * This function should be called by anyone interested in retrieving current * keymap. Presently evdev handlers use it. */ int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke) { guard(spinlock_irqsave)(&dev->event_lock); return dev->getkeycode(dev, ke); } EXPORT_SYMBOL(input_get_keycode); /** * input_set_keycode - attribute a keycode to a given scancode * @dev: input device which keymap is being updated * @ke: new keymap entry * * This function should be called by anyone needing to update current * keymap. Presently keyboard and evdev handlers use it. */ int input_set_keycode(struct input_dev *dev, const struct input_keymap_entry *ke) { unsigned int old_keycode; int error; if (ke->keycode > KEY_MAX) return -EINVAL; guard(spinlock_irqsave)(&dev->event_lock); error = dev->setkeycode(dev, ke, &old_keycode); if (error) return error; /* Make sure KEY_RESERVED did not get enabled. */ __clear_bit(KEY_RESERVED, dev->keybit); /* * Simulate keyup event if keycode is not present * in the keymap anymore */ if (old_keycode > KEY_MAX) { dev_warn(dev->dev.parent ?: &dev->dev, "%s: got too big old keycode %#x\n", __func__, old_keycode); } else if (test_bit(EV_KEY, dev->evbit) && !is_event_supported(old_keycode, dev->keybit, KEY_MAX) && __test_and_clear_bit(old_keycode, dev->key)) { /* * We have to use input_event_dispose() here directly instead * of input_handle_event() because the key we want to release * here is considered no longer supported by the device and * input_handle_event() will ignore it. */ input_event_dispose(dev, INPUT_PASS_TO_HANDLERS, EV_KEY, old_keycode, 0); input_event_dispose(dev, INPUT_PASS_TO_HANDLERS | INPUT_FLUSH, EV_SYN, SYN_REPORT, 1); } return 0; } EXPORT_SYMBOL(input_set_keycode); bool input_match_device_id(const struct input_dev *dev, const struct input_device_id *id) { if (id->flags & INPUT_DEVICE_ID_MATCH_BUS) if (id->bustype != dev->id.bustype) return false; if (id->flags & INPUT_DEVICE_ID_MATCH_VENDOR) if (id->vendor != dev->id.vendor) return false; if (id->flags & INPUT_DEVICE_ID_MATCH_PRODUCT) if (id->product != dev->id.product) return false; if (id->flags & INPUT_DEVICE_ID_MATCH_VERSION) if (id->version != dev->id.version) return false; if (!bitmap_subset(id->evbit, dev->evbit, EV_MAX) || !bitmap_subset(id->keybit, dev->keybit, KEY_MAX) || !bitmap_subset(id->relbit, dev->relbit, REL_MAX) || !bitmap_subset(id->absbit, dev->absbit, ABS_MAX) || !bitmap_subset(id->mscbit, dev->mscbit, MSC_MAX) || !bitmap_subset(id->ledbit, dev->ledbit, LED_MAX) || !bitmap_subset(id->sndbit, dev->sndbit, SND_MAX) || !bitmap_subset(id->ffbit, dev->ffbit, FF_MAX) || !bitmap_subset(id->swbit, dev->swbit, SW_MAX) || !bitmap_subset(id->propbit, dev->propbit, INPUT_PROP_MAX)) { return false; } return true; } EXPORT_SYMBOL(input_match_device_id); static const struct input_device_id *input_match_device(struct input_handler *handler, struct input_dev *dev) { const struct input_device_id *id; for (id = handler->id_table; id->flags || id->driver_info; id++) { if (input_match_device_id(dev, id) && (!handler->match || handler->match(handler, dev))) { return id; } } return NULL; } static int input_attach_handler(struct input_dev *dev, struct input_handler *handler) { const struct input_device_id *id; int error; id = input_match_device(handler, dev); if (!id) return -ENODEV; error = handler->connect(handler, dev, id); if (error && error != -ENODEV) pr_err("failed to attach handler %s to device %s, error: %d\n", handler->name, kobject_name(&dev->dev.kobj), error); return error; } #ifdef CONFIG_COMPAT static int input_bits_to_string(char *buf, int buf_size, unsigned long bits, bool skip_empty) { int len = 0; if (in_compat_syscall()) { u32 dword = bits >> 32; if (dword || !skip_empty) len += snprintf(buf, buf_size, "%x ", dword); dword = bits & 0xffffffffUL; if (dword || !skip_empty || len) len += snprintf(buf + len, max(buf_size - len, 0), "%x", dword); } else { if (bits || !skip_empty) len += snprintf(buf, buf_size, "%lx", bits); } return len; } #else /* !CONFIG_COMPAT */ static int input_bits_to_string(char *buf, int buf_size, unsigned long bits, bool skip_empty) { return bits || !skip_empty ? snprintf(buf, buf_size, "%lx", bits) : 0; } #endif #ifdef CONFIG_PROC_FS static struct proc_dir_entry *proc_bus_input_dir; static DECLARE_WAIT_QUEUE_HEAD(input_devices_poll_wait); static int input_devices_state; static inline void input_wakeup_procfs_readers(void) { input_devices_state++; wake_up(&input_devices_poll_wait); } struct input_seq_state { unsigned short pos; bool mutex_acquired; int input_devices_state; }; static __poll_t input_proc_devices_poll(struct file *file, poll_table *wait) { struct seq_file *seq = file->private_data; struct input_seq_state *state = seq->private; poll_wait(file, &input_devices_poll_wait, wait); if (state->input_devices_state != input_devices_state) { state->input_devices_state = input_devices_state; return EPOLLIN | EPOLLRDNORM; } return 0; } static void *input_devices_seq_start(struct seq_file *seq, loff_t *pos) { struct input_seq_state *state = seq->private; int error; error = mutex_lock_interruptible(&input_mutex); if (error) { state->mutex_acquired = false; return ERR_PTR(error); } state->mutex_acquired = true; return seq_list_start(&input_dev_list, *pos); } static void *input_devices_seq_next(struct seq_file *seq, void *v, loff_t *pos) { return seq_list_next(v, &input_dev_list, pos); } static void input_seq_stop(struct seq_file *seq, void *v) { struct input_seq_state *state = seq->private; if (state->mutex_acquired) mutex_unlock(&input_mutex); } static void input_seq_print_bitmap(struct seq_file *seq, const char *name, unsigned long *bitmap, int max) { int i; bool skip_empty = true; char buf[18]; seq_printf(seq, "B: %s=", name); for (i = BITS_TO_LONGS(max) - 1; i >= 0; i--) { if (input_bits_to_string(buf, sizeof(buf), bitmap[i], skip_empty)) { skip_empty = false; seq_printf(seq, "%s%s", buf, i > 0 ? " " : ""); } } /* * If no output was produced print a single 0. */ if (skip_empty) seq_putc(seq, '0'); seq_putc(seq, '\n'); } static int input_devices_seq_show(struct seq_file *seq, void *v) { struct input_dev *dev = container_of(v, struct input_dev, node); const char *path = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); struct input_handle *handle; seq_printf(seq, "I: Bus=%04x Vendor=%04x Product=%04x Version=%04x\n", dev->id.bustype, dev->id.vendor, dev->id.product, dev->id.version); seq_printf(seq, "N: Name=\"%s\"\n", dev->name ? dev->name : ""); seq_printf(seq, "P: Phys=%s\n", dev->phys ? dev->phys : ""); seq_printf(seq, "S: Sysfs=%s\n", path ? path : ""); seq_printf(seq, "U: Uniq=%s\n", dev->uniq ? dev->uniq : ""); seq_puts(seq, "H: Handlers="); list_for_each_entry(handle, &dev->h_list, d_node) seq_printf(seq, "%s ", handle->name); seq_putc(seq, '\n'); input_seq_print_bitmap(seq, "PROP", dev->propbit, INPUT_PROP_MAX); input_seq_print_bitmap(seq, "EV", dev->evbit, EV_MAX); if (test_bit(EV_KEY, dev->evbit)) input_seq_print_bitmap(seq, "KEY", dev->keybit, KEY_MAX); if (test_bit(EV_REL, dev->evbit)) input_seq_print_bitmap(seq, "REL", dev->relbit, REL_MAX); if (test_bit(EV_ABS, dev->evbit)) input_seq_print_bitmap(seq, "ABS", dev->absbit, ABS_MAX); if (test_bit(EV_MSC, dev->evbit)) input_seq_print_bitmap(seq, "MSC", dev->mscbit, MSC_MAX); if (test_bit(EV_LED, dev->evbit)) input_seq_print_bitmap(seq, "LED", dev->ledbit, LED_MAX); if (test_bit(EV_SND, dev->evbit)) input_seq_print_bitmap(seq, "SND", dev->sndbit, SND_MAX); if (test_bit(EV_FF, dev->evbit)) input_seq_print_bitmap(seq, "FF", dev->ffbit, FF_MAX); if (test_bit(EV_SW, dev->evbit)) input_seq_print_bitmap(seq, "SW", dev->swbit, SW_MAX); seq_putc(seq, '\n'); kfree(path); return 0; } static const struct seq_operations input_devices_seq_ops = { .start = input_devices_seq_start, .next = input_devices_seq_next, .stop = input_seq_stop, .show = input_devices_seq_show, }; static int input_proc_devices_open(struct inode *inode, struct file *file) { return seq_open_private(file, &input_devices_seq_ops, sizeof(struct input_seq_state)); } static const struct proc_ops input_devices_proc_ops = { .proc_open = input_proc_devices_open, .proc_poll = input_proc_devices_poll, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = seq_release_private, }; static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos) { struct input_seq_state *state = seq->private; int error; error = mutex_lock_interruptible(&input_mutex); if (error) { state->mutex_acquired = false; return ERR_PTR(error); } state->mutex_acquired = true; state->pos = *pos; return seq_list_start(&input_handler_list, *pos); } static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct input_seq_state *state = seq->private; state->pos = *pos + 1; return seq_list_next(v, &input_handler_list, pos); } static int input_handlers_seq_show(struct seq_file *seq, void *v) { struct input_handler *handler = container_of(v, struct input_handler, node); struct input_seq_state *state = seq->private; seq_printf(seq, "N: Number=%u Name=%s", state->pos, handler->name); if (handler->filter) seq_puts(seq, " (filter)"); if (handler->legacy_minors) seq_printf(seq, " Minor=%d", handler->minor); seq_putc(seq, '\n'); return 0; } static const struct seq_operations input_handlers_seq_ops = { .start = input_handlers_seq_start, .next = input_handlers_seq_next, .stop = input_seq_stop, .show = input_handlers_seq_show, }; static int input_proc_handlers_open(struct inode *inode, struct file *file) { return seq_open_private(file, &input_handlers_seq_ops, sizeof(struct input_seq_state)); } static const struct proc_ops input_handlers_proc_ops = { .proc_open = input_proc_handlers_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = seq_release_private, }; static int __init input_proc_init(void) { struct proc_dir_entry *entry; proc_bus_input_dir = proc_mkdir("bus/input", NULL); if (!proc_bus_input_dir) return -ENOMEM; entry = proc_create("devices", 0, proc_bus_input_dir, &input_devices_proc_ops); if (!entry) goto fail1; entry = proc_create("handlers", 0, proc_bus_input_dir, &input_handlers_proc_ops); if (!entry) goto fail2; return 0; fail2: remove_proc_entry("devices", proc_bus_input_dir); fail1: remove_proc_entry("bus/input", NULL); return -ENOMEM; } static void input_proc_exit(void) { remove_proc_entry("devices", proc_bus_input_dir); remove_proc_entry("handlers", proc_bus_input_dir); remove_proc_entry("bus/input", NULL); } #else /* !CONFIG_PROC_FS */ static inline void input_wakeup_procfs_readers(void) { } static inline int input_proc_init(void) { return 0; } static inline void input_proc_exit(void) { } #endif #define INPUT_DEV_STRING_ATTR_SHOW(name) \ static ssize_t input_dev_show_##name(struct device *dev, \ struct device_attribute *attr, \ char *buf) \ { \ struct input_dev *input_dev = to_input_dev(dev); \ \ return sysfs_emit(buf, "%s\n", \ input_dev->name ? input_dev->name : ""); \ } \ static DEVICE_ATTR(name, S_IRUGO, input_dev_show_##name, NULL) INPUT_DEV_STRING_ATTR_SHOW(name); INPUT_DEV_STRING_ATTR_SHOW(phys); INPUT_DEV_STRING_ATTR_SHOW(uniq); static int input_print_modalias_bits(char *buf, int size, char name, const unsigned long *bm, unsigned int min_bit, unsigned int max_bit) { int bit = min_bit; int len = 0; len += snprintf(buf, max(size, 0), "%c", name); for_each_set_bit_from(bit, bm, max_bit) len += snprintf(buf + len, max(size - len, 0), "%X,", bit); return len; } static int input_print_modalias_parts(char *buf, int size, int full_len, const struct input_dev *id) { int len, klen, remainder, space; len = snprintf(buf, max(size, 0), "input:b%04Xv%04Xp%04Xe%04X-", id->id.bustype, id->id.vendor, id->id.product, id->id.version); len += input_print_modalias_bits(buf + len, size - len, 'e', id->evbit, 0, EV_MAX); /* * Calculate the remaining space in the buffer making sure we * have place for the terminating 0. */ space = max(size - (len + 1), 0); klen = input_print_modalias_bits(buf + len, size - len, 'k', id->keybit, KEY_MIN_INTERESTING, KEY_MAX); len += klen; /* * If we have more data than we can fit in the buffer, check * if we can trim key data to fit in the rest. We will indicate * that key data is incomplete by adding "+" sign at the end, like * this: * "k1,2,3,45,+,". * * Note that we shortest key info (if present) is "k+," so we * can only try to trim if key data is longer than that. */ if (full_len && size < full_len + 1 && klen > 3) { remainder = full_len - len; /* * We can only trim if we have space for the remainder * and also for at least "k+," which is 3 more characters. */ if (remainder <= space - 3) { /* * We are guaranteed to have 'k' in the buffer, so * we need at least 3 additional bytes for storing * "+," in addition to the remainder. */ for (int i = size - 1 - remainder - 3; i >= 0; i--) { if (buf[i] == 'k' || buf[i] == ',') { strcpy(buf + i + 1, "+,"); len = i + 3; /* Not counting '\0' */ break; } } } } len += input_print_modalias_bits(buf + len, size - len, 'r', id->relbit, 0, REL_MAX); len += input_print_modalias_bits(buf + len, size - len, 'a', id->absbit, 0, ABS_MAX); len += input_print_modalias_bits(buf + len, size - len, 'm', id->mscbit, 0, MSC_MAX); len += input_print_modalias_bits(buf + len, size - len, 'l', id->ledbit, 0, LED_MAX); len += input_print_modalias_bits(buf + len, size - len, 's', id->sndbit, 0, SND_MAX); len += input_print_modalias_bits(buf + len, size - len, 'f', id->ffbit, 0, FF_MAX); len += input_print_modalias_bits(buf + len, size - len, 'w', id->swbit, 0, SW_MAX); return len; } static int input_print_modalias(char *buf, int size, const struct input_dev *id) { int full_len; /* * Printing is done in 2 passes: first one figures out total length * needed for the modalias string, second one will try to trim key * data in case when buffer is too small for the entire modalias. * If the buffer is too small regardless, it will fill as much as it * can (without trimming key data) into the buffer and leave it to * the caller to figure out what to do with the result. */ full_len = input_print_modalias_parts(NULL, 0, 0, id); return input_print_modalias_parts(buf, size, full_len, id); } static ssize_t input_dev_show_modalias(struct device *dev, struct device_attribute *attr, char *buf) { struct input_dev *id = to_input_dev(dev); ssize_t len; len = input_print_modalias(buf, PAGE_SIZE, id); if (len < PAGE_SIZE - 2) len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return min_t(int, len, PAGE_SIZE); } static DEVICE_ATTR(modalias, S_IRUGO, input_dev_show_modalias, NULL); static int input_print_bitmap(char *buf, int buf_size, const unsigned long *bitmap, int max, int add_cr); static ssize_t input_dev_show_properties(struct device *dev, struct device_attribute *attr, char *buf) { struct input_dev *input_dev = to_input_dev(dev); int len = input_print_bitmap(buf, PAGE_SIZE, input_dev->propbit, INPUT_PROP_MAX, true); return min_t(int, len, PAGE_SIZE); } static DEVICE_ATTR(properties, S_IRUGO, input_dev_show_properties, NULL); static int input_inhibit_device(struct input_dev *dev); static int input_uninhibit_device(struct input_dev *dev); static ssize_t inhibited_show(struct device *dev, struct device_attribute *attr, char *buf) { struct input_dev *input_dev = to_input_dev(dev); return sysfs_emit(buf, "%d\n", input_dev->inhibited); } static ssize_t inhibited_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct input_dev *input_dev = to_input_dev(dev); ssize_t rv; bool inhibited; if (kstrtobool(buf, &inhibited)) return -EINVAL; if (inhibited) rv = input_inhibit_device(input_dev); else rv = input_uninhibit_device(input_dev); if (rv != 0) return rv; return len; } static DEVICE_ATTR_RW(inhibited); static struct attribute *input_dev_attrs[] = { &dev_attr_name.attr, &dev_attr_phys.attr, &dev_attr_uniq.attr, &dev_attr_modalias.attr, &dev_attr_properties.attr, &dev_attr_inhibited.attr, NULL }; static const struct attribute_group input_dev_attr_group = { .attrs = input_dev_attrs, }; #define INPUT_DEV_ID_ATTR(name) \ static ssize_t input_dev_show_id_##name(struct device *dev, \ struct device_attribute *attr, \ char *buf) \ { \ struct input_dev *input_dev = to_input_dev(dev); \ return sysfs_emit(buf, "%04x\n", input_dev->id.name); \ } \ static DEVICE_ATTR(name, S_IRUGO, input_dev_show_id_##name, NULL) INPUT_DEV_ID_ATTR(bustype); INPUT_DEV_ID_ATTR(vendor); INPUT_DEV_ID_ATTR(product); INPUT_DEV_ID_ATTR(version); static struct attribute *input_dev_id_attrs[] = { &dev_attr_bustype.attr, &dev_attr_vendor.attr, &dev_attr_product.attr, &dev_attr_version.attr, NULL }; static const struct attribute_group input_dev_id_attr_group = { .name = "id", .attrs = input_dev_id_attrs, }; static int input_print_bitmap(char *buf, int buf_size, const unsigned long *bitmap, int max, int add_cr) { int i; int len = 0; bool skip_empty = true; for (i = BITS_TO_LONGS(max) - 1; i >= 0; i--) { len += input_bits_to_string(buf + len, max(buf_size - len, 0), bitmap[i], skip_empty); if (len) { skip_empty = false; if (i > 0) len += snprintf(buf + len, max(buf_size - len, 0), " "); } } /* * If no output was produced print a single 0. */ if (len == 0) len = snprintf(buf, buf_size, "%d", 0); if (add_cr) len += snprintf(buf + len, max(buf_size - len, 0), "\n"); return len; } #define INPUT_DEV_CAP_ATTR(ev, bm) \ static ssize_t input_dev_show_cap_##bm(struct device *dev, \ struct device_attribute *attr, \ char *buf) \ { \ struct input_dev *input_dev = to_input_dev(dev); \ int len = input_print_bitmap(buf, PAGE_SIZE, \ input_dev->bm##bit, ev##_MAX, \ true); \ return min_t(int, len, PAGE_SIZE); \ } \ static DEVICE_ATTR(bm, S_IRUGO, input_dev_show_cap_##bm, NULL) INPUT_DEV_CAP_ATTR(EV, ev); INPUT_DEV_CAP_ATTR(KEY, key); INPUT_DEV_CAP_ATTR(REL, rel); INPUT_DEV_CAP_ATTR(ABS, abs); INPUT_DEV_CAP_ATTR(MSC, msc); INPUT_DEV_CAP_ATTR(LED, led); INPUT_DEV_CAP_ATTR(SND, snd); INPUT_DEV_CAP_ATTR(FF, ff); INPUT_DEV_CAP_ATTR(SW, sw); static struct attribute *input_dev_caps_attrs[] = { &dev_attr_ev.attr, &dev_attr_key.attr, &dev_attr_rel.attr, &dev_attr_abs.attr, &dev_attr_msc.attr, &dev_attr_led.attr, &dev_attr_snd.attr, &dev_attr_ff.attr, &dev_attr_sw.attr, NULL }; static const struct attribute_group input_dev_caps_attr_group = { .name = "capabilities", .attrs = input_dev_caps_attrs, }; static const struct attribute_group *input_dev_attr_groups[] = { &input_dev_attr_group, &input_dev_id_attr_group, &input_dev_caps_attr_group, &input_poller_attribute_group, NULL }; static void input_dev_release(struct device *device) { struct input_dev *dev = to_input_dev(device); input_ff_destroy(dev); input_mt_destroy_slots(dev); kfree(dev->poller); kfree(dev->absinfo); kfree(dev->vals); kfree(dev); module_put(THIS_MODULE); } /* * Input uevent interface - loading event handlers based on * device bitfields. */ static int input_add_uevent_bm_var(struct kobj_uevent_env *env, const char *name, const unsigned long *bitmap, int max) { int len; if (add_uevent_var(env, "%s", name)) return -ENOMEM; len = input_print_bitmap(&env->buf[env->buflen - 1], sizeof(env->buf) - env->buflen, bitmap, max, false); if (len >= (sizeof(env->buf) - env->buflen)) return -ENOMEM; env->buflen += len; return 0; } /* * This is a pretty gross hack. When building uevent data the driver core * may try adding more environment variables to kobj_uevent_env without * telling us, so we have no idea how much of the buffer we can use to * avoid overflows/-ENOMEM elsewhere. To work around this let's artificially * reduce amount of memory we will use for the modalias environment variable. * * The potential additions are: * * SEQNUM=18446744073709551615 - (%llu - 28 bytes) * HOME=/ (6 bytes) * PATH=/sbin:/bin:/usr/sbin:/usr/bin (34 bytes) * * 68 bytes total. Allow extra buffer - 96 bytes */ #define UEVENT_ENV_EXTRA_LEN 96 static int input_add_uevent_modalias_var(struct kobj_uevent_env *env, const struct input_dev *dev) { int len; if (add_uevent_var(env, "MODALIAS=")) return -ENOMEM; len = input_print_modalias(&env->buf[env->buflen - 1], (int)sizeof(env->buf) - env->buflen - UEVENT_ENV_EXTRA_LEN, dev); if (len >= ((int)sizeof(env->buf) - env->buflen - UEVENT_ENV_EXTRA_LEN)) return -ENOMEM; env->buflen += len; return 0; } #define INPUT_ADD_HOTPLUG_VAR(fmt, val...) \ do { \ int err = add_uevent_var(env, fmt, val); \ if (err) \ return err; \ } while (0) #define INPUT_ADD_HOTPLUG_BM_VAR(name, bm, max) \ do { \ int err = input_add_uevent_bm_var(env, name, bm, max); \ if (err) \ return err; \ } while (0) #define INPUT_ADD_HOTPLUG_MODALIAS_VAR(dev) \ do { \ int err = input_add_uevent_modalias_var(env, dev); \ if (err) \ return err; \ } while (0) static int input_dev_uevent(const struct device *device, struct kobj_uevent_env *env) { const struct input_dev *dev = to_input_dev(device); INPUT_ADD_HOTPLUG_VAR("PRODUCT=%x/%x/%x/%x", dev->id.bustype, dev->id.vendor, dev->id.product, dev->id.version); if (dev->name) INPUT_ADD_HOTPLUG_VAR("NAME=\"%s\"", dev->name); if (dev->phys) INPUT_ADD_HOTPLUG_VAR("PHYS=\"%s\"", dev->phys); if (dev->uniq) INPUT_ADD_HOTPLUG_VAR("UNIQ=\"%s\"", dev->uniq); INPUT_ADD_HOTPLUG_BM_VAR("PROP=", dev->propbit, INPUT_PROP_MAX); INPUT_ADD_HOTPLUG_BM_VAR("EV=", dev->evbit, EV_MAX); if (test_bit(EV_KEY, dev->evbit)) INPUT_ADD_HOTPLUG_BM_VAR("KEY=", dev->keybit, KEY_MAX); if (test_bit(EV_REL, dev->evbit)) INPUT_ADD_HOTPLUG_BM_VAR("REL=", dev->relbit, REL_MAX); if (test_bit(EV_ABS, dev->evbit)) INPUT_ADD_HOTPLUG_BM_VAR("ABS=", dev->absbit, ABS_MAX); if (test_bit(EV_MSC, dev->evbit)) INPUT_ADD_HOTPLUG_BM_VAR("MSC=", dev->mscbit, MSC_MAX); if (test_bit(EV_LED, dev->evbit)) INPUT_ADD_HOTPLUG_BM_VAR("LED=", dev->ledbit, LED_MAX); if (test_bit(EV_SND, dev->evbit)) INPUT_ADD_HOTPLUG_BM_VAR("SND=", dev->sndbit, SND_MAX); if (test_bit(EV_FF, dev->evbit)) INPUT_ADD_HOTPLUG_BM_VAR("FF=", dev->ffbit, FF_MAX); if (test_bit(EV_SW, dev->evbit)) INPUT_ADD_HOTPLUG_BM_VAR("SW=", dev->swbit, SW_MAX); INPUT_ADD_HOTPLUG_MODALIAS_VAR(dev); return 0; } #define INPUT_DO_TOGGLE(dev, type, bits, on) \ do { \ int i; \ bool active; \ \ if (!test_bit(EV_##type, dev->evbit)) \ break; \ \ for_each_set_bit(i, dev->bits##bit, type##_CNT) { \ active = test_bit(i, dev->bits); \ if (!active && !on) \ continue; \ \ dev->event(dev, EV_##type, i, on ? active : 0); \ } \ } while (0) static void input_dev_toggle(struct input_dev *dev, bool activate) { if (!dev->event) return; INPUT_DO_TOGGLE(dev, LED, led, activate); INPUT_DO_TOGGLE(dev, SND, snd, activate); if (activate && test_bit(EV_REP, dev->evbit)) { dev->event(dev, EV_REP, REP_PERIOD, dev->rep[REP_PERIOD]); dev->event(dev, EV_REP, REP_DELAY, dev->rep[REP_DELAY]); } } /** * input_reset_device() - reset/restore the state of input device * @dev: input device whose state needs to be reset * * This function tries to reset the state of an opened input device and * bring internal state and state if the hardware in sync with each other. * We mark all keys as released, restore LED state, repeat rate, etc. */ void input_reset_device(struct input_dev *dev) { guard(mutex)(&dev->mutex); guard(spinlock_irqsave)(&dev->event_lock); input_dev_toggle(dev, true); if (input_dev_release_keys(dev)) input_handle_event(dev, EV_SYN, SYN_REPORT, 1); } EXPORT_SYMBOL(input_reset_device); static int input_inhibit_device(struct input_dev *dev) { guard(mutex)(&dev->mutex); if (dev->inhibited) return 0; if (dev->users) { if (dev->close) dev->close(dev); if (dev->poller) input_dev_poller_stop(dev->poller); } scoped_guard(spinlock_irq, &dev->event_lock) { input_mt_release_slots(dev); input_dev_release_keys(dev); input_handle_event(dev, EV_SYN, SYN_REPORT, 1); input_dev_toggle(dev, false); } dev->inhibited = true; return 0; } static int input_uninhibit_device(struct input_dev *dev) { int error; guard(mutex)(&dev->mutex); if (!dev->inhibited) return 0; if (dev->users) { if (dev->open) { error = dev->open(dev); if (error) return error; } if (dev->poller) input_dev_poller_start(dev->poller); } dev->inhibited = false; scoped_guard(spinlock_irq, &dev->event_lock) input_dev_toggle(dev, true); return 0; } static int input_dev_suspend(struct device *dev) { struct input_dev *input_dev = to_input_dev(dev); guard(spinlock_irq)(&input_dev->event_lock); /* * Keys that are pressed now are unlikely to be * still pressed when we resume. */ if (input_dev_release_keys(input_dev)) input_handle_event(input_dev, EV_SYN, SYN_REPORT, 1); /* Turn off LEDs and sounds, if any are active. */ input_dev_toggle(input_dev, false); return 0; } static int input_dev_resume(struct device *dev) { struct input_dev *input_dev = to_input_dev(dev); guard(spinlock_irq)(&input_dev->event_lock); /* Restore state of LEDs and sounds, if any were active. */ input_dev_toggle(input_dev, true); return 0; } static int input_dev_freeze(struct device *dev) { struct input_dev *input_dev = to_input_dev(dev); guard(spinlock_irq)(&input_dev->event_lock); /* * Keys that are pressed now are unlikely to be * still pressed when we resume. */ if (input_dev_release_keys(input_dev)) input_handle_event(input_dev, EV_SYN, SYN_REPORT, 1); return 0; } static int input_dev_poweroff(struct device *dev) { struct input_dev *input_dev = to_input_dev(dev); guard(spinlock_irq)(&input_dev->event_lock); /* Turn off LEDs and sounds, if any are active. */ input_dev_toggle(input_dev, false); return 0; } static const struct dev_pm_ops input_dev_pm_ops = { .suspend = input_dev_suspend, .resume = input_dev_resume, .freeze = input_dev_freeze, .poweroff = input_dev_poweroff, .restore = input_dev_resume, }; static const struct device_type input_dev_type = { .groups = input_dev_attr_groups, .release = input_dev_release, .uevent = input_dev_uevent, .pm = pm_sleep_ptr(&input_dev_pm_ops), }; static char *input_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "input/%s", dev_name(dev)); } const struct class input_class = { .name = "input", .devnode = input_devnode, }; EXPORT_SYMBOL_GPL(input_class); /** * input_allocate_device - allocate memory for new input device * * Returns prepared struct input_dev or %NULL. * * NOTE: Use input_free_device() to free devices that have not been * registered; input_unregister_device() should be used for already * registered devices. */ struct input_dev *input_allocate_device(void) { static atomic_t input_no = ATOMIC_INIT(-1); struct input_dev *dev; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return NULL; /* * Start with space for SYN_REPORT + 7 EV_KEY/EV_MSC events + 2 spare, * see input_estimate_events_per_packet(). We will tune the number * when we register the device. */ dev->max_vals = 10; dev->vals = kcalloc(dev->max_vals, sizeof(*dev->vals), GFP_KERNEL); if (!dev->vals) { kfree(dev); return NULL; } mutex_init(&dev->mutex); spin_lock_init(&dev->event_lock); timer_setup(&dev->timer, NULL, 0); INIT_LIST_HEAD(&dev->h_list); INIT_LIST_HEAD(&dev->node); dev->dev.type = &input_dev_type; dev->dev.class = &input_class; device_initialize(&dev->dev); /* * From this point on we can no longer simply "kfree(dev)", we need * to use input_free_device() so that device core properly frees its * resources associated with the input device. */ dev_set_name(&dev->dev, "input%lu", (unsigned long)atomic_inc_return(&input_no)); __module_get(THIS_MODULE); return dev; } EXPORT_SYMBOL(input_allocate_device); struct input_devres { struct input_dev *input; }; static int devm_input_device_match(struct device *dev, void *res, void *data) { struct input_devres *devres = res; return devres->input == data; } static void devm_input_device_release(struct device *dev, void *res) { struct input_devres *devres = res; struct input_dev *input = devres->input; dev_dbg(dev, "%s: dropping reference to %s\n", __func__, dev_name(&input->dev)); input_put_device(input); } /** * devm_input_allocate_device - allocate managed input device * @dev: device owning the input device being created * * Returns prepared struct input_dev or %NULL. * * Managed input devices do not need to be explicitly unregistered or * freed as it will be done automatically when owner device unbinds from * its driver (or binding fails). Once managed input device is allocated, * it is ready to be set up and registered in the same fashion as regular * input device. There are no special devm_input_device_[un]register() * variants, regular ones work with both managed and unmanaged devices, * should you need them. In most cases however, managed input device need * not be explicitly unregistered or freed. * * NOTE: the owner device is set up as parent of input device and users * should not override it. */ struct input_dev *devm_input_allocate_device(struct device *dev) { struct input_dev *input; struct input_devres *devres; devres = devres_alloc(devm_input_device_release, sizeof(*devres), GFP_KERNEL); if (!devres) return NULL; input = input_allocate_device(); if (!input) { devres_free(devres); return NULL; } input->dev.parent = dev; input->devres_managed = true; devres->input = input; devres_add(dev, devres); return input; } EXPORT_SYMBOL(devm_input_allocate_device); /** * input_free_device - free memory occupied by input_dev structure * @dev: input device to free * * This function should only be used if input_register_device() * was not called yet or if it failed. Once device was registered * use input_unregister_device() and memory will be freed once last * reference to the device is dropped. * * Device should be allocated by input_allocate_device(). * * NOTE: If there are references to the input device then memory * will not be freed until last reference is dropped. */ void input_free_device(struct input_dev *dev) { if (dev) { if (dev->devres_managed) WARN_ON(devres_destroy(dev->dev.parent, devm_input_device_release, devm_input_device_match, dev)); input_put_device(dev); } } EXPORT_SYMBOL(input_free_device); /** * input_set_timestamp - set timestamp for input events * @dev: input device to set timestamp for * @timestamp: the time at which the event has occurred * in CLOCK_MONOTONIC * * This function is intended to provide to the input system a more * accurate time of when an event actually occurred. The driver should * call this function as soon as a timestamp is acquired ensuring * clock conversions in input_set_timestamp are done correctly. * * The system entering suspend state between timestamp acquisition and * calling input_set_timestamp can result in inaccurate conversions. */ void input_set_timestamp(struct input_dev *dev, ktime_t timestamp) { dev->timestamp[INPUT_CLK_MONO] = timestamp; dev->timestamp[INPUT_CLK_REAL] = ktime_mono_to_real(timestamp); dev->timestamp[INPUT_CLK_BOOT] = ktime_mono_to_any(timestamp, TK_OFFS_BOOT); } EXPORT_SYMBOL(input_set_timestamp); /** * input_get_timestamp - get timestamp for input events * @dev: input device to get timestamp from * * A valid timestamp is a timestamp of non-zero value. */ ktime_t *input_get_timestamp(struct input_dev *dev) { const ktime_t invalid_timestamp = ktime_set(0, 0); if (!ktime_compare(dev->timestamp[INPUT_CLK_MONO], invalid_timestamp)) input_set_timestamp(dev, ktime_get()); return dev->timestamp; } EXPORT_SYMBOL(input_get_timestamp); /** * input_set_capability - mark device as capable of a certain event * @dev: device that is capable of emitting or accepting event * @type: type of the event (EV_KEY, EV_REL, etc...) * @code: event code * * In addition to setting up corresponding bit in appropriate capability * bitmap the function also adjusts dev->evbit. */ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code) { if (type < EV_CNT && input_max_code[type] && code > input_max_code[type]) { pr_err("%s: invalid code %u for type %u\n", __func__, code, type); dump_stack(); return; } switch (type) { case EV_KEY: __set_bit(code, dev->keybit); break; case EV_REL: __set_bit(code, dev->relbit); break; case EV_ABS: input_alloc_absinfo(dev); __set_bit(code, dev->absbit); break; case EV_MSC: __set_bit(code, dev->mscbit); break; case EV_SW: __set_bit(code, dev->swbit); break; case EV_LED: __set_bit(code, dev->ledbit); break; case EV_SND: __set_bit(code, dev->sndbit); break; case EV_FF: __set_bit(code, dev->ffbit); break; case EV_PWR: /* do nothing */ break; default: pr_err("%s: unknown type %u (code %u)\n", __func__, type, code); dump_stack(); return; } __set_bit(type, dev->evbit); } EXPORT_SYMBOL(input_set_capability); static unsigned int input_estimate_events_per_packet(struct input_dev *dev) { int mt_slots; int i; unsigned int events; if (dev->mt) { mt_slots = dev->mt->num_slots; } else if (test_bit(ABS_MT_TRACKING_ID, dev->absbit)) { mt_slots = dev->absinfo[ABS_MT_TRACKING_ID].maximum - dev->absinfo[ABS_MT_TRACKING_ID].minimum + 1; mt_slots = clamp(mt_slots, 2, 32); } else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) { mt_slots = 2; } else { mt_slots = 0; } events = mt_slots + 1; /* count SYN_MT_REPORT and SYN_REPORT */ if (test_bit(EV_ABS, dev->evbit)) for_each_set_bit(i, dev->absbit, ABS_CNT) events += input_is_mt_axis(i) ? mt_slots : 1; if (test_bit(EV_REL, dev->evbit)) events += bitmap_weight(dev->relbit, REL_CNT); /* Make room for KEY and MSC events */ events += 7; return events; } #define INPUT_CLEANSE_BITMASK(dev, type, bits) \ do { \ if (!test_bit(EV_##type, dev->evbit)) \ memset(dev->bits##bit, 0, \ sizeof(dev->bits##bit)); \ } while (0) static void input_cleanse_bitmasks(struct input_dev *dev) { INPUT_CLEANSE_BITMASK(dev, KEY, key); INPUT_CLEANSE_BITMASK(dev, REL, rel); INPUT_CLEANSE_BITMASK(dev, ABS, abs); INPUT_CLEANSE_BITMASK(dev, MSC, msc); INPUT_CLEANSE_BITMASK(dev, LED, led); INPUT_CLEANSE_BITMASK(dev, SND, snd); INPUT_CLEANSE_BITMASK(dev, FF, ff); INPUT_CLEANSE_BITMASK(dev, SW, sw); } static void __input_unregister_device(struct input_dev *dev) { struct input_handle *handle, *next; input_disconnect_device(dev); scoped_guard(mutex, &input_mutex) { list_for_each_entry_safe(handle, next, &dev->h_list, d_node) handle->handler->disconnect(handle); WARN_ON(!list_empty(&dev->h_list)); del_timer_sync(&dev->timer); list_del_init(&dev->node); input_wakeup_procfs_readers(); } device_del(&dev->dev); } static void devm_input_device_unregister(struct device *dev, void *res) { struct input_devres *devres = res; struct input_dev *input = devres->input; dev_dbg(dev, "%s: unregistering device %s\n", __func__, dev_name(&input->dev)); __input_unregister_device(input); } /* * Generate software autorepeat event. Note that we take * dev->event_lock here to avoid racing with input_event * which may cause keys get "stuck". */ static void input_repeat_key(struct timer_list *t) { struct input_dev *dev = from_timer(dev, t, timer); guard(spinlock_irqsave)(&dev->event_lock); if (!dev->inhibited && test_bit(dev->repeat_key, dev->key) && is_event_supported(dev->repeat_key, dev->keybit, KEY_MAX)) { input_set_timestamp(dev, ktime_get()); input_handle_event(dev, EV_KEY, dev->repeat_key, 2); input_handle_event(dev, EV_SYN, SYN_REPORT, 1); if (dev->rep[REP_PERIOD]) mod_timer(&dev->timer, jiffies + msecs_to_jiffies(dev->rep[REP_PERIOD])); } } /** * input_enable_softrepeat - enable software autorepeat * @dev: input device * @delay: repeat delay * @period: repeat period * * Enable software autorepeat on the input device. */ void input_enable_softrepeat(struct input_dev *dev, int delay, int period) { dev->timer.function = input_repeat_key; dev->rep[REP_DELAY] = delay; dev->rep[REP_PERIOD] = period; } EXPORT_SYMBOL(input_enable_softrepeat); bool input_device_enabled(struct input_dev *dev) { lockdep_assert_held(&dev->mutex); return !dev->inhibited && dev->users > 0; } EXPORT_SYMBOL_GPL(input_device_enabled); static int input_device_tune_vals(struct input_dev *dev) { struct input_value *vals; unsigned int packet_size; unsigned int max_vals; packet_size = input_estimate_events_per_packet(dev); if (dev->hint_events_per_packet < packet_size) dev->hint_events_per_packet = packet_size; max_vals = dev->hint_events_per_packet + 2; if (dev->max_vals >= max_vals) return 0; vals = kcalloc(max_vals, sizeof(*vals), GFP_KERNEL); if (!vals) return -ENOMEM; scoped_guard(spinlock_irq, &dev->event_lock) { dev->max_vals = max_vals; swap(dev->vals, vals); } /* Because of swap() above, this frees the old vals memory */ kfree(vals); return 0; } /** * input_register_device - register device with input core * @dev: device to be registered * * This function registers device with input core. The device must be * allocated with input_allocate_device() and all it's capabilities * set up before registering. * If function fails the device must be freed with input_free_device(). * Once device has been successfully registered it can be unregistered * with input_unregister_device(); input_free_device() should not be * called in this case. * * Note that this function is also used to register managed input devices * (ones allocated with devm_input_allocate_device()). Such managed input * devices need not be explicitly unregistered or freed, their tear down * is controlled by the devres infrastructure. It is also worth noting * that tear down of managed input devices is internally a 2-step process: * registered managed input device is first unregistered, but stays in * memory and can still handle input_event() calls (although events will * not be delivered anywhere). The freeing of managed input device will * happen later, when devres stack is unwound to the point where device * allocation was made. */ int input_register_device(struct input_dev *dev) { struct input_devres *devres = NULL; struct input_handler *handler; const char *path; int error; if (test_bit(EV_ABS, dev->evbit) && !dev->absinfo) { dev_err(&dev->dev, "Absolute device without dev->absinfo, refusing to register\n"); return -EINVAL; } if (dev->devres_managed) { devres = devres_alloc(devm_input_device_unregister, sizeof(*devres), GFP_KERNEL); if (!devres) return -ENOMEM; devres->input = dev; } /* Every input device generates EV_SYN/SYN_REPORT events. */ __set_bit(EV_SYN, dev->evbit); /* KEY_RESERVED is not supposed to be transmitted to userspace. */ __clear_bit(KEY_RESERVED, dev->keybit); /* Make sure that bitmasks not mentioned in dev->evbit are clean. */ input_cleanse_bitmasks(dev); error = input_device_tune_vals(dev); if (error) goto err_devres_free; /* * If delay and period are pre-set by the driver, then autorepeating * is handled by the driver itself and we don't do it in input.c. */ if (!dev->rep[REP_DELAY] && !dev->rep[REP_PERIOD]) input_enable_softrepeat(dev, 250, 33); if (!dev->getkeycode) dev->getkeycode = input_default_getkeycode; if (!dev->setkeycode) dev->setkeycode = input_default_setkeycode; if (dev->poller) input_dev_poller_finalize(dev->poller); error = device_add(&dev->dev); if (error) goto err_devres_free; path = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); pr_info("%s as %s\n", dev->name ? dev->name : "Unspecified device", path ? path : "N/A"); kfree(path); error = -EINTR; scoped_cond_guard(mutex_intr, goto err_device_del, &input_mutex) { list_add_tail(&dev->node, &input_dev_list); list_for_each_entry(handler, &input_handler_list, node) input_attach_handler(dev, handler); input_wakeup_procfs_readers(); } if (dev->devres_managed) { dev_dbg(dev->dev.parent, "%s: registering %s with devres.\n", __func__, dev_name(&dev->dev)); devres_add(dev->dev.parent, devres); } return 0; err_device_del: device_del(&dev->dev); err_devres_free: devres_free(devres); return error; } EXPORT_SYMBOL(input_register_device); /** * input_unregister_device - unregister previously registered device * @dev: device to be unregistered * * This function unregisters an input device. Once device is unregistered * the caller should not try to access it as it may get freed at any moment. */ void input_unregister_device(struct input_dev *dev) { if (dev->devres_managed) { WARN_ON(devres_destroy(dev->dev.parent, devm_input_device_unregister, devm_input_device_match, dev)); __input_unregister_device(dev); /* * We do not do input_put_device() here because it will be done * when 2nd devres fires up. */ } else { __input_unregister_device(dev); input_put_device(dev); } } EXPORT_SYMBOL(input_unregister_device); static int input_handler_check_methods(const struct input_handler *handler) { int count = 0; if (handler->filter) count++; if (handler->events) count++; if (handler->event) count++; if (count > 1) { pr_err("%s: only one event processing method can be defined (%s)\n", __func__, handler->name); return -EINVAL; } return 0; } /** * input_register_handler - register a new input handler * @handler: handler to be registered * * This function registers a new input handler (interface) for input * devices in the system and attaches it to all input devices that * are compatible with the handler. */ int input_register_handler(struct input_handler *handler) { struct input_dev *dev; int error; error = input_handler_check_methods(handler); if (error) return error; scoped_cond_guard(mutex_intr, return -EINTR, &input_mutex) { INIT_LIST_HEAD(&handler->h_list); list_add_tail(&handler->node, &input_handler_list); list_for_each_entry(dev, &input_dev_list, node) input_attach_handler(dev, handler); input_wakeup_procfs_readers(); } return 0; } EXPORT_SYMBOL(input_register_handler); /** * input_unregister_handler - unregisters an input handler * @handler: handler to be unregistered * * This function disconnects a handler from its input devices and * removes it from lists of known handlers. */ void input_unregister_handler(struct input_handler *handler) { struct input_handle *handle, *next; guard(mutex)(&input_mutex); list_for_each_entry_safe(handle, next, &handler->h_list, h_node) handler->disconnect(handle); WARN_ON(!list_empty(&handler->h_list)); list_del_init(&handler->node); input_wakeup_procfs_readers(); } EXPORT_SYMBOL(input_unregister_handler); /** * input_handler_for_each_handle - handle iterator * @handler: input handler to iterate * @data: data for the callback * @fn: function to be called for each handle * * Iterate over @bus's list of devices, and call @fn for each, passing * it @data and stop when @fn returns a non-zero value. The function is * using RCU to traverse the list and therefore may be using in atomic * contexts. The @fn callback is invoked from RCU critical section and * thus must not sleep. */ int input_handler_for_each_handle(struct input_handler *handler, void *data, int (*fn)(struct input_handle *, void *)) { struct input_handle *handle; int retval; guard(rcu)(); list_for_each_entry_rcu(handle, &handler->h_list, h_node) { retval = fn(handle, data); if (retval) return retval; } return 0; } EXPORT_SYMBOL(input_handler_for_each_handle); /* * An implementation of input_handle's handle_events() method that simply * invokes handler->event() method for each event one by one. */ static unsigned int input_handle_events_default(struct input_handle *handle, struct input_value *vals, unsigned int count) { struct input_handler *handler = handle->handler; struct input_value *v; for (v = vals; v != vals + count; v++) handler->event(handle, v->type, v->code, v->value); return count; } /* * An implementation of input_handle's handle_events() method that invokes * handler->filter() method for each event one by one and removes events * that were filtered out from the "vals" array. */ static unsigned int input_handle_events_filter(struct input_handle *handle, struct input_value *vals, unsigned int count) { struct input_handler *handler = handle->handler; struct input_value *end = vals; struct input_value *v; for (v = vals; v != vals + count; v++) { if (handler->filter(handle, v->type, v->code, v->value)) continue; if (end != v) *end = *v; end++; } return end - vals; } /* * An implementation of input_handle's handle_events() method that does nothing. */ static unsigned int input_handle_events_null(struct input_handle *handle, struct input_value *vals, unsigned int count) { return count; } /* * Sets up appropriate handle->event_handler based on the input_handler * associated with the handle. */ static void input_handle_setup_event_handler(struct input_handle *handle) { struct input_handler *handler = handle->handler; if (handler->filter) handle->handle_events = input_handle_events_filter; else if (handler->event) handle->handle_events = input_handle_events_default; else if (handler->events) handle->handle_events = handler->events; else handle->handle_events = input_handle_events_null; } /** * input_register_handle - register a new input handle * @handle: handle to register * * This function puts a new input handle onto device's * and handler's lists so that events can flow through * it once it is opened using input_open_device(). * * This function is supposed to be called from handler's * connect() method. */ int input_register_handle(struct input_handle *handle) { struct input_handler *handler = handle->handler; struct input_dev *dev = handle->dev; input_handle_setup_event_handler(handle); /* * We take dev->mutex here to prevent race with * input_release_device(). */ scoped_cond_guard(mutex_intr, return -EINTR, &dev->mutex) { /* * Filters go to the head of the list, normal handlers * to the tail. */ if (handler->filter) list_add_rcu(&handle->d_node, &dev->h_list); else list_add_tail_rcu(&handle->d_node, &dev->h_list); } /* * Since we are supposed to be called from ->connect() * which is mutually exclusive with ->disconnect() * we can't be racing with input_unregister_handle() * and so separate lock is not needed here. */ list_add_tail_rcu(&handle->h_node, &handler->h_list); if (handler->start) handler->start(handle); return 0; } EXPORT_SYMBOL(input_register_handle); /** * input_unregister_handle - unregister an input handle * @handle: handle to unregister * * This function removes input handle from device's * and handler's lists. * * This function is supposed to be called from handler's * disconnect() method. */ void input_unregister_handle(struct input_handle *handle) { struct input_dev *dev = handle->dev; list_del_rcu(&handle->h_node); /* * Take dev->mutex to prevent race with input_release_device(). */ scoped_guard(mutex, &dev->mutex) list_del_rcu(&handle->d_node); synchronize_rcu(); } EXPORT_SYMBOL(input_unregister_handle); /** * input_get_new_minor - allocates a new input minor number * @legacy_base: beginning or the legacy range to be searched * @legacy_num: size of legacy range * @allow_dynamic: whether we can also take ID from the dynamic range * * This function allocates a new device minor for from input major namespace. * Caller can request legacy minor by specifying @legacy_base and @legacy_num * parameters and whether ID can be allocated from dynamic range if there are * no free IDs in legacy range. */ int input_get_new_minor(int legacy_base, unsigned int legacy_num, bool allow_dynamic) { /* * This function should be called from input handler's ->connect() * methods, which are serialized with input_mutex, so no additional * locking is needed here. */ if (legacy_base >= 0) { int minor = ida_alloc_range(&input_ida, legacy_base, legacy_base + legacy_num - 1, GFP_KERNEL); if (minor >= 0 || !allow_dynamic) return minor; } return ida_alloc_range(&input_ida, INPUT_FIRST_DYNAMIC_DEV, INPUT_MAX_CHAR_DEVICES - 1, GFP_KERNEL); } EXPORT_SYMBOL(input_get_new_minor); /** * input_free_minor - release previously allocated minor * @minor: minor to be released * * This function releases previously allocated input minor so that it can be * reused later. */ void input_free_minor(unsigned int minor) { ida_free(&input_ida, minor); } EXPORT_SYMBOL(input_free_minor); static int __init input_init(void) { int err; err = class_register(&input_class); if (err) { pr_err("unable to register input_dev class\n"); return err; } err = input_proc_init(); if (err) goto fail1; err = register_chrdev_region(MKDEV(INPUT_MAJOR, 0), INPUT_MAX_CHAR_DEVICES, "input"); if (err) { pr_err("unable to register char major %d", INPUT_MAJOR); goto fail2; } return 0; fail2: input_proc_exit(); fail1: class_unregister(&input_class); return err; } static void __exit input_exit(void) { input_proc_exit(); unregister_chrdev_region(MKDEV(INPUT_MAJOR, 0), INPUT_MAX_CHAR_DEVICES); class_unregister(&input_class); } subsys_initcall(input_init); module_exit(input_exit);
47 47 47 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 // SPDX-License-Identifier: GPL-2.0-or-later /* client.c: NFS client sharing and management code * * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/module.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/time.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/stat.h> #include <linux/errno.h> #include <linux/unistd.h> #include <linux/sunrpc/addr.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/metrics.h> #include <linux/sunrpc/xprtsock.h> #include <linux/sunrpc/xprtrdma.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> #include <linux/lockd/bind.h> #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/vfs.h> #include <linux/inet.h> #include <linux/in6.h> #include <linux/slab.h> #include <linux/idr.h> #include <net/ipv6.h> #include <linux/nfs_xdr.h> #include <linux/sunrpc/bc_xprt.h> #include <linux/nsproxy.h> #include <linux/pid_namespace.h> #include <linux/nfslocalio.h> #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" #include "iostat.h" #include "internal.h" #include "fscache.h" #include "pnfs.h" #include "nfs.h" #include "netns.h" #include "sysfs.h" #include "nfs42.h" #define NFSDBG_FACILITY NFSDBG_CLIENT static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); static DEFINE_RWLOCK(nfs_version_lock); static struct nfs_subversion *nfs_version_mods[5] = { [2] = NULL, [3] = NULL, [4] = NULL, }; /* * RPC cruft for NFS */ static const struct rpc_version *nfs_version[5] = { [2] = NULL, [3] = NULL, [4] = NULL, }; const struct rpc_program nfs_program = { .name = "nfs", .number = NFS_PROGRAM, .nrvers = ARRAY_SIZE(nfs_version), .version = nfs_version, .pipe_dir_name = NFS_PIPE_DIRNAME, }; static struct nfs_subversion *__find_nfs_version(unsigned int version) { struct nfs_subversion *nfs; read_lock(&nfs_version_lock); nfs = nfs_version_mods[version]; read_unlock(&nfs_version_lock); return nfs; } struct nfs_subversion *find_nfs_version(unsigned int version) { struct nfs_subversion *nfs = __find_nfs_version(version); if (!nfs && request_module("nfsv%d", version) == 0) nfs = __find_nfs_version(version); if (!nfs) return ERR_PTR(-EPROTONOSUPPORT); if (!get_nfs_version(nfs)) return ERR_PTR(-EAGAIN); return nfs; } int get_nfs_version(struct nfs_subversion *nfs) { return try_module_get(nfs->owner); } EXPORT_SYMBOL_GPL(get_nfs_version); void put_nfs_version(struct nfs_subversion *nfs) { module_put(nfs->owner); } void register_nfs_version(struct nfs_subversion *nfs) { write_lock(&nfs_version_lock); nfs_version_mods[nfs->rpc_ops->version] = nfs; nfs_version[nfs->rpc_ops->version] = nfs->rpc_vers; write_unlock(&nfs_version_lock); } EXPORT_SYMBOL_GPL(register_nfs_version); void unregister_nfs_version(struct nfs_subversion *nfs) { write_lock(&nfs_version_lock); nfs_version[nfs->rpc_ops->version] = NULL; nfs_version_mods[nfs->rpc_ops->version] = NULL; write_unlock(&nfs_version_lock); } EXPORT_SYMBOL_GPL(unregister_nfs_version); /* * Allocate a shared client record * * Since these are allocated/deallocated very rarely, we don't * bother putting them in a slab cache... */ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) { struct nfs_client *clp; int err = -ENOMEM; if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; clp->cl_minorversion = cl_init->minorversion; clp->cl_nfs_mod = cl_init->nfs_mod; if (!get_nfs_version(clp->cl_nfs_mod)) goto error_dealloc; clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; refcount_set(&clp->cl_count, 1); clp->cl_cons_state = NFS_CS_INITING; memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen); clp->cl_addrlen = cl_init->addrlen; if (cl_init->hostname) { err = -ENOMEM; clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL); if (!clp->cl_hostname) goto error_cleanup; } INIT_LIST_HEAD(&clp->cl_superblocks); clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_flags = cl_init->init_flags; clp->cl_proto = cl_init->proto; clp->cl_nconnect = cl_init->nconnect; clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1; clp->cl_net = get_net(cl_init->net); #if IS_ENABLED(CONFIG_NFS_LOCALIO) seqlock_init(&clp->cl_boot_lock); ktime_get_real_ts64(&clp->cl_nfssvc_boot); nfs_uuid_init(&clp->cl_uuid); INIT_WORK(&clp->cl_local_probe_work, nfs_local_probe_async_work); #endif /* CONFIG_NFS_LOCALIO */ clp->cl_principal = "*"; clp->cl_xprtsec = cl_init->xprtsec; return clp; error_cleanup: put_nfs_version(clp->cl_nfs_mod); error_dealloc: kfree(clp); error_0: return ERR_PTR(err); } EXPORT_SYMBOL_GPL(nfs_alloc_client); #if IS_ENABLED(CONFIG_NFS_V4) static void nfs_cleanup_cb_ident_idr(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); idr_destroy(&nn->cb_ident_idr); } /* nfs_client_lock held */ static void nfs_cb_idr_remove_locked(struct nfs_client *clp) { struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); if (clp->cl_cb_ident) idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident); } static void pnfs_init_server(struct nfs_server *server) { rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); } #else static void nfs_cleanup_cb_ident_idr(struct net *net) { } static void nfs_cb_idr_remove_locked(struct nfs_client *clp) { } static void pnfs_init_server(struct nfs_server *server) { } #endif /* CONFIG_NFS_V4 */ /* * Destroy a shared client record */ void nfs_free_client(struct nfs_client *clp) { nfs_localio_disable_client(clp); /* -EIO all pending I/O */ if (!IS_ERR(clp->cl_rpcclient)) rpc_shutdown_client(clp->cl_rpcclient); put_net(clp->cl_net); put_nfs_version(clp->cl_nfs_mod); kfree(clp->cl_hostname); kfree(clp->cl_acceptor); kfree_rcu(clp, rcu); } EXPORT_SYMBOL_GPL(nfs_free_client); /* * Release a reference to a shared client record */ void nfs_put_client(struct nfs_client *clp) { struct nfs_net *nn; if (!clp) return; nn = net_generic(clp->cl_net, nfs_net_id); if (refcount_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { list_del(&clp->cl_share_link); nfs_cb_idr_remove_locked(clp); spin_unlock(&nn->nfs_client_lock); WARN_ON_ONCE(!list_empty(&clp->cl_superblocks)); clp->rpc_ops->free_client(clp); } } EXPORT_SYMBOL_GPL(nfs_put_client); /* * Find an nfs_client on the list that matches the initialisation data * that is supplied. */ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data) { struct nfs_client *clp; const struct sockaddr *sap = (struct sockaddr *)data->addr; struct nfs_net *nn = net_generic(data->net, nfs_net_id); int error; again: list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; /* Don't match clients that failed to initialise properly */ if (clp->cl_cons_state < 0) continue; /* If a client is still initializing then we need to wait */ if (clp->cl_cons_state > NFS_CS_READY) { refcount_inc(&clp->cl_count); spin_unlock(&nn->nfs_client_lock); error = nfs_wait_client_init_complete(clp); nfs_put_client(clp); spin_lock(&nn->nfs_client_lock); if (error < 0) return ERR_PTR(error); goto again; } /* Different NFS versions cannot share the same nfs_client */ if (clp->rpc_ops != data->nfs_mod->rpc_ops) continue; if (clp->cl_proto != data->proto) continue; /* Match nfsv4 minorversion */ if (clp->cl_minorversion != data->minorversion) continue; /* Match request for a dedicated DS */ if (test_bit(NFS_CS_DS, &data->init_flags) != test_bit(NFS_CS_DS, &clp->cl_flags)) continue; /* Match the full socket address */ if (!rpc_cmp_addr_port(sap, clap)) /* Match all xprt_switch full socket addresses */ if (IS_ERR(clp->cl_rpcclient) || !rpc_clnt_xprt_switch_has_addr(clp->cl_rpcclient, sap)) continue; /* Match the xprt security policy */ if (clp->cl_xprtsec.policy != data->xprtsec.policy) continue; refcount_inc(&clp->cl_count); return clp; } return NULL; } /* * Return true if @clp is done initializing, false if still working on it. * * Use nfs_client_init_status to check if it was successful. */ bool nfs_client_init_is_complete(const struct nfs_client *clp) { return clp->cl_cons_state <= NFS_CS_READY; } EXPORT_SYMBOL_GPL(nfs_client_init_is_complete); /* * Return 0 if @clp was successfully initialized, -errno otherwise. * * This must be called *after* nfs_client_init_is_complete() returns true, * otherwise it will pop WARN_ON_ONCE and return -EINVAL */ int nfs_client_init_status(const struct nfs_client *clp) { /* called without checking nfs_client_init_is_complete */ if (clp->cl_cons_state > NFS_CS_READY) { WARN_ON_ONCE(1); return -EINVAL; } return clp->cl_cons_state; } EXPORT_SYMBOL_GPL(nfs_client_init_status); int nfs_wait_client_init_complete(const struct nfs_client *clp) { return wait_event_killable(nfs_client_active_wq, nfs_client_init_is_complete(clp)); } EXPORT_SYMBOL_GPL(nfs_wait_client_init_complete); /* * Found an existing client. Make sure it's ready before returning. */ static struct nfs_client * nfs_found_client(const struct nfs_client_initdata *cl_init, struct nfs_client *clp) { int error; error = nfs_wait_client_init_complete(clp); if (error < 0) { nfs_put_client(clp); return ERR_PTR(-ERESTARTSYS); } if (clp->cl_cons_state < NFS_CS_READY) { error = clp->cl_cons_state; nfs_put_client(clp); return ERR_PTR(error); } smp_rmb(); return clp; } /* * Look up a client by IP address and protocol version * - creates a new record if one doesn't yet exist */ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) { struct nfs_client *clp, *new = NULL; struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops; if (cl_init->hostname == NULL) { WARN_ON(1); return ERR_PTR(-EINVAL); } /* see if the client already exists */ do { spin_lock(&nn->nfs_client_lock); clp = nfs_match_client(cl_init); if (clp) { spin_unlock(&nn->nfs_client_lock); if (new) new->rpc_ops->free_client(new); if (IS_ERR(clp)) return clp; return nfs_found_client(cl_init, clp); } if (new) { list_add_tail(&new->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); new = rpc_ops->init_client(new, cl_init); if (!IS_ERR(new)) nfs_local_probe(new); return new; } spin_unlock(&nn->nfs_client_lock); new = rpc_ops->alloc_client(cl_init); } while (!IS_ERR(new)); return new; } EXPORT_SYMBOL_GPL(nfs_get_client); /* * Mark a server as ready or failed */ void nfs_mark_client_ready(struct nfs_client *clp, int state) { smp_wmb(); clp->cl_cons_state = state; wake_up_all(&nfs_client_active_wq); } EXPORT_SYMBOL_GPL(nfs_mark_client_ready); /* * Initialise the timeout values for a connection */ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, int timeo, int retrans) { to->to_initval = timeo * HZ / 10; to->to_retries = retrans; switch (proto) { case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_TCP_TLS: case XPRT_TRANSPORT_RDMA: if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_TCP_RETRANS; if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0) to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10; if (to->to_initval > NFS_MAX_TCP_TIMEOUT) to->to_initval = NFS_MAX_TCP_TIMEOUT; to->to_increment = to->to_initval; to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); if (to->to_maxval > NFS_MAX_TCP_TIMEOUT) to->to_maxval = NFS_MAX_TCP_TIMEOUT; if (to->to_maxval < to->to_initval) to->to_maxval = to->to_initval; to->to_exponential = 0; break; case XPRT_TRANSPORT_UDP: if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_UDP_RETRANS; if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0) to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10; if (to->to_initval > NFS_MAX_UDP_TIMEOUT) to->to_initval = NFS_MAX_UDP_TIMEOUT; to->to_maxval = NFS_MAX_UDP_TIMEOUT; to->to_exponential = 1; break; default: BUG(); } } EXPORT_SYMBOL_GPL(nfs_init_timeout_values); /* * Create an RPC client handle */ int nfs_create_rpc_client(struct nfs_client *clp, const struct nfs_client_initdata *cl_init, rpc_authflavor_t flavor) { struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { .net = clp->cl_net, .protocol = clp->cl_proto, .nconnect = clp->cl_nconnect, .address = (struct sockaddr *)&clp->cl_addr, .addrsize = clp->cl_addrlen, .timeout = cl_init->timeparms, .servername = clp->cl_hostname, .nodename = cl_init->nodename, .program = &nfs_program, .stats = &nn->rpcstats, .version = clp->rpc_ops->version, .authflavor = flavor, .cred = cl_init->cred, .xprtsec = cl_init->xprtsec, .connect_timeout = cl_init->connect_timeout, .reconnect_timeout = cl_init->reconnect_timeout, }; if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_DISCRTRY; if (test_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT; if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS; if (test_bit(NFS_CS_NOPING, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_NOPING; if (test_bit(NFS_CS_REUSEPORT, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_REUSEPORT; if (!IS_ERR(clp->cl_rpcclient)) return 0; clnt = rpc_create(&args); if (IS_ERR(clnt)) { dprintk("%s: cannot create RPC client. Error = %ld\n", __func__, PTR_ERR(clnt)); return PTR_ERR(clnt); } clnt->cl_principal = clp->cl_principal; clp->cl_rpcclient = clnt; clnt->cl_max_connect = clp->cl_max_connect; return 0; } EXPORT_SYMBOL_GPL(nfs_create_rpc_client); /* * Version 2 or 3 client destruction */ static void nfs_destroy_server(struct nfs_server *server) { if (server->nlm_host) nlmclnt_done(server->nlm_host); } /* * Version 2 or 3 lockd setup */ static int nfs_start_lockd(struct nfs_server *server) { struct nlm_host *host; struct nfs_client *clp = server->nfs_client; struct nlmclnt_initdata nlm_init = { .hostname = clp->cl_hostname, .address = (struct sockaddr *)&clp->cl_addr, .addrlen = clp->cl_addrlen, .nfs_version = clp->rpc_ops->version, .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? 1 : 0, .net = clp->cl_net, .nlmclnt_ops = clp->cl_nfs_mod->rpc_ops->nlmclnt_ops, .cred = server->cred, }; if (nlm_init.nfs_version > 3) return 0; if ((server->flags & NFS_MOUNT_LOCAL_FLOCK) && (server->flags & NFS_MOUNT_LOCAL_FCNTL)) return 0; switch (clp->cl_proto) { default: nlm_init.protocol = IPPROTO_TCP; break; #ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT case XPRT_TRANSPORT_UDP: nlm_init.protocol = IPPROTO_UDP; #endif } host = nlmclnt_init(&nlm_init); if (IS_ERR(host)) return PTR_ERR(host); server->nlm_host = host; server->destroy = nfs_destroy_server; nfs_sysfs_link_rpc_client(server, nlmclnt_rpc_clnt(host), NULL); return 0; } /* * Create a general RPC client */ int nfs_init_server_rpcclient(struct nfs_server *server, const struct rpc_timeout *timeo, rpc_authflavor_t pseudoflavour) { struct nfs_client *clp = server->nfs_client; server->client = rpc_clone_client_set_auth(clp->cl_rpcclient, pseudoflavour); if (IS_ERR(server->client)) { dprintk("%s: couldn't create rpc_client!\n", __func__); return PTR_ERR(server->client); } memcpy(&server->client->cl_timeout_default, timeo, sizeof(server->client->cl_timeout_default)); server->client->cl_timeout = &server->client->cl_timeout_default; server->client->cl_softrtry = 0; if (server->flags & NFS_MOUNT_SOFTERR) server->client->cl_softerr = 1; if (server->flags & NFS_MOUNT_SOFT) server->client->cl_softrtry = 1; nfs_sysfs_link_rpc_client(server, server->client, NULL); return 0; } EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient); /** * nfs_init_client - Initialise an NFS2 or NFS3 client * * @clp: nfs_client to initialise * @cl_init: Initialisation parameters * * Returns pointer to an NFS client, or an ERR_PTR value. */ struct nfs_client *nfs_init_client(struct nfs_client *clp, const struct nfs_client_initdata *cl_init) { int error; /* the client is already initialised */ if (clp->cl_cons_state == NFS_CS_READY) return clp; /* * Create a client RPC handle for doing FSSTAT with UNIX auth only * - RFC 2623, sec 2.3.2 */ error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX); nfs_mark_client_ready(clp, error == 0 ? NFS_CS_READY : error); if (error < 0) { nfs_put_client(clp); clp = ERR_PTR(error); } return clp; } EXPORT_SYMBOL_GPL(nfs_init_client); /* * Create a version 2 or 3 client */ static int nfs_init_server(struct nfs_server *server, const struct fs_context *fc) { const struct nfs_fs_context *ctx = nfs_fc2context(fc); struct rpc_timeout timeparms; struct nfs_client_initdata cl_init = { .hostname = ctx->nfs_server.hostname, .addr = &ctx->nfs_server._address, .addrlen = ctx->nfs_server.addrlen, .nfs_mod = ctx->nfs_mod, .proto = ctx->nfs_server.protocol, .net = fc->net_ns, .timeparms = &timeparms, .cred = server->cred, .nconnect = ctx->nfs_server.nconnect, .init_flags = (1UL << NFS_CS_REUSEPORT), .xprtsec = ctx->xprtsec, }; struct nfs_client *clp; int error; nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol, ctx->timeo, ctx->retrans); if (ctx->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init); if (IS_ERR(clp)) return PTR_ERR(clp); server->nfs_client = clp; nfs_sysfs_add_server(server); nfs_sysfs_link_rpc_client(server, clp->cl_rpcclient, "_state"); /* Initialise the client representation from the mount data */ server->flags = ctx->flags; server->options = ctx->options; server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; switch (clp->rpc_ops->version) { case 2: server->fattr_valid = NFS_ATTR_FATTR_V2; break; case 3: server->fattr_valid = NFS_ATTR_FATTR_V3; break; default: server->fattr_valid = NFS_ATTR_FATTR_V4; } if (ctx->rsize) server->rsize = nfs_io_size(ctx->rsize, clp->cl_proto); if (ctx->wsize) server->wsize = nfs_io_size(ctx->wsize, clp->cl_proto); server->acregmin = ctx->acregmin * HZ; server->acregmax = ctx->acregmax * HZ; server->acdirmin = ctx->acdirmin * HZ; server->acdirmax = ctx->acdirmax * HZ; /* Start lockd here, before we might error out */ error = nfs_start_lockd(server); if (error < 0) goto error; server->port = ctx->nfs_server.port; server->auth_info = ctx->auth_info; error = nfs_init_server_rpcclient(server, &timeparms, ctx->selected_flavor); if (error < 0) goto error; /* Preserve the values of mount_server-related mount options */ if (ctx->mount_server.addrlen) { memcpy(&server->mountd_address, &ctx->mount_server.address, ctx->mount_server.addrlen); server->mountd_addrlen = ctx->mount_server.addrlen; } server->mountd_version = ctx->mount_server.version; server->mountd_port = ctx->mount_server.port; server->mountd_protocol = ctx->mount_server.protocol; server->namelen = ctx->namlen; return 0; error: server->nfs_client = NULL; nfs_put_client(clp); return error; } /* * Load up the server record from information gained in an fsinfo record */ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo) { struct nfs_client *clp = server->nfs_client; unsigned long max_rpc_payload, raw_max_rpc_payload; /* Work out a lot of parameters */ if (server->rsize == 0) server->rsize = nfs_io_size(fsinfo->rtpref, clp->cl_proto); if (server->wsize == 0) server->wsize = nfs_io_size(fsinfo->wtpref, clp->cl_proto); if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax) server->rsize = nfs_io_size(fsinfo->rtmax, clp->cl_proto); if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax) server->wsize = nfs_io_size(fsinfo->wtmax, clp->cl_proto); raw_max_rpc_payload = rpc_max_payload(server->client); max_rpc_payload = nfs_block_size(raw_max_rpc_payload, NULL); if (server->rsize > max_rpc_payload) server->rsize = max_rpc_payload; if (server->rsize > NFS_MAX_FILE_IO_SIZE) server->rsize = NFS_MAX_FILE_IO_SIZE; server->rpages = (server->rsize + PAGE_SIZE - 1) >> PAGE_SHIFT; if (server->wsize > max_rpc_payload) server->wsize = max_rpc_payload; if (server->wsize > NFS_MAX_FILE_IO_SIZE) server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_SIZE - 1) >> PAGE_SHIFT; server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); if (server->dtsize > NFS_MAX_FILE_IO_SIZE) server->dtsize = NFS_MAX_FILE_IO_SIZE; if (server->dtsize > server->rsize) server->dtsize = server->rsize; if (server->flags & NFS_MOUNT_NOAC) { server->acregmin = server->acregmax = 0; server->acdirmin = server->acdirmax = 0; } server->maxfilesize = fsinfo->maxfilesize; server->time_delta = fsinfo->time_delta; server->change_attr_type = fsinfo->change_attr_type; server->clone_blksize = fsinfo->clone_blksize; /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); #ifdef CONFIG_NFS_V4_2 /* * Defaults until limited by the session parameters. */ server->gxasize = min_t(unsigned int, raw_max_rpc_payload, XATTR_SIZE_MAX); server->sxasize = min_t(unsigned int, raw_max_rpc_payload, XATTR_SIZE_MAX); server->lxasize = min_t(unsigned int, raw_max_rpc_payload, nfs42_listxattr_xdrsize(XATTR_LIST_MAX)); if (fsinfo->xattr_support) server->caps |= NFS_CAP_XATTR; #endif } /* * Probe filesystem information, including the FSID on v2/v3 */ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) { struct nfs_fsinfo fsinfo; struct nfs_client *clp = server->nfs_client; int error; if (clp->rpc_ops->set_capabilities != NULL) { error = clp->rpc_ops->set_capabilities(server, mntfh); if (error < 0) return error; } fsinfo.fattr = fattr; fsinfo.nlayouttypes = 0; memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype)); error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); if (error < 0) return error; nfs_server_set_fsinfo(server, &fsinfo); /* Get some general file system info */ if (server->namelen == 0) { struct nfs_pathconf pathinfo; pathinfo.fattr = fattr; nfs_fattr_init(fattr); if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0) server->namelen = pathinfo.max_namelen; } if (clp->rpc_ops->discover_trunking != NULL && (server->caps & NFS_CAP_FS_LOCATIONS && (server->flags & NFS_MOUNT_TRUNK_DISCOVERY))) { error = clp->rpc_ops->discover_trunking(server, mntfh); if (error < 0) return error; } return 0; } /* * Grab the destination's particulars, including lease expiry time. * * Returns zero if probe succeeded and retrieved FSID matches the FSID * we have cached. */ int nfs_probe_server(struct nfs_server *server, struct nfs_fh *mntfh) { struct nfs_fattr *fattr; int error; fattr = nfs_alloc_fattr(); if (fattr == NULL) return -ENOMEM; /* Sanity: the probe won't work if the destination server * does not recognize the migrated FH. */ error = nfs_probe_fsinfo(server, mntfh, fattr); nfs_free_fattr(fattr); return error; } EXPORT_SYMBOL_GPL(nfs_probe_server); /* * Copy useful information when duplicating a server record */ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) { target->flags = source->flags; target->rsize = source->rsize; target->wsize = source->wsize; target->acregmin = source->acregmin; target->acregmax = source->acregmax; target->acdirmin = source->acdirmin; target->acdirmax = source->acdirmax; target->caps = source->caps; target->options = source->options; target->auth_info = source->auth_info; target->port = source->port; } EXPORT_SYMBOL_GPL(nfs_server_copy_userdata); void nfs_server_insert_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); spin_lock(&nn->nfs_client_lock); list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); list_add_tail(&server->master_link, &nn->nfs_volume_list); clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); spin_unlock(&nn->nfs_client_lock); } EXPORT_SYMBOL_GPL(nfs_server_insert_lists); void nfs_server_remove_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; struct nfs_net *nn; if (clp == NULL) return; nn = net_generic(clp->cl_net, nfs_net_id); spin_lock(&nn->nfs_client_lock); list_del_rcu(&server->client_link); if (list_empty(&clp->cl_superblocks)) set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); list_del(&server->master_link); spin_unlock(&nn->nfs_client_lock); synchronize_rcu(); } EXPORT_SYMBOL_GPL(nfs_server_remove_lists); static DEFINE_IDA(s_sysfs_ids); /* * Allocate and initialise a server record */ struct nfs_server *nfs_alloc_server(void) { struct nfs_server *server; server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) return NULL; server->s_sysfs_id = ida_alloc(&s_sysfs_ids, GFP_KERNEL); if (server->s_sysfs_id < 0) { kfree(server); return NULL; } server->client = server->client_acl = ERR_PTR(-EINVAL); /* Zero out the NFS state stuff */ INIT_LIST_HEAD(&server->client_link); INIT_LIST_HEAD(&server->master_link); INIT_LIST_HEAD(&server->delegations); INIT_LIST_HEAD(&server->layouts); INIT_LIST_HEAD(&server->state_owners_lru); INIT_LIST_HEAD(&server->ss_copies); INIT_LIST_HEAD(&server->ss_src_copies); atomic_set(&server->active, 0); server->io_stats = nfs_alloc_iostats(); if (!server->io_stats) { kfree(server); return NULL; } server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; init_waitqueue_head(&server->write_congestion_wait); atomic_long_set(&server->writeback, 0); atomic64_set(&server->owner_ctr, 0); pnfs_init_server(server); rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC"); return server; } EXPORT_SYMBOL_GPL(nfs_alloc_server); static void delayed_free(struct rcu_head *p) { struct nfs_server *server = container_of(p, struct nfs_server, rcu); nfs_free_iostats(server->io_stats); kfree(server); } /* * Free up a server record */ void nfs_free_server(struct nfs_server *server) { nfs_server_remove_lists(server); if (server->destroy != NULL) server->destroy(server); if (!IS_ERR(server->client_acl)) rpc_shutdown_client(server->client_acl); if (!IS_ERR(server->client)) rpc_shutdown_client(server->client); nfs_put_client(server->nfs_client); if (server->kobj.state_initialized) { nfs_sysfs_remove_server(server); kobject_put(&server->kobj); } ida_free(&s_sysfs_ids, server->s_sysfs_id); put_cred(server->cred); nfs_release_automount_timer(); call_rcu(&server->rcu, delayed_free); } EXPORT_SYMBOL_GPL(nfs_free_server); /* * Create a version 2 or 3 volume record * - keyed on server and FSID */ struct nfs_server *nfs_create_server(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_server *server; struct nfs_fattr *fattr; int error; server = nfs_alloc_server(); if (!server) return ERR_PTR(-ENOMEM); server->cred = get_cred(fc->cred); error = -ENOMEM; fattr = nfs_alloc_fattr(); if (fattr == NULL) goto error; /* Get a client representation */ error = nfs_init_server(server, fc); if (error < 0) goto error; /* Probe the root fh to retrieve its FSID */ error = nfs_probe_fsinfo(server, ctx->mntfh, fattr); if (error < 0) goto error; if (server->nfs_client->rpc_ops->version == 3) { if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; if (!(ctx->flags & NFS_MOUNT_NORDIRPLUS)) server->caps |= NFS_CAP_READDIRPLUS; } else { if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) server->namelen = NFS2_MAXNAMLEN; } if (!(fattr->valid & NFS_ATTR_FATTR)) { error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh, fattr, NULL); if (error < 0) { dprintk("nfs_create_server: getattr error = %d\n", -error); goto error; } } memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); dprintk("Server FSID: %llx:%llx\n", (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); nfs_server_insert_lists(server); server->mount_time = jiffies; nfs_free_fattr(fattr); return server; error: nfs_free_fattr(fattr); nfs_free_server(server); return ERR_PTR(error); } EXPORT_SYMBOL_GPL(nfs_create_server); /* * Clone an NFS2, NFS3 or NFS4 server record */ struct nfs_server *nfs_clone_server(struct nfs_server *source, struct nfs_fh *fh, struct nfs_fattr *fattr, rpc_authflavor_t flavor) { struct nfs_server *server; int error; server = nfs_alloc_server(); if (!server) return ERR_PTR(-ENOMEM); server->cred = get_cred(source->cred); /* Copy data from the source */ server->nfs_client = source->nfs_client; server->destroy = source->destroy; refcount_inc(&server->nfs_client->cl_count); nfs_server_copy_userdata(server, source); server->fsid = fattr->fsid; nfs_sysfs_add_server(server); nfs_sysfs_link_rpc_client(server, server->nfs_client->cl_rpcclient, "_state"); error = nfs_init_server_rpcclient(server, source->client->cl_timeout, flavor); if (error < 0) goto out_free_server; /* probe the filesystem info for this server filesystem */ error = nfs_probe_server(server, fh); if (error < 0) goto out_free_server; if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) server->namelen = NFS4_MAXNAMLEN; error = nfs_start_lockd(server); if (error < 0) goto out_free_server; nfs_server_insert_lists(server); server->mount_time = jiffies; return server; out_free_server: nfs_free_server(server); return ERR_PTR(error); } EXPORT_SYMBOL_GPL(nfs_clone_server); void nfs_clients_init(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); INIT_LIST_HEAD(&nn->nfs_client_list); INIT_LIST_HEAD(&nn->nfs_volume_list); #if IS_ENABLED(CONFIG_NFS_V4) idr_init(&nn->cb_ident_idr); #endif spin_lock_init(&nn->nfs_client_lock); nn->boot_time = ktime_get_real(); memset(&nn->rpcstats, 0, sizeof(nn->rpcstats)); nn->rpcstats.program = &nfs_program; nfs_netns_sysfs_setup(nn, net); } void nfs_clients_exit(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); nfs_netns_sysfs_destroy(nn); nfs_cleanup_cb_ident_idr(net); WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); } #ifdef CONFIG_PROC_FS static void *nfs_server_list_start(struct seq_file *p, loff_t *pos); static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); static void nfs_server_list_stop(struct seq_file *p, void *v); static int nfs_server_list_show(struct seq_file *m, void *v); static const struct seq_operations nfs_server_list_ops = { .start = nfs_server_list_start, .next = nfs_server_list_next, .stop = nfs_server_list_stop, .show = nfs_server_list_show, }; static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos); static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos); static void nfs_volume_list_stop(struct seq_file *p, void *v); static int nfs_volume_list_show(struct seq_file *m, void *v); static const struct seq_operations nfs_volume_list_ops = { .start = nfs_volume_list_start, .next = nfs_volume_list_next, .stop = nfs_volume_list_stop, .show = nfs_volume_list_show, }; /* * set up the iterator to start reading from the server list and return the first item */ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) __acquires(&nn->nfs_client_lock) { struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* lock the list against modification */ spin_lock(&nn->nfs_client_lock); return seq_list_start_head(&nn->nfs_client_list, *_pos); } /* * move to next server */ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) { struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); return seq_list_next(v, &nn->nfs_client_list, pos); } /* * clean up after reading from the transports list */ static void nfs_server_list_stop(struct seq_file *p, void *v) __releases(&nn->nfs_client_lock) { struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); spin_unlock(&nn->nfs_client_lock); } /* * display a header line followed by a load of call lines */ static int nfs_server_list_show(struct seq_file *m, void *v) { struct nfs_client *clp; struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* display header on line 1 */ if (v == &nn->nfs_client_list) { seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); return 0; } /* display one transport per line on subsequent lines */ clp = list_entry(v, struct nfs_client, cl_share_link); /* Check if the client is initialized */ if (clp->cl_cons_state != NFS_CS_READY) return 0; rcu_read_lock(); seq_printf(m, "v%u %s %s %3d %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), refcount_read(&clp->cl_count), clp->cl_hostname); rcu_read_unlock(); return 0; } /* * set up the iterator to start reading from the volume list and return the first item */ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) __acquires(&nn->nfs_client_lock) { struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* lock the list against modification */ spin_lock(&nn->nfs_client_lock); return seq_list_start_head(&nn->nfs_volume_list, *_pos); } /* * move to next volume */ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) { struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); return seq_list_next(v, &nn->nfs_volume_list, pos); } /* * clean up after reading from the transports list */ static void nfs_volume_list_stop(struct seq_file *p, void *v) __releases(&nn->nfs_client_lock) { struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); spin_unlock(&nn->nfs_client_lock); } /* * display a header line followed by a load of call lines */ static int nfs_volume_list_show(struct seq_file *m, void *v) { struct nfs_server *server; struct nfs_client *clp; char dev[13]; // 8 for 2^24, 1 for ':', 3 for 2^8, 1 for '\0' char fsid[34]; // 2 * 16 for %llx, 1 for ':', 1 for '\0' struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* display header on line 1 */ if (v == &nn->nfs_volume_list) { seq_puts(m, "NV SERVER PORT DEV FSID" " FSC\n"); return 0; } /* display one transport per line on subsequent lines */ server = list_entry(v, struct nfs_server, master_link); clp = server->nfs_client; snprintf(dev, sizeof(dev), "%u:%u", MAJOR(server->s_dev), MINOR(server->s_dev)); snprintf(fsid, sizeof(fsid), "%llx:%llx", (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); rcu_read_lock(); seq_printf(m, "v%u %s %s %-12s %-33s %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), dev, fsid, nfs_server_fscache_state(server)); rcu_read_unlock(); return 0; } int nfs_fs_proc_net_init(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); struct proc_dir_entry *p; nn->proc_nfsfs = proc_net_mkdir(net, "nfsfs", net->proc_net); if (!nn->proc_nfsfs) goto error_0; /* a file of servers with which we're dealing */ p = proc_create_net("servers", S_IFREG|S_IRUGO, nn->proc_nfsfs, &nfs_server_list_ops, sizeof(struct seq_net_private)); if (!p) goto error_1; /* a file of volumes that we have mounted */ p = proc_create_net("volumes", S_IFREG|S_IRUGO, nn->proc_nfsfs, &nfs_volume_list_ops, sizeof(struct seq_net_private)); if (!p) goto error_1; return 0; error_1: remove_proc_subtree("nfsfs", net->proc_net); error_0: return -ENOMEM; } void nfs_fs_proc_net_exit(struct net *net) { remove_proc_subtree("nfsfs", net->proc_net); } /* * initialise the /proc/fs/nfsfs/ directory */ int __init nfs_fs_proc_init(void) { if (!proc_mkdir("fs/nfsfs", NULL)) goto error_0; /* a file of servers with which we're dealing */ if (!proc_symlink("fs/nfsfs/servers", NULL, "../../net/nfsfs/servers")) goto error_1; /* a file of volumes that we have mounted */ if (!proc_symlink("fs/nfsfs/volumes", NULL, "../../net/nfsfs/volumes")) goto error_1; return 0; error_1: remove_proc_subtree("fs/nfsfs", NULL); error_0: return -ENOMEM; } /* * clean up the /proc/fs/nfsfs/ directory */ void nfs_fs_proc_exit(void) { remove_proc_subtree("fs/nfsfs", NULL); ida_destroy(&s_sysfs_ids); } #endif /* CONFIG_PROC_FS */
10800 10729 3193 88 2683 2790 55 157 8153 5199 87 38 5169 988 12487 2276 12482 327 10378 15 23 254 39 1284 4594 13 3 3 16 255 445 408 410 408 409 314 107 408 405 2490 139 2191 2188 113 113 113 111 1110 254 337 723 1625 1627 727 1535 3371 464 5206 4452 3904 381 745 102 6 718 5 764 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_FS_NOTIFY_H #define _LINUX_FS_NOTIFY_H /* * include/linux/fsnotify.h - generic hooks for filesystem notification, to * reduce in-source duplication from both dnotify and inotify. * * We don't compile any of this away in some complicated menagerie of ifdefs. * Instead, we rely on the code inside to optimize away as needed. * * (C) Copyright 2005 Robert Love */ #include <linux/fsnotify_backend.h> #include <linux/audit.h> #include <linux/slab.h> #include <linux/bug.h> /* Are there any inode/mount/sb objects watched with priority prio or above? */ static inline bool fsnotify_sb_has_priority_watchers(struct super_block *sb, int prio) { struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); /* Were any marks ever added to any object on this sb? */ if (!sbinfo) return false; return atomic_long_read(&sbinfo->watched_objects[prio]); } /* Are there any inode/mount/sb objects that are being watched at all? */ static inline bool fsnotify_sb_has_watchers(struct super_block *sb) { return fsnotify_sb_has_priority_watchers(sb, 0); } /* * Notify this @dir inode about a change in a child directory entry. * The directory entry may have turned positive or negative or its inode may * have changed (i.e. renamed over). * * Unlike fsnotify_parent(), the event will be reported regardless of the * FS_EVENT_ON_CHILD mask on the parent inode and will not be reported if only * the child is interested and not the parent. */ static inline int fsnotify_name(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie) { if (!fsnotify_sb_has_watchers(dir->i_sb)) return 0; return fsnotify(mask, data, data_type, dir, name, NULL, cookie); } static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, __u32 mask) { fsnotify_name(mask, dentry, FSNOTIFY_EVENT_DENTRY, dir, &dentry->d_name, 0); } static inline void fsnotify_inode(struct inode *inode, __u32 mask) { if (!fsnotify_sb_has_watchers(inode->i_sb)) return; if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; fsnotify(mask, inode, FSNOTIFY_EVENT_INODE, NULL, NULL, inode, 0); } /* Notify this dentry's parent about a child's events. */ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) { struct inode *inode = d_inode(dentry); if (!fsnotify_sb_has_watchers(inode->i_sb)) return 0; if (S_ISDIR(inode->i_mode)) { mask |= FS_ISDIR; /* sb/mount marks are not interested in name of directory */ if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) goto notify_child; } /* disconnected dentry cannot notify parent */ if (IS_ROOT(dentry)) goto notify_child; return __fsnotify_parent(dentry, mask, data, data_type); notify_child: return fsnotify(mask, data, data_type, NULL, NULL, inode, 0); } /* * Simple wrappers to consolidate calls to fsnotify_parent() when an event * is on a file/dentry. */ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) { fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY); } static inline int fsnotify_path(const struct path *path, __u32 mask) { return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); } static inline int fsnotify_file(struct file *file, __u32 mask) { /* * FMODE_NONOTIFY are fds generated by fanotify itself which should not * generate new events. We also don't want to generate events for * FMODE_PATH fds (involves open & close events) as they are just * handle creation / destruction events and not "real" file events. */ if (FMODE_FSNOTIFY_NONE(file->f_mode)) return 0; return fsnotify_path(&file->f_path, mask); } #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS void file_set_fsnotify_mode_from_watchers(struct file *file); /* * fsnotify_file_area_perm - permission hook before access to file range */ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { /* * filesystem may be modified in the context of permission events * (e.g. by HSM filling a file on access), so sb freeze protection * must not be held. */ lockdep_assert_once(file_write_not_started(file)); if (!(perm_mask & (MAY_READ | MAY_WRITE | MAY_ACCESS))) return 0; if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode))) return 0; /* * read()/write() and other types of access generate pre-content events. */ if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { int ret = fsnotify_pre_content(&file->f_path, ppos, count); if (ret) return ret; } if (!(perm_mask & MAY_READ)) return 0; /* * read() also generates the legacy FS_ACCESS_PERM event, so content * scanners can inspect the content filled by pre-content event. */ return fsnotify_path(&file->f_path, FS_ACCESS_PERM); } /* * fsnotify_truncate_perm - permission hook before file truncate */ static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) { struct inode *inode = d_inode(path->dentry); if (!(inode->i_sb->s_iflags & SB_I_ALLOW_HSM) || !fsnotify_sb_has_priority_watchers(inode->i_sb, FSNOTIFY_PRIO_PRE_CONTENT)) return 0; return fsnotify_pre_content(path, &length, 0); } /* * fsnotify_file_perm - permission hook before file access (unknown range) */ static inline int fsnotify_file_perm(struct file *file, int perm_mask) { return fsnotify_file_area_perm(file, perm_mask, NULL, 0); } /* * fsnotify_open_perm - permission hook before file open */ static inline int fsnotify_open_perm(struct file *file) { int ret; if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode))) return 0; if (file->f_flags & __FMODE_EXEC) { ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM); if (ret) return ret; } return fsnotify_path(&file->f_path, FS_OPEN_PERM); } #else static inline void file_set_fsnotify_mode_from_watchers(struct file *file) { } static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { return 0; } static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) { return 0; } static inline int fsnotify_file_perm(struct file *file, int perm_mask) { return 0; } static inline int fsnotify_open_perm(struct file *file) { return 0; } #endif /* * fsnotify_link_count - inode's link count changed */ static inline void fsnotify_link_count(struct inode *inode) { fsnotify_inode(inode, FS_ATTRIB); } /* * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir */ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, const struct qstr *old_name, int isdir, struct inode *target, struct dentry *moved) { struct inode *source = moved->d_inode; u32 fs_cookie = fsnotify_get_cookie(); __u32 old_dir_mask = FS_MOVED_FROM; __u32 new_dir_mask = FS_MOVED_TO; __u32 rename_mask = FS_RENAME; const struct qstr *new_name = &moved->d_name; if (isdir) { old_dir_mask |= FS_ISDIR; new_dir_mask |= FS_ISDIR; rename_mask |= FS_ISDIR; } /* Event with information about both old and new parent+name */ fsnotify_name(rename_mask, moved, FSNOTIFY_EVENT_DENTRY, old_dir, old_name, 0); fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_dir, old_name, fs_cookie); fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE, new_dir, new_name, fs_cookie); if (target) fsnotify_link_count(target); fsnotify_inode(source, FS_MOVE_SELF); audit_inode_child(new_dir, moved, AUDIT_TYPE_CHILD_CREATE); } /* * fsnotify_inode_delete - and inode is being evicted from cache, clean up is needed */ static inline void fsnotify_inode_delete(struct inode *inode) { __fsnotify_inode_delete(inode); } /* * fsnotify_vfsmount_delete - a vfsmount is being destroyed, clean up is needed */ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt) { __fsnotify_vfsmount_delete(mnt); } /* * fsnotify_inoderemove - an inode is going away */ static inline void fsnotify_inoderemove(struct inode *inode) { fsnotify_inode(inode, FS_DELETE_SELF); __fsnotify_inode_delete(inode); } /* * fsnotify_create - 'name' was linked in * * Caller must make sure that dentry->d_name is stable. * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate * ->d_inode later */ static inline void fsnotify_create(struct inode *dir, struct dentry *dentry) { audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE); fsnotify_dirent(dir, dentry, FS_CREATE); } /* * fsnotify_link - new hardlink in 'inode' directory * * Caller must make sure that new_dentry->d_name is stable. * Note: We have to pass also the linked inode ptr as some filesystems leave * new_dentry->d_inode NULL and instantiate inode pointer later */ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry) { fsnotify_link_count(inode); audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE); fsnotify_name(FS_CREATE, inode, FSNOTIFY_EVENT_INODE, dir, &new_dentry->d_name, 0); } /* * fsnotify_delete - @dentry was unlinked and unhashed * * Caller must make sure that dentry->d_name is stable. * * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode * as this may be called after d_delete() and old_dentry may be negative. */ static inline void fsnotify_delete(struct inode *dir, struct inode *inode, struct dentry *dentry) { __u32 mask = FS_DELETE; if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name, 0); } /** * d_delete_notify - delete a dentry and call fsnotify_delete() * @dentry: The dentry to delete * * This helper is used to guaranty that the unlinked inode cannot be found * by lookup of this name after fsnotify_delete() event has been delivered. */ static inline void d_delete_notify(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); ihold(inode); d_delete(dentry); fsnotify_delete(dir, inode, dentry); iput(inode); } /* * fsnotify_unlink - 'name' was unlinked * * Caller must make sure that dentry->d_name is stable. */ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) { if (WARN_ON_ONCE(d_is_negative(dentry))) return; fsnotify_delete(dir, d_inode(dentry), dentry); } /* * fsnotify_mkdir - directory 'name' was created * * Caller must make sure that dentry->d_name is stable. * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate * ->d_inode later */ static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry) { audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE); fsnotify_dirent(dir, dentry, FS_CREATE | FS_ISDIR); } /* * fsnotify_rmdir - directory 'name' was removed * * Caller must make sure that dentry->d_name is stable. */ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) { if (WARN_ON_ONCE(d_is_negative(dentry))) return; fsnotify_delete(dir, d_inode(dentry), dentry); } /* * fsnotify_access - file was read */ static inline void fsnotify_access(struct file *file) { fsnotify_file(file, FS_ACCESS); } /* * fsnotify_modify - file was modified */ static inline void fsnotify_modify(struct file *file) { fsnotify_file(file, FS_MODIFY); } /* * fsnotify_open - file was opened */ static inline void fsnotify_open(struct file *file) { __u32 mask = FS_OPEN; if (file->f_flags & __FMODE_EXEC) mask |= FS_OPEN_EXEC; fsnotify_file(file, mask); } /* * fsnotify_close - file was closed */ static inline void fsnotify_close(struct file *file) { __u32 mask = (file->f_mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; fsnotify_file(file, mask); } /* * fsnotify_xattr - extended attributes were changed */ static inline void fsnotify_xattr(struct dentry *dentry) { fsnotify_dentry(dentry, FS_ATTRIB); } /* * fsnotify_change - notify_change event. file was modified and/or metadata * was changed. */ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) { __u32 mask = 0; if (ia_valid & ATTR_UID) mask |= FS_ATTRIB; if (ia_valid & ATTR_GID) mask |= FS_ATTRIB; if (ia_valid & ATTR_SIZE) mask |= FS_MODIFY; /* both times implies a utime(s) call */ if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) mask |= FS_ATTRIB; else if (ia_valid & ATTR_ATIME) mask |= FS_ACCESS; else if (ia_valid & ATTR_MTIME) mask |= FS_MODIFY; if (ia_valid & ATTR_MODE) mask |= FS_ATTRIB; if (mask) fsnotify_dentry(dentry, mask); } static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode, int error) { struct fs_error_report report = { .error = error, .inode = inode, .sb = sb, }; return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, NULL, NULL, NULL, 0); } #endif /* _LINUX_FS_NOTIFY_H */
180 95 5 131 9 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 /* SPDX-License-Identifier: GPL-2.0 */ /* * connection tracking event cache. */ #ifndef _NF_CONNTRACK_ECACHE_H #define _NF_CONNTRACK_ECACHE_H #include <net/netfilter/nf_conntrack.h> #include <net/net_namespace.h> #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_conntrack_tuple_common.h> #include <net/netfilter/nf_conntrack_extend.h> #include <asm/local64.h> enum nf_ct_ecache_state { NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */ NFCT_ECACHE_DESTROY_SENT, /* sent destroy event after failure */ }; struct nf_conntrack_ecache { unsigned long cache; /* bitops want long */ #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP local64_t timestamp; /* event timestamp, in nanoseconds */ #endif u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ u32 missed; /* missed events */ u32 portid; /* netlink portid of destroyer */ }; static inline struct nf_conntrack_ecache * nf_ct_ecache_find(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE); #else return NULL; #endif } static inline bool nf_ct_ecache_exist(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS return nf_ct_ext_exist(ct, NF_CT_EXT_ECACHE); #else return false; #endif } #ifdef CONFIG_NF_CONNTRACK_EVENTS /* This structure is passed to event handler */ struct nf_ct_event { struct nf_conn *ct; u32 portid; int report; }; struct nf_exp_event { struct nf_conntrack_expect *exp; u32 portid; int report; }; struct nf_ct_event_notifier { int (*ct_event)(unsigned int events, const struct nf_ct_event *item); int (*exp_event)(unsigned int events, const struct nf_exp_event *item); }; void nf_conntrack_register_notifier(struct net *net, const struct nf_ct_event_notifier *nb); void nf_conntrack_unregister_notifier(struct net *net); void nf_ct_deliver_cached_events(struct nf_conn *ct); int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, u32 portid, int report); bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp); #else static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) { } static inline int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, u32 portid, int report) { return 0; } static inline bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) { return false; } #endif static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *e; if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) return; e = nf_ct_ecache_find(ct); if (e == NULL) return; #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP /* renew only if this is the first cached event, so that the * timestamp reflects the first, not the last, generated event. */ if (local64_read(&e->timestamp) && READ_ONCE(e->cache) == 0) local64_set(&e->timestamp, ktime_get_real_ns()); #endif set_bit(event, &e->cache); #endif } static inline int nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, u32 portid, int report) { #ifdef CONFIG_NF_CONNTRACK_EVENTS if (nf_ct_ecache_exist(ct)) return nf_conntrack_eventmask_report(1 << event, ct, portid, report); #endif return 0; } static inline int nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS if (nf_ct_ecache_exist(ct)) return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); #endif return 0; } #ifdef CONFIG_NF_CONNTRACK_EVENTS void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp, u32 portid, int report); void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state); void nf_conntrack_ecache_pernet_init(struct net *net); void nf_conntrack_ecache_pernet_fini(struct net *net); struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net); static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return net->ct.ecache_dwork_pending; } #else /* CONFIG_NF_CONNTRACK_EVENTS */ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, struct nf_conntrack_expect *exp, u32 portid, int report) { } static inline void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state s) { } static inline void nf_conntrack_ecache_pernet_init(struct net *net) { } static inline void nf_conntrack_ecache_pernet_fini(struct net *net) { } static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return false; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ #endif /*_NF_CONNTRACK_ECACHE_H*/
18 18 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 /* * Utility functions for x86 operand and address decoding * * Copyright (C) Intel Corporation 2017 */ #include <linux/kernel.h> #include <linux/string.h> #include <linux/ratelimit.h> #include <linux/mmu_context.h> #include <asm/desc_defs.h> #include <asm/desc.h> #include <asm/inat.h> #include <asm/insn.h> #include <asm/insn-eval.h> #include <asm/ldt.h> #include <asm/vm86.h> #undef pr_fmt #define pr_fmt(fmt) "insn: " fmt enum reg_type { REG_TYPE_RM = 0, REG_TYPE_REG, REG_TYPE_INDEX, REG_TYPE_BASE, }; /** * is_string_insn() - Determine if instruction is a string instruction * @insn: Instruction containing the opcode to inspect * * Returns: * * true if the instruction, determined by the opcode, is any of the * string instructions as defined in the Intel Software Development manual. * False otherwise. */ static bool is_string_insn(struct insn *insn) { /* All string instructions have a 1-byte opcode. */ if (insn->opcode.nbytes != 1) return false; switch (insn->opcode.bytes[0]) { case 0x6c ... 0x6f: /* INS, OUTS */ case 0xa4 ... 0xa7: /* MOVS, CMPS */ case 0xaa ... 0xaf: /* STOS, LODS, SCAS */ return true; default: return false; } } /** * insn_has_rep_prefix() - Determine if instruction has a REP prefix * @insn: Instruction containing the prefix to inspect * * Returns: * * true if the instruction has a REP prefix, false if not. */ bool insn_has_rep_prefix(struct insn *insn) { insn_byte_t p; int i; insn_get_prefixes(insn); for_each_insn_prefix(insn, i, p) { if (p == 0xf2 || p == 0xf3) return true; } return false; } /** * get_seg_reg_override_idx() - obtain segment register override index * @insn: Valid instruction with segment override prefixes * * Inspect the instruction prefixes in @insn and find segment overrides, if any. * * Returns: * * A constant identifying the segment register to use, among CS, SS, DS, * ES, FS, or GS. INAT_SEG_REG_DEFAULT is returned if no segment override * prefixes were found. * * -EINVAL in case of error. */ static int get_seg_reg_override_idx(struct insn *insn) { int idx = INAT_SEG_REG_DEFAULT; int num_overrides = 0, i; insn_byte_t p; insn_get_prefixes(insn); /* Look for any segment override prefixes. */ for_each_insn_prefix(insn, i, p) { insn_attr_t attr; attr = inat_get_opcode_attribute(p); switch (attr) { case INAT_MAKE_PREFIX(INAT_PFX_CS): idx = INAT_SEG_REG_CS; num_overrides++; break; case INAT_MAKE_PREFIX(INAT_PFX_SS): idx = INAT_SEG_REG_SS; num_overrides++; break; case INAT_MAKE_PREFIX(INAT_PFX_DS): idx = INAT_SEG_REG_DS; num_overrides++; break; case INAT_MAKE_PREFIX(INAT_PFX_ES): idx = INAT_SEG_REG_ES; num_overrides++; break; case INAT_MAKE_PREFIX(INAT_PFX_FS): idx = INAT_SEG_REG_FS; num_overrides++; break; case INAT_MAKE_PREFIX(INAT_PFX_GS): idx = INAT_SEG_REG_GS; num_overrides++; break; /* No default action needed. */ } } /* More than one segment override prefix leads to undefined behavior. */ if (num_overrides > 1) return -EINVAL; return idx; } /** * check_seg_overrides() - check if segment override prefixes are allowed * @insn: Valid instruction with segment override prefixes * @regoff: Operand offset, in pt_regs, for which the check is performed * * For a particular register used in register-indirect addressing, determine if * segment override prefixes can be used. Specifically, no overrides are allowed * for rDI if used with a string instruction. * * Returns: * * True if segment override prefixes can be used with the register indicated * in @regoff. False if otherwise. */ static bool check_seg_overrides(struct insn *insn, int regoff) { if (regoff == offsetof(struct pt_regs, di) && is_string_insn(insn)) return false; return true; } /** * resolve_default_seg() - resolve default segment register index for an operand * @insn: Instruction with opcode and address size. Must be valid. * @regs: Register values as seen when entering kernel mode * @off: Operand offset, in pt_regs, for which resolution is needed * * Resolve the default segment register index associated with the instruction * operand register indicated by @off. Such index is resolved based on defaults * described in the Intel Software Development Manual. * * Returns: * * If in protected mode, a constant identifying the segment register to use, * among CS, SS, ES or DS. If in long mode, INAT_SEG_REG_IGNORE. * * -EINVAL in case of error. */ static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off) { if (any_64bit_mode(regs)) return INAT_SEG_REG_IGNORE; /* * Resolve the default segment register as described in Section 3.7.4 * of the Intel Software Development Manual Vol. 1: * * + DS for all references involving r[ABCD]X, and rSI. * + If used in a string instruction, ES for rDI. Otherwise, DS. * + AX, CX and DX are not valid register operands in 16-bit address * encodings but are valid for 32-bit and 64-bit encodings. * + -EDOM is reserved to identify for cases in which no register * is used (i.e., displacement-only addressing). Use DS. * + SS for rSP or rBP. * + CS for rIP. */ switch (off) { case offsetof(struct pt_regs, ax): case offsetof(struct pt_regs, cx): case offsetof(struct pt_regs, dx): /* Need insn to verify address size. */ if (insn->addr_bytes == 2) return -EINVAL; fallthrough; case -EDOM: case offsetof(struct pt_regs, bx): case offsetof(struct pt_regs, si): return INAT_SEG_REG_DS; case offsetof(struct pt_regs, di): if (is_string_insn(insn)) return INAT_SEG_REG_ES; return INAT_SEG_REG_DS; case offsetof(struct pt_regs, bp): case offsetof(struct pt_regs, sp): return INAT_SEG_REG_SS; case offsetof(struct pt_regs, ip): return INAT_SEG_REG_CS; default: return -EINVAL; } } /** * resolve_seg_reg() - obtain segment register index * @insn: Instruction with operands * @regs: Register values as seen when entering kernel mode * @regoff: Operand offset, in pt_regs, used to determine segment register * * Determine the segment register associated with the operands and, if * applicable, prefixes and the instruction pointed by @insn. * * The segment register associated to an operand used in register-indirect * addressing depends on: * * a) Whether running in long mode (in such a case segments are ignored, except * if FS or GS are used). * * b) Whether segment override prefixes can be used. Certain instructions and * registers do not allow override prefixes. * * c) Whether segment overrides prefixes are found in the instruction prefixes. * * d) If there are not segment override prefixes or they cannot be used, the * default segment register associated with the operand register is used. * * The function checks first if segment override prefixes can be used with the * operand indicated by @regoff. If allowed, obtain such overridden segment * register index. Lastly, if not prefixes were found or cannot be used, resolve * the segment register index to use based on the defaults described in the * Intel documentation. In long mode, all segment register indexes will be * ignored, except if overrides were found for FS or GS. All these operations * are done using helper functions. * * The operand register, @regoff, is represented as the offset from the base of * pt_regs. * * As stated, the main use of this function is to determine the segment register * index based on the instruction, its operands and prefixes. Hence, @insn * must be valid. However, if @regoff indicates rIP, we don't need to inspect * @insn at all as in this case CS is used in all cases. This case is checked * before proceeding further. * * Please note that this function does not return the value in the segment * register (i.e., the segment selector) but our defined index. The segment * selector needs to be obtained using get_segment_selector() and passing the * segment register index resolved by this function. * * Returns: * * An index identifying the segment register to use, among CS, SS, DS, * ES, FS, or GS. INAT_SEG_REG_IGNORE is returned if running in long mode. * * -EINVAL in case of error. */ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff) { int idx; /* * In the unlikely event of having to resolve the segment register * index for rIP, do it first. Segment override prefixes should not * be used. Hence, it is not necessary to inspect the instruction, * which may be invalid at this point. */ if (regoff == offsetof(struct pt_regs, ip)) { if (any_64bit_mode(regs)) return INAT_SEG_REG_IGNORE; else return INAT_SEG_REG_CS; } if (!insn) return -EINVAL; if (!check_seg_overrides(insn, regoff)) return resolve_default_seg(insn, regs, regoff); idx = get_seg_reg_override_idx(insn); if (idx < 0) return idx; if (idx == INAT_SEG_REG_DEFAULT) return resolve_default_seg(insn, regs, regoff); /* * In long mode, segment override prefixes are ignored, except for * overrides for FS and GS. */ if (any_64bit_mode(regs)) { if (idx != INAT_SEG_REG_FS && idx != INAT_SEG_REG_GS) idx = INAT_SEG_REG_IGNORE; } return idx; } /** * get_segment_selector() - obtain segment selector * @regs: Register values as seen when entering kernel mode * @seg_reg_idx: Segment register index to use * * Obtain the segment selector from any of the CS, SS, DS, ES, FS, GS segment * registers. In CONFIG_X86_32, the segment is obtained from either pt_regs or * kernel_vm86_regs as applicable. In CONFIG_X86_64, CS and SS are obtained * from pt_regs. DS, ES, FS and GS are obtained by reading the actual CPU * registers. This done for only for completeness as in CONFIG_X86_64 segment * registers are ignored. * * Returns: * * Value of the segment selector, including null when running in * long mode. * * -EINVAL on error. */ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx) { unsigned short sel; #ifdef CONFIG_X86_64 switch (seg_reg_idx) { case INAT_SEG_REG_IGNORE: return 0; case INAT_SEG_REG_CS: return (unsigned short)(regs->cs & 0xffff); case INAT_SEG_REG_SS: return (unsigned short)(regs->ss & 0xffff); case INAT_SEG_REG_DS: savesegment(ds, sel); return sel; case INAT_SEG_REG_ES: savesegment(es, sel); return sel; case INAT_SEG_REG_FS: savesegment(fs, sel); return sel; case INAT_SEG_REG_GS: savesegment(gs, sel); return sel; default: return -EINVAL; } #else /* CONFIG_X86_32 */ struct kernel_vm86_regs *vm86regs = (struct kernel_vm86_regs *)regs; if (v8086_mode(regs)) { switch (seg_reg_idx) { case INAT_SEG_REG_CS: return (unsigned short)(regs->cs & 0xffff); case INAT_SEG_REG_SS: return (unsigned short)(regs->ss & 0xffff); case INAT_SEG_REG_DS: return vm86regs->ds; case INAT_SEG_REG_ES: return vm86regs->es; case INAT_SEG_REG_FS: return vm86regs->fs; case INAT_SEG_REG_GS: return vm86regs->gs; case INAT_SEG_REG_IGNORE: default: return -EINVAL; } } switch (seg_reg_idx) { case INAT_SEG_REG_CS: return (unsigned short)(regs->cs & 0xffff); case INAT_SEG_REG_SS: return (unsigned short)(regs->ss & 0xffff); case INAT_SEG_REG_DS: return (unsigned short)(regs->ds & 0xffff); case INAT_SEG_REG_ES: return (unsigned short)(regs->es & 0xffff); case INAT_SEG_REG_FS: return (unsigned short)(regs->fs & 0xffff); case INAT_SEG_REG_GS: savesegment(gs, sel); return sel; case INAT_SEG_REG_IGNORE: default: return -EINVAL; } #endif /* CONFIG_X86_64 */ } static const int pt_regoff[] = { offsetof(struct pt_regs, ax), offsetof(struct pt_regs, cx), offsetof(struct pt_regs, dx), offsetof(struct pt_regs, bx), offsetof(struct pt_regs, sp), offsetof(struct pt_regs, bp), offsetof(struct pt_regs, si), offsetof(struct pt_regs, di), #ifdef CONFIG_X86_64 offsetof(struct pt_regs, r8), offsetof(struct pt_regs, r9), offsetof(struct pt_regs, r10), offsetof(struct pt_regs, r11), offsetof(struct pt_regs, r12), offsetof(struct pt_regs, r13), offsetof(struct pt_regs, r14), offsetof(struct pt_regs, r15), #else offsetof(struct pt_regs, ds), offsetof(struct pt_regs, es), offsetof(struct pt_regs, fs), offsetof(struct pt_regs, gs), #endif }; int pt_regs_offset(struct pt_regs *regs, int regno) { if ((unsigned)regno < ARRAY_SIZE(pt_regoff)) return pt_regoff[regno]; return -EDOM; } static int get_regno(struct insn *insn, enum reg_type type) { int nr_registers = ARRAY_SIZE(pt_regoff); int regno = 0; /* * Don't possibly decode a 32-bit instructions as * reading a 64-bit-only register. */ if (IS_ENABLED(CONFIG_X86_64) && !insn->x86_64) nr_registers -= 8; switch (type) { case REG_TYPE_RM: regno = X86_MODRM_RM(insn->modrm.value); /* * ModRM.mod == 0 and ModRM.rm == 5 means a 32-bit displacement * follows the ModRM byte. */ if (!X86_MODRM_MOD(insn->modrm.value) && regno == 5) return -EDOM; if (X86_REX_B(insn->rex_prefix.value)) regno += 8; break; case REG_TYPE_REG: regno = X86_MODRM_REG(insn->modrm.value); if (X86_REX_R(insn->rex_prefix.value)) regno += 8; break; case REG_TYPE_INDEX: regno = X86_SIB_INDEX(insn->sib.value); if (X86_REX_X(insn->rex_prefix.value)) regno += 8; /* * If ModRM.mod != 3 and SIB.index = 4 the scale*index * portion of the address computation is null. This is * true only if REX.X is 0. In such a case, the SIB index * is used in the address computation. */ if (X86_MODRM_MOD(insn->modrm.value) != 3 && regno == 4) return -EDOM; break; case REG_TYPE_BASE: regno = X86_SIB_BASE(insn->sib.value); /* * If ModRM.mod is 0 and SIB.base == 5, the base of the * register-indirect addressing is 0. In this case, a * 32-bit displacement follows the SIB byte. */ if (!X86_MODRM_MOD(insn->modrm.value) && regno == 5) return -EDOM; if (X86_REX_B(insn->rex_prefix.value)) regno += 8; break; default: pr_err_ratelimited("invalid register type: %d\n", type); return -EINVAL; } if (regno >= nr_registers) { WARN_ONCE(1, "decoded an instruction with an invalid register"); return -EINVAL; } return regno; } static int get_reg_offset(struct insn *insn, struct pt_regs *regs, enum reg_type type) { int regno = get_regno(insn, type); if (regno < 0) return regno; return pt_regs_offset(regs, regno); } /** * get_reg_offset_16() - Obtain offset of register indicated by instruction * @insn: Instruction containing ModRM byte * @regs: Register values as seen when entering kernel mode * @offs1: Offset of the first operand register * @offs2: Offset of the second operand register, if applicable * * Obtain the offset, in pt_regs, of the registers indicated by the ModRM byte * in @insn. This function is to be used with 16-bit address encodings. The * @offs1 and @offs2 will be written with the offset of the two registers * indicated by the instruction. In cases where any of the registers is not * referenced by the instruction, the value will be set to -EDOM. * * Returns: * * 0 on success, -EINVAL on error. */ static int get_reg_offset_16(struct insn *insn, struct pt_regs *regs, int *offs1, int *offs2) { /* * 16-bit addressing can use one or two registers. Specifics of * encodings are given in Table 2-1. "16-Bit Addressing Forms with the * ModR/M Byte" of the Intel Software Development Manual. */ static const int regoff1[] = { offsetof(struct pt_regs, bx), offsetof(struct pt_regs, bx), offsetof(struct pt_regs, bp), offsetof(struct pt_regs, bp), offsetof(struct pt_regs, si), offsetof(struct pt_regs, di), offsetof(struct pt_regs, bp), offsetof(struct pt_regs, bx), }; static const int regoff2[] = { offsetof(struct pt_regs, si), offsetof(struct pt_regs, di), offsetof(struct pt_regs, si), offsetof(struct pt_regs, di), -EDOM, -EDOM, -EDOM, -EDOM, }; if (!offs1 || !offs2) return -EINVAL; /* Operand is a register, use the generic function. */ if (X86_MODRM_MOD(insn->modrm.value) == 3) { *offs1 = insn_get_modrm_rm_off(insn, regs); *offs2 = -EDOM; return 0; } *offs1 = regoff1[X86_MODRM_RM(insn->modrm.value)]; *offs2 = regoff2[X86_MODRM_RM(insn->modrm.value)]; /* * If ModRM.mod is 0 and ModRM.rm is 110b, then we use displacement- * only addressing. This means that no registers are involved in * computing the effective address. Thus, ensure that the first * register offset is invalid. The second register offset is already * invalid under the aforementioned conditions. */ if ((X86_MODRM_MOD(insn->modrm.value) == 0) && (X86_MODRM_RM(insn->modrm.value) == 6)) *offs1 = -EDOM; return 0; } /** * get_desc() - Obtain contents of a segment descriptor * @out: Segment descriptor contents on success * @sel: Segment selector * * Given a segment selector, obtain a pointer to the segment descriptor. * Both global and local descriptor tables are supported. * * Returns: * * True on success, false on failure. * * NULL on error. */ static bool get_desc(struct desc_struct *out, unsigned short sel) { struct desc_ptr gdt_desc = {0, 0}; unsigned long desc_base; #ifdef CONFIG_MODIFY_LDT_SYSCALL if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) { bool success = false; struct ldt_struct *ldt; /* Bits [15:3] contain the index of the desired entry. */ sel >>= 3; mutex_lock(&current->active_mm->context.lock); ldt = current->active_mm->context.ldt; if (ldt && sel < ldt->nr_entries) { *out = ldt->entries[sel]; success = true; } mutex_unlock(&current->active_mm->context.lock); return success; } #endif native_store_gdt(&gdt_desc); /* * Segment descriptors have a size of 8 bytes. Thus, the index is * multiplied by 8 to obtain the memory offset of the desired descriptor * from the base of the GDT. As bits [15:3] of the segment selector * contain the index, it can be regarded as multiplied by 8 already. * All that remains is to clear bits [2:0]. */ desc_base = sel & ~(SEGMENT_RPL_MASK | SEGMENT_TI_MASK); if (desc_base > gdt_desc.size) return false; *out = *(struct desc_struct *)(gdt_desc.address + desc_base); return true; } /** * insn_get_seg_base() - Obtain base address of segment descriptor. * @regs: Register values as seen when entering kernel mode * @seg_reg_idx: Index of the segment register pointing to seg descriptor * * Obtain the base address of the segment as indicated by the segment descriptor * pointed by the segment selector. The segment selector is obtained from the * input segment register index @seg_reg_idx. * * Returns: * * In protected mode, base address of the segment. Zero in long mode, * except when FS or GS are used. In virtual-8086 mode, the segment * selector shifted 4 bits to the right. * * -1L in case of error. */ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) { struct desc_struct desc; short sel; sel = get_segment_selector(regs, seg_reg_idx); if (sel < 0) return -1L; if (v8086_mode(regs)) /* * Base is simply the segment selector shifted 4 * bits to the right. */ return (unsigned long)(sel << 4); if (any_64bit_mode(regs)) { /* * Only FS or GS will have a base address, the rest of * the segments' bases are forced to 0. */ unsigned long base; if (seg_reg_idx == INAT_SEG_REG_FS) { rdmsrl(MSR_FS_BASE, base); } else if (seg_reg_idx == INAT_SEG_REG_GS) { /* * swapgs was called at the kernel entry point. Thus, * MSR_KERNEL_GS_BASE will have the user-space GS base. */ if (user_mode(regs)) rdmsrl(MSR_KERNEL_GS_BASE, base); else rdmsrl(MSR_GS_BASE, base); } else { base = 0; } return base; } /* In protected mode the segment selector cannot be null. */ if (!sel) return -1L; if (!get_desc(&desc, sel)) return -1L; return get_desc_base(&desc); } /** * get_seg_limit() - Obtain the limit of a segment descriptor * @regs: Register values as seen when entering kernel mode * @seg_reg_idx: Index of the segment register pointing to seg descriptor * * Obtain the limit of the segment as indicated by the segment descriptor * pointed by the segment selector. The segment selector is obtained from the * input segment register index @seg_reg_idx. * * Returns: * * In protected mode, the limit of the segment descriptor in bytes. * In long mode and virtual-8086 mode, segment limits are not enforced. Thus, * limit is returned as -1L to imply a limit-less segment. * * Zero is returned on error. */ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx) { struct desc_struct desc; unsigned long limit; short sel; sel = get_segment_selector(regs, seg_reg_idx); if (sel < 0) return 0; if (any_64bit_mode(regs) || v8086_mode(regs)) return -1L; if (!sel) return 0; if (!get_desc(&desc, sel)) return 0; /* * If the granularity bit is set, the limit is given in multiples * of 4096. This also means that the 12 least significant bits are * not tested when checking the segment limits. In practice, * this means that the segment ends in (limit << 12) + 0xfff. */ limit = get_desc_limit(&desc); if (desc.g) limit = (limit << 12) + 0xfff; return limit; } /** * insn_get_code_seg_params() - Obtain code segment parameters * @regs: Structure with register values as seen when entering kernel mode * * Obtain address and operand sizes of the code segment. It is obtained from the * selector contained in the CS register in regs. In protected mode, the default * address is determined by inspecting the L and D bits of the segment * descriptor. In virtual-8086 mode, the default is always two bytes for both * address and operand sizes. * * Returns: * * An int containing ORed-in default parameters on success. * * -EINVAL on error. */ int insn_get_code_seg_params(struct pt_regs *regs) { struct desc_struct desc; short sel; if (v8086_mode(regs)) /* Address and operand size are both 16-bit. */ return INSN_CODE_SEG_PARAMS(2, 2); sel = get_segment_selector(regs, INAT_SEG_REG_CS); if (sel < 0) return sel; if (!get_desc(&desc, sel)) return -EINVAL; /* * The most significant byte of the Type field of the segment descriptor * determines whether a segment contains data or code. If this is a data * segment, return error. */ if (!(desc.type & BIT(3))) return -EINVAL; switch ((desc.l << 1) | desc.d) { case 0: /* * Legacy mode. CS.L=0, CS.D=0. Address and operand size are * both 16-bit. */ return INSN_CODE_SEG_PARAMS(2, 2); case 1: /* * Legacy mode. CS.L=0, CS.D=1. Address and operand size are * both 32-bit. */ return INSN_CODE_SEG_PARAMS(4, 4); case 2: /* * IA-32e 64-bit mode. CS.L=1, CS.D=0. Address size is 64-bit; * operand size is 32-bit. */ return INSN_CODE_SEG_PARAMS(4, 8); case 3: /* Invalid setting. CS.L=1, CS.D=1 */ fallthrough; default: return -EINVAL; } } /** * insn_get_modrm_rm_off() - Obtain register in r/m part of the ModRM byte * @insn: Instruction containing the ModRM byte * @regs: Register values as seen when entering kernel mode * * Returns: * * The register indicated by the r/m part of the ModRM byte. The * register is obtained as an offset from the base of pt_regs. In specific * cases, the returned value can be -EDOM to indicate that the particular value * of ModRM does not refer to a register and shall be ignored. */ int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs) { return get_reg_offset(insn, regs, REG_TYPE_RM); } /** * insn_get_modrm_reg_off() - Obtain register in reg part of the ModRM byte * @insn: Instruction containing the ModRM byte * @regs: Register values as seen when entering kernel mode * * Returns: * * The register indicated by the reg part of the ModRM byte. The * register is obtained as an offset from the base of pt_regs. */ int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs) { return get_reg_offset(insn, regs, REG_TYPE_REG); } /** * insn_get_modrm_reg_ptr() - Obtain register pointer based on ModRM byte * @insn: Instruction containing the ModRM byte * @regs: Register values as seen when entering kernel mode * * Returns: * * The register indicated by the reg part of the ModRM byte. * The register is obtained as a pointer within pt_regs. */ unsigned long *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs) { int offset; offset = insn_get_modrm_reg_off(insn, regs); if (offset < 0) return NULL; return (void *)regs + offset; } /** * get_seg_base_limit() - obtain base address and limit of a segment * @insn: Instruction. Must be valid. * @regs: Register values as seen when entering kernel mode * @regoff: Operand offset, in pt_regs, used to resolve segment descriptor * @base: Obtained segment base * @limit: Obtained segment limit * * Obtain the base address and limit of the segment associated with the operand * @regoff and, if any or allowed, override prefixes in @insn. This function is * different from insn_get_seg_base() as the latter does not resolve the segment * associated with the instruction operand. If a limit is not needed (e.g., * when running in long mode), @limit can be NULL. * * Returns: * * 0 on success. @base and @limit will contain the base address and of the * resolved segment, respectively. * * -EINVAL on error. */ static int get_seg_base_limit(struct insn *insn, struct pt_regs *regs, int regoff, unsigned long *base, unsigned long *limit) { int seg_reg_idx; if (!base) return -EINVAL; seg_reg_idx = resolve_seg_reg(insn, regs, regoff); if (seg_reg_idx < 0) return seg_reg_idx; *base = insn_get_seg_base(regs, seg_reg_idx); if (*base == -1L) return -EINVAL; if (!limit) return 0; *limit = get_seg_limit(regs, seg_reg_idx); if (!(*limit)) return -EINVAL; return 0; } /** * get_eff_addr_reg() - Obtain effective address from register operand * @insn: Instruction. Must be valid. * @regs: Register values as seen when entering kernel mode * @regoff: Obtained operand offset, in pt_regs, with the effective address * @eff_addr: Obtained effective address * * Obtain the effective address stored in the register operand as indicated by * the ModRM byte. This function is to be used only with register addressing * (i.e., ModRM.mod is 3). The effective address is saved in @eff_addr. The * register operand, as an offset from the base of pt_regs, is saved in @regoff; * such offset can then be used to resolve the segment associated with the * operand. This function can be used with any of the supported address sizes * in x86. * * Returns: * * 0 on success. @eff_addr will have the effective address stored in the * operand indicated by ModRM. @regoff will have such operand as an offset from * the base of pt_regs. * * -EINVAL on error. */ static int get_eff_addr_reg(struct insn *insn, struct pt_regs *regs, int *regoff, long *eff_addr) { int ret; ret = insn_get_modrm(insn); if (ret) return ret; if (X86_MODRM_MOD(insn->modrm.value) != 3) return -EINVAL; *regoff = get_reg_offset(insn, regs, REG_TYPE_RM); if (*regoff < 0) return -EINVAL; /* Ignore bytes that are outside the address size. */ if (insn->addr_bytes == 2) *eff_addr = regs_get_register(regs, *regoff) & 0xffff; else if (insn->addr_bytes == 4) *eff_addr = regs_get_register(regs, *regoff) & 0xffffffff; else /* 64-bit address */ *eff_addr = regs_get_register(regs, *regoff); return 0; } /** * get_eff_addr_modrm() - Obtain referenced effective address via ModRM * @insn: Instruction. Must be valid. * @regs: Register values as seen when entering kernel mode * @regoff: Obtained operand offset, in pt_regs, associated with segment * @eff_addr: Obtained effective address * * Obtain the effective address referenced by the ModRM byte of @insn. After * identifying the registers involved in the register-indirect memory reference, * its value is obtained from the operands in @regs. The computed address is * stored @eff_addr. Also, the register operand that indicates the associated * segment is stored in @regoff, this parameter can later be used to determine * such segment. * * Returns: * * 0 on success. @eff_addr will have the referenced effective address. @regoff * will have a register, as an offset from the base of pt_regs, that can be used * to resolve the associated segment. * * -EINVAL on error. */ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs, int *regoff, long *eff_addr) { long tmp; int ret; if (insn->addr_bytes != 8 && insn->addr_bytes != 4) return -EINVAL; ret = insn_get_modrm(insn); if (ret) return ret; if (X86_MODRM_MOD(insn->modrm.value) > 2) return -EINVAL; *regoff = get_reg_offset(insn, regs, REG_TYPE_RM); /* * -EDOM means that we must ignore the address_offset. In such a case, * in 64-bit mode the effective address relative to the rIP of the * following instruction. */ if (*regoff == -EDOM) { if (any_64bit_mode(regs)) tmp = regs->ip + insn->length; else tmp = 0; } else if (*regoff < 0) { return -EINVAL; } else { tmp = regs_get_register(regs, *regoff); } if (insn->addr_bytes == 4) { int addr32 = (int)(tmp & 0xffffffff) + insn->displacement.value; *eff_addr = addr32 & 0xffffffff; } else { *eff_addr = tmp + insn->displacement.value; } return 0; } /** * get_eff_addr_modrm_16() - Obtain referenced effective address via ModRM * @insn: Instruction. Must be valid. * @regs: Register values as seen when entering kernel mode * @regoff: Obtained operand offset, in pt_regs, associated with segment * @eff_addr: Obtained effective address * * Obtain the 16-bit effective address referenced by the ModRM byte of @insn. * After identifying the registers involved in the register-indirect memory * reference, its value is obtained from the operands in @regs. The computed * address is stored @eff_addr. Also, the register operand that indicates * the associated segment is stored in @regoff, this parameter can later be used * to determine such segment. * * Returns: * * 0 on success. @eff_addr will have the referenced effective address. @regoff * will have a register, as an offset from the base of pt_regs, that can be used * to resolve the associated segment. * * -EINVAL on error. */ static int get_eff_addr_modrm_16(struct insn *insn, struct pt_regs *regs, int *regoff, short *eff_addr) { int addr_offset1, addr_offset2, ret; short addr1 = 0, addr2 = 0, displacement; if (insn->addr_bytes != 2) return -EINVAL; insn_get_modrm(insn); if (!insn->modrm.nbytes) return -EINVAL; if (X86_MODRM_MOD(insn->modrm.value) > 2) return -EINVAL; ret = get_reg_offset_16(insn, regs, &addr_offset1, &addr_offset2); if (ret < 0) return -EINVAL; /* * Don't fail on invalid offset values. They might be invalid because * they cannot be used for this particular value of ModRM. Instead, use * them in the computation only if they contain a valid value. */ if (addr_offset1 != -EDOM) addr1 = regs_get_register(regs, addr_offset1) & 0xffff; if (addr_offset2 != -EDOM) addr2 = regs_get_register(regs, addr_offset2) & 0xffff; displacement = insn->displacement.value & 0xffff; *eff_addr = addr1 + addr2 + displacement; /* * The first operand register could indicate to use of either SS or DS * registers to obtain the segment selector. The second operand * register can only indicate the use of DS. Thus, the first operand * will be used to obtain the segment selector. */ *regoff = addr_offset1; return 0; } /** * get_eff_addr_sib() - Obtain referenced effective address via SIB * @insn: Instruction. Must be valid. * @regs: Register values as seen when entering kernel mode * @base_offset: Obtained operand offset, in pt_regs, associated with segment * @eff_addr: Obtained effective address * * Obtain the effective address referenced by the SIB byte of @insn. After * identifying the registers involved in the indexed, register-indirect memory * reference, its value is obtained from the operands in @regs. The computed * address is stored @eff_addr. Also, the register operand that indicates the * associated segment is stored in @base_offset; this parameter can later be * used to determine such segment. * * Returns: * * 0 on success. @eff_addr will have the referenced effective address. * @base_offset will have a register, as an offset from the base of pt_regs, * that can be used to resolve the associated segment. * * Negative value on error. */ static int get_eff_addr_sib(struct insn *insn, struct pt_regs *regs, int *base_offset, long *eff_addr) { long base, indx; int indx_offset; int ret; if (insn->addr_bytes != 8 && insn->addr_bytes != 4) return -EINVAL; ret = insn_get_modrm(insn); if (ret) return ret; if (!insn->modrm.nbytes) return -EINVAL; if (X86_MODRM_MOD(insn->modrm.value) > 2) return -EINVAL; ret = insn_get_sib(insn); if (ret) return ret; if (!insn->sib.nbytes) return -EINVAL; *base_offset = get_reg_offset(insn, regs, REG_TYPE_BASE); indx_offset = get_reg_offset(insn, regs, REG_TYPE_INDEX); /* * Negative values in the base and index offset means an error when * decoding the SIB byte. Except -EDOM, which means that the registers * should not be used in the address computation. */ if (*base_offset == -EDOM) base = 0; else if (*base_offset < 0) return -EINVAL; else base = regs_get_register(regs, *base_offset); if (indx_offset == -EDOM) indx = 0; else if (indx_offset < 0) return -EINVAL; else indx = regs_get_register(regs, indx_offset); if (insn->addr_bytes == 4) { int addr32, base32, idx32; base32 = base & 0xffffffff; idx32 = indx & 0xffffffff; addr32 = base32 + idx32 * (1 << X86_SIB_SCALE(insn->sib.value)); addr32 += insn->displacement.value; *eff_addr = addr32 & 0xffffffff; } else { *eff_addr = base + indx * (1 << X86_SIB_SCALE(insn->sib.value)); *eff_addr += insn->displacement.value; } return 0; } /** * get_addr_ref_16() - Obtain the 16-bit address referred by instruction * @insn: Instruction containing ModRM byte and displacement * @regs: Register values as seen when entering kernel mode * * This function is to be used with 16-bit address encodings. Obtain the memory * address referred by the instruction's ModRM and displacement bytes. Also, the * segment used as base is determined by either any segment override prefixes in * @insn or the default segment of the registers involved in the address * computation. In protected mode, segment limits are enforced. * * Returns: * * Linear address referenced by the instruction operands on success. * * -1L on error. */ static void __user *get_addr_ref_16(struct insn *insn, struct pt_regs *regs) { unsigned long linear_addr = -1L, seg_base, seg_limit; int ret, regoff; short eff_addr; long tmp; if (insn_get_displacement(insn)) goto out; if (insn->addr_bytes != 2) goto out; if (X86_MODRM_MOD(insn->modrm.value) == 3) { ret = get_eff_addr_reg(insn, regs, &regoff, &tmp); if (ret) goto out; eff_addr = tmp; } else { ret = get_eff_addr_modrm_16(insn, regs, &regoff, &eff_addr); if (ret) goto out; } ret = get_seg_base_limit(insn, regs, regoff, &seg_base, &seg_limit); if (ret) goto out; /* * Before computing the linear address, make sure the effective address * is within the limits of the segment. In virtual-8086 mode, segment * limits are not enforced. In such a case, the segment limit is -1L to * reflect this fact. */ if ((unsigned long)(eff_addr & 0xffff) > seg_limit) goto out; linear_addr = (unsigned long)(eff_addr & 0xffff) + seg_base; /* Limit linear address to 20 bits */ if (v8086_mode(regs)) linear_addr &= 0xfffff; out: return (void __user *)linear_addr; } /** * get_addr_ref_32() - Obtain a 32-bit linear address * @insn: Instruction with ModRM, SIB bytes and displacement * @regs: Register values as seen when entering kernel mode * * This function is to be used with 32-bit address encodings to obtain the * linear memory address referred by the instruction's ModRM, SIB, * displacement bytes and segment base address, as applicable. If in protected * mode, segment limits are enforced. * * Returns: * * Linear address referenced by instruction and registers on success. * * -1L on error. */ static void __user *get_addr_ref_32(struct insn *insn, struct pt_regs *regs) { unsigned long linear_addr = -1L, seg_base, seg_limit; int eff_addr, regoff; long tmp; int ret; if (insn->addr_bytes != 4) goto out; if (X86_MODRM_MOD(insn->modrm.value) == 3) { ret = get_eff_addr_reg(insn, regs, &regoff, &tmp); if (ret) goto out; eff_addr = tmp; } else { if (insn->sib.nbytes) { ret = get_eff_addr_sib(insn, regs, &regoff, &tmp); if (ret) goto out; eff_addr = tmp; } else { ret = get_eff_addr_modrm(insn, regs, &regoff, &tmp); if (ret) goto out; eff_addr = tmp; } } ret = get_seg_base_limit(insn, regs, regoff, &seg_base, &seg_limit); if (ret) goto out; /* * In protected mode, before computing the linear address, make sure * the effective address is within the limits of the segment. * 32-bit addresses can be used in long and virtual-8086 modes if an * address override prefix is used. In such cases, segment limits are * not enforced. When in virtual-8086 mode, the segment limit is -1L * to reflect this situation. * * After computed, the effective address is treated as an unsigned * quantity. */ if (!any_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit)) goto out; /* * Even though 32-bit address encodings are allowed in virtual-8086 * mode, the address range is still limited to [0x-0xffff]. */ if (v8086_mode(regs) && (eff_addr & ~0xffff)) goto out; /* * Data type long could be 64 bits in size. Ensure that our 32-bit * effective address is not sign-extended when computing the linear * address. */ linear_addr = (unsigned long)(eff_addr & 0xffffffff) + seg_base; /* Limit linear address to 20 bits */ if (v8086_mode(regs)) linear_addr &= 0xfffff; out: return (void __user *)linear_addr; } /** * get_addr_ref_64() - Obtain a 64-bit linear address * @insn: Instruction struct with ModRM and SIB bytes and displacement * @regs: Structure with register values as seen when entering kernel mode * * This function is to be used with 64-bit address encodings to obtain the * linear memory address referred by the instruction's ModRM, SIB, * displacement bytes and segment base address, as applicable. * * Returns: * * Linear address referenced by instruction and registers on success. * * -1L on error. */ #ifndef CONFIG_X86_64 static void __user *get_addr_ref_64(struct insn *insn, struct pt_regs *regs) { return (void __user *)-1L; } #else static void __user *get_addr_ref_64(struct insn *insn, struct pt_regs *regs) { unsigned long linear_addr = -1L, seg_base; int regoff, ret; long eff_addr; if (insn->addr_bytes != 8) goto out; if (X86_MODRM_MOD(insn->modrm.value) == 3) { ret = get_eff_addr_reg(insn, regs, &regoff, &eff_addr); if (ret) goto out; } else { if (insn->sib.nbytes) { ret = get_eff_addr_sib(insn, regs, &regoff, &eff_addr); if (ret) goto out; } else { ret = get_eff_addr_modrm(insn, regs, &regoff, &eff_addr); if (ret) goto out; } } ret = get_seg_base_limit(insn, regs, regoff, &seg_base, NULL); if (ret) goto out; linear_addr = (unsigned long)eff_addr + seg_base; out: return (void __user *)linear_addr; } #endif /* CONFIG_X86_64 */ /** * insn_get_addr_ref() - Obtain the linear address referred by instruction * @insn: Instruction structure containing ModRM byte and displacement * @regs: Structure with register values as seen when entering kernel mode * * Obtain the linear address referred by the instruction's ModRM, SIB and * displacement bytes, and segment base, as applicable. In protected mode, * segment limits are enforced. * * Returns: * * Linear address referenced by instruction and registers on success. * * -1L on error. */ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) { if (!insn || !regs) return (void __user *)-1L; if (insn_get_opcode(insn)) return (void __user *)-1L; switch (insn->addr_bytes) { case 2: return get_addr_ref_16(insn, regs); case 4: return get_addr_ref_32(insn, regs); case 8: return get_addr_ref_64(insn, regs); default: return (void __user *)-1L; } } int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip) { unsigned long seg_base = 0; /* * If not in user-space long mode, a custom code segment could be in * use. This is true in protected mode (if the process defined a local * descriptor table), or virtual-8086 mode. In most of the cases * seg_base will be zero as in USER_CS. */ if (!user_64bit_mode(regs)) { seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); if (seg_base == -1L) return -EINVAL; } *ip = seg_base + regs->ip; return 0; } /** * insn_fetch_from_user() - Copy instruction bytes from user-space memory * @regs: Structure with register values as seen when entering kernel mode * @buf: Array to store the fetched instruction * * Gets the linear address of the instruction and copies the instruction bytes * to the buf. * * Returns: * * - number of instruction bytes copied. * - 0 if nothing was copied. * - -EINVAL if the linear address of the instruction could not be calculated */ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) { unsigned long ip; int not_copied; if (insn_get_effective_ip(regs, &ip)) return -EINVAL; not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE); return MAX_INSN_SIZE - not_copied; } /** * insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space memory * while in atomic code * @regs: Structure with register values as seen when entering kernel mode * @buf: Array to store the fetched instruction * * Gets the linear address of the instruction and copies the instruction bytes * to the buf. This function must be used in atomic context. * * Returns: * * - number of instruction bytes copied. * - 0 if nothing was copied. * - -EINVAL if the linear address of the instruction could not be calculated. */ int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) { unsigned long ip; int not_copied; if (insn_get_effective_ip(regs, &ip)) return -EINVAL; not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE); return MAX_INSN_SIZE - not_copied; } /** * insn_decode_from_regs() - Decode an instruction * @insn: Structure to store decoded instruction * @regs: Structure with register values as seen when entering kernel mode * @buf: Buffer containing the instruction bytes * @buf_size: Number of instruction bytes available in buf * * Decodes the instruction provided in buf and stores the decoding results in * insn. Also determines the correct address and operand sizes. * * Returns: * * True if instruction was decoded, False otherwise. */ bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE], int buf_size) { int seg_defs; insn_init(insn, buf, buf_size, user_64bit_mode(regs)); /* * Override the default operand and address sizes with what is specified * in the code segment descriptor. The instruction decoder only sets * the address size it to either 4 or 8 address bytes and does nothing * for the operand bytes. This OK for most of the cases, but we could * have special cases where, for instance, a 16-bit code segment * descriptor is used. * If there is an address override prefix, the instruction decoder * correctly updates these values, even for 16-bit defaults. */ seg_defs = insn_get_code_seg_params(regs); if (seg_defs == -EINVAL) return false; insn->addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs); insn->opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs); if (insn_get_length(insn)) return false; if (buf_size < insn->length) return false; return true; } /** * insn_decode_mmio() - Decode a MMIO instruction * @insn: Structure to store decoded instruction * @bytes: Returns size of memory operand * * Decodes instruction that used for Memory-mapped I/O. * * Returns: * * Type of the instruction. Size of the memory operand is stored in * @bytes. If decode failed, INSN_MMIO_DECODE_FAILED returned. */ enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes) { enum insn_mmio_type type = INSN_MMIO_DECODE_FAILED; *bytes = 0; if (insn_get_opcode(insn)) return INSN_MMIO_DECODE_FAILED; switch (insn->opcode.bytes[0]) { case 0x88: /* MOV m8,r8 */ *bytes = 1; fallthrough; case 0x89: /* MOV m16/m32/m64, r16/m32/m64 */ if (!*bytes) *bytes = insn->opnd_bytes; type = INSN_MMIO_WRITE; break; case 0xc6: /* MOV m8, imm8 */ *bytes = 1; fallthrough; case 0xc7: /* MOV m16/m32/m64, imm16/imm32/imm64 */ if (!*bytes) *bytes = insn->opnd_bytes; type = INSN_MMIO_WRITE_IMM; break; case 0x8a: /* MOV r8, m8 */ *bytes = 1; fallthrough; case 0x8b: /* MOV r16/r32/r64, m16/m32/m64 */ if (!*bytes) *bytes = insn->opnd_bytes; type = INSN_MMIO_READ; break; case 0xa4: /* MOVS m8, m8 */ *bytes = 1; fallthrough; case 0xa5: /* MOVS m16/m32/m64, m16/m32/m64 */ if (!*bytes) *bytes = insn->opnd_bytes; type = INSN_MMIO_MOVS; break; case 0x0f: /* Two-byte instruction */ switch (insn->opcode.bytes[1]) { case 0xb6: /* MOVZX r16/r32/r64, m8 */ *bytes = 1; fallthrough; case 0xb7: /* MOVZX r32/r64, m16 */ if (!*bytes) *bytes = 2; type = INSN_MMIO_READ_ZERO_EXTEND; break; case 0xbe: /* MOVSX r16/r32/r64, m8 */ *bytes = 1; fallthrough; case 0xbf: /* MOVSX r32/r64, m16 */ if (!*bytes) *bytes = 2; type = INSN_MMIO_READ_SIGN_EXTEND; break; } break; } return type; }
126 126 169 51 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_CPUSET_H #define _LINUX_CPUSET_H /* * cpuset interface * * Copyright (C) 2003 BULL SA * Copyright (C) 2004-2006 Silicon Graphics, Inc. * */ #include <linux/sched.h> #include <linux/sched/topology.h> #include <linux/sched/task.h> #include <linux/cpumask.h> #include <linux/nodemask.h> #include <linux/mm.h> #include <linux/mmu_context.h> #include <linux/jump_label.h> #ifdef CONFIG_CPUSETS /* * Static branch rewrites can happen in an arbitrary order for a given * key. In code paths where we need to loop with read_mems_allowed_begin() and * read_mems_allowed_retry() to get a consistent view of mems_allowed, we need * to ensure that begin() always gets rewritten before retry() in the * disabled -> enabled transition. If not, then if local irqs are disabled * around the loop, we can deadlock since retry() would always be * comparing the latest value of the mems_allowed seqcount against 0 as * begin() still would see cpusets_enabled() as false. The enabled -> disabled * transition should happen in reverse order for the same reasons (want to stop * looking at real value of mems_allowed.sequence in retry() first). */ extern struct static_key_false cpusets_pre_enable_key; extern struct static_key_false cpusets_enabled_key; extern struct static_key_false cpusets_insane_config_key; static inline bool cpusets_enabled(void) { return static_branch_unlikely(&cpusets_enabled_key); } static inline void cpuset_inc(void) { static_branch_inc_cpuslocked(&cpusets_pre_enable_key); static_branch_inc_cpuslocked(&cpusets_enabled_key); } static inline void cpuset_dec(void) { static_branch_dec_cpuslocked(&cpusets_enabled_key); static_branch_dec_cpuslocked(&cpusets_pre_enable_key); } /* * This will get enabled whenever a cpuset configuration is considered * unsupportable in general. E.g. movable only node which cannot satisfy * any non movable allocations (see update_nodemask). Page allocator * needs to make additional checks for those configurations and this * check is meant to guard those checks without any overhead for sane * configurations. */ static inline bool cpusets_insane_config(void) { return static_branch_unlikely(&cpusets_insane_config_key); } extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_force_rebuild(void); extern void cpuset_update_active_cpus(void); extern void inc_dl_tasks_cs(struct task_struct *task); extern void dec_dl_tasks_cs(struct task_struct *task); extern void cpuset_lock(void); extern void cpuset_unlock(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern bool cpuset_cpus_allowed_fallback(struct task_struct *p); extern bool cpuset_cpu_is_isolated(int cpu); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); extern bool cpuset_node_allowed(int node, gfp_t gfp_mask); static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { return cpuset_node_allowed(zone_to_nid(z), gfp_mask); } static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { if (cpusets_enabled()) return __cpuset_zone_allowed(z, gfp_mask); return true; } extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, const struct task_struct *tsk2); #ifdef CONFIG_CPUSETS_V1 #define cpuset_memory_pressure_bump() \ do { \ if (cpuset_memory_pressure_enabled) \ __cpuset_memory_pressure_bump(); \ } while (0) extern int cpuset_memory_pressure_enabled; extern void __cpuset_memory_pressure_bump(void); #else static inline void cpuset_memory_pressure_bump(void) { } #endif extern void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task); extern int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); extern int cpuset_mem_spread_node(void); static inline int cpuset_do_page_mem_spread(void) { return task_spread_page(current); } extern bool current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); extern void cpuset_print_current_mems_allowed(void); /* * read_mems_allowed_begin is required when making decisions involving * mems_allowed such as during page allocation. mems_allowed can be updated in * parallel and depending on the new value an operation can fail potentially * causing process failure. A retry loop with read_mems_allowed_begin and * read_mems_allowed_retry prevents these artificial failures. */ static inline unsigned int read_mems_allowed_begin(void) { if (!static_branch_unlikely(&cpusets_pre_enable_key)) return 0; return read_seqcount_begin(&current->mems_allowed_seq); } /* * If this returns true, the operation that took place after * read_mems_allowed_begin may have failed artificially due to a concurrent * update of mems_allowed. It is up to the caller to retry the operation if * appropriate. */ static inline bool read_mems_allowed_retry(unsigned int seq) { if (!static_branch_unlikely(&cpusets_enabled_key)) return false; return read_seqcount_retry(&current->mems_allowed_seq, seq); } static inline void set_mems_allowed(nodemask_t nodemask) { unsigned long flags; task_lock(current); local_irq_save(flags); write_seqcount_begin(&current->mems_allowed_seq); current->mems_allowed = nodemask; write_seqcount_end(&current->mems_allowed_seq); local_irq_restore(flags); task_unlock(current); } #else /* !CONFIG_CPUSETS */ static inline bool cpusets_enabled(void) { return false; } static inline bool cpusets_insane_config(void) { return false; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} static inline void cpuset_force_rebuild(void) { } static inline void cpuset_update_active_cpus(void) { partition_sched_domains(1, NULL, NULL); } static inline void inc_dl_tasks_cs(struct task_struct *task) { } static inline void dec_dl_tasks_cs(struct task_struct *task) { } static inline void cpuset_lock(void) { } static inline void cpuset_unlock(void) { } static inline void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask) { cpumask_copy(mask, task_cpu_possible_mask(p)); } static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p) { return false; } static inline bool cpuset_cpu_is_isolated(int cpu) { return false; } static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) { return node_possible_map; } #define cpuset_current_mems_allowed (node_states[N_MEMORY]) static inline void cpuset_init_current_mems_allowed(void) {} static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) { return 1; } static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { return true; } static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { return true; } static inline int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, const struct task_struct *tsk2) { return 1; } static inline void cpuset_memory_pressure_bump(void) {} static inline void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) { } static inline int cpuset_mem_spread_node(void) { return 0; } static inline int cpuset_do_page_mem_spread(void) { return 0; } static inline bool current_cpuset_is_being_rebound(void) { return false; } static inline void rebuild_sched_domains(void) { partition_sched_domains(1, NULL, NULL); } static inline void cpuset_print_current_mems_allowed(void) { } static inline void set_mems_allowed(nodemask_t nodemask) { } static inline unsigned int read_mems_allowed_begin(void) { return 0; } static inline bool read_mems_allowed_retry(unsigned int seq) { return false; } #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */
105 59 80 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM page_pool #if !defined(_TRACE_PAGE_POOL_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_PAGE_POOL_H #include <linux/types.h> #include <linux/tracepoint.h> #include <trace/events/mmflags.h> #include <net/page_pool/types.h> TRACE_EVENT(page_pool_release, TP_PROTO(const struct page_pool *pool, s32 inflight, u32 hold, u32 release), TP_ARGS(pool, inflight, hold, release), TP_STRUCT__entry( __field(const struct page_pool *, pool) __field(s32, inflight) __field(u32, hold) __field(u32, release) __field(u64, cnt) ), TP_fast_assign( __entry->pool = pool; __entry->inflight = inflight; __entry->hold = hold; __entry->release = release; __entry->cnt = pool->destroy_cnt; ), TP_printk("page_pool=%p inflight=%d hold=%u release=%u cnt=%llu", __entry->pool, __entry->inflight, __entry->hold, __entry->release, __entry->cnt) ); TRACE_EVENT(page_pool_state_release, TP_PROTO(const struct page_pool *pool, netmem_ref netmem, u32 release), TP_ARGS(pool, netmem, release), TP_STRUCT__entry( __field(const struct page_pool *, pool) __field(unsigned long, netmem) __field(u32, release) __field(unsigned long, pfn) ), TP_fast_assign( __entry->pool = pool; __entry->netmem = (__force unsigned long)netmem; __entry->release = release; __entry->pfn = netmem_pfn_trace(netmem); ), TP_printk("page_pool=%p netmem=%p is_net_iov=%lu pfn=0x%lx release=%u", __entry->pool, (void *)__entry->netmem, __entry->netmem & NET_IOV, __entry->pfn, __entry->release) ); TRACE_EVENT(page_pool_state_hold, TP_PROTO(const struct page_pool *pool, netmem_ref netmem, u32 hold), TP_ARGS(pool, netmem, hold), TP_STRUCT__entry( __field(const struct page_pool *, pool) __field(unsigned long, netmem) __field(u32, hold) __field(unsigned long, pfn) ), TP_fast_assign( __entry->pool = pool; __entry->netmem = (__force unsigned long)netmem; __entry->hold = hold; __entry->pfn = netmem_pfn_trace(netmem); ), TP_printk("page_pool=%p netmem=%p is_net_iov=%lu, pfn=0x%lx hold=%u", __entry->pool, (void *)__entry->netmem, __entry->netmem & NET_IOV, __entry->pfn, __entry->hold) ); TRACE_EVENT(page_pool_update_nid, TP_PROTO(const struct page_pool *pool, int new_nid), TP_ARGS(pool, new_nid), TP_STRUCT__entry( __field(const struct page_pool *, pool) __field(int, pool_nid) __field(int, new_nid) ), TP_fast_assign( __entry->pool = pool; __entry->pool_nid = pool->p.nid; __entry->new_nid = new_nid; ), TP_printk("page_pool=%p pool_nid=%d new_nid=%d", __entry->pool, __entry->pool_nid, __entry->new_nid) ); #endif /* _TRACE_PAGE_POOL_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
7 1 1 4 7 4 3 7 7 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 // SPDX-License-Identifier: GPL-2.0-only /* * symlink.c * * PURPOSE * Symlink handling routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT * (C) 1998-2001 Ben Fennema * (C) 1999 Stelias Computing Inc * * HISTORY * * 04/16/99 blf Created. * */ #include "udfdecl.h" #include <linux/uaccess.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/stat.h> #include <linux/pagemap.h> #include "udf_i.h" static int udf_pc_to_char(struct super_block *sb, unsigned char *from, int fromlen, unsigned char *to, int tolen) { struct pathComponent *pc; int elen = 0; int comp_len; unsigned char *p = to; /* Reserve one byte for terminating \0 */ tolen--; while (elen < fromlen) { pc = (struct pathComponent *)(from + elen); elen += sizeof(struct pathComponent); switch (pc->componentType) { case 1: /* * Symlink points to some place which should be agreed * upon between originator and receiver of the media. Ignore. */ if (pc->lengthComponentIdent > 0) { elen += pc->lengthComponentIdent; break; } fallthrough; case 2: if (tolen == 0) return -ENAMETOOLONG; p = to; *p++ = '/'; tolen--; break; case 3: if (tolen < 3) return -ENAMETOOLONG; memcpy(p, "../", 3); p += 3; tolen -= 3; break; case 4: if (tolen < 2) return -ENAMETOOLONG; memcpy(p, "./", 2); p += 2; tolen -= 2; /* that would be . - just ignore */ break; case 5: elen += pc->lengthComponentIdent; if (elen > fromlen) return -EIO; comp_len = udf_get_filename(sb, pc->componentIdent, pc->lengthComponentIdent, p, tolen); if (comp_len < 0) return comp_len; p += comp_len; tolen -= comp_len; if (tolen == 0) return -ENAMETOOLONG; *p++ = '/'; tolen--; break; } } if (p > to + 1) p[-1] = '\0'; else p[0] = '\0'; return 0; } static int udf_symlink_filler(struct file *file, struct folio *folio) { struct inode *inode = folio->mapping->host; struct buffer_head *bh = NULL; unsigned char *symlink; int err = 0; unsigned char *p = folio_address(folio); struct udf_inode_info *iinfo = UDF_I(inode); /* We don't support symlinks longer than one block */ if (inode->i_size > inode->i_sb->s_blocksize) { err = -ENAMETOOLONG; goto out; } if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { symlink = iinfo->i_data + iinfo->i_lenEAttr; } else { bh = udf_bread(inode, 0, 0, &err); if (!bh) { if (!err) err = -EFSCORRUPTED; goto out; } symlink = bh->b_data; } err = udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p, PAGE_SIZE); brelse(bh); out: folio_end_read(folio, err == 0); return err; } static int udf_symlink_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; struct inode *inode = d_backing_inode(dentry); struct folio *folio; generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); folio = read_mapping_folio(inode->i_mapping, 0, NULL); if (IS_ERR(folio)) return PTR_ERR(folio); /* * UDF uses non-trivial encoding of symlinks so i_size does not match * number of characters reported by readlink(2) which apparently some * applications expect. Also POSIX says that "The value returned in the * st_size field shall be the length of the contents of the symbolic * link, and shall not count a trailing null if one is present." So * let's report the length of string returned by readlink(2) for * st_size. */ stat->size = strlen(folio_address(folio)); folio_put(folio); return 0; } /* * symlinks can't do much... */ const struct address_space_operations udf_symlink_aops = { .read_folio = udf_symlink_filler, }; const struct inode_operations udf_symlink_inode_operations = { .get_link = page_get_link, .getattr = udf_symlink_getattr, };
114 63 32 32 29 221 222 12 217 1 1 1008 1008 598 467 63 24 106 592 593 63 548 108 549 106 139 136 3 138 462 138 598 594 80 595 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 // SPDX-License-Identifier: GPL-2.0-or-later /* * Linux network device link state notification * * Author: * Stefan Rompf <sux@loplof.de> */ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/if.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <linux/rtnetlink.h> #include <linux/jiffies.h> #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/bitops.h> #include <linux/types.h> #include "dev.h" enum lw_bits { LW_URGENT = 0, }; static unsigned long linkwatch_flags; static unsigned long linkwatch_nextevent; static void linkwatch_event(struct work_struct *dummy); static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); static LIST_HEAD(lweventlist); static DEFINE_SPINLOCK(lweventlist_lock); static unsigned int default_operstate(const struct net_device *dev) { if (netif_testing(dev)) return IF_OPER_TESTING; /* Some uppers (DSA) have additional sources for being down, so * first check whether lower is indeed the source of its down state. */ if (!netif_carrier_ok(dev)) { struct net_device *peer; int iflink; /* If called from netdev_run_todo()/linkwatch_sync_dev(), * dev_net(dev) can be already freed, and RTNL is not held. */ if (dev->reg_state <= NETREG_REGISTERED) iflink = dev_get_iflink(dev); else iflink = dev->ifindex; if (iflink == dev->ifindex) return IF_OPER_DOWN; ASSERT_RTNL(); peer = __dev_get_by_index(dev_net(dev), iflink); if (!peer) return IF_OPER_DOWN; return netif_carrier_ok(peer) ? IF_OPER_DOWN : IF_OPER_LOWERLAYERDOWN; } if (netif_dormant(dev)) return IF_OPER_DORMANT; return IF_OPER_UP; } static void rfc2863_policy(struct net_device *dev) { unsigned int operstate = default_operstate(dev); if (operstate == READ_ONCE(dev->operstate)) return; switch(dev->link_mode) { case IF_LINK_MODE_TESTING: if (operstate == IF_OPER_UP) operstate = IF_OPER_TESTING; break; case IF_LINK_MODE_DORMANT: if (operstate == IF_OPER_UP) operstate = IF_OPER_DORMANT; break; case IF_LINK_MODE_DEFAULT: default: break; } WRITE_ONCE(dev->operstate, operstate); } void linkwatch_init_dev(struct net_device *dev) { /* Handle pre-registration link state changes */ if (!netif_carrier_ok(dev) || netif_dormant(dev) || netif_testing(dev)) rfc2863_policy(dev); } static bool linkwatch_urgent_event(struct net_device *dev) { if (!netif_running(dev)) return false; if (dev->ifindex != dev_get_iflink(dev)) return true; if (netif_is_lag_port(dev) || netif_is_lag_master(dev)) return true; return netif_carrier_ok(dev) && qdisc_tx_changing(dev); } static void linkwatch_add_event(struct net_device *dev) { unsigned long flags; spin_lock_irqsave(&lweventlist_lock, flags); if (list_empty(&dev->link_watch_list)) { list_add_tail(&dev->link_watch_list, &lweventlist); netdev_hold(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); } spin_unlock_irqrestore(&lweventlist_lock, flags); } static void linkwatch_schedule_work(int urgent) { unsigned long delay = linkwatch_nextevent - jiffies; if (test_bit(LW_URGENT, &linkwatch_flags)) return; /* Minimise down-time: drop delay for up event. */ if (urgent) { if (test_and_set_bit(LW_URGENT, &linkwatch_flags)) return; delay = 0; } /* If we wrap around we'll delay it by at most HZ. */ if (delay > HZ) delay = 0; /* * If urgent, schedule immediate execution; otherwise, don't * override the existing timer. */ if (test_bit(LW_URGENT, &linkwatch_flags)) mod_delayed_work(system_unbound_wq, &linkwatch_work, 0); else queue_delayed_work(system_unbound_wq, &linkwatch_work, delay); } static void linkwatch_do_dev(struct net_device *dev) { /* * Make sure the above read is complete since it can be * rewritten as soon as we clear the bit below. */ smp_mb__before_atomic(); /* We are about to handle this device, * so new events can be accepted */ clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); rfc2863_policy(dev); if (dev->flags & IFF_UP) { if (netif_carrier_ok(dev)) dev_activate(dev); else dev_deactivate(dev); netdev_state_change(dev); } /* Note: our callers are responsible for calling netdev_tracker_free(). * This is the reason we use __dev_put() instead of dev_put(). */ __dev_put(dev); } static void __linkwatch_run_queue(int urgent_only) { #define MAX_DO_DEV_PER_LOOP 100 int do_dev = MAX_DO_DEV_PER_LOOP; /* Use a local list here since we add non-urgent * events back to the global one when called with * urgent_only=1. */ LIST_HEAD(wrk); /* Give urgent case more budget */ if (urgent_only) do_dev += MAX_DO_DEV_PER_LOOP; /* * Limit the number of linkwatch events to one * per second so that a runaway driver does not * cause a storm of messages on the netlink * socket. This limit does not apply to up events * while the device qdisc is down. */ if (!urgent_only) linkwatch_nextevent = jiffies + HZ; /* Limit wrap-around effect on delay. */ else if (time_after(linkwatch_nextevent, jiffies + HZ)) linkwatch_nextevent = jiffies; clear_bit(LW_URGENT, &linkwatch_flags); spin_lock_irq(&lweventlist_lock); list_splice_init(&lweventlist, &wrk); while (!list_empty(&wrk) && do_dev > 0) { struct net_device *dev; dev = list_first_entry(&wrk, struct net_device, link_watch_list); list_del_init(&dev->link_watch_list); if (!netif_device_present(dev) || (urgent_only && !linkwatch_urgent_event(dev))) { list_add_tail(&dev->link_watch_list, &lweventlist); continue; } /* We must free netdev tracker under * the spinlock protection. */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); spin_unlock_irq(&lweventlist_lock); linkwatch_do_dev(dev); do_dev--; spin_lock_irq(&lweventlist_lock); } /* Add the remaining work back to lweventlist */ list_splice_init(&wrk, &lweventlist); if (!list_empty(&lweventlist)) linkwatch_schedule_work(0); spin_unlock_irq(&lweventlist_lock); } void linkwatch_sync_dev(struct net_device *dev) { unsigned long flags; int clean = 0; spin_lock_irqsave(&lweventlist_lock, flags); if (!list_empty(&dev->link_watch_list)) { list_del_init(&dev->link_watch_list); clean = 1; /* We must release netdev tracker under * the spinlock protection. */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); } spin_unlock_irqrestore(&lweventlist_lock, flags); if (clean) linkwatch_do_dev(dev); } /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) { __linkwatch_run_queue(0); } static void linkwatch_event(struct work_struct *dummy) { rtnl_lock(); __linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies)); rtnl_unlock(); } void linkwatch_fire_event(struct net_device *dev) { bool urgent = linkwatch_urgent_event(dev); if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { linkwatch_add_event(dev); } else if (!urgent) return; linkwatch_schedule_work(urgent); } EXPORT_SYMBOL(linkwatch_fire_event);
5392 1450 5399 8206 7020 7021 6042 5381 5823 75 75 3764 3793 210 213 3130 3132 4801 3 4801 596 599 595 11 4008 22 16 3978 49 5 44 5 5 1144 1145 1145 929 223 13 13 11723 12774 12805 12986 2 62 13532 76 13701 197 52 150 1304 1304 1302 52 52 326 5507 5494 266 10 10 10 99 99 99 96 43 2 2 2 1631 1634 2 11 11 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 // SPDX-License-Identifier: GPL-2.0-only #include <linux/mm.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/compiler.h> #include <linux/export.h> #include <linux/err.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/sched/task_stack.h> #include <linux/security.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/mman.h> #include <linux/hugetlb.h> #include <linux/vmalloc.h> #include <linux/userfaultfd_k.h> #include <linux/elf.h> #include <linux/elf-randomize.h> #include <linux/personality.h> #include <linux/random.h> #include <linux/processor.h> #include <linux/sizes.h> #include <linux/compat.h> #include <linux/uaccess.h> #include <kunit/visibility.h> #include "internal.h" #include "swap.h" /** * kfree_const - conditionally free memory * @x: pointer to the memory * * Function calls kfree only if @x is not in .rodata section. */ void kfree_const(const void *x) { if (!is_kernel_rodata((unsigned long)x)) kfree(x); } EXPORT_SYMBOL(kfree_const); /** * __kmemdup_nul - Create a NUL-terminated string from @s, which might be unterminated. * @s: The data to copy * @len: The size of the data, not including the NUL terminator * @gfp: the GFP mask used in the kmalloc() call when allocating memory * * Return: newly allocated copy of @s with NUL-termination or %NULL in * case of error */ static __always_inline char *__kmemdup_nul(const char *s, size_t len, gfp_t gfp) { char *buf; /* '+1' for the NUL terminator */ buf = kmalloc_track_caller(len + 1, gfp); if (!buf) return NULL; memcpy(buf, s, len); /* Ensure the buf is always NUL-terminated, regardless of @s. */ buf[len] = '\0'; return buf; } /** * kstrdup - allocate space for and copy an existing string * @s: the string to duplicate * @gfp: the GFP mask used in the kmalloc() call when allocating memory * * Return: newly allocated copy of @s or %NULL in case of error */ noinline char *kstrdup(const char *s, gfp_t gfp) { return s ? __kmemdup_nul(s, strlen(s), gfp) : NULL; } EXPORT_SYMBOL(kstrdup); /** * kstrdup_const - conditionally duplicate an existing const string * @s: the string to duplicate * @gfp: the GFP mask used in the kmalloc() call when allocating memory * * Note: Strings allocated by kstrdup_const should be freed by kfree_const and * must not be passed to krealloc(). * * Return: source string if it is in .rodata section otherwise * fallback to kstrdup. */ const char *kstrdup_const(const char *s, gfp_t gfp) { if (is_kernel_rodata((unsigned long)s)) return s; return kstrdup(s, gfp); } EXPORT_SYMBOL(kstrdup_const); /** * kstrndup - allocate space for and copy an existing string * @s: the string to duplicate * @max: read at most @max chars from @s * @gfp: the GFP mask used in the kmalloc() call when allocating memory * * Note: Use kmemdup_nul() instead if the size is known exactly. * * Return: newly allocated copy of @s or %NULL in case of error */ char *kstrndup(const char *s, size_t max, gfp_t gfp) { return s ? __kmemdup_nul(s, strnlen(s, max), gfp) : NULL; } EXPORT_SYMBOL(kstrndup); /** * kmemdup - duplicate region of memory * * @src: memory region to duplicate * @len: memory region length * @gfp: GFP mask to use * * Return: newly allocated copy of @src or %NULL in case of error, * result is physically contiguous. Use kfree() to free. */ void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) { void *p; p = kmalloc_node_track_caller_noprof(len, gfp, NUMA_NO_NODE, _RET_IP_); if (p) memcpy(p, src, len); return p; } EXPORT_SYMBOL(kmemdup_noprof); /** * kmemdup_array - duplicate a given array. * * @src: array to duplicate. * @count: number of elements to duplicate from array. * @element_size: size of each element of array. * @gfp: GFP mask to use. * * Return: duplicated array of @src or %NULL in case of error, * result is physically contiguous. Use kfree() to free. */ void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp) { return kmemdup(src, size_mul(element_size, count), gfp); } EXPORT_SYMBOL(kmemdup_array); /** * kvmemdup - duplicate region of memory * * @src: memory region to duplicate * @len: memory region length * @gfp: GFP mask to use * * Return: newly allocated copy of @src or %NULL in case of error, * result may be not physically contiguous. Use kvfree() to free. */ void *kvmemdup(const void *src, size_t len, gfp_t gfp) { void *p; p = kvmalloc(len, gfp); if (p) memcpy(p, src, len); return p; } EXPORT_SYMBOL(kvmemdup); /** * kmemdup_nul - Create a NUL-terminated string from unterminated data * @s: The data to stringify * @len: The size of the data * @gfp: the GFP mask used in the kmalloc() call when allocating memory * * Return: newly allocated copy of @s with NUL-termination or %NULL in * case of error */ char *kmemdup_nul(const char *s, size_t len, gfp_t gfp) { return s ? __kmemdup_nul(s, len, gfp) : NULL; } EXPORT_SYMBOL(kmemdup_nul); static kmem_buckets *user_buckets __ro_after_init; static int __init init_user_buckets(void) { user_buckets = kmem_buckets_create("memdup_user", 0, 0, INT_MAX, NULL); return 0; } subsys_initcall(init_user_buckets); /** * memdup_user - duplicate memory region from user space * * @src: source address in user space * @len: number of bytes to copy * * Return: an ERR_PTR() on failure. Result is physically * contiguous, to be freed by kfree(). */ void *memdup_user(const void __user *src, size_t len) { void *p; p = kmem_buckets_alloc_track_caller(user_buckets, len, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); if (copy_from_user(p, src, len)) { kfree(p); return ERR_PTR(-EFAULT); } return p; } EXPORT_SYMBOL(memdup_user); /** * vmemdup_user - duplicate memory region from user space * * @src: source address in user space * @len: number of bytes to copy * * Return: an ERR_PTR() on failure. Result may be not * physically contiguous. Use kvfree() to free. */ void *vmemdup_user(const void __user *src, size_t len) { void *p; p = kmem_buckets_valloc(user_buckets, len, GFP_USER); if (!p) return ERR_PTR(-ENOMEM); if (copy_from_user(p, src, len)) { kvfree(p); return ERR_PTR(-EFAULT); } return p; } EXPORT_SYMBOL(vmemdup_user); /** * strndup_user - duplicate an existing string from user space * @s: The string to duplicate * @n: Maximum number of bytes to copy, including the trailing NUL. * * Return: newly allocated copy of @s or an ERR_PTR() in case of error */ char *strndup_user(const char __user *s, long n) { char *p; long length; length = strnlen_user(s, n); if (!length) return ERR_PTR(-EFAULT); if (length > n) return ERR_PTR(-EINVAL); p = memdup_user(s, length); if (IS_ERR(p)) return p; p[length - 1] = '\0'; return p; } EXPORT_SYMBOL(strndup_user); /** * memdup_user_nul - duplicate memory region from user space and NUL-terminate * * @src: source address in user space * @len: number of bytes to copy * * Return: an ERR_PTR() on failure. */ void *memdup_user_nul(const void __user *src, size_t len) { char *p; p = kmem_buckets_alloc_track_caller(user_buckets, len + 1, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); if (copy_from_user(p, src, len)) { kfree(p); return ERR_PTR(-EFAULT); } p[len] = '\0'; return p; } EXPORT_SYMBOL(memdup_user_nul); /* Check if the vma is being used as a stack by this task */ int vma_is_stack_for_current(struct vm_area_struct *vma) { struct task_struct * __maybe_unused t = current; return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t)); } /* * Change backing file, only valid to use during initial VMA setup. */ void vma_set_file(struct vm_area_struct *vma, struct file *file) { /* Changing an anonymous vma with this is illegal */ get_file(file); swap(vma->vm_file, file); fput(file); } EXPORT_SYMBOL(vma_set_file); #ifndef STACK_RND_MASK #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */ #endif unsigned long randomize_stack_top(unsigned long stack_top) { unsigned long random_variable = 0; if (current->flags & PF_RANDOMIZE) { random_variable = get_random_long(); random_variable &= STACK_RND_MASK; random_variable <<= PAGE_SHIFT; } #ifdef CONFIG_STACK_GROWSUP return PAGE_ALIGN(stack_top) + random_variable; #else return PAGE_ALIGN(stack_top) - random_variable; #endif } /** * randomize_page - Generate a random, page aligned address * @start: The smallest acceptable address the caller will take. * @range: The size of the area, starting at @start, within which the * random address must fall. * * If @start + @range would overflow, @range is capped. * * NOTE: Historical use of randomize_range, which this replaces, presumed that * @start was already page aligned. We now align it regardless. * * Return: A page aligned address within [start, start + range). On error, * @start is returned. */ unsigned long randomize_page(unsigned long start, unsigned long range) { if (!PAGE_ALIGNED(start)) { range -= PAGE_ALIGN(start) - start; start = PAGE_ALIGN(start); } if (start > ULONG_MAX - range) range = ULONG_MAX - start; range >>= PAGE_SHIFT; if (range == 0) return start; return start + (get_random_long() % range << PAGE_SHIFT); } #ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT unsigned long __weak arch_randomize_brk(struct mm_struct *mm) { /* Is the current task 32bit ? */ if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task()) return randomize_page(mm->brk, SZ_32M); return randomize_page(mm->brk, SZ_1G); } unsigned long arch_mmap_rnd(void) { unsigned long rnd; #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS if (is_compat_task()) rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); else #endif /* CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS */ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); return rnd << PAGE_SHIFT; } static int mmap_is_legacy(struct rlimit *rlim_stack) { if (current->personality & ADDR_COMPAT_LAYOUT) return 1; /* On parisc the stack always grows up - so a unlimited stack should * not be an indicator to use the legacy memory layout. */ if (rlim_stack->rlim_cur == RLIM_INFINITY && !IS_ENABLED(CONFIG_STACK_GROWSUP)) return 1; return sysctl_legacy_va_layout; } /* * Leave enough space between the mmap area and the stack to honour ulimit in * the face of randomisation. */ #define MIN_GAP (SZ_128M) #define MAX_GAP (STACK_TOP / 6 * 5) static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) { #ifdef CONFIG_STACK_GROWSUP /* * For an upwards growing stack the calculation is much simpler. * Memory for the maximum stack size is reserved at the top of the * task. mmap_base starts directly below the stack and grows * downwards. */ return PAGE_ALIGN_DOWN(mmap_upper_limit(rlim_stack) - rnd); #else unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_guard_gap; /* Account for stack randomization if necessary */ if (current->flags & PF_RANDOMIZE) pad += (STACK_RND_MASK << PAGE_SHIFT); /* Values close to RLIM_INFINITY can overflow. */ if (gap + pad > gap) gap += pad; if (gap < MIN_GAP && MIN_GAP < MAX_GAP) gap = MIN_GAP; else if (gap > MAX_GAP) gap = MAX_GAP; return PAGE_ALIGN(STACK_TOP - gap - rnd); #endif } void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { unsigned long random_factor = 0UL; if (current->flags & PF_RANDOMIZE) random_factor = arch_mmap_rnd(); if (mmap_is_legacy(rlim_stack)) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; clear_bit(MMF_TOPDOWN, &mm->flags); } else { mm->mmap_base = mmap_base(random_factor, rlim_stack); set_bit(MMF_TOPDOWN, &mm->flags); } } #elif defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { mm->mmap_base = TASK_UNMAPPED_BASE; clear_bit(MMF_TOPDOWN, &mm->flags); } #endif #ifdef CONFIG_MMU EXPORT_SYMBOL_IF_KUNIT(arch_pick_mmap_layout); #endif /** * __account_locked_vm - account locked pages to an mm's locked_vm * @mm: mm to account against * @pages: number of pages to account * @inc: %true if @pages should be considered positive, %false if not * @task: task used to check RLIMIT_MEMLOCK * @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped * * Assumes @task and @mm are valid (i.e. at least one reference on each), and * that mmap_lock is held as writer. * * Return: * * 0 on success * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded. */ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, struct task_struct *task, bool bypass_rlim) { unsigned long locked_vm, limit; int ret = 0; mmap_assert_write_locked(mm); locked_vm = mm->locked_vm; if (inc) { if (!bypass_rlim) { limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; if (locked_vm + pages > limit) ret = -ENOMEM; } if (!ret) mm->locked_vm = locked_vm + pages; } else { WARN_ON_ONCE(pages > locked_vm); mm->locked_vm = locked_vm - pages; } pr_debug("%s: [%d] caller %ps %c%lu %lu/%lu%s\n", __func__, task->pid, (void *)_RET_IP_, (inc) ? '+' : '-', pages << PAGE_SHIFT, locked_vm << PAGE_SHIFT, task_rlimit(task, RLIMIT_MEMLOCK), ret ? " - exceeded" : ""); return ret; } EXPORT_SYMBOL_GPL(__account_locked_vm); /** * account_locked_vm - account locked pages to an mm's locked_vm * @mm: mm to account against, may be NULL * @pages: number of pages to account * @inc: %true if @pages should be considered positive, %false if not * * Assumes a non-NULL @mm is valid (i.e. at least one reference on it). * * Return: * * 0 on success, or if mm is NULL * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded. */ int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc) { int ret; if (pages == 0 || !mm) return 0; mmap_write_lock(mm); ret = __account_locked_vm(mm, pages, inc, current, capable(CAP_IPC_LOCK)); mmap_write_unlock(mm); return ret; } EXPORT_SYMBOL_GPL(account_locked_vm); unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) { unsigned long ret; struct mm_struct *mm = current->mm; unsigned long populate; LIST_HEAD(uf); ret = security_mmap_file(file, prot, flag); if (!ret) { if (mmap_write_lock_killable(mm)) return -EINTR; ret = do_mmap(file, addr, len, prot, flag, 0, pgoff, &populate, &uf); mmap_write_unlock(mm); userfaultfd_unmap_complete(mm, &uf); if (populate) mm_populate(ret, populate); } return ret; } /* * Perform a userland memory mapping into the current process address space. See * the comment for do_mmap() for more details on this operation in general. * * This differs from do_mmap() in that: * * a. An offset parameter is provided rather than pgoff, which is both checked * for overflow and page alignment. * b. mmap locking is performed on the caller's behalf. * c. Userfaultfd unmap events and memory population are handled. * * This means that this function performs essentially the same work as if * userland were invoking mmap (2). * * Returns either an error, or the address at which the requested mapping has * been performed. */ unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { if (unlikely(offset + PAGE_ALIGN(len) < offset)) return -EINVAL; if (unlikely(offset_in_page(offset))) return -EINVAL; return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); } EXPORT_SYMBOL(vm_mmap); static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size) { /* * We want to attempt a large physically contiguous block first because * it is less likely to fragment multiple larger blocks and therefore * contribute to a long term fragmentation less than vmalloc fallback. * However make sure that larger requests are not too disruptive - no * OOM killer and no allocation failure warnings as we have a fallback. */ if (size > PAGE_SIZE) { flags |= __GFP_NOWARN; if (!(flags & __GFP_RETRY_MAYFAIL)) flags |= __GFP_NORETRY; /* nofail semantic is implemented by the vmalloc fallback */ flags &= ~__GFP_NOFAIL; } return flags; } /** * __kvmalloc_node - attempt to allocate physically contiguous memory, but upon * failure, fall back to non-contiguous (vmalloc) allocation. * @size: size of the request. * @b: which set of kmalloc buckets to allocate from. * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL. * @node: numa node to allocate from * * Uses kmalloc to get the memory but if the allocation fails then falls back * to the vmalloc allocator. Use kvfree for freeing the memory. * * GFP_NOWAIT and GFP_ATOMIC are not supported, neither is the __GFP_NORETRY modifier. * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is * preferable to the vmalloc fallback, due to visible performance drawbacks. * * Return: pointer to the allocated memory of %NULL in case of failure */ void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) { void *ret; /* * It doesn't really make sense to fallback to vmalloc for sub page * requests */ ret = __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, b), kmalloc_gfp_adjust(flags, size), node); if (ret || size <= PAGE_SIZE) return ret; /* non-sleeping allocations are not supported by vmalloc */ if (!gfpflags_allow_blocking(flags)) return NULL; /* Don't even allow crazy sizes */ if (unlikely(size > INT_MAX)) { WARN_ON_ONCE(!(flags & __GFP_NOWARN)); return NULL; } /* * kvmalloc() can always use VM_ALLOW_HUGE_VMAP, * since the callers already cannot assume anything * about the resulting pointer, and cannot play * protection games. */ return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END, flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP, node, __builtin_return_address(0)); } EXPORT_SYMBOL(__kvmalloc_node_noprof); /** * kvfree() - Free memory. * @addr: Pointer to allocated memory. * * kvfree frees memory allocated by any of vmalloc(), kmalloc() or kvmalloc(). * It is slightly more efficient to use kfree() or vfree() if you are certain * that you know which one to use. * * Context: Either preemptible task context or not-NMI interrupt. */ void kvfree(const void *addr) { if (is_vmalloc_addr(addr)) vfree(addr); else kfree(addr); } EXPORT_SYMBOL(kvfree); /** * kvfree_sensitive - Free a data object containing sensitive information. * @addr: address of the data object to be freed. * @len: length of the data object. * * Use the special memzero_explicit() function to clear the content of a * kvmalloc'ed object containing sensitive data to make sure that the * compiler won't optimize out the data clearing. */ void kvfree_sensitive(const void *addr, size_t len) { if (likely(!ZERO_OR_NULL_PTR(addr))) { memzero_explicit((void *)addr, len); kvfree(addr); } } EXPORT_SYMBOL(kvfree_sensitive); /** * kvrealloc - reallocate memory; contents remain unchanged * @p: object to reallocate memory for * @size: the size to reallocate * @flags: the flags for the page level allocator * * If @p is %NULL, kvrealloc() behaves exactly like kvmalloc(). If @size is 0 * and @p is not a %NULL pointer, the object pointed to is freed. * * If __GFP_ZERO logic is requested, callers must ensure that, starting with the * initial memory allocation, every subsequent call to this API for the same * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that * __GFP_ZERO is not fully honored by this API. * * In any case, the contents of the object pointed to are preserved up to the * lesser of the new and old sizes. * * This function must not be called concurrently with itself or kvfree() for the * same memory allocation. * * Return: pointer to the allocated memory or %NULL in case of error */ void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags) { void *n; if (is_vmalloc_addr(p)) return vrealloc_noprof(p, size, flags); n = krealloc_noprof(p, size, kmalloc_gfp_adjust(flags, size)); if (!n) { /* We failed to krealloc(), fall back to kvmalloc(). */ n = kvmalloc_noprof(size, flags); if (!n) return NULL; if (p) { /* We already know that `p` is not a vmalloc address. */ kasan_disable_current(); memcpy(n, kasan_reset_tag(p), ksize(p)); kasan_enable_current(); kfree(p); } } return n; } EXPORT_SYMBOL(kvrealloc_noprof); /** * __vmalloc_array - allocate memory for a virtually contiguous array. * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; return __vmalloc_noprof(bytes, flags); } EXPORT_SYMBOL(__vmalloc_array_noprof); /** * vmalloc_array - allocate memory for a virtually contiguous array. * @n: number of elements. * @size: element size. */ void *vmalloc_array_noprof(size_t n, size_t size) { return __vmalloc_array_noprof(n, size, GFP_KERNEL); } EXPORT_SYMBOL(vmalloc_array_noprof); /** * __vcalloc - allocate and zero memory for a virtually contiguous array. * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) { return __vmalloc_array_noprof(n, size, flags | __GFP_ZERO); } EXPORT_SYMBOL(__vcalloc_noprof); /** * vcalloc - allocate and zero memory for a virtually contiguous array. * @n: number of elements. * @size: element size. */ void *vcalloc_noprof(size_t n, size_t size) { return __vmalloc_array_noprof(n, size, GFP_KERNEL | __GFP_ZERO); } EXPORT_SYMBOL(vcalloc_noprof); struct anon_vma *folio_anon_vma(const struct folio *folio) { unsigned long mapping = (unsigned long)folio->mapping; if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) return NULL; return (void *)(mapping - PAGE_MAPPING_ANON); } /** * folio_mapping - Find the mapping where this folio is stored. * @folio: The folio. * * For folios which are in the page cache, return the mapping that this * page belongs to. Folios in the swap cache return the swap mapping * this page is stored in (which is different from the mapping for the * swap file or swap device where the data is stored). * * You can call this for folios which aren't in the swap cache or page * cache and it will return NULL. */ struct address_space *folio_mapping(struct folio *folio) { struct address_space *mapping; /* This happens if someone calls flush_dcache_page on slab page */ if (unlikely(folio_test_slab(folio))) return NULL; if (unlikely(folio_test_swapcache(folio))) return swap_address_space(folio->swap); mapping = folio->mapping; if ((unsigned long)mapping & PAGE_MAPPING_FLAGS) return NULL; return mapping; } EXPORT_SYMBOL(folio_mapping); /** * folio_copy - Copy the contents of one folio to another. * @dst: Folio to copy to. * @src: Folio to copy from. * * The bytes in the folio represented by @src are copied to @dst. * Assumes the caller has validated that @dst is at least as large as @src. * Can be called in atomic context for order-0 folios, but if the folio is * larger, it may sleep. */ void folio_copy(struct folio *dst, struct folio *src) { long i = 0; long nr = folio_nr_pages(src); for (;;) { copy_highpage(folio_page(dst, i), folio_page(src, i)); if (++i == nr) break; cond_resched(); } } EXPORT_SYMBOL(folio_copy); int folio_mc_copy(struct folio *dst, struct folio *src) { long nr = folio_nr_pages(src); long i = 0; for (;;) { if (copy_mc_highpage(folio_page(dst, i), folio_page(src, i))) return -EHWPOISON; if (++i == nr) break; cond_resched(); } return 0; } EXPORT_SYMBOL(folio_mc_copy); int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; int sysctl_overcommit_ratio __read_mostly = 50; unsigned long sysctl_overcommit_kbytes __read_mostly; int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ int overcommit_ratio_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret == 0 && write) sysctl_overcommit_kbytes = 0; return ret; } static void sync_overcommit_as(struct work_struct *dummy) { percpu_counter_sync(&vm_committed_as); } int overcommit_policy_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; int new_policy = -1; int ret; /* * The deviation of sync_overcommit_as could be big with loose policy * like OVERCOMMIT_ALWAYS/OVERCOMMIT_GUESS. When changing policy to * strict OVERCOMMIT_NEVER, we need to reduce the deviation to comply * with the strict "NEVER", and to avoid possible race condition (even * though user usually won't too frequently do the switching to policy * OVERCOMMIT_NEVER), the switch is done in the following order: * 1. changing the batch * 2. sync percpu count on each CPU * 3. switch the policy */ if (write) { t = *table; t.data = &new_policy; ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); if (ret || new_policy == -1) return ret; mm_compute_batch(new_policy); if (new_policy == OVERCOMMIT_NEVER) schedule_on_each_cpu(sync_overcommit_as); sysctl_overcommit_memory = new_policy; } else { ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); } return ret; } int overcommit_kbytes_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) sysctl_overcommit_ratio = 0; return ret; } /* * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used */ unsigned long vm_commit_limit(void) { unsigned long allowed; if (sysctl_overcommit_kbytes) allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10); else allowed = ((totalram_pages() - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100); allowed += total_swap_pages; return allowed; } /* * Make sure vm_committed_as in one cacheline and not cacheline shared with * other variables. It can be updated by several CPUs frequently. */ struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp; /* * The global memory commitment made in the system can be a metric * that can be used to drive ballooning decisions when Linux is hosted * as a guest. On Hyper-V, the host implements a policy engine for dynamically * balancing memory across competing virtual machines that are hosted. * Several metrics drive this policy engine including the guest reported * memory commitment. * * The time cost of this is very low for small platforms, and for big * platform like a 2S/36C/72T Skylake server, in worst case where * vm_committed_as's spinlock is under severe contention, the time cost * could be about 30~40 microseconds. */ unsigned long vm_memory_committed(void) { return percpu_counter_sum_positive(&vm_committed_as); } EXPORT_SYMBOL_GPL(vm_memory_committed); /* * Check that a process has enough memory to allocate a new virtual * mapping. 0 means there is enough memory for the allocation to * succeed and -ENOMEM implies there is not. * * We currently support three overcommit policies, which are set via the * vm.overcommit_memory sysctl. See Documentation/mm/overcommit-accounting.rst * * Strict overcommit modes added 2002 Feb 26 by Alan Cox. * Additional code 2002 Jul 20 by Robert Love. * * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. * * Note this is a helper function intended to be used by LSMs which * wish to use this logic. */ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { long allowed; unsigned long bytes_failed; vm_acct_memory(pages); /* * Sometimes we want to use more memory than we have */ if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) return 0; if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { if (pages > totalram_pages() + total_swap_pages) goto error; return 0; } allowed = vm_commit_limit(); /* * Reserve some for root */ if (!cap_sys_admin) allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); /* * Don't let a single process grow so big a user can't recover */ if (mm) { long reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); allowed -= min_t(long, mm->total_vm / 32, reserve); } if (percpu_counter_read_positive(&vm_committed_as) < allowed) return 0; error: bytes_failed = pages << PAGE_SHIFT; pr_warn_ratelimited("%s: pid: %d, comm: %s, bytes: %lu not enough memory for the allocation\n", __func__, current->pid, current->comm, bytes_failed); vm_unacct_memory(pages); return -ENOMEM; } /** * get_cmdline() - copy the cmdline value to a buffer. * @task: the task whose cmdline value to copy. * @buffer: the buffer to copy to. * @buflen: the length of the buffer. Larger cmdline values are truncated * to this length. * * Return: the size of the cmdline field copied. Note that the copy does * not guarantee an ending NULL byte. */ int get_cmdline(struct task_struct *task, char *buffer, int buflen) { int res = 0; unsigned int len; struct mm_struct *mm = get_task_mm(task); unsigned long arg_start, arg_end, env_start, env_end; if (!mm) goto out; if (!mm->arg_end) goto out_mm; /* Shh! No looking before we're done */ spin_lock(&mm->arg_lock); arg_start = mm->arg_start; arg_end = mm->arg_end; env_start = mm->env_start; env_end = mm->env_end; spin_unlock(&mm->arg_lock); len = arg_end - arg_start; if (len > buflen) len = buflen; res = access_process_vm(task, arg_start, buffer, len, FOLL_FORCE); /* * If the nul at the end of args has been overwritten, then * assume application is using setproctitle(3). */ if (res > 0 && buffer[res-1] != '\0' && len < buflen) { len = strnlen(buffer, res); if (len < res) { res = len; } else { len = env_end - env_start; if (len > buflen - res) len = buflen - res; res += access_process_vm(task, env_start, buffer+res, len, FOLL_FORCE); res = strnlen(buffer, res); } } out_mm: mmput(mm); out: return res; } int __weak memcmp_pages(struct page *page1, struct page *page2) { char *addr1, *addr2; int ret; addr1 = kmap_local_page(page1); addr2 = kmap_local_page(page2); ret = memcmp(addr1, addr2, PAGE_SIZE); kunmap_local(addr2); kunmap_local(addr1); return ret; } #ifdef CONFIG_PRINTK /** * mem_dump_obj - Print available provenance information * @object: object for which to find provenance information. * * This function uses pr_cont(), so that the caller is expected to have * printed out whatever preamble is appropriate. The provenance information * depends on the type of object and on how much debugging is enabled. * For example, for a slab-cache object, the slab name is printed, and, * if available, the return address and stack trace from the allocation * and last free path of that object. */ void mem_dump_obj(void *object) { const char *type; if (kmem_dump_obj(object)) return; if (vmalloc_dump_obj(object)) return; if (is_vmalloc_addr(object)) type = "vmalloc memory"; else if (virt_addr_valid(object)) type = "non-slab/vmalloc memory"; else if (object == NULL) type = "NULL pointer"; else if (object == ZERO_SIZE_PTR) type = "zero-size pointer"; else type = "non-paged memory"; pr_cont(" %s\n", type); } EXPORT_SYMBOL_GPL(mem_dump_obj); #endif /* * A driver might set a page logically offline -- PageOffline() -- and * turn the page inaccessible in the hypervisor; after that, access to page * content can be fatal. * * Some special PFN walkers -- i.e., /proc/kcore -- read content of random * pages after checking PageOffline(); however, these PFN walkers can race * with drivers that set PageOffline(). * * page_offline_freeze()/page_offline_thaw() allows for a subsystem to * synchronize with such drivers, achieving that a page cannot be set * PageOffline() while frozen. * * page_offline_begin()/page_offline_end() is used by drivers that care about * such races when setting a page PageOffline(). */ static DECLARE_RWSEM(page_offline_rwsem); void page_offline_freeze(void) { down_read(&page_offline_rwsem); } void page_offline_thaw(void) { up_read(&page_offline_rwsem); } void page_offline_begin(void) { down_write(&page_offline_rwsem); } EXPORT_SYMBOL(page_offline_begin); void page_offline_end(void) { up_write(&page_offline_rwsem); } EXPORT_SYMBOL(page_offline_end); #ifndef flush_dcache_folio void flush_dcache_folio(struct folio *folio) { long i, nr = folio_nr_pages(folio); for (i = 0; i < nr; i++) flush_dcache_page(folio_page(folio, i)); } EXPORT_SYMBOL(flush_dcache_folio); #endif
10 171