Total coverage: 438219 (25%)of 1785546
9 9 9 9 9 9 9 15 15 15 15 15 15 15 9 9 9 9 9 9 9 9 9 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ #include "devl_internal.h" /** * struct devlink_resource - devlink resource * @name: name of the resource * @id: id, per devlink instance * @size: size of the resource * @size_new: updated size of the resource, reload is needed * @size_valid: valid in case the total size of the resource is valid * including its children * @parent: parent resource * @size_params: size parameters * @list: parent list * @resource_list: list of child resources * @occ_get: occupancy getter callback * @occ_get_priv: occupancy getter callback priv */ struct devlink_resource { const char *name; u64 id; u64 size; u64 size_new; bool size_valid; struct devlink_resource *parent; struct devlink_resource_size_params size_params; struct list_head list; struct list_head resource_list; devlink_resource_occ_get_t *occ_get; void *occ_get_priv; }; static struct devlink_resource * devlink_resource_find(struct devlink *devlink, struct devlink_resource *resource, u64 resource_id) { struct list_head *resource_list; if (resource) resource_list = &resource->resource_list; else resource_list = &devlink->resource_list; list_for_each_entry(resource, resource_list, list) { struct devlink_resource *child_resource; if (resource->id == resource_id) return resource; child_resource = devlink_resource_find(devlink, resource, resource_id); if (child_resource) return child_resource; } return NULL; } static void devlink_resource_validate_children(struct devlink_resource *resource) { struct devlink_resource *child_resource; bool size_valid = true; u64 parts_size = 0; if (list_empty(&resource->resource_list)) goto out; list_for_each_entry(child_resource, &resource->resource_list, list) parts_size += child_resource->size_new; if (parts_size > resource->size_new) size_valid = false; out: resource->size_valid = size_valid; } static int devlink_resource_validate_size(struct devlink_resource *resource, u64 size, struct netlink_ext_ack *extack) { u64 reminder; int err = 0; if (size > resource->size_params.size_max) { NL_SET_ERR_MSG(extack, "Size larger than maximum"); err = -EINVAL; } if (size < resource->size_params.size_min) { NL_SET_ERR_MSG(extack, "Size smaller than minimum"); err = -EINVAL; } div64_u64_rem(size, resource->size_params.size_granularity, &reminder); if (reminder) { NL_SET_ERR_MSG(extack, "Wrong granularity"); err = -EINVAL; } return err; } int devlink_nl_resource_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_resource *resource; u64 resource_id; u64 size; int err; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_ID) || GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_SIZE)) return -EINVAL; resource_id = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_ID]); resource = devlink_resource_find(devlink, NULL, resource_id); if (!resource) return -EINVAL; size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]); err = devlink_resource_validate_size(resource, size, info->extack); if (err) return err; resource->size_new = size; devlink_resource_validate_children(resource); if (resource->parent) devlink_resource_validate_children(resource->parent); return 0; } static int devlink_resource_size_params_put(struct devlink_resource *resource, struct sk_buff *skb) { struct devlink_resource_size_params *size_params; size_params = &resource->size_params; if (devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN, size_params->size_granularity) || devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX, size_params->size_max) || devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN, size_params->size_min) || nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit)) return -EMSGSIZE; return 0; } static int devlink_resource_occ_put(struct devlink_resource *resource, struct sk_buff *skb) { if (!resource->occ_get) return 0; return devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_OCC, resource->occ_get(resource->occ_get_priv)); } static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb, struct devlink_resource *resource) { struct devlink_resource *child_resource; struct nlattr *child_resource_attr; struct nlattr *resource_attr; resource_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE); if (!resource_attr) return -EMSGSIZE; if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) || devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size) || devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id)) goto nla_put_failure; if (resource->size != resource->size_new && devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW, resource->size_new)) goto nla_put_failure; if (devlink_resource_occ_put(resource, skb)) goto nla_put_failure; if (devlink_resource_size_params_put(resource, skb)) goto nla_put_failure; if (list_empty(&resource->resource_list)) goto out; if (nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_SIZE_VALID, resource->size_valid)) goto nla_put_failure; child_resource_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE_LIST); if (!child_resource_attr) goto nla_put_failure; list_for_each_entry(child_resource, &resource->resource_list, list) { if (devlink_resource_put(devlink, skb, child_resource)) goto resource_put_failure; } nla_nest_end(skb, child_resource_attr); out: nla_nest_end(skb, resource_attr); return 0; resource_put_failure: nla_nest_cancel(skb, child_resource_attr); nla_put_failure: nla_nest_cancel(skb, resource_attr); return -EMSGSIZE; } static int devlink_resource_fill(struct genl_info *info, enum devlink_command cmd, int flags) { struct devlink *devlink = info->user_ptr[0]; struct devlink_resource *resource; struct nlattr *resources_attr; struct sk_buff *skb = NULL; struct nlmsghdr *nlh; bool incomplete; void *hdr; int i; int err; resource = list_first_entry(&devlink->resource_list, struct devlink_resource, list); start_again: err = devlink_nl_msg_reply_and_new(&skb, info); if (err) return err; hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, &devlink_nl_family, NLM_F_MULTI, cmd); if (!hdr) { nlmsg_free(skb); return -EMSGSIZE; } if (devlink_nl_put_handle(skb, devlink)) goto nla_put_failure; resources_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE_LIST); if (!resources_attr) goto nla_put_failure; incomplete = false; i = 0; list_for_each_entry_from(resource, &devlink->resource_list, list) { err = devlink_resource_put(devlink, skb, resource); if (err) { if (!i) goto err_resource_put; incomplete = true; break; } i++; } nla_nest_end(skb, resources_attr); genlmsg_end(skb, hdr); if (incomplete) goto start_again; send_done: nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, NLMSG_DONE, 0, flags | NLM_F_MULTI); if (!nlh) { err = devlink_nl_msg_reply_and_new(&skb, info); if (err) return err; goto send_done; } return genlmsg_reply(skb, info); nla_put_failure: err = -EMSGSIZE; err_resource_put: nlmsg_free(skb); return err; } int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; if (list_empty(&devlink->resource_list)) return -EOPNOTSUPP; return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0); } int devlink_resources_validate(struct devlink *devlink, struct devlink_resource *resource, struct genl_info *info) { struct list_head *resource_list; int err = 0; if (resource) resource_list = &resource->resource_list; else resource_list = &devlink->resource_list; list_for_each_entry(resource, resource_list, list) { if (!resource->size_valid) return -EINVAL; err = devlink_resources_validate(devlink, resource, info); if (err) return err; } return err; } /** * devl_resource_register - devlink resource register * * @devlink: devlink * @resource_name: resource's name * @resource_size: resource's size * @resource_id: resource's id * @parent_resource_id: resource's parent id * @size_params: size parameters * * Generic resources should reuse the same names across drivers. * Please see the generic resources list at: * Documentation/networking/devlink/devlink-resource.rst */ int devl_resource_register(struct devlink *devlink, const char *resource_name, u64 resource_size, u64 resource_id, u64 parent_resource_id, const struct devlink_resource_size_params *size_params) { struct devlink_resource *resource; struct list_head *resource_list; bool top_hierarchy; lockdep_assert_held(&devlink->lock); top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP; resource = devlink_resource_find(devlink, NULL, resource_id); if (resource) return -EEXIST; resource = kzalloc(sizeof(*resource), GFP_KERNEL); if (!resource) return -ENOMEM; if (top_hierarchy) { resource_list = &devlink->resource_list; } else { struct devlink_resource *parent_resource; parent_resource = devlink_resource_find(devlink, NULL, parent_resource_id); if (parent_resource) { resource_list = &parent_resource->resource_list; resource->parent = parent_resource; } else { kfree(resource); return -EINVAL; } } resource->name = resource_name; resource->size = resource_size; resource->size_new = resource_size; resource->id = resource_id; resource->size_valid = true; memcpy(&resource->size_params, size_params, sizeof(resource->size_params)); INIT_LIST_HEAD(&resource->resource_list); list_add_tail(&resource->list, resource_list); return 0; } EXPORT_SYMBOL_GPL(devl_resource_register); static void devlink_resource_unregister(struct devlink *devlink, struct devlink_resource *resource) { struct devlink_resource *tmp, *child_resource; list_for_each_entry_safe(child_resource, tmp, &resource->resource_list, list) { devlink_resource_unregister(devlink, child_resource); list_del(&child_resource->list); kfree(child_resource); } } /** * devl_resources_unregister - free all resources * * @devlink: devlink */ void devl_resources_unregister(struct devlink *devlink) { struct devlink_resource *tmp, *child_resource; lockdep_assert_held(&devlink->lock); list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list, list) { devlink_resource_unregister(devlink, child_resource); list_del(&child_resource->list); kfree(child_resource); } } EXPORT_SYMBOL_GPL(devl_resources_unregister); /** * devlink_resources_unregister - free all resources * * @devlink: devlink * * Context: Takes and release devlink->lock <mutex>. */ void devlink_resources_unregister(struct devlink *devlink) { devl_lock(devlink); devl_resources_unregister(devlink); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_resources_unregister); /** * devl_resource_size_get - get and update size * * @devlink: devlink * @resource_id: the requested resource id * @p_resource_size: ptr to update */ int devl_resource_size_get(struct devlink *devlink, u64 resource_id, u64 *p_resource_size) { struct devlink_resource *resource; lockdep_assert_held(&devlink->lock); resource = devlink_resource_find(devlink, NULL, resource_id); if (!resource) return -EINVAL; *p_resource_size = resource->size_new; resource->size = resource->size_new; return 0; } EXPORT_SYMBOL_GPL(devl_resource_size_get); /** * devl_resource_occ_get_register - register occupancy getter * * @devlink: devlink * @resource_id: resource id * @occ_get: occupancy getter callback * @occ_get_priv: occupancy getter callback priv */ void devl_resource_occ_get_register(struct devlink *devlink, u64 resource_id, devlink_resource_occ_get_t *occ_get, void *occ_get_priv) { struct devlink_resource *resource; lockdep_assert_held(&devlink->lock); resource = devlink_resource_find(devlink, NULL, resource_id); if (WARN_ON(!resource)) return; WARN_ON(resource->occ_get); resource->occ_get = occ_get; resource->occ_get_priv = occ_get_priv; } EXPORT_SYMBOL_GPL(devl_resource_occ_get_register); /** * devl_resource_occ_get_unregister - unregister occupancy getter * * @devlink: devlink * @resource_id: resource id */ void devl_resource_occ_get_unregister(struct devlink *devlink, u64 resource_id) { struct devlink_resource *resource; lockdep_assert_held(&devlink->lock); resource = devlink_resource_find(devlink, NULL, resource_id); if (WARN_ON(!resource)) return; WARN_ON(!resource->occ_get); resource->occ_get = NULL; resource->occ_get_priv = NULL; } EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister);
34 35 1 31 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 /* SPDX-License-Identifier: GPL-2.0-only */ /* * V9FS definitions. * * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #ifndef FS_9P_V9FS_H #define FS_9P_V9FS_H #include <linux/backing-dev.h> #include <linux/netfs.h> /** * enum p9_session_flags - option flags for each 9P session * @V9FS_PROTO_2000U: whether or not to use 9P2000.u extensions * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) * @V9FS_ACCESS_CLIENT: Just like user, but access check is performed on client. * @V9FS_ACCESS_ANY: use a single attach for all users * @V9FS_ACCESS_MASK: bit mask of different ACCESS options * @V9FS_POSIX_ACL: POSIX ACLs are enforced * * Session flags reflect options selected by users at mount time */ #define V9FS_ACCESS_ANY (V9FS_ACCESS_SINGLE | \ V9FS_ACCESS_USER | \ V9FS_ACCESS_CLIENT) #define V9FS_ACCESS_MASK V9FS_ACCESS_ANY #define V9FS_ACL_MASK V9FS_POSIX_ACL enum p9_session_flags { V9FS_PROTO_2000U = 0x01, V9FS_PROTO_2000L = 0x02, V9FS_ACCESS_SINGLE = 0x04, V9FS_ACCESS_USER = 0x08, V9FS_ACCESS_CLIENT = 0x10, V9FS_POSIX_ACL = 0x20, V9FS_NO_XATTR = 0x40, V9FS_IGNORE_QV = 0x80, /* ignore qid.version for cache hints */ V9FS_DIRECT_IO = 0x100, V9FS_SYNC = 0x200 }; /** * enum p9_cache_shortcuts - human readable cache preferences * @CACHE_SC_NONE: disable all caches * @CACHE_SC_READAHEAD: only provide caching for readahead * @CACHE_SC_MMAP: provide caching to enable mmap * @CACHE_SC_LOOSE: non-coherent caching for files and meta data * @CACHE_SC_FSCACHE: persistent non-coherent caching for files and meta-data * */ enum p9_cache_shortcuts { CACHE_SC_NONE = 0b00000000, CACHE_SC_READAHEAD = 0b00000001, CACHE_SC_MMAP = 0b00000101, CACHE_SC_LOOSE = 0b00001111, CACHE_SC_FSCACHE = 0b10001111, }; /** * enum p9_cache_bits - possible values of ->cache * @CACHE_NONE: caches disabled * @CACHE_FILE: file caching (open to close) * @CACHE_META: meta-data and directory caching * @CACHE_WRITEBACK: write-back caching for files * @CACHE_LOOSE: don't check cache consistency * @CACHE_FSCACHE: local persistent caches * */ enum p9_cache_bits { CACHE_NONE = 0b00000000, CACHE_FILE = 0b00000001, CACHE_META = 0b00000010, CACHE_WRITEBACK = 0b00000100, CACHE_LOOSE = 0b00001000, CACHE_FSCACHE = 0b10000000, }; /** * struct v9fs_session_info - per-instance session information * @flags: session options of type &p9_session_flags * @nodev: set to 1 to disable device mapping * @debug: debug level * @afid: authentication handle * @cache: cache mode of type &p9_cache_bits * @cachetag: the tag of the cache associated with this session * @fscache: session cookie associated with FS-Cache * @uname: string user name to mount hierarchy as * @aname: mount specifier for remote hierarchy * @maxdata: maximum data to be sent/recvd per protocol message * @dfltuid: default numeric userid to mount hierarchy as * @dfltgid: default numeric groupid to mount hierarchy as * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy * @clnt: reference to 9P network client instantiated for this session * @slist: reference to list of registered 9p sessions * * This structure holds state for each session instance established during * a sys_mount() . * * Bugs: there seems to be a lot of state which could be condensed and/or * removed. */ struct v9fs_session_info { /* options */ unsigned int flags; unsigned char nodev; unsigned short debug; unsigned int afid; unsigned int cache; #ifdef CONFIG_9P_FSCACHE char *cachetag; struct fscache_volume *fscache; #endif char *uname; /* user name to mount as */ char *aname; /* name of remote hierarchy being mounted */ unsigned int maxdata; /* max data for client interface */ kuid_t dfltuid; /* default uid/muid for legacy support */ kgid_t dfltgid; /* default gid for legacy support */ kuid_t uid; /* if ACCESS_SINGLE, the uid that has access */ struct p9_client *clnt; /* 9p client */ struct list_head slist; /* list of sessions registered with v9fs */ struct rw_semaphore rename_sem; long session_lock_timeout; /* retry interval for blocking locks */ }; /* cache_validity flags */ #define V9FS_INO_INVALID_ATTR 0x01 struct v9fs_inode { struct netfs_inode netfs; /* Netfslib context and vfs inode */ struct p9_qid qid; unsigned int cache_validity; struct mutex v_mutex; }; static inline struct v9fs_inode *V9FS_I(const struct inode *inode) { return container_of(inode, struct v9fs_inode, netfs.inode); } static inline struct fscache_cookie *v9fs_inode_cookie(struct v9fs_inode *v9inode) { #ifdef CONFIG_9P_FSCACHE return netfs_i_cookie(&v9inode->netfs); #else return NULL; #endif } static inline struct fscache_volume *v9fs_session_cache(struct v9fs_session_info *v9ses) { #ifdef CONFIG_9P_FSCACHE return v9ses->fscache; #else return NULL; #endif } extern int v9fs_show_options(struct seq_file *m, struct dentry *root); struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, const char *dev_name, char *data); extern void v9fs_session_close(struct v9fs_session_info *v9ses); extern void v9fs_session_cancel(struct v9fs_session_info *v9ses); extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d); extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d); extern int v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb, int new); extern const struct inode_operations v9fs_dir_inode_operations_dotl; extern const struct inode_operations v9fs_file_inode_operations_dotl; extern const struct inode_operations v9fs_symlink_inode_operations_dotl; extern const struct netfs_request_ops v9fs_req_ops; extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb, int new); /* other default globals */ #define V9FS_PORT 564 #define V9FS_DEFUSER "nobody" #define V9FS_DEFANAME "" #define V9FS_DEFUID KUIDT_INIT(-2) #define V9FS_DEFGID KGIDT_INIT(-2) static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) { return inode->i_sb->s_fs_info; } static inline struct v9fs_session_info *v9fs_dentry2v9ses(const struct dentry *dentry) { return dentry->d_sb->s_fs_info; } static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses) { return v9ses->flags & V9FS_PROTO_2000U; } static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses) { return v9ses->flags & V9FS_PROTO_2000L; } /** * v9fs_get_inode_from_fid - Helper routine to populate an inode by * issuing a attribute request * @v9ses: session information * @fid: fid to issue attribute request for * @sb: superblock on which to create inode * */ static inline struct inode * v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) { if (v9fs_proto_dotl(v9ses)) return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 0); else return v9fs_inode_from_fid(v9ses, fid, sb, 0); } /** * v9fs_get_new_inode_from_fid - Helper routine to populate an inode by * issuing a attribute request * @v9ses: session information * @fid: fid to issue attribute request for * @sb: superblock on which to create inode * */ static inline struct inode * v9fs_get_new_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) { if (v9fs_proto_dotl(v9ses)) return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 1); else return v9fs_inode_from_fid(v9ses, fid, sb, 1); } #endif
115 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * x86-optimized SHA-512 block function * * Copyright 2025 Google LLC */ #include <asm/fpu/api.h> #include <linux/static_call.h> DEFINE_STATIC_CALL(sha512_blocks_x86, sha512_blocks_generic); #define DEFINE_X86_SHA512_FN(c_fn, asm_fn) \ asmlinkage void asm_fn(struct sha512_block_state *state, \ const u8 *data, size_t nblocks); \ static void c_fn(struct sha512_block_state *state, const u8 *data, \ size_t nblocks) \ { \ if (likely(irq_fpu_usable())) { \ kernel_fpu_begin(); \ asm_fn(state, data, nblocks); \ kernel_fpu_end(); \ } else { \ sha512_blocks_generic(state, data, nblocks); \ } \ } DEFINE_X86_SHA512_FN(sha512_blocks_ssse3, sha512_transform_ssse3); DEFINE_X86_SHA512_FN(sha512_blocks_avx, sha512_transform_avx); DEFINE_X86_SHA512_FN(sha512_blocks_avx2, sha512_transform_rorx); static void sha512_blocks(struct sha512_block_state *state, const u8 *data, size_t nblocks) { static_call(sha512_blocks_x86)(state, data, nblocks); } #define sha512_mod_init_arch sha512_mod_init_arch static void sha512_mod_init_arch(void) { if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL) && boot_cpu_has(X86_FEATURE_AVX)) { if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2)) static_call_update(sha512_blocks_x86, sha512_blocks_avx2); else static_call_update(sha512_blocks_x86, sha512_blocks_avx); } else if (boot_cpu_has(X86_FEATURE_SSSE3)) { static_call_update(sha512_blocks_x86, sha512_blocks_ssse3); } }
7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 /* * Copyright (c) 2006-2008 Intel Corporation * Copyright (c) 2007 Dave Airlie <airlied@linux.ie> * * DRM core CRTC related functions * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * * Authors: * Keith Packard * Eric Anholt <eric@anholt.net> * Dave Airlie <airlied@linux.ie> * Jesse Barnes <jesse.barnes@intel.com> */ #include <linux/export.h> #include <linux/kernel.h> #include <linux/moduleparam.h> #include <linux/dynamic_debug.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_atomic_uapi.h> #include <drm/drm_bridge.h> #include <drm/drm_crtc.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_drv.h> #include <drm/drm_edid.h> #include <drm/drm_encoder.h> #include <drm/drm_fourcc.h> #include <drm/drm_framebuffer.h> #include <drm/drm_print.h> #include <drm/drm_vblank.h> #include "drm_crtc_helper_internal.h" DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, "DRM_UT_CORE", "DRM_UT_DRIVER", "DRM_UT_KMS", "DRM_UT_PRIME", "DRM_UT_ATOMIC", "DRM_UT_VBL", "DRM_UT_STATE", "DRM_UT_LEASE", "DRM_UT_DP", "DRM_UT_DRMRES"); /** * DOC: overview * * The CRTC modeset helper library provides a default set_config implementation * in drm_crtc_helper_set_config(). Plus a few other convenience functions using * the same callbacks which drivers can use to e.g. restore the modeset * configuration on resume with drm_helper_resume_force_mode(). * * Note that this helper library doesn't track the current power state of CRTCs * and encoders. It can call callbacks like &drm_encoder_helper_funcs.dpms even * though the hardware is already in the desired state. This deficiency has been * fixed in the atomic helpers. * * The driver callbacks are mostly compatible with the atomic modeset helpers, * except for the handling of the primary plane: Atomic helpers require that the * primary plane is implemented as a real standalone plane and not directly tied * to the CRTC state. For easier transition this library provides functions to * implement the old semantics required by the CRTC helpers using the new plane * and atomic helper callbacks. * * Drivers are strongly urged to convert to the atomic helpers (by way of first * converting to the plane helpers). New drivers must not use these functions * but need to implement the atomic interface instead, potentially using the * atomic helpers for that. * * These legacy modeset helpers use the same function table structures as * all other modesetting helpers. See the documentation for struct * &drm_crtc_helper_funcs, &struct drm_encoder_helper_funcs and struct * &drm_connector_helper_funcs. */ /** * drm_helper_encoder_in_use - check if a given encoder is in use * @encoder: encoder to check * * Checks whether @encoder is with the current mode setting output configuration * in use by any connector. This doesn't mean that it is actually enabled since * the DPMS state is tracked separately. * * Returns: * True if @encoder is used, false otherwise. */ bool drm_helper_encoder_in_use(struct drm_encoder *encoder) { struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct drm_device *dev = encoder->dev; drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); /* * We can expect this mutex to be locked if we are not panicking. * Locking is currently fubar in the panic handler. */ if (!oops_in_progress) { drm_WARN_ON(dev, !mutex_is_locked(&dev->mode_config.mutex)); drm_WARN_ON(dev, !drm_modeset_is_locked(&dev->mode_config.connection_mutex)); } drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { if (connector->encoder == encoder) { drm_connector_list_iter_end(&conn_iter); return true; } } drm_connector_list_iter_end(&conn_iter); return false; } EXPORT_SYMBOL(drm_helper_encoder_in_use); /** * drm_helper_crtc_in_use - check if a given CRTC is in a mode_config * @crtc: CRTC to check * * Checks whether @crtc is with the current mode setting output configuration * in use by any connector. This doesn't mean that it is actually enabled since * the DPMS state is tracked separately. * * Returns: * True if @crtc is used, false otherwise. */ bool drm_helper_crtc_in_use(struct drm_crtc *crtc) { struct drm_encoder *encoder; struct drm_device *dev = crtc->dev; drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); /* * We can expect this mutex to be locked if we are not panicking. * Locking is currently fubar in the panic handler. */ if (!oops_in_progress) drm_WARN_ON(dev, !mutex_is_locked(&dev->mode_config.mutex)); drm_for_each_encoder(encoder, dev) if (encoder->crtc == crtc && drm_helper_encoder_in_use(encoder)) return true; return false; } EXPORT_SYMBOL(drm_helper_crtc_in_use); static void drm_encoder_disable(struct drm_encoder *encoder) { const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private; if (!encoder_funcs) return; if (encoder_funcs->disable) (*encoder_funcs->disable)(encoder); else if (encoder_funcs->dpms) (*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF); } static void __drm_helper_disable_unused_functions(struct drm_device *dev) { struct drm_encoder *encoder; struct drm_crtc *crtc; drm_warn_on_modeset_not_all_locked(dev); drm_for_each_encoder(encoder, dev) { if (!drm_helper_encoder_in_use(encoder)) { drm_encoder_disable(encoder); /* disconnect encoder from any connector */ encoder->crtc = NULL; } } drm_for_each_crtc(crtc, dev) { const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; crtc->enabled = drm_helper_crtc_in_use(crtc); if (!crtc->enabled) { if (crtc_funcs->disable) (*crtc_funcs->disable)(crtc); else (*crtc_funcs->dpms)(crtc, DRM_MODE_DPMS_OFF); crtc->primary->fb = NULL; } } } /** * drm_helper_disable_unused_functions - disable unused objects * @dev: DRM device * * This function walks through the entire mode setting configuration of @dev. It * will remove any CRTC links of unused encoders and encoder links of * disconnected connectors. Then it will disable all unused encoders and CRTCs * either by calling their disable callback if available or by calling their * dpms callback with DRM_MODE_DPMS_OFF. * * NOTE: * * This function is part of the legacy modeset helper library and will cause * major confusion with atomic drivers. This is because atomic helpers guarantee * to never call ->disable() hooks on a disabled function, or ->enable() hooks * on an enabled functions. drm_helper_disable_unused_functions() on the other * hand throws such guarantees into the wind and calls disable hooks * unconditionally on unused functions. */ void drm_helper_disable_unused_functions(struct drm_device *dev) { drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); drm_modeset_lock_all(dev); __drm_helper_disable_unused_functions(dev); drm_modeset_unlock_all(dev); } EXPORT_SYMBOL(drm_helper_disable_unused_functions); /* * Check the CRTC we're going to map each output to vs. its current * CRTC. If they don't match, we have to disable the output and the CRTC * since the driver will have to re-route things. */ static void drm_crtc_prepare_encoders(struct drm_device *dev) { const struct drm_encoder_helper_funcs *encoder_funcs; struct drm_encoder *encoder; drm_for_each_encoder(encoder, dev) { encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; /* Disable unused encoders */ if (encoder->crtc == NULL) drm_encoder_disable(encoder); } } /** * drm_crtc_helper_set_mode - internal helper to set a mode * @crtc: CRTC to program * @mode: mode to use * @x: horizontal offset into the surface * @y: vertical offset into the surface * @old_fb: old framebuffer, for cleanup * * Try to set @mode on @crtc. Give @crtc and its associated connectors a chance * to fixup or reject the mode prior to trying to set it. This is an internal * helper that drivers could e.g. use to update properties that require the * entire output pipe to be disabled and re-enabled in a new configuration. For * example for changing whether audio is enabled on a hdmi link or for changing * panel fitter or dither attributes. It is also called by the * drm_crtc_helper_set_config() helper function to drive the mode setting * sequence. * * Returns: * True if the mode was set successfully, false otherwise. */ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, struct drm_display_mode *mode, int x, int y, struct drm_framebuffer *old_fb) { struct drm_device *dev = crtc->dev; struct drm_display_mode *adjusted_mode, saved_mode, saved_hwmode; const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; const struct drm_encoder_helper_funcs *encoder_funcs; int saved_x, saved_y; bool saved_enabled; struct drm_encoder *encoder; bool ret = true; drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); drm_warn_on_modeset_not_all_locked(dev); saved_enabled = crtc->enabled; crtc->enabled = drm_helper_crtc_in_use(crtc); if (!crtc->enabled) return true; adjusted_mode = drm_mode_duplicate(dev, mode); if (!adjusted_mode) { crtc->enabled = saved_enabled; return false; } drm_mode_init(&saved_mode, &crtc->mode); drm_mode_init(&saved_hwmode, &crtc->hwmode); saved_x = crtc->x; saved_y = crtc->y; /* Update crtc values up front so the driver can rely on them for mode * setting. */ drm_mode_copy(&crtc->mode, mode); crtc->x = x; crtc->y = y; /* Pass our mode to the connectors and the CRTC to give them a chance to * adjust it according to limitations or connector properties, and also * a chance to reject the mode entirely. */ drm_for_each_encoder(encoder, dev) { if (encoder->crtc != crtc) continue; encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; if (encoder_funcs->mode_fixup) { if (!(ret = encoder_funcs->mode_fixup(encoder, mode, adjusted_mode))) { drm_dbg_kms(dev, "[ENCODER:%d:%s] mode fixup failed\n", encoder->base.id, encoder->name); goto done; } } } if (crtc_funcs->mode_fixup) { if (!(ret = crtc_funcs->mode_fixup(crtc, mode, adjusted_mode))) { drm_dbg_kms(dev, "[CRTC:%d:%s] mode fixup failed\n", crtc->base.id, crtc->name); goto done; } } drm_dbg_kms(dev, "[CRTC:%d:%s]\n", crtc->base.id, crtc->name); drm_mode_copy(&crtc->hwmode, adjusted_mode); /* Prepare the encoders and CRTCs before setting the mode. */ drm_for_each_encoder(encoder, dev) { if (encoder->crtc != crtc) continue; encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; /* Disable the encoders as the first thing we do. */ if (encoder_funcs->prepare) encoder_funcs->prepare(encoder); } drm_crtc_prepare_encoders(dev); crtc_funcs->prepare(crtc); /* Set up the DPLL and any encoders state that needs to adjust or depend * on the DPLL. */ ret = !crtc_funcs->mode_set(crtc, mode, adjusted_mode, x, y, old_fb); if (!ret) goto done; drm_for_each_encoder(encoder, dev) { if (encoder->crtc != crtc) continue; encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; drm_dbg_kms(dev, "[ENCODER:%d:%s] set [MODE:%s]\n", encoder->base.id, encoder->name, mode->name); if (encoder_funcs->mode_set) encoder_funcs->mode_set(encoder, mode, adjusted_mode); } /* Now enable the clocks, plane, pipe, and connectors that we set up. */ crtc_funcs->commit(crtc); drm_for_each_encoder(encoder, dev) { if (encoder->crtc != crtc) continue; encoder_funcs = encoder->helper_private; if (!encoder_funcs) continue; if (encoder_funcs->commit) encoder_funcs->commit(encoder); } /* Calculate and store various constants which * are later needed by vblank and swap-completion * timestamping. They are derived from true hwmode. */ drm_calc_timestamping_constants(crtc, &crtc->hwmode); /* FIXME: add subpixel order */ done: drm_mode_destroy(dev, adjusted_mode); if (!ret) { crtc->enabled = saved_enabled; drm_mode_copy(&crtc->mode, &saved_mode); drm_mode_copy(&crtc->hwmode, &saved_hwmode); crtc->x = saved_x; crtc->y = saved_y; } return ret; } EXPORT_SYMBOL(drm_crtc_helper_set_mode); /** * drm_crtc_helper_atomic_check() - Helper to check CRTC atomic-state * @crtc: CRTC to check * @state: atomic state object * * Provides a default CRTC-state check handler for CRTCs that only have * one primary plane attached to it. This is often the case for the CRTC * of simple framebuffers. * * RETURNS: * Zero on success, or an errno code otherwise. */ int drm_crtc_helper_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_crtc_state *new_crtc_state = drm_atomic_get_new_crtc_state(state, crtc); if (!new_crtc_state->enable) return 0; return drm_atomic_helper_check_crtc_primary_plane(new_crtc_state); } EXPORT_SYMBOL(drm_crtc_helper_atomic_check); static void drm_crtc_helper_disable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_connector *connector; struct drm_encoder *encoder; /* Decouple all encoders and their attached connectors from this crtc */ drm_for_each_encoder(encoder, dev) { struct drm_connector_list_iter conn_iter; if (encoder->crtc != crtc) continue; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { if (connector->encoder != encoder) continue; connector->encoder = NULL; /* * drm_helper_disable_unused_functions() ought to be * doing this, but since we've decoupled the encoder * from the connector above, the required connection * between them is henceforth no longer available. */ connector->dpms = DRM_MODE_DPMS_OFF; /* we keep a reference while the encoder is bound */ drm_connector_put(connector); } drm_connector_list_iter_end(&conn_iter); } __drm_helper_disable_unused_functions(dev); } /* * For connectors that support multiple encoders, either the * .atomic_best_encoder() or .best_encoder() operation must be implemented. */ struct drm_encoder * drm_connector_get_single_encoder(struct drm_connector *connector) { struct drm_encoder *encoder; drm_WARN_ON(connector->dev, hweight32(connector->possible_encoders) > 1); drm_connector_for_each_possible_encoder(connector, encoder) return encoder; return NULL; } /** * drm_crtc_helper_set_config - set a new config from userspace * @set: mode set configuration * @ctx: lock acquire context, not used here * * The drm_crtc_helper_set_config() helper function implements the of * &drm_crtc_funcs.set_config callback for drivers using the legacy CRTC * helpers. * * It first tries to locate the best encoder for each connector by calling the * connector @drm_connector_helper_funcs.best_encoder helper operation. * * After locating the appropriate encoders, the helper function will call the * mode_fixup encoder and CRTC helper operations to adjust the requested mode, * or reject it completely in which case an error will be returned to the * application. If the new configuration after mode adjustment is identical to * the current configuration the helper function will return without performing * any other operation. * * If the adjusted mode is identical to the current mode but changes to the * frame buffer need to be applied, the drm_crtc_helper_set_config() function * will call the CRTC &drm_crtc_helper_funcs.mode_set_base helper operation. * * If the adjusted mode differs from the current mode, or if the * ->mode_set_base() helper operation is not provided, the helper function * performs a full mode set sequence by calling the ->prepare(), ->mode_set() * and ->commit() CRTC and encoder helper operations, in that order. * Alternatively it can also use the dpms and disable helper operations. For * details see &struct drm_crtc_helper_funcs and struct * &drm_encoder_helper_funcs. * * This function is deprecated. New drivers must implement atomic modeset * support, for which this function is unsuitable. Instead drivers should use * drm_atomic_helper_set_config(). * * Returns: * Returns 0 on success, negative errno numbers on failure. */ int drm_crtc_helper_set_config(struct drm_mode_set *set, struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev; struct drm_crtc **save_encoder_crtcs, *new_crtc; struct drm_encoder **save_connector_encoders, *new_encoder, *encoder; bool mode_changed = false; /* if true do a full mode set */ bool fb_changed = false; /* if true and !mode_changed just do a flip */ struct drm_connector *connector; struct drm_connector_list_iter conn_iter; int count = 0, ro, fail = 0; const struct drm_crtc_helper_funcs *crtc_funcs; struct drm_mode_set save_set; int ret; int i; BUG_ON(!set); BUG_ON(!set->crtc); BUG_ON(!set->crtc->helper_private); /* Enforce sane interface api - has been abused by the fb helper. */ BUG_ON(!set->mode && set->fb); BUG_ON(set->fb && set->num_connectors == 0); crtc_funcs = set->crtc->helper_private; dev = set->crtc->dev; drm_dbg_kms(dev, "\n"); drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); if (!set->mode) set->fb = NULL; if (set->fb) { drm_dbg_kms(dev, "[CRTC:%d:%s] [FB:%d] #connectors=%d (x y) (%i %i)\n", set->crtc->base.id, set->crtc->name, set->fb->base.id, (int)set->num_connectors, set->x, set->y); } else { drm_dbg_kms(dev, "[CRTC:%d:%s] [NOFB]\n", set->crtc->base.id, set->crtc->name); drm_crtc_helper_disable(set->crtc); return 0; } drm_warn_on_modeset_not_all_locked(dev); /* * Allocate space for the backup of all (non-pointer) encoder and * connector data. */ save_encoder_crtcs = kcalloc(dev->mode_config.num_encoder, sizeof(struct drm_crtc *), GFP_KERNEL); if (!save_encoder_crtcs) return -ENOMEM; save_connector_encoders = kcalloc(dev->mode_config.num_connector, sizeof(struct drm_encoder *), GFP_KERNEL); if (!save_connector_encoders) { kfree(save_encoder_crtcs); return -ENOMEM; } /* * Copy data. Note that driver private data is not affected. * Should anything bad happen only the expected state is * restored, not the drivers personal bookkeeping. */ count = 0; drm_for_each_encoder(encoder, dev) { save_encoder_crtcs[count++] = encoder->crtc; } count = 0; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) save_connector_encoders[count++] = connector->encoder; drm_connector_list_iter_end(&conn_iter); save_set.crtc = set->crtc; save_set.mode = &set->crtc->mode; save_set.x = set->crtc->x; save_set.y = set->crtc->y; save_set.fb = set->crtc->primary->fb; /* We should be able to check here if the fb has the same properties * and then just flip_or_move it */ if (set->crtc->primary->fb != set->fb) { /* If we have no fb then treat it as a full mode set */ if (set->crtc->primary->fb == NULL) { drm_dbg_kms(dev, "[CRTC:%d:%s] no fb, full mode set\n", set->crtc->base.id, set->crtc->name); mode_changed = true; } else if (set->fb->format != set->crtc->primary->fb->format) { mode_changed = true; } else fb_changed = true; } if (set->x != set->crtc->x || set->y != set->crtc->y) fb_changed = true; if (!drm_mode_equal(set->mode, &set->crtc->mode)) { drm_dbg_kms(dev, "[CRTC:%d:%s] modes are different, full mode set:\n", set->crtc->base.id, set->crtc->name); drm_dbg_kms(dev, DRM_MODE_FMT "\n", DRM_MODE_ARG(&set->crtc->mode)); drm_dbg_kms(dev, DRM_MODE_FMT "\n", DRM_MODE_ARG(set->mode)); mode_changed = true; } /* take a reference on all unbound connectors in set, reuse the * already taken reference for bound connectors */ for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro]->encoder) continue; drm_connector_get(set->connectors[ro]); } /* a) traverse passed in connector list and get encoders for them */ count = 0; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; new_encoder = connector->encoder; for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro] == connector) { if (connector_funcs->best_encoder) new_encoder = connector_funcs->best_encoder(connector); else new_encoder = drm_connector_get_single_encoder(connector); /* if we can't get an encoder for a connector we are setting now - then fail */ if (new_encoder == NULL) /* don't break so fail path works correct */ fail = 1; if (connector->dpms != DRM_MODE_DPMS_ON) { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] DPMS not on, full mode switch\n", connector->base.id, connector->name); mode_changed = true; } break; } } if (new_encoder != connector->encoder) { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] encoder changed, full mode switch\n", connector->base.id, connector->name); mode_changed = true; /* If the encoder is reused for another connector, then * the appropriate crtc will be set later. */ if (connector->encoder) connector->encoder->crtc = NULL; connector->encoder = new_encoder; } } drm_connector_list_iter_end(&conn_iter); if (fail) { ret = -EINVAL; goto fail; } count = 0; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { if (!connector->encoder) continue; if (connector->encoder->crtc == set->crtc) new_crtc = NULL; else new_crtc = connector->encoder->crtc; for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro] == connector) new_crtc = set->crtc; } /* Make sure the new CRTC will work with the encoder */ if (new_crtc && !drm_encoder_crtc_ok(connector->encoder, new_crtc)) { ret = -EINVAL; drm_connector_list_iter_end(&conn_iter); goto fail; } if (new_crtc != connector->encoder->crtc) { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] CRTC changed, full mode switch\n", connector->base.id, connector->name); mode_changed = true; connector->encoder->crtc = new_crtc; } if (new_crtc) { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] to [CRTC:%d:%s]\n", connector->base.id, connector->name, new_crtc->base.id, new_crtc->name); } else { drm_dbg_kms(dev, "[CONNECTOR:%d:%s] to [NOCRTC]\n", connector->base.id, connector->name); } } drm_connector_list_iter_end(&conn_iter); /* mode_set_base is not a required function */ if (fb_changed && !crtc_funcs->mode_set_base) mode_changed = true; if (mode_changed) { if (drm_helper_crtc_in_use(set->crtc)) { drm_dbg_kms(dev, "[CRTC:%d:%s] attempting to set mode from userspace: " DRM_MODE_FMT "\n", set->crtc->base.id, set->crtc->name, DRM_MODE_ARG(set->mode)); set->crtc->primary->fb = set->fb; if (!drm_crtc_helper_set_mode(set->crtc, set->mode, set->x, set->y, save_set.fb)) { drm_err(dev, "[CRTC:%d:%s] failed to set mode\n", set->crtc->base.id, set->crtc->name); set->crtc->primary->fb = save_set.fb; ret = -EINVAL; goto fail; } drm_dbg_kms(dev, "[CRTC:%d:%s] Setting connector DPMS state to on\n", set->crtc->base.id, set->crtc->name); for (i = 0; i < set->num_connectors; i++) { drm_dbg_kms(dev, "\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id, set->connectors[i]->name); set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON); } } __drm_helper_disable_unused_functions(dev); } else if (fb_changed) { set->crtc->x = set->x; set->crtc->y = set->y; set->crtc->primary->fb = set->fb; ret = crtc_funcs->mode_set_base(set->crtc, set->x, set->y, save_set.fb); if (ret != 0) { set->crtc->x = save_set.x; set->crtc->y = save_set.y; set->crtc->primary->fb = save_set.fb; goto fail; } } kfree(save_connector_encoders); kfree(save_encoder_crtcs); return 0; fail: /* Restore all previous data. */ count = 0; drm_for_each_encoder(encoder, dev) { encoder->crtc = save_encoder_crtcs[count++]; } count = 0; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) connector->encoder = save_connector_encoders[count++]; drm_connector_list_iter_end(&conn_iter); /* after fail drop reference on all unbound connectors in set, let * bound connectors keep their reference */ for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro]->encoder) continue; drm_connector_put(set->connectors[ro]); } /* Try to restore the config */ if (mode_changed && !drm_crtc_helper_set_mode(save_set.crtc, save_set.mode, save_set.x, save_set.y, save_set.fb)) drm_err(dev, "failed to restore config after modeset failure\n"); kfree(save_connector_encoders); kfree(save_encoder_crtcs); return ret; } EXPORT_SYMBOL(drm_crtc_helper_set_config); static int drm_helper_choose_encoder_dpms(struct drm_encoder *encoder) { int dpms = DRM_MODE_DPMS_OFF; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct drm_device *dev = encoder->dev; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) if (connector->encoder == encoder) if (connector->dpms < dpms) dpms = connector->dpms; drm_connector_list_iter_end(&conn_iter); return dpms; } /* Helper which handles bridge ordering around encoder dpms */ static void drm_helper_encoder_dpms(struct drm_encoder *encoder, int mode) { const struct drm_encoder_helper_funcs *encoder_funcs; encoder_funcs = encoder->helper_private; if (!encoder_funcs) return; if (encoder_funcs->dpms) encoder_funcs->dpms(encoder, mode); } static int drm_helper_choose_crtc_dpms(struct drm_crtc *crtc) { int dpms = DRM_MODE_DPMS_OFF; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct drm_device *dev = crtc->dev; drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) if (connector->encoder && connector->encoder->crtc == crtc) if (connector->dpms < dpms) dpms = connector->dpms; drm_connector_list_iter_end(&conn_iter); return dpms; } /** * drm_helper_connector_dpms() - connector dpms helper implementation * @connector: affected connector * @mode: DPMS mode * * The drm_helper_connector_dpms() helper function implements the * &drm_connector_funcs.dpms callback for drivers using the legacy CRTC * helpers. * * This is the main helper function provided by the CRTC helper framework for * implementing the DPMS connector attribute. It computes the new desired DPMS * state for all encoders and CRTCs in the output mesh and calls the * &drm_crtc_helper_funcs.dpms and &drm_encoder_helper_funcs.dpms callbacks * provided by the driver. * * This function is deprecated. New drivers must implement atomic modeset * support, where DPMS is handled in the DRM core. * * Returns: * Always returns 0. */ int drm_helper_connector_dpms(struct drm_connector *connector, int mode) { struct drm_encoder *encoder = connector->encoder; struct drm_crtc *crtc = encoder ? encoder->crtc : NULL; int old_dpms, encoder_dpms = DRM_MODE_DPMS_OFF; drm_WARN_ON(connector->dev, drm_drv_uses_atomic_modeset(connector->dev)); if (mode == connector->dpms) return 0; old_dpms = connector->dpms; connector->dpms = mode; if (encoder) encoder_dpms = drm_helper_choose_encoder_dpms(encoder); /* from off to on, do crtc then encoder */ if (mode < old_dpms) { if (crtc) { const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; if (crtc_funcs->dpms) (*crtc_funcs->dpms) (crtc, drm_helper_choose_crtc_dpms(crtc)); } if (encoder) drm_helper_encoder_dpms(encoder, encoder_dpms); } /* from on to off, do encoder then crtc */ if (mode > old_dpms) { if (encoder) drm_helper_encoder_dpms(encoder, encoder_dpms); if (crtc) { const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; if (crtc_funcs->dpms) (*crtc_funcs->dpms) (crtc, drm_helper_choose_crtc_dpms(crtc)); } } return 0; } EXPORT_SYMBOL(drm_helper_connector_dpms); /** * drm_helper_resume_force_mode - force-restore mode setting configuration * @dev: drm_device which should be restored * * Drivers which use the mode setting helpers can use this function to * force-restore the mode setting configuration e.g. on resume or when something * else might have trampled over the hw state (like some overzealous old BIOSen * tended to do). * * This helper doesn't provide a error return value since restoring the old * config should never fail due to resource allocation issues since the driver * has successfully set the restored configuration already. Hence this should * boil down to the equivalent of a few dpms on calls, which also don't provide * an error code. * * Drivers where simply restoring an old configuration again might fail (e.g. * due to slight differences in allocating shared resources when the * configuration is restored in a different order than when userspace set it up) * need to use their own restore logic. * * This function is deprecated. New drivers should implement atomic mode- * setting and use the atomic suspend/resume helpers. * * See also: * drm_atomic_helper_suspend(), drm_atomic_helper_resume() */ void drm_helper_resume_force_mode(struct drm_device *dev) { struct drm_crtc *crtc; struct drm_encoder *encoder; const struct drm_crtc_helper_funcs *crtc_funcs; int encoder_dpms; bool ret; drm_WARN_ON(dev, drm_drv_uses_atomic_modeset(dev)); drm_modeset_lock_all(dev); drm_for_each_crtc(crtc, dev) { if (!crtc->enabled) continue; ret = drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x, crtc->y, crtc->primary->fb); /* Restoring the old config should never fail! */ if (ret == false) drm_err(dev, "failed to set mode on crtc %p\n", crtc); /* Turn off outputs that were already powered off */ if (drm_helper_choose_crtc_dpms(crtc)) { drm_for_each_encoder(encoder, dev) { if(encoder->crtc != crtc) continue; encoder_dpms = drm_helper_choose_encoder_dpms( encoder); drm_helper_encoder_dpms(encoder, encoder_dpms); } crtc_funcs = crtc->helper_private; if (crtc_funcs->dpms) (*crtc_funcs->dpms) (crtc, drm_helper_choose_crtc_dpms(crtc)); } } /* disable the unused connectors while restoring the modesetting */ __drm_helper_disable_unused_functions(dev); drm_modeset_unlock_all(dev); } EXPORT_SYMBOL(drm_helper_resume_force_mode); /** * drm_helper_force_disable_all - Forcibly turn off all enabled CRTCs * @dev: DRM device whose CRTCs to turn off * * Drivers may want to call this on unload to ensure that all displays are * unlit and the GPU is in a consistent, low power state. Takes modeset locks. * * Note: This should only be used by non-atomic legacy drivers. For an atomic * version look at drm_atomic_helper_shutdown(). * * Returns: * Zero on success, error code on failure. */ int drm_helper_force_disable_all(struct drm_device *dev) { struct drm_crtc *crtc; int ret = 0; drm_modeset_lock_all(dev); drm_for_each_crtc(crtc, dev) if (crtc->enabled) { struct drm_mode_set set = { .crtc = crtc, }; ret = drm_mode_set_config_internal(&set); if (ret) goto out; } out: drm_modeset_unlock_all(dev); return ret; } EXPORT_SYMBOL(drm_helper_force_disable_all);
248 248 249 1434 1436 1437 805 1432 438 394 169 119 120 120 120 101 101 41 546 545 439 437 121 1 120 828 832 547 547 635 635 636 166 513 207 29 29 29 6 23 638 638 637 638 191 23 1 635 37 1 35 8 8 10 10 7 10 9 9 11 11 8 11 8 11 14 14 14 14 14 14 3 28 27 27 27 28 27 33 31 31 26 33 30 33 5 28 268 9 9 9 8 7 18 223 18 18 13 12 13 13 13 12 12 12 11 10 12 1 12 8 6 11 11 12 1 1 1 16 17 17 5 17 17 17 10 7 17 17 17 17 15 17 17 17 835 835 542 542 541 20 540 542 542 540 249 5 248 249 1443 11 1446 1443 1431 1138 835 993 265 224 249 249 107 3 113 2 1 2 256 16 998 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #include "hard-interface.h" #include "main.h" #include <linux/atomic.h> #include <linux/byteorder/generic.h> #include <linux/compiler.h> #include <linux/container_of.h> #include <linux/errno.h> #include <linux/gfp.h> #include <linux/if.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/kref.h> #include <linux/limits.h> #include <linux/list.h> #include <linux/minmax.h> #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/printk.h> #include <linux/rculist.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <net/net_namespace.h> #include <net/rtnetlink.h> #include <uapi/linux/batadv_packet.h> #include "bat_v.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "log.h" #include "mesh-interface.h" #include "originator.h" #include "send.h" #include "translation-table.h" /** * batadv_hardif_release() - release hard interface from lists and queue for * free after rcu grace period * @ref: kref pointer of the hard interface */ void batadv_hardif_release(struct kref *ref) { struct batadv_hard_iface *hard_iface; hard_iface = container_of(ref, struct batadv_hard_iface, refcount); netdev_put(hard_iface->net_dev, &hard_iface->dev_tracker); kfree_rcu(hard_iface, rcu); } /** * batadv_hardif_get_by_netdev() - Get hard interface object of a net_device * @net_dev: net_device to search for * * Return: batadv_hard_iface of net_dev (with increased refcnt), NULL on errors */ struct batadv_hard_iface * batadv_hardif_get_by_netdev(const struct net_device *net_dev) { struct batadv_hard_iface *hard_iface; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { if (hard_iface->net_dev == net_dev && kref_get_unless_zero(&hard_iface->refcount)) goto out; } hard_iface = NULL; out: rcu_read_unlock(); return hard_iface; } /** * batadv_getlink_net() - return link net namespace (of use fallback) * @netdev: net_device to check * @fallback_net: return in case get_link_net is not available for @netdev * * Return: result of rtnl_link_ops->get_link_net or @fallback_net */ static struct net *batadv_getlink_net(const struct net_device *netdev, struct net *fallback_net) { if (!netdev->rtnl_link_ops) return fallback_net; if (!netdev->rtnl_link_ops->get_link_net) return fallback_net; return netdev->rtnl_link_ops->get_link_net(netdev); } /** * batadv_mutual_parents() - check if two devices are each others parent * @dev1: 1st net dev * @net1: 1st devices netns * @dev2: 2nd net dev * @net2: 2nd devices netns * * veth devices come in pairs and each is the parent of the other! * * Return: true if the devices are each others parent, otherwise false */ static bool batadv_mutual_parents(const struct net_device *dev1, struct net *net1, const struct net_device *dev2, struct net *net2) { int dev1_parent_iflink = dev_get_iflink(dev1); int dev2_parent_iflink = dev_get_iflink(dev2); const struct net *dev1_parent_net; const struct net *dev2_parent_net; dev1_parent_net = batadv_getlink_net(dev1, net1); dev2_parent_net = batadv_getlink_net(dev2, net2); if (!dev1_parent_iflink || !dev2_parent_iflink) return false; return (dev1_parent_iflink == dev2->ifindex) && (dev2_parent_iflink == dev1->ifindex) && net_eq(dev1_parent_net, net2) && net_eq(dev2_parent_net, net1); } /** * batadv_is_on_batman_iface() - check if a device is a batman iface descendant * @net_dev: the device to check * * If the user creates any virtual device on top of a batman-adv interface, it * is important to prevent this new interface from being used to create a new * mesh network (this behaviour would lead to a batman-over-batman * configuration). This function recursively checks all the fathers of the * device passed as argument looking for a batman-adv mesh interface. * * Return: true if the device is descendant of a batman-adv mesh interface (or * if it is a batman-adv interface itself), false otherwise */ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) { struct net *net = dev_net(net_dev); struct net_device *parent_dev; struct net *parent_net; int iflink; bool ret; /* check if this is a batman-adv mesh interface */ if (batadv_meshif_is_valid(net_dev)) return true; iflink = dev_get_iflink(net_dev); if (iflink == 0) return false; parent_net = batadv_getlink_net(net_dev, net); /* iflink to itself, most likely physical device */ if (net == parent_net && iflink == net_dev->ifindex) return false; /* recurse over the parent device */ parent_dev = __dev_get_by_index((struct net *)parent_net, iflink); if (!parent_dev) { pr_warn("Cannot find parent device. Skipping batadv-on-batadv check for %s\n", net_dev->name); return false; } if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; ret = batadv_is_on_batman_iface(parent_dev); return ret; } static bool batadv_is_valid_iface(const struct net_device *net_dev) { if (net_dev->flags & IFF_LOOPBACK) return false; if (net_dev->type != ARPHRD_ETHER) return false; if (net_dev->addr_len != ETH_ALEN) return false; /* no batman over batman */ if (batadv_is_on_batman_iface(net_dev)) return false; return true; } /** * batadv_get_real_netdevice() - check if the given netdev struct is a virtual * interface on top of another 'real' interface * @netdev: the device to check * * Callers must hold the rtnl semaphore. You may want batadv_get_real_netdev() * instead of this. * * Return: the 'real' net device or the original net device and NULL in case * of an error. */ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) { struct batadv_hard_iface *hard_iface = NULL; struct net_device *real_netdev = NULL; struct net *real_net; struct net *net; int iflink; ASSERT_RTNL(); if (!netdev) return NULL; iflink = dev_get_iflink(netdev); if (iflink == 0) { dev_hold(netdev); return netdev; } hard_iface = batadv_hardif_get_by_netdev(netdev); if (!hard_iface || !hard_iface->mesh_iface) goto out; net = dev_net(hard_iface->mesh_iface); real_net = batadv_getlink_net(netdev, net); /* iflink to itself, most likely physical device */ if (net == real_net && netdev->ifindex == iflink) { real_netdev = netdev; dev_hold(real_netdev); goto out; } real_netdev = dev_get_by_index(real_net, iflink); out: batadv_hardif_put(hard_iface); return real_netdev; } /** * batadv_get_real_netdev() - check if the given net_device struct is a virtual * interface on top of another 'real' interface * @net_device: the device to check * * Return: the 'real' net device or the original net device and NULL in case * of an error. */ struct net_device *batadv_get_real_netdev(struct net_device *net_device) { struct net_device *real_netdev; rtnl_lock(); real_netdev = batadv_get_real_netdevice(net_device); rtnl_unlock(); return real_netdev; } /** * batadv_is_wext_netdev() - check if the given net_device struct is a * wext wifi interface * @net_device: the device to check * * Return: true if the net device is a wext wireless device, false * otherwise. */ static bool batadv_is_wext_netdev(struct net_device *net_device) { if (!net_device) return false; #ifdef CONFIG_WIRELESS_EXT /* pre-cfg80211 drivers have to implement WEXT, so it is possible to * check for wireless_handlers != NULL */ if (net_device->wireless_handlers) return true; #endif return false; } /** * batadv_is_cfg80211_netdev() - check if the given net_device struct is a * cfg80211 wifi interface * @net_device: the device to check * * Return: true if the net device is a cfg80211 wireless device, false * otherwise. */ static bool batadv_is_cfg80211_netdev(struct net_device *net_device) { if (!net_device) return false; #if IS_ENABLED(CONFIG_CFG80211) /* cfg80211 drivers have to set ieee80211_ptr */ if (net_device->ieee80211_ptr) return true; #endif return false; } /** * batadv_wifi_flags_evaluate() - calculate wifi flags for net_device * @net_device: the device to check * * Return: batadv_hard_iface_wifi_flags flags of the device */ static u32 batadv_wifi_flags_evaluate(struct net_device *net_device) { u32 wifi_flags = 0; struct net_device *real_netdev; if (batadv_is_wext_netdev(net_device)) wifi_flags |= BATADV_HARDIF_WIFI_WEXT_DIRECT; if (batadv_is_cfg80211_netdev(net_device)) wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT; real_netdev = batadv_get_real_netdevice(net_device); if (!real_netdev) return wifi_flags; if (real_netdev == net_device) goto out; if (batadv_is_wext_netdev(real_netdev)) wifi_flags |= BATADV_HARDIF_WIFI_WEXT_INDIRECT; if (batadv_is_cfg80211_netdev(real_netdev)) wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_INDIRECT; out: dev_put(real_netdev); return wifi_flags; } /** * batadv_is_cfg80211_hardif() - check if the given hardif is a cfg80211 wifi * interface * @hard_iface: the device to check * * Return: true if the net device is a cfg80211 wireless device, false * otherwise. */ bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface) { u32 allowed_flags = 0; allowed_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT; allowed_flags |= BATADV_HARDIF_WIFI_CFG80211_INDIRECT; return !!(hard_iface->wifi_flags & allowed_flags); } /** * batadv_is_wifi_hardif() - check if the given hardif is a wifi interface * @hard_iface: the device to check * * Return: true if the net device is a 802.11 wireless device, false otherwise. */ bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface) { if (!hard_iface) return false; return hard_iface->wifi_flags != 0; } /** * batadv_hardif_no_broadcast() - check whether (re)broadcast is necessary * @if_outgoing: the outgoing interface checked and considered for (re)broadcast * @orig_addr: the originator of this packet * @orig_neigh: originator address of the forwarder we just got the packet from * (NULL if we originated) * * Checks whether a packet needs to be (re)broadcasted on the given interface. * * Return: * BATADV_HARDIF_BCAST_NORECIPIENT: No neighbor on interface * BATADV_HARDIF_BCAST_DUPFWD: Just one neighbor, but it is the forwarder * BATADV_HARDIF_BCAST_DUPORIG: Just one neighbor, but it is the originator * BATADV_HARDIF_BCAST_OK: Several neighbors, must broadcast */ int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing, u8 *orig_addr, u8 *orig_neigh) { struct batadv_hardif_neigh_node *hardif_neigh; struct hlist_node *first; int ret = BATADV_HARDIF_BCAST_OK; rcu_read_lock(); /* 0 neighbors -> no (re)broadcast */ first = rcu_dereference(hlist_first_rcu(&if_outgoing->neigh_list)); if (!first) { ret = BATADV_HARDIF_BCAST_NORECIPIENT; goto out; } /* >1 neighbors -> (re)broadcast */ if (rcu_dereference(hlist_next_rcu(first))) goto out; hardif_neigh = hlist_entry(first, struct batadv_hardif_neigh_node, list); /* 1 neighbor, is the originator -> no rebroadcast */ if (orig_addr && batadv_compare_eth(hardif_neigh->orig, orig_addr)) { ret = BATADV_HARDIF_BCAST_DUPORIG; /* 1 neighbor, is the one we received from -> no rebroadcast */ } else if (orig_neigh && batadv_compare_eth(hardif_neigh->orig, orig_neigh)) { ret = BATADV_HARDIF_BCAST_DUPFWD; } out: rcu_read_unlock(); return ret; } static struct batadv_hard_iface * batadv_hardif_get_active(struct net_device *mesh_iface) { struct batadv_hard_iface *hard_iface; struct list_head *iter; rcu_read_lock(); netdev_for_each_lower_private_rcu(mesh_iface, hard_iface, iter) { if (hard_iface->if_status == BATADV_IF_ACTIVE && kref_get_unless_zero(&hard_iface->refcount)) goto out; } hard_iface = NULL; out: rcu_read_unlock(); return hard_iface; } static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv, struct batadv_hard_iface *oldif) { struct batadv_hard_iface *primary_if; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; batadv_dat_init_own_addr(bat_priv, primary_if); batadv_bla_update_orig_address(bat_priv, primary_if, oldif); out: batadv_hardif_put(primary_if); } static void batadv_primary_if_select(struct batadv_priv *bat_priv, struct batadv_hard_iface *new_hard_iface) { struct batadv_hard_iface *curr_hard_iface; ASSERT_RTNL(); if (new_hard_iface) kref_get(&new_hard_iface->refcount); curr_hard_iface = rcu_replace_pointer(bat_priv->primary_if, new_hard_iface, 1); if (!new_hard_iface) goto out; bat_priv->algo_ops->iface.primary_set(new_hard_iface); batadv_primary_if_update_addr(bat_priv, curr_hard_iface); out: batadv_hardif_put(curr_hard_iface); } static bool batadv_hardif_is_iface_up(const struct batadv_hard_iface *hard_iface) { if (hard_iface->net_dev->flags & IFF_UP) return true; return false; } static void batadv_check_known_mac_addr(const struct batadv_hard_iface *hard_iface) { struct net_device *mesh_iface = hard_iface->mesh_iface; const struct batadv_hard_iface *tmp_hard_iface; struct list_head *iter; if (!mesh_iface) return; netdev_for_each_lower_private(mesh_iface, tmp_hard_iface, iter) { if (tmp_hard_iface == hard_iface) continue; if (tmp_hard_iface->if_status == BATADV_IF_NOT_IN_USE) continue; if (!batadv_compare_eth(tmp_hard_iface->net_dev->dev_addr, hard_iface->net_dev->dev_addr)) continue; pr_warn("The newly added mac address (%pM) already exists on: %s\n", hard_iface->net_dev->dev_addr, tmp_hard_iface->net_dev->name); pr_warn("It is strongly recommended to keep mac addresses unique to avoid problems!\n"); } } /** * batadv_hardif_recalc_extra_skbroom() - Recalculate skbuff extra head/tailroom * @mesh_iface: netdev struct of the mesh interface */ static void batadv_hardif_recalc_extra_skbroom(struct net_device *mesh_iface) { const struct batadv_hard_iface *hard_iface; unsigned short lower_header_len = ETH_HLEN; unsigned short lower_headroom = 0; unsigned short lower_tailroom = 0; unsigned short needed_headroom; struct list_head *iter; rcu_read_lock(); netdev_for_each_lower_private_rcu(mesh_iface, hard_iface, iter) { if (hard_iface->if_status == BATADV_IF_NOT_IN_USE) continue; lower_header_len = max_t(unsigned short, lower_header_len, hard_iface->net_dev->hard_header_len); lower_headroom = max_t(unsigned short, lower_headroom, hard_iface->net_dev->needed_headroom); lower_tailroom = max_t(unsigned short, lower_tailroom, hard_iface->net_dev->needed_tailroom); } rcu_read_unlock(); needed_headroom = lower_headroom + (lower_header_len - ETH_HLEN); needed_headroom += batadv_max_header_len(); /* fragmentation headers don't strip the unicast/... header */ needed_headroom += sizeof(struct batadv_frag_packet); mesh_iface->needed_headroom = needed_headroom; mesh_iface->needed_tailroom = lower_tailroom; } /** * batadv_hardif_min_mtu() - Calculate maximum MTU for mesh interface * @mesh_iface: netdev struct of the mesh interface * * Return: MTU for the mesh-interface (limited by the minimal MTU of all active * slave interfaces) */ int batadv_hardif_min_mtu(struct net_device *mesh_iface) { struct batadv_priv *bat_priv = netdev_priv(mesh_iface); const struct batadv_hard_iface *hard_iface; struct list_head *iter; int min_mtu = INT_MAX; rcu_read_lock(); netdev_for_each_lower_private_rcu(mesh_iface, hard_iface, iter) { if (hard_iface->if_status != BATADV_IF_ACTIVE && hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) continue; min_mtu = min_t(int, hard_iface->net_dev->mtu, min_mtu); } rcu_read_unlock(); if (atomic_read(&bat_priv->fragmentation) == 0) goto out; /* with fragmentation enabled the maximum size of internally generated * packets such as translation table exchanges or tvlv containers, etc * has to be calculated */ min_mtu = min_t(int, min_mtu, BATADV_FRAG_MAX_FRAG_SIZE); min_mtu -= sizeof(struct batadv_frag_packet); min_mtu *= BATADV_FRAG_MAX_FRAGMENTS; out: /* report to the other components the maximum amount of bytes that * batman-adv can send over the wire (without considering the payload * overhead). For example, this value is used by TT to compute the * maximum local table size */ atomic_set(&bat_priv->packet_size_max, min_mtu); /* the real mesh-interface MTU is computed by removing the payload * overhead from the maximum amount of bytes that was just computed. */ return min_t(int, min_mtu - batadv_max_header_len(), BATADV_MAX_MTU); } /** * batadv_update_min_mtu() - Adjusts the MTU if a new interface with a smaller * MTU appeared * @mesh_iface: netdev struct of the mesh interface */ void batadv_update_min_mtu(struct net_device *mesh_iface) { struct batadv_priv *bat_priv = netdev_priv(mesh_iface); int limit_mtu; int mtu; mtu = batadv_hardif_min_mtu(mesh_iface); if (bat_priv->mtu_set_by_user) limit_mtu = bat_priv->mtu_set_by_user; else limit_mtu = ETH_DATA_LEN; mtu = min(mtu, limit_mtu); dev_set_mtu(mesh_iface, mtu); /* Check if the local translate table should be cleaned up to match a * new (and smaller) MTU. */ batadv_tt_local_resize_to_mtu(mesh_iface); } static void batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; if (hard_iface->if_status != BATADV_IF_INACTIVE) goto out; bat_priv = netdev_priv(hard_iface->mesh_iface); bat_priv->algo_ops->iface.update_mac(hard_iface); hard_iface->if_status = BATADV_IF_TO_BE_ACTIVATED; /* the first active interface becomes our primary interface or * the next active interface after the old primary interface was removed */ primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) batadv_primary_if_select(bat_priv, hard_iface); batadv_info(hard_iface->mesh_iface, "Interface activated: %s\n", hard_iface->net_dev->name); batadv_update_min_mtu(hard_iface->mesh_iface); if (bat_priv->algo_ops->iface.activate) bat_priv->algo_ops->iface.activate(hard_iface); out: batadv_hardif_put(primary_if); } static void batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) { if (hard_iface->if_status != BATADV_IF_ACTIVE && hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) return; hard_iface->if_status = BATADV_IF_INACTIVE; batadv_info(hard_iface->mesh_iface, "Interface deactivated: %s\n", hard_iface->net_dev->name); batadv_update_min_mtu(hard_iface->mesh_iface); } /** * batadv_hardif_enable_interface() - Enslave hard interface to mesh interface * @hard_iface: hard interface to add to mesh interface * @mesh_iface: netdev struct of the mesh interface * * Return: 0 on success or negative error number in case of failure */ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, struct net_device *mesh_iface) { struct batadv_priv *bat_priv; __be16 ethertype = htons(ETH_P_BATMAN); int max_header_len = batadv_max_header_len(); unsigned int required_mtu; unsigned int hardif_mtu; int ret; hardif_mtu = READ_ONCE(hard_iface->net_dev->mtu); required_mtu = READ_ONCE(mesh_iface->mtu) + max_header_len; if (hardif_mtu < ETH_MIN_MTU + max_header_len) return -EINVAL; if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) goto out; kref_get(&hard_iface->refcount); netdev_hold(mesh_iface, &hard_iface->meshif_dev_tracker, GFP_ATOMIC); hard_iface->mesh_iface = mesh_iface; bat_priv = netdev_priv(hard_iface->mesh_iface); ret = netdev_master_upper_dev_link(hard_iface->net_dev, mesh_iface, hard_iface, NULL, NULL); if (ret) goto err_dev; ret = bat_priv->algo_ops->iface.enable(hard_iface); if (ret < 0) goto err_upper; hard_iface->if_status = BATADV_IF_INACTIVE; kref_get(&hard_iface->refcount); hard_iface->batman_adv_ptype.type = ethertype; hard_iface->batman_adv_ptype.func = batadv_batman_skb_recv; hard_iface->batman_adv_ptype.dev = hard_iface->net_dev; dev_add_pack(&hard_iface->batman_adv_ptype); batadv_info(hard_iface->mesh_iface, "Adding interface: %s\n", hard_iface->net_dev->name); if (atomic_read(&bat_priv->fragmentation) && hardif_mtu < required_mtu) batadv_info(hard_iface->mesh_iface, "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. Packets going over this interface will be fragmented on layer2 which could impact the performance. Setting the MTU to %i would solve the problem.\n", hard_iface->net_dev->name, hardif_mtu, required_mtu); if (!atomic_read(&bat_priv->fragmentation) && hardif_mtu < required_mtu) batadv_info(hard_iface->mesh_iface, "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. If you experience problems getting traffic through try increasing the MTU to %i.\n", hard_iface->net_dev->name, hardif_mtu, required_mtu); batadv_check_known_mac_addr(hard_iface); if (batadv_hardif_is_iface_up(hard_iface)) batadv_hardif_activate_interface(hard_iface); else batadv_err(hard_iface->mesh_iface, "Not using interface %s (retrying later): interface not active\n", hard_iface->net_dev->name); batadv_hardif_recalc_extra_skbroom(mesh_iface); if (bat_priv->algo_ops->iface.enabled) bat_priv->algo_ops->iface.enabled(hard_iface); out: return 0; err_upper: netdev_upper_dev_unlink(hard_iface->net_dev, mesh_iface); err_dev: hard_iface->mesh_iface = NULL; netdev_put(mesh_iface, &hard_iface->meshif_dev_tracker); batadv_hardif_put(hard_iface); return ret; } /** * batadv_hardif_cnt() - get number of interfaces enslaved to mesh interface * @mesh_iface: mesh interface to check * * This function is only using RCU for locking - the result can therefore be * off when another function is modifying the list at the same time. The * caller can use the rtnl_lock to make sure that the count is accurate. * * Return: number of connected/enslaved hard interfaces */ static size_t batadv_hardif_cnt(struct net_device *mesh_iface) { struct batadv_hard_iface *hard_iface; struct list_head *iter; size_t count = 0; rcu_read_lock(); netdev_for_each_lower_private_rcu(mesh_iface, hard_iface, iter) count++; rcu_read_unlock(); return count; } /** * batadv_hardif_disable_interface() - Remove hard interface from mesh interface * @hard_iface: hard interface to be removed */ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hard_iface *primary_if = NULL; batadv_hardif_deactivate_interface(hard_iface); if (hard_iface->if_status != BATADV_IF_INACTIVE) goto out; batadv_info(hard_iface->mesh_iface, "Removing interface: %s\n", hard_iface->net_dev->name); dev_remove_pack(&hard_iface->batman_adv_ptype); batadv_hardif_put(hard_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (hard_iface == primary_if) { struct batadv_hard_iface *new_if; new_if = batadv_hardif_get_active(hard_iface->mesh_iface); batadv_primary_if_select(bat_priv, new_if); batadv_hardif_put(new_if); } bat_priv->algo_ops->iface.disable(hard_iface); hard_iface->if_status = BATADV_IF_NOT_IN_USE; /* delete all references to this hard_iface */ batadv_purge_orig_ref(bat_priv); batadv_purge_outstanding_packets(bat_priv, hard_iface); netdev_put(hard_iface->mesh_iface, &hard_iface->meshif_dev_tracker); netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->mesh_iface); batadv_hardif_recalc_extra_skbroom(hard_iface->mesh_iface); /* nobody uses this interface anymore */ if (batadv_hardif_cnt(hard_iface->mesh_iface) <= 1) batadv_gw_check_client_stop(bat_priv); hard_iface->mesh_iface = NULL; batadv_hardif_put(hard_iface); out: batadv_hardif_put(primary_if); } static struct batadv_hard_iface * batadv_hardif_add_interface(struct net_device *net_dev) { struct batadv_hard_iface *hard_iface; ASSERT_RTNL(); if (!batadv_is_valid_iface(net_dev)) return NULL; hard_iface = kzalloc(sizeof(*hard_iface), GFP_ATOMIC); if (!hard_iface) return NULL; netdev_hold(net_dev, &hard_iface->dev_tracker, GFP_ATOMIC); hard_iface->net_dev = net_dev; hard_iface->mesh_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; INIT_LIST_HEAD(&hard_iface->list); INIT_HLIST_HEAD(&hard_iface->neigh_list); mutex_init(&hard_iface->bat_iv.ogm_buff_mutex); spin_lock_init(&hard_iface->neigh_list_lock); kref_init(&hard_iface->refcount); hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT; hard_iface->wifi_flags = batadv_wifi_flags_evaluate(net_dev); if (batadv_is_wifi_hardif(hard_iface)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; atomic_set(&hard_iface->hop_penalty, 0); batadv_v_hardif_init(hard_iface); kref_get(&hard_iface->refcount); list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); batadv_hardif_generation++; return hard_iface; } static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) { ASSERT_RTNL(); /* first deactivate interface */ if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) batadv_hardif_disable_interface(hard_iface); if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) return; hard_iface->if_status = BATADV_IF_TO_BE_REMOVED; batadv_hardif_put(hard_iface); } /** * batadv_hard_if_event_meshif() - Handle events for mesh interfaces * @event: NETDEV_* event to handle * @net_dev: net_device which generated an event * * Return: NOTIFY_* result */ static int batadv_hard_if_event_meshif(unsigned long event, struct net_device *net_dev) { struct batadv_priv *bat_priv; switch (event) { case NETDEV_REGISTER: bat_priv = netdev_priv(net_dev); batadv_meshif_create_vlan(bat_priv, BATADV_NO_FLAGS); break; } return NOTIFY_DONE; } static int batadv_hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); struct batadv_hard_iface *hard_iface; struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; if (batadv_meshif_is_valid(net_dev)) return batadv_hard_if_event_meshif(event, net_dev); hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface && (event == NETDEV_REGISTER || event == NETDEV_POST_TYPE_CHANGE)) hard_iface = batadv_hardif_add_interface(net_dev); if (!hard_iface) goto out; switch (event) { case NETDEV_UP: batadv_hardif_activate_interface(hard_iface); break; case NETDEV_GOING_DOWN: case NETDEV_DOWN: batadv_hardif_deactivate_interface(hard_iface); break; case NETDEV_UNREGISTER: case NETDEV_PRE_TYPE_CHANGE: list_del_rcu(&hard_iface->list); batadv_hardif_generation++; batadv_hardif_remove_interface(hard_iface); break; case NETDEV_CHANGEMTU: if (hard_iface->mesh_iface) batadv_update_min_mtu(hard_iface->mesh_iface); break; case NETDEV_CHANGEADDR: if (hard_iface->if_status == BATADV_IF_NOT_IN_USE) goto hardif_put; batadv_check_known_mac_addr(hard_iface); bat_priv = netdev_priv(hard_iface->mesh_iface); bat_priv->algo_ops->iface.update_mac(hard_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto hardif_put; if (hard_iface == primary_if) batadv_primary_if_update_addr(bat_priv, NULL); break; case NETDEV_CHANGEUPPER: hard_iface->wifi_flags = batadv_wifi_flags_evaluate(net_dev); if (batadv_is_wifi_hardif(hard_iface)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; break; default: break; } hardif_put: batadv_hardif_put(hard_iface); out: batadv_hardif_put(primary_if); return NOTIFY_DONE; } struct notifier_block batadv_hard_if_notifier = { .notifier_call = batadv_hard_if_event, };
12861 12778 12717 3232 12734 12719 12706 12635 105 12784 554 424 179 178 23 19 23 156 556 370 362 333 333 370 38 22 22 22 38 389 173 173 172 217 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 // SPDX-License-Identifier: GPL-2.0-only /* * Access kernel or user memory without faulting. */ #include <linux/export.h> #include <linux/mm.h> #include <linux/uaccess.h> #include <asm/tlb.h> bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return true; } /* * The below only uses kmsan_check_memory() to ensure uninitialized kernel * memory isn't leaked. */ #define copy_from_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __get_kernel_nofault(dst, src, type, err_label); \ kmsan_check_memory(src, sizeof(type)); \ dst += sizeof(type); \ src += sizeof(type); \ len -= sizeof(type); \ } long copy_from_kernel_nofault(void *dst, const void *src, size_t size) { unsigned long align = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) align = (unsigned long)dst | (unsigned long)src; if (!copy_from_kernel_nofault_allowed(src, size)) return -ERANGE; pagefault_disable(); if (!(align & 7)) copy_from_kernel_nofault_loop(dst, src, size, u64, Efault); if (!(align & 3)) copy_from_kernel_nofault_loop(dst, src, size, u32, Efault); if (!(align & 1)) copy_from_kernel_nofault_loop(dst, src, size, u16, Efault); copy_from_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } EXPORT_SYMBOL_GPL(copy_from_kernel_nofault); #define copy_to_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __put_kernel_nofault(dst, src, type, err_label); \ instrument_write(dst, sizeof(type)); \ dst += sizeof(type); \ src += sizeof(type); \ len -= sizeof(type); \ } long copy_to_kernel_nofault(void *dst, const void *src, size_t size) { unsigned long align = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) align = (unsigned long)dst | (unsigned long)src; pagefault_disable(); if (!(align & 7)) copy_to_kernel_nofault_loop(dst, src, size, u64, Efault); if (!(align & 3)) copy_to_kernel_nofault_loop(dst, src, size, u32, Efault); if (!(align & 1)) copy_to_kernel_nofault_loop(dst, src, size, u16, Efault); copy_to_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) { const void *src = unsafe_addr; if (unlikely(count <= 0)) return 0; if (!copy_from_kernel_nofault_allowed(unsafe_addr, count)) return -ERANGE; pagefault_disable(); do { __get_kernel_nofault(dst, src, u8, Efault); dst++; src++; } while (dst[-1] && src - unsafe_addr < count); pagefault_enable(); dst[-1] = '\0'; return src - unsafe_addr; Efault: pagefault_enable(); dst[0] = '\0'; return -EFAULT; } /** * copy_from_user_nofault(): safely attempt to read from a user-space location * @dst: pointer to the buffer that shall take the data * @src: address to read from. This must be a user address. * @size: size of the data chunk * * Safely read from user address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ long copy_from_user_nofault(void *dst, const void __user *src, size_t size) { long ret = -EFAULT; if (!__access_ok(src, size)) return ret; if (!nmi_uaccess_okay()) return ret; pagefault_disable(); ret = __copy_from_user_inatomic(dst, src, size); pagefault_enable(); if (ret) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(copy_from_user_nofault); /** * copy_to_user_nofault(): safely attempt to write to a user-space location * @dst: address to write to * @src: pointer to the data that shall be written * @size: size of the data chunk * * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ long copy_to_user_nofault(void __user *dst, const void *src, size_t size) { long ret = -EFAULT; if (access_ok(dst, size)) { pagefault_disable(); ret = __copy_to_user_inatomic(dst, src, size); pagefault_enable(); } if (ret) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(copy_to_user_nofault); /** * strncpy_from_user_nofault: - Copy a NUL terminated string from unsafe user * address. * @dst: Destination address, in kernel space. This buffer must be at * least @count bytes long. * @unsafe_addr: Unsafe user address. * @count: Maximum number of bytes to copy, including the trailing NUL. * * Copies a NUL-terminated string from unsafe user address to kernel buffer. * * On success, returns the length of the string INCLUDING the trailing NUL. * * If access fails, returns -EFAULT (some data may have been copied * and the trailing NUL added). * * If @count is smaller than the length of the string, copies @count-1 bytes, * sets the last byte of @dst buffer to NUL and returns @count. */ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long count) { long ret; if (unlikely(count <= 0)) return 0; pagefault_disable(); ret = strncpy_from_user(dst, unsafe_addr, count); pagefault_enable(); if (ret >= count) { ret = count; dst[ret - 1] = '\0'; } else if (ret >= 0) { ret++; } return ret; } /** * strnlen_user_nofault: - Get the size of a user string INCLUDING final NUL. * @unsafe_addr: The string to measure. * @count: Maximum count (including NUL) * * Get the size of a NUL-terminated string in user space without pagefault. * * Returns the size of the string INCLUDING the terminating NUL. * * If the string is too long, returns a number larger than @count. User * has to check the return value against "> count". * On exception (or invalid count), returns 0. * * Unlike strnlen_user, this can be used from IRQ handler etc. because * it disables pagefaults. */ long strnlen_user_nofault(const void __user *unsafe_addr, long count) { int ret; pagefault_disable(); ret = strnlen_user(unsafe_addr, count); pagefault_enable(); return ret; } void __copy_overflow(int size, unsigned long count) { WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); } EXPORT_SYMBOL(__copy_overflow);
7 7 7 7 7 14 8 10 14 7 7 7 7 7 7 7 7 8 7 7 7 7 7 7 7 7 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 18 18 18 2 2 2 2 2 1 1 1 2 1 1 2 2 1 1 1 1 1 1 18 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 191 191 18 17 17 18 17 18 1 18 17 17 9 8 17 17 17 17 8 8 8 17 14 12 2 2 2 2 2 2 15 11 11 11 11 18 16 18 18 18 18 2 17 17 17 17 17 3 17 1 17 15 15 14 17 17 9 13 17 15 17 17 17 17 3 2 16 18 18 18 18 18 18 14 14 16 2 16 18 8 8 21 21 21 20 21 21 18 18 18 18 8 8 10 18 18 8 3 21 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_btree.h" #include "xfs_refcount_btree.h" #include "xfs_refcount.h" #include "xfs_bmap_btree.h" #include "xfs_trans_space.h" #include "xfs_bit.h" #include "xfs_alloc.h" #include "xfs_quota.h" #include "xfs_reflink.h" #include "xfs_iomap.h" #include "xfs_ag.h" #include "xfs_ag_resv.h" #include "xfs_health.h" #include "xfs_rtrefcount_btree.h" #include "xfs_rtalloc.h" #include "xfs_rtgroup.h" #include "xfs_metafile.h" /* * Copy on Write of Shared Blocks * * XFS must preserve "the usual" file semantics even when two files share * the same physical blocks. This means that a write to one file must not * alter the blocks in a different file; the way that we'll do that is * through the use of a copy-on-write mechanism. At a high level, that * means that when we want to write to a shared block, we allocate a new * block, write the data to the new block, and if that succeeds we map the * new block into the file. * * XFS provides a "delayed allocation" mechanism that defers the allocation * of disk blocks to dirty-but-not-yet-mapped file blocks as long as * possible. This reduces fragmentation by enabling the filesystem to ask * for bigger chunks less often, which is exactly what we want for CoW. * * The delalloc mechanism begins when the kernel wants to make a block * writable (write_begin or page_mkwrite). If the offset is not mapped, we * create a delalloc mapping, which is a regular in-core extent, but without * a real startblock. (For delalloc mappings, the startblock encodes both * a flag that this is a delalloc mapping, and a worst-case estimate of how * many blocks might be required to put the mapping into the BMBT.) delalloc * mappings are a reservation against the free space in the filesystem; * adjacent mappings can also be combined into fewer larger mappings. * * As an optimization, the CoW extent size hint (cowextsz) creates * outsized aligned delalloc reservations in the hope of landing out of * order nearby CoW writes in a single extent on disk, thereby reducing * fragmentation and improving future performance. * * D: --RRRRRRSSSRRRRRRRR--- (data fork) * C: ------DDDDDDD--------- (CoW fork) * * When dirty pages are being written out (typically in writepage), the * delalloc reservations are converted into unwritten mappings by * allocating blocks and replacing the delalloc mapping with real ones. * A delalloc mapping can be replaced by several unwritten ones if the * free space is fragmented. * * D: --RRRRRRSSSRRRRRRRR--- * C: ------UUUUUUU--------- * * We want to adapt the delalloc mechanism for copy-on-write, since the * write paths are similar. The first two steps (creating the reservation * and allocating the blocks) are exactly the same as delalloc except that * the mappings must be stored in a separate CoW fork because we do not want * to disturb the mapping in the data fork until we're sure that the write * succeeded. IO completion in this case is the process of removing the old * mapping from the data fork and moving the new mapping from the CoW fork to * the data fork. This will be discussed shortly. * * For now, unaligned directio writes will be bounced back to the page cache. * Block-aligned directio writes will use the same mechanism as buffered * writes. * * Just prior to submitting the actual disk write requests, we convert * the extents representing the range of the file actually being written * (as opposed to extra pieces created for the cowextsize hint) to real * extents. This will become important in the next step: * * D: --RRRRRRSSSRRRRRRRR--- * C: ------UUrrUUU--------- * * CoW remapping must be done after the data block write completes, * because we don't want to destroy the old data fork map until we're sure * the new block has been written. Since the new mappings are kept in a * separate fork, we can simply iterate these mappings to find the ones * that cover the file blocks that we just CoW'd. For each extent, simply * unmap the corresponding range in the data fork, map the new range into * the data fork, and remove the extent from the CoW fork. Because of * the presence of the cowextsize hint, however, we must be careful * only to remap the blocks that we've actually written out -- we must * never remap delalloc reservations nor CoW staging blocks that have * yet to be written. This corresponds exactly to the real extents in * the CoW fork: * * D: --RRRRRRrrSRRRRRRRR--- * C: ------UU--UUU--------- * * Since the remapping operation can be applied to an arbitrary file * range, we record the need for the remap step as a flag in the ioend * instead of declaring a new IO type. This is required for direct io * because we only have ioend for the whole dio, and we have to be able to * remember the presence of unwritten blocks and CoW blocks with a single * ioend structure. Better yet, the more ground we can cover with one * ioend, the better. */ /* * Given a file mapping for the data device, find the lowest-numbered run of * shared blocks within that mapping and return it in shared_offset/shared_len. * The offset is relative to the start of irec. * * If find_end_of_shared is true, return the longest contiguous extent of shared * blocks. If there are no shared extents, shared_offset and shared_len will be * set to 0; */ static int xfs_reflink_find_shared( struct xfs_mount *mp, struct xfs_trans *tp, const struct xfs_bmbt_irec *irec, xfs_extlen_t *shared_offset, xfs_extlen_t *shared_len, bool find_end_of_shared) { struct xfs_buf *agbp; struct xfs_perag *pag; struct xfs_btree_cur *cur; int error; xfs_agblock_t orig_bno, found_bno; pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, irec->br_startblock)); orig_bno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); error = xfs_alloc_read_agf(pag, tp, 0, &agbp); if (error) goto out; cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); error = xfs_refcount_find_shared(cur, orig_bno, irec->br_blockcount, &found_bno, shared_len, find_end_of_shared); xfs_btree_del_cursor(cur, error); xfs_trans_brelse(tp, agbp); if (!error && *shared_len) *shared_offset = found_bno - orig_bno; out: xfs_perag_put(pag); return error; } /* * Given a file mapping for the rt device, find the lowest-numbered run of * shared blocks within that mapping and return it in shared_offset/shared_len. * The offset is relative to the start of irec. * * If find_end_of_shared is true, return the longest contiguous extent of shared * blocks. If there are no shared extents, shared_offset and shared_len will be * set to 0; */ static int xfs_reflink_find_rtshared( struct xfs_mount *mp, struct xfs_trans *tp, const struct xfs_bmbt_irec *irec, xfs_extlen_t *shared_offset, xfs_extlen_t *shared_len, bool find_end_of_shared) { struct xfs_rtgroup *rtg; struct xfs_btree_cur *cur; xfs_rgblock_t orig_bno; xfs_agblock_t found_bno; int error; BUILD_BUG_ON(NULLRGBLOCK != NULLAGBLOCK); /* * Note: this uses the not quite correct xfs_agblock_t type because * xfs_refcount_find_shared is shared between the RT and data device * refcount code. */ orig_bno = xfs_rtb_to_rgbno(mp, irec->br_startblock); rtg = xfs_rtgroup_get(mp, xfs_rtb_to_rgno(mp, irec->br_startblock)); xfs_rtgroup_lock(rtg, XFS_RTGLOCK_REFCOUNT); cur = xfs_rtrefcountbt_init_cursor(tp, rtg); error = xfs_refcount_find_shared(cur, orig_bno, irec->br_blockcount, &found_bno, shared_len, find_end_of_shared); xfs_btree_del_cursor(cur, error); xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_REFCOUNT); xfs_rtgroup_put(rtg); if (!error && *shared_len) *shared_offset = found_bno - orig_bno; return error; } /* * Trim the mapping to the next block where there's a change in the * shared/unshared status. More specifically, this means that we * find the lowest-numbered extent of shared blocks that coincides with * the given block mapping. If the shared extent overlaps the start of * the mapping, trim the mapping to the end of the shared extent. If * the shared region intersects the mapping, trim the mapping to the * start of the shared extent. If there are no shared regions that * overlap, just return the original extent. */ int xfs_reflink_trim_around_shared( struct xfs_inode *ip, struct xfs_bmbt_irec *irec, bool *shared) { struct xfs_mount *mp = ip->i_mount; xfs_extlen_t shared_offset, shared_len; int error = 0; /* Holes, unwritten, and delalloc extents cannot be shared */ if (!xfs_is_reflink_inode(ip) || !xfs_bmap_is_written_extent(irec)) { *shared = false; return 0; } trace_xfs_reflink_trim_around_shared(ip, irec); if (XFS_IS_REALTIME_INODE(ip)) error = xfs_reflink_find_rtshared(mp, NULL, irec, &shared_offset, &shared_len, true); else error = xfs_reflink_find_shared(mp, NULL, irec, &shared_offset, &shared_len, true); if (error) return error; if (!shared_len) { /* No shared blocks at all. */ *shared = false; } else if (!shared_offset) { /* * The start of this mapping points to shared space. Truncate * the mapping at the end of the shared region so that a * subsequent iteration starts at the start of the unshared * region. */ irec->br_blockcount = shared_len; *shared = true; } else { /* * There's a shared region that doesn't start at the beginning * of the mapping. Truncate the mapping at the start of the * shared extent so that a subsequent iteration starts at the * start of the shared region. */ irec->br_blockcount = shared_offset; *shared = false; } return 0; } int xfs_bmap_trim_cow( struct xfs_inode *ip, struct xfs_bmbt_irec *imap, bool *shared) { /* We can't update any real extents in always COW mode. */ if (xfs_is_always_cow_inode(ip) && !isnullstartblock(imap->br_startblock)) { *shared = true; return 0; } /* Trim the mapping to the nearest shared extent boundary. */ return xfs_reflink_trim_around_shared(ip, imap, shared); } int xfs_reflink_convert_cow_locked( struct xfs_inode *ip, xfs_fileoff_t offset_fsb, xfs_filblks_t count_fsb) { struct xfs_iext_cursor icur; struct xfs_bmbt_irec got; struct xfs_btree_cur *dummy_cur = NULL; int dummy_logflags; int error = 0; if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got)) return 0; do { if (got.br_startoff >= offset_fsb + count_fsb) break; if (got.br_state == XFS_EXT_NORM) continue; if (WARN_ON_ONCE(isnullstartblock(got.br_startblock))) return -EIO; xfs_trim_extent(&got, offset_fsb, count_fsb); if (!got.br_blockcount) continue; got.br_state = XFS_EXT_NORM; error = xfs_bmap_add_extent_unwritten_real(NULL, ip, XFS_COW_FORK, &icur, &dummy_cur, &got, &dummy_logflags); if (error) return error; } while (xfs_iext_next_extent(ip->i_cowfp, &icur, &got)); return error; } /* Convert all of the unwritten CoW extents in a file's range to real ones. */ int xfs_reflink_convert_cow( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); xfs_filblks_t count_fsb = end_fsb - offset_fsb; int error; ASSERT(count != 0); xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } /* * Find the extent that maps the given range in the COW fork. Even if the extent * is not shared we might have a preallocation for it in the COW fork. If so we * use it that rather than trigger a new allocation. */ static int xfs_find_trim_cow_extent( struct xfs_inode *ip, struct xfs_bmbt_irec *imap, struct xfs_bmbt_irec *cmap, bool *shared, bool *found) { xfs_fileoff_t offset_fsb = imap->br_startoff; xfs_filblks_t count_fsb = imap->br_blockcount; struct xfs_iext_cursor icur; *found = false; /* * If we don't find an overlapping extent, trim the range we need to * allocate to fit the hole we found. */ if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, cmap)) cmap->br_startoff = offset_fsb + count_fsb; if (cmap->br_startoff > offset_fsb) { xfs_trim_extent(imap, imap->br_startoff, cmap->br_startoff - imap->br_startoff); return xfs_bmap_trim_cow(ip, imap, shared); } *shared = true; if (isnullstartblock(cmap->br_startblock)) { xfs_trim_extent(imap, cmap->br_startoff, cmap->br_blockcount); return 0; } /* real extent found - no need to allocate */ xfs_trim_extent(cmap, offset_fsb, count_fsb); *found = true; return 0; } static int xfs_reflink_convert_unwritten( struct xfs_inode *ip, struct xfs_bmbt_irec *imap, struct xfs_bmbt_irec *cmap, bool convert_now) { xfs_fileoff_t offset_fsb = imap->br_startoff; xfs_filblks_t count_fsb = imap->br_blockcount; int error; /* * cmap might larger than imap due to cowextsize hint. */ xfs_trim_extent(cmap, offset_fsb, count_fsb); /* * COW fork extents are supposed to remain unwritten until we're ready * to initiate a disk write. For direct I/O we are going to write the * data and need the conversion, but for buffered writes we're done. */ if (!convert_now || cmap->br_state == XFS_EXT_NORM) return 0; trace_xfs_reflink_convert_cow(ip, cmap); error = xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb); if (!error) cmap->br_state = XFS_EXT_NORM; return error; } static int xfs_reflink_fill_cow_hole( struct xfs_inode *ip, struct xfs_bmbt_irec *imap, struct xfs_bmbt_irec *cmap, bool *shared, uint *lockmode, bool convert_now) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; xfs_filblks_t resaligned; unsigned int dblocks = 0, rblocks = 0; int nimaps; int error; bool found; resaligned = xfs_aligned_fsb_count(imap->br_startoff, imap->br_blockcount, xfs_get_cowextsz_hint(ip)); if (XFS_IS_REALTIME_INODE(ip)) { dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0); rblocks = resaligned; } else { dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); rblocks = 0; } xfs_iunlock(ip, *lockmode); *lockmode = 0; error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, dblocks, rblocks, false, &tp); if (error) return error; *lockmode = XFS_ILOCK_EXCL; error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found); if (error || !*shared) goto out_trans_cancel; if (found) { xfs_trans_cancel(tp); goto convert; } /* Allocate the entire reservation as unwritten blocks. */ nimaps = 1; error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap, &nimaps); if (error) goto out_trans_cancel; xfs_inode_set_cowblocks_tag(ip); error = xfs_trans_commit(tp); if (error) return error; convert: return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now); out_trans_cancel: xfs_trans_cancel(tp); return error; } static int xfs_reflink_fill_delalloc( struct xfs_inode *ip, struct xfs_bmbt_irec *imap, struct xfs_bmbt_irec *cmap, bool *shared, uint *lockmode, bool convert_now) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int nimaps; int error; bool found; do { xfs_iunlock(ip, *lockmode); *lockmode = 0; error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, 0, 0, false, &tp); if (error) return error; *lockmode = XFS_ILOCK_EXCL; error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found); if (error || !*shared) goto out_trans_cancel; if (found) { xfs_trans_cancel(tp); break; } ASSERT(isnullstartblock(cmap->br_startblock) || cmap->br_startblock == DELAYSTARTBLOCK); /* * Replace delalloc reservation with an unwritten extent. */ nimaps = 1; error = xfs_bmapi_write(tp, ip, cmap->br_startoff, cmap->br_blockcount, XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap, &nimaps); if (error) goto out_trans_cancel; xfs_inode_set_cowblocks_tag(ip); error = xfs_trans_commit(tp); if (error) return error; } while (cmap->br_startoff + cmap->br_blockcount <= imap->br_startoff); return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now); out_trans_cancel: xfs_trans_cancel(tp); return error; } /* Allocate all CoW reservations covering a range of blocks in a file. */ int xfs_reflink_allocate_cow( struct xfs_inode *ip, struct xfs_bmbt_irec *imap, struct xfs_bmbt_irec *cmap, bool *shared, uint *lockmode, bool convert_now) { int error; bool found; xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); if (!ip->i_cowfp) { ASSERT(!xfs_is_reflink_inode(ip)); xfs_ifork_init_cow(ip); } error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found); if (error || !*shared) return error; /* CoW fork has a real extent */ if (found) return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now); /* * CoW fork does not have an extent and data extent is shared. * Allocate a real extent in the CoW fork. */ if (cmap->br_startoff > imap->br_startoff) return xfs_reflink_fill_cow_hole(ip, imap, cmap, shared, lockmode, convert_now); /* * CoW fork has a delalloc reservation. Replace it with a real extent. * There may or may not be a data fork mapping. */ if (isnullstartblock(cmap->br_startblock) || cmap->br_startblock == DELAYSTARTBLOCK) return xfs_reflink_fill_delalloc(ip, imap, cmap, shared, lockmode, convert_now); /* Shouldn't get here. */ ASSERT(0); return -EFSCORRUPTED; } /* * Cancel CoW reservations for some block range of an inode. * * If cancel_real is true this function cancels all COW fork extents for the * inode; if cancel_real is false, real extents are not cleared. * * Caller must have already joined the inode to the current transaction. The * inode will be joined to the transaction returned to the caller. */ int xfs_reflink_cancel_cow_blocks( struct xfs_inode *ip, struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb, bool cancel_real) { struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK); struct xfs_bmbt_irec got, del; struct xfs_iext_cursor icur; bool isrt = XFS_IS_REALTIME_INODE(ip); int error = 0; if (!xfs_inode_has_cow_data(ip)) return 0; if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) return 0; /* Walk backwards until we're out of the I/O range... */ while (got.br_startoff + got.br_blockcount > offset_fsb) { del = got; xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); /* Extent delete may have bumped ext forward */ if (!del.br_blockcount) { xfs_iext_prev(ifp, &icur); goto next_extent; } trace_xfs_reflink_cancel_cow(ip, &del); if (isnullstartblock(del.br_startblock)) { xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, &icur, &got, &del, 0); } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER); /* Free the CoW orphan record. */ xfs_refcount_free_cow_extent(*tpp, isrt, del.br_startblock, del.br_blockcount); error = xfs_free_extent_later(*tpp, del.br_startblock, del.br_blockcount, NULL, XFS_AG_RESV_NONE, isrt ? XFS_FREE_EXTENT_REALTIME : 0); if (error) break; /* Roll the transaction */ error = xfs_defer_finish(tpp); if (error) break; /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); /* Remove the quota reservation */ xfs_quota_unreserve_blkres(ip, del.br_blockcount); } else { /* Didn't do anything, push cursor back. */ xfs_iext_prev(ifp, &icur); } next_extent: if (!xfs_iext_get_extent(ifp, &icur, &got)) break; } /* clear tag if cow fork is emptied */ if (!ifp->if_bytes) xfs_inode_clear_cowblocks_tag(ip); return error; } /* * Cancel CoW reservations for some byte range of an inode. * * If cancel_real is true this function cancels all COW fork extents for the * inode; if cancel_real is false, real extents are not cleared. */ int xfs_reflink_cancel_cow_range( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count, bool cancel_real) { struct xfs_trans *tp; xfs_fileoff_t offset_fsb; xfs_fileoff_t end_fsb; int error; trace_xfs_reflink_cancel_cow_range(ip, offset, count); ASSERT(ip->i_cowfp); offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); if (count == NULLFILEOFF) end_fsb = NULLFILEOFF; else end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); /* Start a rolling transaction to remove the mappings */ error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 0, 0, 0, &tp); if (error) goto out; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* Scrape out the old CoW reservations */ error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb, cancel_real); if (error) goto out_cancel; error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out: trace_xfs_reflink_cancel_cow_range_error(ip, error, _RET_IP_); return error; } #ifdef CONFIG_XFS_QUOTA /* * Update quota accounting for a remapping operation. When we're remapping * something from the CoW fork to the data fork, we must update the quota * accounting for delayed allocations. For remapping from the data fork to the * data fork, use regular block accounting. */ static inline void xfs_reflink_update_quota( struct xfs_trans *tp, struct xfs_inode *ip, bool is_cow, int64_t blocks) { unsigned int qflag; if (XFS_IS_REALTIME_INODE(ip)) { qflag = is_cow ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_RTBCOUNT; } else { qflag = is_cow ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT; } xfs_trans_mod_dquot_byino(tp, ip, qflag, blocks); } #else # define xfs_reflink_update_quota(tp, ip, is_cow, blocks) ((void)0) #endif /* * Remap part of the CoW fork into the data fork. * * We aim to remap the range starting at @offset_fsb and ending at @end_fsb * into the data fork; this function will remap what it can (at the end of the * range) and update @end_fsb appropriately. Each remap gets its own * transaction because we can end up merging and splitting bmbt blocks for * every remap operation and we'd like to keep the block reservation * requirements as low as possible. */ STATIC int xfs_reflink_end_cow_extent_locked( struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *offset_fsb, xfs_fileoff_t end_fsb) { struct xfs_iext_cursor icur; struct xfs_bmbt_irec got, del, data; struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK); int nmaps; bool isrt = XFS_IS_REALTIME_INODE(ip); int error; /* * In case of racing, overlapping AIO writes no COW extents might be * left by the time I/O completes for the loser of the race. In that * case we are done. */ if (!xfs_iext_lookup_extent(ip, ifp, *offset_fsb, &icur, &got) || got.br_startoff >= end_fsb) { *offset_fsb = end_fsb; return 0; } /* * Only remap real extents that contain data. With AIO, speculative * preallocations can leak into the range we are called upon, and we * need to skip them. Preserve @got for the eventual CoW fork * deletion; from now on @del represents the mapping that we're * actually remapping. */ while (!xfs_bmap_is_written_extent(&got)) { if (!xfs_iext_next_extent(ifp, &icur, &got) || got.br_startoff >= end_fsb) { *offset_fsb = end_fsb; return 0; } } del = got; xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb); error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, XFS_IEXT_REFLINK_END_COW_CNT); if (error) return error; /* Grab the corresponding mapping in the data fork. */ nmaps = 1; error = xfs_bmapi_read(ip, del.br_startoff, del.br_blockcount, &data, &nmaps, 0); if (error) return error; /* We can only remap the smaller of the two extent sizes. */ data.br_blockcount = min(data.br_blockcount, del.br_blockcount); del.br_blockcount = data.br_blockcount; trace_xfs_reflink_cow_remap_from(ip, &del); trace_xfs_reflink_cow_remap_to(ip, &data); if (xfs_bmap_is_real_extent(&data)) { /* * If the extent we're remapping is backed by storage (written * or not), unmap the extent and drop its refcount. */ xfs_bmap_unmap_extent(tp, ip, XFS_DATA_FORK, &data); xfs_refcount_decrease_extent(tp, isrt, &data); xfs_reflink_update_quota(tp, ip, false, -data.br_blockcount); } else if (data.br_startblock == DELAYSTARTBLOCK) { int done; /* * If the extent we're remapping is a delalloc reservation, * we can use the regular bunmapi function to release the * incore state. Dropping the delalloc reservation takes care * of the quota reservation for us. */ error = xfs_bunmapi(NULL, ip, data.br_startoff, data.br_blockcount, 0, 1, &done); if (error) return error; ASSERT(done); } /* Free the CoW orphan record. */ xfs_refcount_free_cow_extent(tp, isrt, del.br_startblock, del.br_blockcount); /* Map the new blocks into the data fork. */ xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, &del); /* Charge this new data fork mapping to the on-disk quota. */ xfs_reflink_update_quota(tp, ip, true, del.br_blockcount); /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); /* Update the caller about how much progress we made. */ *offset_fsb = del.br_startoff + del.br_blockcount; return 0; } /* * Remap part of the CoW fork into the data fork. * * We aim to remap the range starting at @offset_fsb and ending at @end_fsb * into the data fork; this function will remap what it can (at the end of the * range) and update @end_fsb appropriately. Each remap gets its own * transaction because we can end up merging and splitting bmbt blocks for * every remap operation and we'd like to keep the block reservation * requirements as low as possible. */ STATIC int xfs_reflink_end_cow_extent( struct xfs_inode *ip, xfs_fileoff_t *offset_fsb, xfs_fileoff_t end_fsb) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; unsigned int resblks; int error; resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, XFS_TRANS_RESERVE, &tp); if (error) return error; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); error = xfs_reflink_end_cow_extent_locked(tp, ip, offset_fsb, end_fsb); if (error) xfs_trans_cancel(tp); else error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } /* * Remap parts of a file's data fork after a successful CoW. */ int xfs_reflink_end_cow( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count) { xfs_fileoff_t offset_fsb; xfs_fileoff_t end_fsb; int error = 0; trace_xfs_reflink_end_cow(ip, offset, count); offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); /* * Walk forwards until we've remapped the I/O range. The loop function * repeatedly cycles the ILOCK to allocate one transaction per remapped * extent. * * If we're being called by writeback then the pages will still * have PageWriteback set, which prevents races with reflink remapping * and truncate. Reflink remapping prevents races with writeback by * taking the iolock and mmaplock before flushing the pages and * remapping, which means there won't be any further writeback or page * cache dirtying until the reflink completes. * * We should never have two threads issuing writeback for the same file * region. There are also have post-eof checks in the writeback * preparation code so that we don't bother writing out pages that are * about to be truncated. * * If we're being called as part of directio write completion, the dio * count is still elevated, which reflink and truncate will wait for. * Reflink remapping takes the iolock and mmaplock and waits for * pending dio to finish, which should prevent any directio until the * remap completes. Multiple concurrent directio writes to the same * region are handled by end_cow processing only occurring for the * threads which succeed; the outcome of multiple overlapping direct * writes is not well defined anyway. * * It's possible that a buffered write and a direct write could collide * here (the buffered write stumbles in after the dio flushes and * invalidates the page cache and immediately queues writeback), but we * have never supported this 100%. If either disk write succeeds the * blocks will be remapped. */ while (end_fsb > offset_fsb && !error) error = xfs_reflink_end_cow_extent(ip, &offset_fsb, end_fsb); if (error) trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_); return error; } /* * Fully remap all of the file's data fork at once, which is the critical part * in achieving atomic behaviour. * The regular CoW end path does not use function as to keep the block * reservation per transaction as low as possible. */ int xfs_reflink_end_atomic_cow( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count) { xfs_fileoff_t offset_fsb; xfs_fileoff_t end_fsb; int error = 0; struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; unsigned int resblks; trace_xfs_reflink_end_cow(ip, offset, count); offset_fsb = XFS_B_TO_FSBT(mp, offset); end_fsb = XFS_B_TO_FSB(mp, offset + count); /* * Each remapping operation could cause a btree split, so in the worst * case that's one for each block. */ resblks = (end_fsb - offset_fsb) * XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_atomic_ioend, resblks, 0, XFS_TRANS_RESERVE, &tp); if (error) return error; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); while (end_fsb > offset_fsb && !error) { error = xfs_reflink_end_cow_extent_locked(tp, ip, &offset_fsb, end_fsb); } if (error) { trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_); goto out_cancel; } error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } /* Compute the largest atomic write that we can complete through software. */ xfs_extlen_t xfs_reflink_max_atomic_cow( struct xfs_mount *mp) { /* We cannot do any atomic writes without out of place writes. */ if (!xfs_can_sw_atomic_write(mp)) return 0; /* * Atomic write limits must always be a power-of-2, according to * generic_atomic_write_valid. */ return rounddown_pow_of_two(xfs_calc_max_atomic_write_fsblocks(mp)); } /* * Free all CoW staging blocks that are still referenced by the ondisk refcount * metadata. The ondisk metadata does not track which inode created the * staging extent, so callers must ensure that there are no cached inodes with * live CoW staging extents. */ int xfs_reflink_recover_cow( struct xfs_mount *mp) { struct xfs_perag *pag = NULL; struct xfs_rtgroup *rtg = NULL; int error = 0; if (!xfs_has_reflink(mp)) return 0; while ((pag = xfs_perag_next(mp, pag))) { error = xfs_refcount_recover_cow_leftovers(pag_group(pag)); if (error) { xfs_perag_rele(pag); return error; } } while ((rtg = xfs_rtgroup_next(mp, rtg))) { error = xfs_refcount_recover_cow_leftovers(rtg_group(rtg)); if (error) { xfs_rtgroup_rele(rtg); return error; } } return 0; } /* * Reflinking (Block) Ranges of Two Files Together * * First, ensure that the reflink flag is set on both inodes. The flag is an * optimization to avoid unnecessary refcount btree lookups in the write path. * * Now we can iteratively remap the range of extents (and holes) in src to the * corresponding ranges in dest. Let drange and srange denote the ranges of * logical blocks in dest and src touched by the reflink operation. * * While the length of drange is greater than zero, * - Read src's bmbt at the start of srange ("imap") * - If imap doesn't exist, make imap appear to start at the end of srange * with zero length. * - If imap starts before srange, advance imap to start at srange. * - If imap goes beyond srange, truncate imap to end at the end of srange. * - Punch (imap start - srange start + imap len) blocks from dest at * offset (drange start). * - If imap points to a real range of pblks, * > Increase the refcount of the imap's pblks * > Map imap's pblks into dest at the offset * (drange start + imap start - srange start) * - Advance drange and srange by (imap start - srange start + imap len) * * Finally, if the reflink made dest longer, update both the in-core and * on-disk file sizes. * * ASCII Art Demonstration: * * Let's say we want to reflink this source file: * * ----SSSSSSS-SSSSS----SSSSSS (src file) * <--------------------> * * into this destination file: * * --DDDDDDDDDDDDDDDDDDD--DDD (dest file) * <--------------------> * '-' means a hole, and 'S' and 'D' are written blocks in the src and dest. * Observe that the range has different logical offsets in either file. * * Consider that the first extent in the source file doesn't line up with our * reflink range. Unmapping and remapping are separate operations, so we can * unmap more blocks from the destination file than we remap. * * ----SSSSSSS-SSSSS----SSSSSS * <-------> * --DDDDD---------DDDDD--DDD * <-------> * * Now remap the source extent into the destination file: * * ----SSSSSSS-SSSSS----SSSSSS * <-------> * --DDDDD--SSSSSSSDDDDD--DDD * <-------> * * Do likewise with the second hole and extent in our range. Holes in the * unmap range don't affect our operation. * * ----SSSSSSS-SSSSS----SSSSSS * <----> * --DDDDD--SSSSSSS-SSSSS-DDD * <----> * * Finally, unmap and remap part of the third extent. This will increase the * size of the destination file. * * ----SSSSSSS-SSSSS----SSSSSS * <-----> * --DDDDD--SSSSSSS-SSSSS----SSS * <-----> * * Once we update the destination file's i_size, we're done. */ /* * Ensure the reflink bit is set in both inodes. */ STATIC int xfs_reflink_set_inode_flag( struct xfs_inode *src, struct xfs_inode *dest) { struct xfs_mount *mp = src->i_mount; int error; struct xfs_trans *tp; if (xfs_is_reflink_inode(src) && xfs_is_reflink_inode(dest)) return 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); if (error) goto out_error; /* Lock both files against IO */ if (src->i_ino == dest->i_ino) xfs_ilock(src, XFS_ILOCK_EXCL); else xfs_lock_two_inodes(src, XFS_ILOCK_EXCL, dest, XFS_ILOCK_EXCL); if (!xfs_is_reflink_inode(src)) { trace_xfs_reflink_set_inode_flag(src); xfs_trans_ijoin(tp, src, XFS_ILOCK_EXCL); src->i_diflags2 |= XFS_DIFLAG2_REFLINK; xfs_trans_log_inode(tp, src, XFS_ILOG_CORE); xfs_ifork_init_cow(src); } else xfs_iunlock(src, XFS_ILOCK_EXCL); if (src->i_ino == dest->i_ino) goto commit_flags; if (!xfs_is_reflink_inode(dest)) { trace_xfs_reflink_set_inode_flag(dest); xfs_trans_ijoin(tp, dest, XFS_ILOCK_EXCL); dest->i_diflags2 |= XFS_DIFLAG2_REFLINK; xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); xfs_ifork_init_cow(dest); } else xfs_iunlock(dest, XFS_ILOCK_EXCL); commit_flags: error = xfs_trans_commit(tp); if (error) goto out_error; return error; out_error: trace_xfs_reflink_set_inode_flag_error(dest, error, _RET_IP_); return error; } /* * Update destination inode size & cowextsize hint, if necessary. */ int xfs_reflink_update_dest( struct xfs_inode *dest, xfs_off_t newlen, xfs_extlen_t cowextsize, unsigned int remap_flags) { struct xfs_mount *mp = dest->i_mount; struct xfs_trans *tp; int error; if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) return 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); if (error) goto out_error; xfs_ilock(dest, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, dest, XFS_ILOCK_EXCL); if (newlen > i_size_read(VFS_I(dest))) { trace_xfs_reflink_update_inode_size(dest, newlen); i_size_write(VFS_I(dest), newlen); dest->i_disk_size = newlen; } if (cowextsize) { dest->i_cowextsize = cowextsize; dest->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE; } xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); error = xfs_trans_commit(tp); if (error) goto out_error; return error; out_error: trace_xfs_reflink_update_inode_size_error(dest, error, _RET_IP_); return error; } /* * Do we have enough reserve in this AG to handle a reflink? The refcount * btree already reserved all the space it needs, but the rmap btree can grow * infinitely, so we won't allow more reflinks when the AG is down to the * btree reserves. */ static int xfs_reflink_ag_has_free_space( struct xfs_mount *mp, struct xfs_inode *ip, xfs_fsblock_t fsb) { struct xfs_perag *pag; xfs_agnumber_t agno; int error = 0; if (!xfs_has_rmapbt(mp)) return 0; if (XFS_IS_REALTIME_INODE(ip)) { if (xfs_metafile_resv_critical(mp)) return -ENOSPC; return 0; } agno = XFS_FSB_TO_AGNO(mp, fsb); pag = xfs_perag_get(mp, agno); if (xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) || xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA)) error = -ENOSPC; xfs_perag_put(pag); return error; } /* * Remap the given extent into the file. The dmap blockcount will be set to * the number of blocks that were actually remapped. */ STATIC int xfs_reflink_remap_extent( struct xfs_inode *ip, struct xfs_bmbt_irec *dmap, xfs_off_t new_isize) { struct xfs_bmbt_irec smap; struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; xfs_off_t newlen; int64_t qdelta = 0; unsigned int dblocks, rblocks, resblks; bool quota_reserved = true; bool smap_real; bool dmap_written = xfs_bmap_is_written_extent(dmap); bool isrt = XFS_IS_REALTIME_INODE(ip); int iext_delta = 0; int nimaps; int error; /* * Start a rolling transaction to switch the mappings. * * Adding a written extent to the extent map can cause a bmbt split, * and removing a mapped extent from the extent can cause a bmbt split. * The two operations cannot both cause a split since they operate on * the same index in the bmap btree, so we only need a reservation for * one bmbt split if either thing is happening. However, we haven't * locked the inode yet, so we reserve assuming this is the case. * * The first allocation call tries to reserve enough space to handle * mapping dmap into a sparse part of the file plus the bmbt split. We * haven't locked the inode or read the existing mapping yet, so we do * not know for sure that we need the space. This should succeed most * of the time. * * If the first attempt fails, try again but reserving only enough * space to handle a bmbt split. This is the hard minimum requirement, * and we revisit quota reservations later when we know more about what * we're remapping. */ resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); if (XFS_IS_REALTIME_INODE(ip)) { dblocks = resblks; rblocks = dmap->br_blockcount; } else { dblocks = resblks + dmap->br_blockcount; rblocks = 0; } error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, dblocks, rblocks, false, &tp); if (error == -EDQUOT || error == -ENOSPC) { quota_reserved = false; error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, resblks, 0, false, &tp); } if (error) goto out; /* * Read what's currently mapped in the destination file into smap. * If smap isn't a hole, we will have to remove it before we can add * dmap to the destination file. */ nimaps = 1; error = xfs_bmapi_read(ip, dmap->br_startoff, dmap->br_blockcount, &smap, &nimaps, 0); if (error) goto out_cancel; ASSERT(nimaps == 1 && smap.br_startoff == dmap->br_startoff); smap_real = xfs_bmap_is_real_extent(&smap); /* * We can only remap as many blocks as the smaller of the two extent * maps, because we can only remap one extent at a time. */ dmap->br_blockcount = min(dmap->br_blockcount, smap.br_blockcount); ASSERT(dmap->br_blockcount == smap.br_blockcount); trace_xfs_reflink_remap_extent_dest(ip, &smap); /* * Two extents mapped to the same physical block must not have * different states; that's filesystem corruption. Move on to the next * extent if they're both holes or both the same physical extent. */ if (dmap->br_startblock == smap.br_startblock) { if (dmap->br_state != smap.br_state) { xfs_bmap_mark_sick(ip, XFS_DATA_FORK); error = -EFSCORRUPTED; } goto out_cancel; } /* If both extents are unwritten, leave them alone. */ if (dmap->br_state == XFS_EXT_UNWRITTEN && smap.br_state == XFS_EXT_UNWRITTEN) goto out_cancel; /* No reflinking if the AG of the dest mapping is low on space. */ if (dmap_written) { error = xfs_reflink_ag_has_free_space(mp, ip, dmap->br_startblock); if (error) goto out_cancel; } /* * Increase quota reservation if we think the quota block counter for * this file could increase. * * If we are mapping a written extent into the file, we need to have * enough quota block count reservation to handle the blocks in that * extent. We log only the delta to the quota block counts, so if the * extent we're unmapping also has blocks allocated to it, we don't * need a quota reservation for the extent itself. * * Note that if we're replacing a delalloc reservation with a written * extent, we have to take the full quota reservation because removing * the delalloc reservation gives the block count back to the quota * count. This is suboptimal, but the VFS flushed the dest range * before we started. That should have removed all the delalloc * reservations, but we code defensively. * * xfs_trans_alloc_inode above already tried to grab an even larger * quota reservation, and kicked off a blockgc scan if it couldn't. * If we can't get a potentially smaller quota reservation now, we're * done. */ if (!quota_reserved && !smap_real && dmap_written) { if (XFS_IS_REALTIME_INODE(ip)) { dblocks = 0; rblocks = dmap->br_blockcount; } else { dblocks = dmap->br_blockcount; rblocks = 0; } error = xfs_trans_reserve_quota_nblks(tp, ip, dblocks, rblocks, false); if (error) goto out_cancel; } if (smap_real) ++iext_delta; if (dmap_written) ++iext_delta; error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, iext_delta); if (error) goto out_cancel; if (smap_real) { /* * If the extent we're unmapping is backed by storage (written * or not), unmap the extent and drop its refcount. */ xfs_bmap_unmap_extent(tp, ip, XFS_DATA_FORK, &smap); xfs_refcount_decrease_extent(tp, isrt, &smap); qdelta -= smap.br_blockcount; } else if (smap.br_startblock == DELAYSTARTBLOCK) { int done; /* * If the extent we're unmapping is a delalloc reservation, * we can use the regular bunmapi function to release the * incore state. Dropping the delalloc reservation takes care * of the quota reservation for us. */ error = xfs_bunmapi(NULL, ip, smap.br_startoff, smap.br_blockcount, 0, 1, &done); if (error) goto out_cancel; ASSERT(done); } /* * If the extent we're sharing is backed by written storage, increase * its refcount and map it into the file. */ if (dmap_written) { xfs_refcount_increase_extent(tp, isrt, dmap); xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, dmap); qdelta += dmap->br_blockcount; } xfs_reflink_update_quota(tp, ip, false, qdelta); /* Update dest isize if needed. */ newlen = XFS_FSB_TO_B(mp, dmap->br_startoff + dmap->br_blockcount); newlen = min_t(xfs_off_t, newlen, new_isize); if (newlen > i_size_read(VFS_I(ip))) { trace_xfs_reflink_update_inode_size(ip, newlen); i_size_write(VFS_I(ip), newlen); ip->i_disk_size = newlen; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } /* Commit everything and unlock. */ error = xfs_trans_commit(tp); goto out_unlock; out_cancel: xfs_trans_cancel(tp); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); out: if (error) trace_xfs_reflink_remap_extent_error(ip, error, _RET_IP_); return error; } /* Remap a range of one file to the other. */ int xfs_reflink_remap_blocks( struct xfs_inode *src, loff_t pos_in, struct xfs_inode *dest, loff_t pos_out, loff_t remap_len, loff_t *remapped) { struct xfs_bmbt_irec imap; struct xfs_mount *mp = src->i_mount; xfs_fileoff_t srcoff = XFS_B_TO_FSBT(mp, pos_in); xfs_fileoff_t destoff = XFS_B_TO_FSBT(mp, pos_out); xfs_filblks_t len; xfs_filblks_t remapped_len = 0; xfs_off_t new_isize = pos_out + remap_len; int nimaps; int error = 0; len = min_t(xfs_filblks_t, XFS_B_TO_FSB(mp, remap_len), XFS_MAX_FILEOFF); trace_xfs_reflink_remap_blocks(src, srcoff, len, dest, destoff); while (len > 0) { unsigned int lock_mode; /* Read extent from the source file */ nimaps = 1; lock_mode = xfs_ilock_data_map_shared(src); error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); xfs_iunlock(src, lock_mode); if (error) break; /* * The caller supposedly flushed all dirty pages in the source * file range, which means that writeback should have allocated * or deleted all delalloc reservations in that range. If we * find one, that's a good sign that something is seriously * wrong here. */ ASSERT(nimaps == 1 && imap.br_startoff == srcoff); if (imap.br_startblock == DELAYSTARTBLOCK) { ASSERT(imap.br_startblock != DELAYSTARTBLOCK); xfs_bmap_mark_sick(src, XFS_DATA_FORK); error = -EFSCORRUPTED; break; } trace_xfs_reflink_remap_extent_src(src, &imap); /* Remap into the destination file at the given offset. */ imap.br_startoff = destoff; error = xfs_reflink_remap_extent(dest, &imap, new_isize); if (error) break; if (fatal_signal_pending(current)) { error = -EINTR; break; } /* Advance drange/srange */ srcoff += imap.br_blockcount; destoff += imap.br_blockcount; len -= imap.br_blockcount; remapped_len += imap.br_blockcount; cond_resched(); } if (error) trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_); *remapped = min_t(loff_t, remap_len, XFS_FSB_TO_B(src->i_mount, remapped_len)); return error; } /* * If we're reflinking to a point past the destination file's EOF, we must * zero any speculative post-EOF preallocations that sit between the old EOF * and the destination file offset. */ static int xfs_reflink_zero_posteof( struct xfs_inode *ip, loff_t pos) { loff_t isize = i_size_read(VFS_I(ip)); if (pos <= isize) return 0; trace_xfs_zero_eof(ip, isize, pos - isize); return xfs_zero_range(ip, isize, pos - isize, NULL, NULL); } /* * Prepare two files for range cloning. Upon a successful return both inodes * will have the iolock and mmaplock held, the page cache of the out file will * be truncated, and any leases on the out file will have been broken. This * function borrows heavily from xfs_file_aio_write_checks. * * The VFS allows partial EOF blocks to "match" for dedupe even though it hasn't * checked that the bytes beyond EOF physically match. Hence we cannot use the * EOF block in the source dedupe range because it's not a complete block match, * hence can introduce a corruption into the file that has it's block replaced. * * In similar fashion, the VFS file cloning also allows partial EOF blocks to be * "block aligned" for the purposes of cloning entire files. However, if the * source file range includes the EOF block and it lands within the existing EOF * of the destination file, then we can expose stale data from beyond the source * file EOF in the destination file. * * XFS doesn't support partial block sharing, so in both cases we have check * these cases ourselves. For dedupe, we can simply round the length to dedupe * down to the previous whole block and ignore the partial EOF block. While this * means we can't dedupe the last block of a file, this is an acceptible * tradeoff for simplicity on implementation. * * For cloning, we want to share the partial EOF block if it is also the new EOF * block of the destination file. If the partial EOF block lies inside the * existing destination EOF, then we have to abort the clone to avoid exposing * stale data in the destination file. Hence we reject these clone attempts with * -EINVAL in this case. */ int xfs_reflink_remap_prep( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); int ret; /* Lock both files against IO */ ret = xfs_ilock2_io_mmap(src, dest); if (ret) return ret; /* Check file eligibility and prepare for block sharing. */ ret = -EINVAL; /* Can't reflink between data and rt volumes */ if (XFS_IS_REALTIME_INODE(src) != XFS_IS_REALTIME_INODE(dest)) goto out_unlock; /* Don't share DAX file data with non-DAX file. */ if (IS_DAX(inode_in) != IS_DAX(inode_out)) goto out_unlock; if (!IS_DAX(inode_in)) ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, len, remap_flags); else ret = dax_remap_file_range_prep(file_in, pos_in, file_out, pos_out, len, remap_flags, &xfs_read_iomap_ops); if (ret || *len == 0) goto out_unlock; /* Attach dquots to dest inode before changing block map */ ret = xfs_qm_dqattach(dest); if (ret) goto out_unlock; /* * Zero existing post-eof speculative preallocations in the destination * file. */ ret = xfs_reflink_zero_posteof(dest, pos_out); if (ret) goto out_unlock; /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) goto out_unlock; /* * If pos_out > EOF, we may have dirtied blocks between EOF and * pos_out. In that case, we need to extend the flush and unmap to cover * from EOF to the end of the copy length. */ if (pos_out > XFS_ISIZE(dest)) { loff_t flen = *len + (pos_out - XFS_ISIZE(dest)); ret = xfs_flush_unmap_range(dest, XFS_ISIZE(dest), flen); } else { ret = xfs_flush_unmap_range(dest, pos_out, *len); } if (ret) goto out_unlock; xfs_iflags_set(src, XFS_IREMAPPING); if (inode_in != inode_out) xfs_ilock_demote(src, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); return 0; out_unlock: xfs_iunlock2_io_mmap(src, dest); return ret; } /* Does this inode need the reflink flag? */ int xfs_reflink_inode_has_shared_extents( struct xfs_trans *tp, struct xfs_inode *ip, bool *has_shared) { struct xfs_bmbt_irec got; struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; struct xfs_iext_cursor icur; bool found; int error; ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); if (error) return error; *has_shared = false; found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got); while (found) { xfs_extlen_t shared_offset, shared_len; if (isnullstartblock(got.br_startblock) || got.br_state != XFS_EXT_NORM) goto next; if (XFS_IS_REALTIME_INODE(ip)) error = xfs_reflink_find_rtshared(mp, tp, &got, &shared_offset, &shared_len, false); else error = xfs_reflink_find_shared(mp, tp, &got, &shared_offset, &shared_len, false); if (error) return error; /* Is there still a shared block here? */ if (shared_len) { *has_shared = true; return 0; } next: found = xfs_iext_next_extent(ifp, &icur, &got); } return 0; } /* * Clear the inode reflink flag if there are no shared extents. * * The caller is responsible for joining the inode to the transaction passed in. * The inode will be joined to the transaction that is returned to the caller. */ int xfs_reflink_clear_inode_flag( struct xfs_inode *ip, struct xfs_trans **tpp) { bool needs_flag; int error = 0; ASSERT(xfs_is_reflink_inode(ip)); if (!xfs_can_free_cowblocks(ip)) return 0; error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag); if (error || needs_flag) return error; /* * We didn't find any shared blocks so turn off the reflink flag. * First, get rid of any leftover CoW mappings. */ error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, XFS_MAX_FILEOFF, true); if (error) return error; /* Clear the inode flag. */ trace_xfs_reflink_unset_inode_flag(ip); ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; xfs_inode_clear_cowblocks_tag(ip); xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); return error; } /* * Clear the inode reflink flag if there are no shared extents and the size * hasn't changed. */ STATIC int xfs_reflink_try_clear_inode_flag( struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error = 0; /* Start a rolling transaction to remove the mappings */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); if (error) return error; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); error = xfs_reflink_clear_inode_flag(ip, &tp); if (error) goto cancel; error = xfs_trans_commit(tp); if (error) goto out; xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; cancel: xfs_trans_cancel(tp); out: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } /* * Pre-COW all shared blocks within a given byte range of a file and turn off * the reflink flag if we unshare all of the file's blocks. */ int xfs_reflink_unshare( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len) { struct inode *inode = VFS_I(ip); int error; if (!xfs_is_reflink_inode(ip)) return 0; trace_xfs_reflink_unshare(ip, offset, len); inode_dio_wait(inode); if (IS_DAX(inode)) error = dax_file_unshare(inode, offset, len, &xfs_dax_write_iomap_ops); else error = iomap_file_unshare(inode, offset, len, &xfs_buffered_write_iomap_ops, &xfs_iomap_write_ops); if (error) goto out; error = filemap_write_and_wait_range(inode->i_mapping, offset, offset + len - 1); if (error) goto out; /* Turn off the reflink flag if possible. */ error = xfs_reflink_try_clear_inode_flag(ip); if (error) goto out; return 0; out: trace_xfs_reflink_unshare_error(ip, error, _RET_IP_); return error; } /* * Can we use reflink with this realtime extent size? Note that we don't check * for rblocks > 0 here because this can be called as part of attaching a new * rt section. */ bool xfs_reflink_supports_rextsize( struct xfs_mount *mp, unsigned int rextsize) { /* reflink on the realtime device requires rtgroups */ if (!xfs_has_rtgroups(mp)) return false; /* * Reflink doesn't support rt extent size larger than a single fsblock * because we would have to perform CoW-around for unaligned write * requests to guarantee that we always remap entire rt extents. */ if (rextsize != 1) return false; return true; }
1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for some petalynx "special" devices * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina * Copyright (c) 2008 Jiri Slaby */ /* */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" /* Petalynx Maxter Remote has maximum for consumer page set too low */ static const __u8 *pl_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { if (*rsize >= 62 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 && rdesc[41] == 0x00 && rdesc[59] == 0x26 && rdesc[60] == 0xf9 && rdesc[61] == 0x00) { hid_info(hdev, "fixing up Petalynx Maxter Remote report descriptor\n"); rdesc[60] = 0xfa; rdesc[40] = 0xfa; } return rdesc; } #define pl_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ EV_KEY, (c)) static int pl_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { if ((usage->hid & HID_USAGE_PAGE) == HID_UP_LOGIVENDOR) { switch (usage->hid & HID_USAGE) { case 0x05a: pl_map_key_clear(KEY_TEXT); break; case 0x05b: pl_map_key_clear(KEY_RED); break; case 0x05c: pl_map_key_clear(KEY_GREEN); break; case 0x05d: pl_map_key_clear(KEY_YELLOW); break; case 0x05e: pl_map_key_clear(KEY_BLUE); break; default: return 0; } return 1; } if ((usage->hid & HID_USAGE_PAGE) == HID_UP_CONSUMER) { switch (usage->hid & HID_USAGE) { case 0x0f6: pl_map_key_clear(KEY_NEXT); break; case 0x0fa: pl_map_key_clear(KEY_BACK); break; default: return 0; } return 1; } return 0; } static int pl_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; hdev->quirks |= HID_QUIRK_NOGET; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err_free; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "hw start failed\n"); goto err_free; } return 0; err_free: return ret; } static const struct hid_device_id pl_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_PETALYNX, USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE) }, { } }; MODULE_DEVICE_TABLE(hid, pl_devices); static struct hid_driver pl_driver = { .name = "petalynx", .id_table = pl_devices, .report_fixup = pl_report_fixup, .input_mapping = pl_input_mapping, .probe = pl_probe, }; module_hid_driver(pl_driver); MODULE_DESCRIPTION("HID driver for some petalynx \"special\" devices"); MODULE_LICENSE("GPL");
8752 242 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 /* * linux/include/linux/console.h * * Copyright (C) 1993 Hamish Macdonald * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive * for more details. * * Changed: * 10-Mar-94: Arno Griffioen: Conversion for vt100 emulator port from PC LINUX */ #ifndef _LINUX_CONSOLE_H_ #define _LINUX_CONSOLE_H_ 1 #include <linux/atomic.h> #include <linux/bits.h> #include <linux/irq_work.h> #include <linux/rculist.h> #include <linux/rcuwait.h> #include <linux/types.h> #include <linux/vesa.h> struct vc_data; struct console_font_op; struct console_font; struct module; struct tty_struct; struct notifier_block; enum con_scroll { SM_UP, SM_DOWN, }; enum vc_intensity; /** * struct consw - callbacks for consoles * * @owner: the module to get references of when this console is used * @con_startup: set up the console and return its name (like VGA, EGA, ...) * @con_init: initialize the console on @vc. @init is true for the very first * call on this @vc. * @con_deinit: deinitialize the console from @vc. * @con_clear: erase @count characters at [@x, @y] on @vc. @count >= 1. * @con_putc: emit one character with attributes @ca to [@x, @y] on @vc. * (optional -- @con_putcs would be called instead) * @con_putcs: emit @count characters with attributes @s to [@x, @y] on @vc. * @con_cursor: enable/disable cursor depending on @enable * @con_scroll: move lines from @top to @bottom in direction @dir by @lines. * Return true if no generic handling should be done. * Invoked by csi_M and printing to the console. * @con_switch: notifier about the console switch; it is supposed to return * true if a redraw is needed. * @con_blank: blank/unblank the console. The target mode is passed in @blank. * @mode_switch is set if changing from/to text/graphics. The hook * is supposed to return true if a redraw is needed. * @con_font_set: set console @vc font to @font with height @vpitch. @flags can * be %KD_FONT_FLAG_DONT_RECALC. (optional) * @con_font_get: fetch the current font on @vc of height @vpitch into @font. * (optional) * @con_font_default: set default font on @vc. @name can be %NULL or font name * to search for. @font can be filled back. (optional) * @con_resize: resize the @vc console to @width x @height. @from_user is true * when this change comes from the user space. * @con_set_palette: sets the palette of the console @vc to @table (optional) * @con_scrolldelta: the contents of the console should be scrolled by @lines. * Invoked by user. (optional) * @con_set_origin: set origin (see &vc_data::vc_origin) of the @vc. If not * provided or returns false, the origin is set to * @vc->vc_screenbuf. (optional) * @con_save_screen: save screen content into @vc->vc_screenbuf. Called e.g. * upon entering graphics. (optional) * @con_build_attr: build attributes based on @color, @intensity and other * parameters. The result is used for both normal and erase * characters. (optional) * @con_invert_region: invert a region of length @count on @vc starting at @p. * (optional) * @con_debug_enter: prepare the console for the debugger. This includes, but * is not limited to, unblanking the console, loading an * appropriate palette, and allowing debugger generated output. * (optional) * @con_debug_leave: restore the console to its pre-debug state as closely as * possible. (optional) */ struct consw { struct module *owner; const char *(*con_startup)(void); void (*con_init)(struct vc_data *vc, bool init); void (*con_deinit)(struct vc_data *vc); void (*con_clear)(struct vc_data *vc, unsigned int y, unsigned int x, unsigned int count); void (*con_putc)(struct vc_data *vc, u16 ca, unsigned int y, unsigned int x); void (*con_putcs)(struct vc_data *vc, const u16 *s, unsigned int count, unsigned int ypos, unsigned int xpos); void (*con_cursor)(struct vc_data *vc, bool enable); bool (*con_scroll)(struct vc_data *vc, unsigned int top, unsigned int bottom, enum con_scroll dir, unsigned int lines); bool (*con_switch)(struct vc_data *vc); bool (*con_blank)(struct vc_data *vc, enum vesa_blank_mode blank, bool mode_switch); int (*con_font_set)(struct vc_data *vc, const struct console_font *font, unsigned int vpitch, unsigned int flags); int (*con_font_get)(struct vc_data *vc, struct console_font *font, unsigned int vpitch); int (*con_font_default)(struct vc_data *vc, struct console_font *font, const char *name); int (*con_resize)(struct vc_data *vc, unsigned int width, unsigned int height, bool from_user); void (*con_set_palette)(struct vc_data *vc, const unsigned char *table); void (*con_scrolldelta)(struct vc_data *vc, int lines); bool (*con_set_origin)(struct vc_data *vc); void (*con_save_screen)(struct vc_data *vc); u8 (*con_build_attr)(struct vc_data *vc, u8 color, enum vc_intensity intensity, bool blink, bool underline, bool reverse, bool italic); void (*con_invert_region)(struct vc_data *vc, u16 *p, int count); void (*con_debug_enter)(struct vc_data *vc); void (*con_debug_leave)(struct vc_data *vc); }; extern const struct consw *conswitchp; extern const struct consw dummy_con; /* dummy console buffer */ extern const struct consw vga_con; /* VGA text console */ extern const struct consw newport_con; /* SGI Newport console */ struct screen_info; #ifdef CONFIG_VGA_CONSOLE void vgacon_register_screen(struct screen_info *si); #else static inline void vgacon_register_screen(struct screen_info *si) { } #endif int con_is_bound(const struct consw *csw); int do_unregister_con_driver(const struct consw *csw); int do_take_over_console(const struct consw *sw, int first, int last, int deflt); void give_up_console(const struct consw *sw); #ifdef CONFIG_VT void con_debug_enter(struct vc_data *vc); void con_debug_leave(void); #else static inline void con_debug_enter(struct vc_data *vc) { } static inline void con_debug_leave(void) { } #endif /* * The interface for a console, or any other device that wants to capture * console messages (printer driver?) */ /** * enum cons_flags - General console flags * @CON_PRINTBUFFER: Used by newly registered consoles to avoid duplicate * output of messages that were already shown by boot * consoles or read by userspace via syslog() syscall. * @CON_CONSDEV: Indicates that the console driver is backing * /dev/console. * @CON_ENABLED: Indicates if a console is allowed to print records. If * false, the console also will not advance to later * records. * @CON_BOOT: Marks the console driver as early console driver which * is used during boot before the real driver becomes * available. It will be automatically unregistered * when the real console driver is registered unless * "keep_bootcon" parameter is used. * @CON_ANYTIME: A misnomed historical flag which tells the core code * that the legacy @console::write callback can be invoked * on a CPU which is marked OFFLINE. That is misleading as * it suggests that there is no contextual limit for * invoking the callback. The original motivation was * readiness of the per-CPU areas. * @CON_BRL: Indicates a braille device which is exempt from * receiving the printk spam for obvious reasons. * @CON_EXTENDED: The console supports the extended output format of * /dev/kmesg which requires a larger output buffer. * @CON_SUSPENDED: Indicates if a console is suspended. If true, the * printing callbacks must not be called. * @CON_NBCON: Console can operate outside of the legacy style console_lock * constraints. */ enum cons_flags { CON_PRINTBUFFER = BIT(0), CON_CONSDEV = BIT(1), CON_ENABLED = BIT(2), CON_BOOT = BIT(3), CON_ANYTIME = BIT(4), CON_BRL = BIT(5), CON_EXTENDED = BIT(6), CON_SUSPENDED = BIT(7), CON_NBCON = BIT(8), }; /** * struct nbcon_state - console state for nbcon consoles * @atom: Compound of the state fields for atomic operations * * @req_prio: The priority of a handover request * @prio: The priority of the current owner * @unsafe: Console is busy in a non takeover region * @unsafe_takeover: A hostile takeover in an unsafe state happened in the * past. The console cannot be safe until re-initialized. * @cpu: The CPU on which the owner runs * * To be used for reading and preparing of the value stored in the nbcon * state variable @console::nbcon_state. * * The @prio and @req_prio fields are particularly important to allow * spin-waiting to timeout and give up without the risk of a waiter being * assigned the lock after giving up. */ struct nbcon_state { union { unsigned int atom; struct { unsigned int prio : 2; unsigned int req_prio : 2; unsigned int unsafe : 1; unsigned int unsafe_takeover : 1; unsigned int cpu : 24; }; }; }; /* * The nbcon_state struct is used to easily create and interpret values that * are stored in the @console::nbcon_state variable. Ensure this struct stays * within the size boundaries of the atomic variable's underlying type in * order to avoid any accidental truncation. */ static_assert(sizeof(struct nbcon_state) <= sizeof(int)); /** * enum nbcon_prio - console owner priority for nbcon consoles * @NBCON_PRIO_NONE: Unused * @NBCON_PRIO_NORMAL: Normal (non-emergency) usage * @NBCON_PRIO_EMERGENCY: Emergency output (WARN/OOPS...) * @NBCON_PRIO_PANIC: Panic output * @NBCON_PRIO_MAX: The number of priority levels * * A higher priority context can takeover the console when it is * in the safe state. The final attempt to flush consoles in panic() * can be allowed to do so even in an unsafe state (Hope and pray). */ enum nbcon_prio { NBCON_PRIO_NONE = 0, NBCON_PRIO_NORMAL, NBCON_PRIO_EMERGENCY, NBCON_PRIO_PANIC, NBCON_PRIO_MAX, }; struct console; struct printk_buffers; /** * struct nbcon_context - Context for console acquire/release * @console: The associated console * @spinwait_max_us: Limit for spin-wait acquire * @prio: Priority of the context * @allow_unsafe_takeover: Allow performing takeover even if unsafe. Can * be used only with NBCON_PRIO_PANIC @prio. It * might cause a system freeze when the console * is used later. * @backlog: Ringbuffer has pending records * @pbufs: Pointer to the text buffer for this context * @seq: The sequence number to print for this context */ struct nbcon_context { /* members set by caller */ struct console *console; unsigned int spinwait_max_us; enum nbcon_prio prio; unsigned int allow_unsafe_takeover : 1; /* members set by emit */ unsigned int backlog : 1; /* members set by acquire */ struct printk_buffers *pbufs; u64 seq; }; /** * struct nbcon_write_context - Context handed to the nbcon write callbacks * @ctxt: The core console context * @outbuf: Pointer to the text buffer for output * @len: Length to write * @unsafe_takeover: If a hostile takeover in an unsafe state has occurred */ struct nbcon_write_context { struct nbcon_context __private ctxt; char *outbuf; unsigned int len; bool unsafe_takeover; }; /** * struct console - The console descriptor structure * @name: The name of the console driver * @write: Legacy write callback to output messages (Optional) * @read: Read callback for console input (Optional) * @device: The underlying TTY device driver (Optional) * @unblank: Callback to unblank the console (Optional) * @setup: Callback for initializing the console (Optional) * @exit: Callback for teardown of the console (Optional) * @match: Callback for matching a console (Optional) * @flags: Console flags. See enum cons_flags * @index: Console index, e.g. port number * @cflag: TTY control mode flags * @ispeed: TTY input speed * @ospeed: TTY output speed * @seq: Sequence number of the next ringbuffer record to print * @dropped: Number of unreported dropped ringbuffer records * @data: Driver private data * @node: hlist node for the console list * * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print * @nbcon_device_ctxt: Context available for non-printing operations * @nbcon_prev_seq: Seq num the previous nbcon owner was assigned to print * @pbufs: Pointer to nbcon private buffer * @kthread: Printer kthread for this console * @rcuwait: RCU-safe wait object for @kthread waking * @irq_work: Defer @kthread waking to IRQ work context */ struct console { char name[16]; void (*write)(struct console *co, const char *s, unsigned int count); int (*read)(struct console *co, char *s, unsigned int count); struct tty_driver *(*device)(struct console *co, int *index); void (*unblank)(void); int (*setup)(struct console *co, char *options); int (*exit)(struct console *co); int (*match)(struct console *co, char *name, int idx, char *options); short flags; short index; int cflag; uint ispeed; uint ospeed; u64 seq; unsigned long dropped; void *data; struct hlist_node node; /* nbcon console specific members */ /** * @write_atomic: * * NBCON callback to write out text in any context. (Optional) * * This callback is called with the console already acquired. However, * a higher priority context is allowed to take it over by default. * * The callback must call nbcon_enter_unsafe() and nbcon_exit_unsafe() * around any code where the takeover is not safe, for example, when * manipulating the serial port registers. * * nbcon_enter_unsafe() will fail if the context has lost the console * ownership in the meantime. In this case, the callback is no longer * allowed to go forward. It must back out immediately and carefully. * The buffer content is also no longer trusted since it no longer * belongs to the context. * * The callback should allow the takeover whenever it is safe. It * increases the chance to see messages when the system is in trouble. * If the driver must reacquire ownership in order to finalize or * revert hardware changes, nbcon_reacquire_nobuf() can be used. * However, on reacquire the buffer content is no longer available. A * reacquire cannot be used to resume printing. * * The callback can be called from any context (including NMI). * Therefore it must avoid usage of any locking and instead rely * on the console ownership for synchronization. */ void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); /** * @write_thread: * * NBCON callback to write out text in task context. * * This callback must be called only in task context with both * device_lock() and the nbcon console acquired with * NBCON_PRIO_NORMAL. * * The same rules for console ownership verification and unsafe * sections handling applies as with write_atomic(). * * The console ownership handling is necessary for synchronization * against write_atomic() which is synchronized only via the context. * * The device_lock() provides the primary serialization for operations * on the device. It might be as relaxed (mutex)[*] or as tight * (disabled preemption and interrupts) as needed. It allows * the kthread to operate in the least restrictive mode[**]. * * [*] Standalone nbcon_context_try_acquire() is not safe with * the preemption enabled, see nbcon_owner_matches(). But it * can be safe when always called in the preemptive context * under the device_lock(). * * [**] The device_lock() makes sure that nbcon_context_try_acquire() * would never need to spin which is important especially with * PREEMPT_RT. */ void (*write_thread)(struct console *con, struct nbcon_write_context *wctxt); /** * @device_lock: * * NBCON callback to begin synchronization with driver code. * * Console drivers typically must deal with access to the hardware * via user input/output (such as an interactive login shell) and * output of kernel messages via printk() calls. This callback is * called by the printk-subsystem whenever it needs to synchronize * with hardware access by the driver. It should be implemented to * use whatever synchronization mechanism the driver is using for * itself (for example, the port lock for uart serial consoles). * * The callback is always called from task context. It may use any * synchronization method required by the driver. * * IMPORTANT: The callback MUST disable migration. The console driver * may be using a synchronization mechanism that already takes * care of this (such as spinlocks). Otherwise this function must * explicitly call migrate_disable(). * * The flags argument is provided as a convenience to the driver. It * will be passed again to device_unlock(). It can be ignored if the * driver does not need it. */ void (*device_lock)(struct console *con, unsigned long *flags); /** * @device_unlock: * * NBCON callback to finish synchronization with driver code. * * It is the counterpart to device_lock(). * * This callback is always called from task context. It must * appropriately re-enable migration (depending on how device_lock() * disabled migration). * * The flags argument is the value of the same variable that was * passed to device_lock(). */ void (*device_unlock)(struct console *con, unsigned long flags); atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct nbcon_context __private nbcon_device_ctxt; atomic_long_t __private nbcon_prev_seq; struct printk_buffers *pbufs; struct task_struct *kthread; struct rcuwait rcuwait; struct irq_work irq_work; }; #ifdef CONFIG_LOCKDEP extern void lockdep_assert_console_list_lock_held(void); #else static inline void lockdep_assert_console_list_lock_held(void) { } #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC extern bool console_srcu_read_lock_is_held(void); #else static inline bool console_srcu_read_lock_is_held(void) { return 1; } #endif extern int console_srcu_read_lock(void); extern void console_srcu_read_unlock(int cookie); extern void console_list_lock(void) __acquires(console_mutex); extern void console_list_unlock(void) __releases(console_mutex); extern struct hlist_head console_list; /** * console_srcu_read_flags - Locklessly read flags of a possibly registered * console * @con: struct console pointer of console to read flags from * * Locklessly reading @con->flags provides a consistent read value because * there is at most one CPU modifying @con->flags and that CPU is using only * read-modify-write operations to do so. * * Requires console_srcu_read_lock to be held, which implies that @con might * be a registered console. The purpose of holding console_srcu_read_lock is * to guarantee that the console state is valid (CON_SUSPENDED/CON_ENABLED) * and that no exit/cleanup routines will run if the console is currently * undergoing unregistration. * * If the caller is holding the console_list_lock or it is _certain_ that * @con is not and will not become registered, the caller may read * @con->flags directly instead. * * Context: Any context. * Return: The current value of the @con->flags field. */ static inline short console_srcu_read_flags(const struct console *con) { WARN_ON_ONCE(!console_srcu_read_lock_is_held()); /* * The READ_ONCE() matches the WRITE_ONCE() when @flags are modified * for registered consoles with console_srcu_write_flags(). */ return data_race(READ_ONCE(con->flags)); } /** * console_srcu_write_flags - Write flags for a registered console * @con: struct console pointer of console to write flags to * @flags: new flags value to write * * Only use this function to write flags for registered consoles. It * requires holding the console_list_lock. * * Context: Any context. */ static inline void console_srcu_write_flags(struct console *con, short flags) { lockdep_assert_console_list_lock_held(); /* This matches the READ_ONCE() in console_srcu_read_flags(). */ WRITE_ONCE(con->flags, flags); } /* Variant of console_is_registered() when the console_list_lock is held. */ static inline bool console_is_registered_locked(const struct console *con) { lockdep_assert_console_list_lock_held(); return !hlist_unhashed(&con->node); } /* * console_is_registered - Check if the console is registered * @con: struct console pointer of console to check * * Context: Process context. May sleep while acquiring console list lock. * Return: true if the console is in the console list, otherwise false. * * If false is returned for a console that was previously registered, it * can be assumed that the console's unregistration is fully completed, * including the exit() callback after console list removal. */ static inline bool console_is_registered(const struct console *con) { bool ret; console_list_lock(); ret = console_is_registered_locked(con); console_list_unlock(); return ret; } /** * for_each_console_srcu() - Iterator over registered consoles * @con: struct console pointer used as loop cursor * * Although SRCU guarantees the console list will be consistent, the * struct console fields may be updated by other CPUs while iterating. * * Requires console_srcu_read_lock to be held. Can be invoked from * any context. */ #define for_each_console_srcu(con) \ hlist_for_each_entry_srcu(con, &console_list, node, \ console_srcu_read_lock_is_held()) /** * for_each_console() - Iterator over registered consoles * @con: struct console pointer used as loop cursor * * The console list and the &console.flags are immutable while iterating. * * Requires console_list_lock to be held. */ #define for_each_console(con) \ lockdep_assert_console_list_lock_held(); \ hlist_for_each_entry(con, &console_list, node) #ifdef CONFIG_PRINTK extern void nbcon_cpu_emergency_enter(void); extern void nbcon_cpu_emergency_exit(void); extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); extern void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt); #else static inline void nbcon_cpu_emergency_enter(void) { } static inline void nbcon_cpu_emergency_exit(void) { } static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } static inline void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) { } #endif extern int console_set_on_cmdline; extern struct console *early_console; enum con_flush_mode { CONSOLE_FLUSH_PENDING, CONSOLE_REPLAY_ALL, }; extern int add_preferred_console(const char *name, const short idx, char *options); extern void console_force_preferred_locked(struct console *con); extern void register_console(struct console *); extern int unregister_console(struct console *); extern void console_lock(void); extern int console_trylock(void); extern void console_unlock(void); extern void console_conditional_schedule(void); extern void console_unblank(void); extern void console_flush_on_panic(enum con_flush_mode mode); extern struct tty_driver *console_device(int *); extern void console_suspend(struct console *); extern void console_resume(struct console *); extern int is_console_locked(void); extern int braille_register_console(struct console *, int index, char *console_options, char *braille_options); extern int braille_unregister_console(struct console *); #ifdef CONFIG_TTY extern void console_sysfs_notify(void); #else static inline void console_sysfs_notify(void) { } #endif extern bool console_suspend_enabled; /* Suspend and resume console messages over PM events */ extern void console_suspend_all(void); extern void console_resume_all(void); int mda_console_init(void); void vcs_make_sysfs(int index); void vcs_remove_sysfs(int index); /* Some debug stub to catch some of the obvious races in the VT code */ #define WARN_CONSOLE_UNLOCKED() \ WARN_ON(!atomic_read(&ignore_console_lock_warning) && \ !is_console_locked() && !oops_in_progress) /* * Increment ignore_console_lock_warning if you need to quiet * WARN_CONSOLE_UNLOCKED() for debugging purposes. */ extern atomic_t ignore_console_lock_warning; DEFINE_LOCK_GUARD_0(console_lock, console_lock(), console_unlock()); extern void console_init(void); /* For deferred console takeover */ void dummycon_register_output_notifier(struct notifier_block *nb); void dummycon_unregister_output_notifier(struct notifier_block *nb); #endif /* _LINUX_CONSOLE_H */
9 8 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 // SPDX-License-Identifier: GPL-2.0-or-later /* * Parallel SCSI (SPI) transport specific attributes exported to sysfs. * * Copyright (c) 2003 Silicon Graphics, Inc. All rights reserved. * Copyright (c) 2004, 2005 James Bottomley <James.Bottomley@SteelEye.com> */ #include <linux/ctype.h> #include <linux/init.h> #include <linux/module.h> #include <linux/workqueue.h> #include <linux/blkdev.h> #include <linux/mutex.h> #include <linux/sysfs.h> #include <linux/slab.h> #include <linux/suspend.h> #include <scsi/scsi.h> #include "scsi_priv.h" #include <scsi/scsi_device.h> #include <scsi/scsi_host.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_tcq.h> #include <scsi/scsi_transport.h> #include <scsi/scsi_transport_spi.h> #define SPI_NUM_ATTRS 14 /* increase this if you add attributes */ #define SPI_OTHER_ATTRS 1 /* Increase this if you add "always * on" attributes */ #define SPI_HOST_ATTRS 1 #define SPI_MAX_ECHO_BUFFER_SIZE 4096 #define DV_LOOPS 3 #define DV_TIMEOUT (10*HZ) #define DV_RETRIES 3 /* should only need at most * two cc/ua clears */ /* Our blacklist flags */ enum { SPI_BLIST_NOIUS = (__force blist_flags_t)0x1, }; /* blacklist table, modelled on scsi_devinfo.c */ static struct { char *vendor; char *model; blist_flags_t flags; } spi_static_device_list[] __initdata = { {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS }, {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS }, {NULL, NULL, 0} }; /* Private data accessors (keep these out of the header file) */ #define spi_dv_in_progress(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_in_progress) #define spi_dv_mutex(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_mutex) struct spi_internal { struct scsi_transport_template t; struct spi_function_template *f; }; #define to_spi_internal(tmpl) container_of(tmpl, struct spi_internal, t) static const int ppr_to_ps[] = { /* The PPR values 0-6 are reserved, fill them in when * the committee defines them */ -1, /* 0x00 */ -1, /* 0x01 */ -1, /* 0x02 */ -1, /* 0x03 */ -1, /* 0x04 */ -1, /* 0x05 */ -1, /* 0x06 */ 3125, /* 0x07 */ 6250, /* 0x08 */ 12500, /* 0x09 */ 25000, /* 0x0a */ 30300, /* 0x0b */ 50000, /* 0x0c */ }; /* The PPR values at which you calculate the period in ns by multiplying * by 4 */ #define SPI_STATIC_PPR 0x0c static int sprint_frac(char *dest, int value, int denom) { int frac = value % denom; int result = sprintf(dest, "%d", value / denom); if (frac == 0) return result; dest[result++] = '.'; do { denom /= 10; sprintf(dest + result, "%d", frac / denom); result++; frac %= denom; } while (frac); dest[result++] = '\0'; return result; } static int spi_execute(struct scsi_device *sdev, const void *cmd, enum req_op op, void *buffer, unsigned int bufflen, struct scsi_sense_hdr *sshdr) { blk_opf_t opf = op | REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER; struct scsi_failure failure_defs[] = { { .sense = UNIT_ATTENTION, .asc = SCMD_FAILURE_ASC_ANY, .ascq = SCMD_FAILURE_ASCQ_ANY, .allowed = DV_RETRIES, .result = SAM_STAT_CHECK_CONDITION, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { /* bypass the SDEV_QUIESCE state with BLK_MQ_REQ_PM */ .req_flags = BLK_MQ_REQ_PM, .sshdr = sshdr, .failures = &failures, }; return scsi_execute_cmd(sdev, cmd, opf, buffer, bufflen, DV_TIMEOUT, 1, &exec_args); } static struct { enum spi_signal_type value; char *name; } signal_types[] = { { SPI_SIGNAL_UNKNOWN, "unknown" }, { SPI_SIGNAL_SE, "SE" }, { SPI_SIGNAL_LVD, "LVD" }, { SPI_SIGNAL_HVD, "HVD" }, }; static inline const char *spi_signal_to_string(enum spi_signal_type type) { int i; for (i = 0; i < ARRAY_SIZE(signal_types); i++) { if (type == signal_types[i].value) return signal_types[i].name; } return NULL; } static inline enum spi_signal_type spi_signal_to_value(const char *name) { int i, len; for (i = 0; i < ARRAY_SIZE(signal_types); i++) { len = strlen(signal_types[i].name); if (strncmp(name, signal_types[i].name, len) == 0 && (name[len] == '\n' || name[len] == '\0')) return signal_types[i].value; } return SPI_SIGNAL_UNKNOWN; } static int spi_host_setup(struct transport_container *tc, struct device *dev, struct device *cdev) { struct Scsi_Host *shost = dev_to_shost(dev); spi_signalling(shost) = SPI_SIGNAL_UNKNOWN; return 0; } static int spi_host_configure(struct transport_container *tc, struct device *dev, struct device *cdev); static DECLARE_TRANSPORT_CLASS(spi_host_class, "spi_host", spi_host_setup, NULL, spi_host_configure); static int spi_host_match(struct attribute_container *cont, struct device *dev) { struct Scsi_Host *shost; if (!scsi_is_host_device(dev)) return 0; shost = dev_to_shost(dev); if (!shost->transportt || shost->transportt->host_attrs.ac.class != &spi_host_class.class) return 0; return &shost->transportt->host_attrs.ac == cont; } static int spi_target_configure(struct transport_container *tc, struct device *dev, struct device *cdev); static int spi_device_configure(struct transport_container *tc, struct device *dev, struct device *cdev) { struct scsi_device *sdev = to_scsi_device(dev); struct scsi_target *starget = sdev->sdev_target; blist_flags_t bflags; bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8], &sdev->inquiry[16], SCSI_DEVINFO_SPI); /* Populate the target capability fields with the values * gleaned from the device inquiry */ spi_support_sync(starget) = scsi_device_sync(sdev); spi_support_wide(starget) = scsi_device_wide(sdev); spi_support_dt(starget) = scsi_device_dt(sdev); spi_support_dt_only(starget) = scsi_device_dt_only(sdev); spi_support_ius(starget) = scsi_device_ius(sdev); if (bflags & SPI_BLIST_NOIUS) { dev_info(dev, "Information Units disabled by blacklist\n"); spi_support_ius(starget) = 0; } spi_support_qas(starget) = scsi_device_qas(sdev); return 0; } static int spi_setup_transport_attrs(struct transport_container *tc, struct device *dev, struct device *cdev) { struct scsi_target *starget = to_scsi_target(dev); spi_period(starget) = -1; /* illegal value */ spi_min_period(starget) = 0; spi_offset(starget) = 0; /* async */ spi_max_offset(starget) = 255; spi_width(starget) = 0; /* narrow */ spi_max_width(starget) = 1; spi_iu(starget) = 0; /* no IU */ spi_max_iu(starget) = 1; spi_dt(starget) = 0; /* ST */ spi_qas(starget) = 0; spi_max_qas(starget) = 1; spi_wr_flow(starget) = 0; spi_rd_strm(starget) = 0; spi_rti(starget) = 0; spi_pcomp_en(starget) = 0; spi_hold_mcs(starget) = 0; spi_dv_pending(starget) = 0; spi_dv_in_progress(starget) = 0; spi_initial_dv(starget) = 0; mutex_init(&spi_dv_mutex(starget)); return 0; } #define spi_transport_show_simple(field, format_string) \ \ static ssize_t \ show_spi_transport_##field(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct scsi_target *starget = transport_class_to_starget(dev); \ struct spi_transport_attrs *tp; \ \ tp = (struct spi_transport_attrs *)&starget->starget_data; \ return snprintf(buf, 20, format_string, tp->field); \ } #define spi_transport_store_simple(field, format_string) \ \ static ssize_t \ store_spi_transport_##field(struct device *dev, \ struct device_attribute *attr, \ const char *buf, size_t count) \ { \ int val; \ struct scsi_target *starget = transport_class_to_starget(dev); \ struct spi_transport_attrs *tp; \ \ tp = (struct spi_transport_attrs *)&starget->starget_data; \ val = simple_strtoul(buf, NULL, 0); \ tp->field = val; \ return count; \ } #define spi_transport_show_function(field, format_string) \ \ static ssize_t \ show_spi_transport_##field(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct scsi_target *starget = transport_class_to_starget(dev); \ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); \ struct spi_transport_attrs *tp; \ struct spi_internal *i = to_spi_internal(shost->transportt); \ tp = (struct spi_transport_attrs *)&starget->starget_data; \ if (i->f->get_##field) \ i->f->get_##field(starget); \ return snprintf(buf, 20, format_string, tp->field); \ } #define spi_transport_store_function(field, format_string) \ static ssize_t \ store_spi_transport_##field(struct device *dev, \ struct device_attribute *attr, \ const char *buf, size_t count) \ { \ int val; \ struct scsi_target *starget = transport_class_to_starget(dev); \ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); \ struct spi_internal *i = to_spi_internal(shost->transportt); \ \ if (!i->f->set_##field) \ return -EINVAL; \ val = simple_strtoul(buf, NULL, 0); \ i->f->set_##field(starget, val); \ return count; \ } #define spi_transport_store_max(field, format_string) \ static ssize_t \ store_spi_transport_##field(struct device *dev, \ struct device_attribute *attr, \ const char *buf, size_t count) \ { \ int val; \ struct scsi_target *starget = transport_class_to_starget(dev); \ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); \ struct spi_internal *i = to_spi_internal(shost->transportt); \ struct spi_transport_attrs *tp \ = (struct spi_transport_attrs *)&starget->starget_data; \ \ if (!i->f->set_##field) \ return -EINVAL; \ val = simple_strtoul(buf, NULL, 0); \ if (val > tp->max_##field) \ val = tp->max_##field; \ i->f->set_##field(starget, val); \ return count; \ } #define spi_transport_rd_attr(field, format_string) \ spi_transport_show_function(field, format_string) \ spi_transport_store_function(field, format_string) \ static DEVICE_ATTR(field, S_IRUGO, \ show_spi_transport_##field, \ store_spi_transport_##field); #define spi_transport_simple_attr(field, format_string) \ spi_transport_show_simple(field, format_string) \ spi_transport_store_simple(field, format_string) \ static DEVICE_ATTR(field, S_IRUGO, \ show_spi_transport_##field, \ store_spi_transport_##field); #define spi_transport_max_attr(field, format_string) \ spi_transport_show_function(field, format_string) \ spi_transport_store_max(field, format_string) \ spi_transport_simple_attr(max_##field, format_string) \ static DEVICE_ATTR(field, S_IRUGO, \ show_spi_transport_##field, \ store_spi_transport_##field); /* The Parallel SCSI Tranport Attributes: */ spi_transport_max_attr(offset, "%d\n"); spi_transport_max_attr(width, "%d\n"); spi_transport_max_attr(iu, "%d\n"); spi_transport_rd_attr(dt, "%d\n"); spi_transport_max_attr(qas, "%d\n"); spi_transport_rd_attr(wr_flow, "%d\n"); spi_transport_rd_attr(rd_strm, "%d\n"); spi_transport_rd_attr(rti, "%d\n"); spi_transport_rd_attr(pcomp_en, "%d\n"); spi_transport_rd_attr(hold_mcs, "%d\n"); /* we only care about the first child device that's a real SCSI device * so we return 1 to terminate the iteration when we find it */ static int child_iter(struct device *dev, void *data) { if (!scsi_is_sdev_device(dev)) return 0; spi_dv_device(to_scsi_device(dev)); return 1; } static ssize_t store_spi_revalidate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_target *starget = transport_class_to_starget(dev); device_for_each_child(&starget->dev, NULL, child_iter); return count; } static DEVICE_ATTR(revalidate, S_IWUSR, NULL, store_spi_revalidate); /* Translate the period into ns according to the current spec * for SDTR/PPR messages */ static int period_to_str(char *buf, int period) { int len, picosec; if (period < 0 || period > 0xff) { picosec = -1; } else if (period <= SPI_STATIC_PPR) { picosec = ppr_to_ps[period]; } else { picosec = period * 4000; } if (picosec == -1) { len = sprintf(buf, "reserved"); } else { len = sprint_frac(buf, picosec, 1000); } return len; } static ssize_t show_spi_transport_period_helper(char *buf, int period) { int len = period_to_str(buf, period); buf[len++] = '\n'; buf[len] = '\0'; return len; } static ssize_t store_spi_transport_period_helper(struct device *dev, const char *buf, size_t count, int *periodp) { int j, picosec, period = -1; char *endp; picosec = simple_strtoul(buf, &endp, 10) * 1000; if (*endp == '.') { int mult = 100; do { endp++; if (!isdigit(*endp)) break; picosec += (*endp - '0') * mult; mult /= 10; } while (mult > 0); } for (j = 0; j <= SPI_STATIC_PPR; j++) { if (ppr_to_ps[j] < picosec) continue; period = j; break; } if (period == -1) period = picosec / 4000; if (period > 0xff) period = 0xff; *periodp = period; return count; } static ssize_t show_spi_transport_period(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_target *starget = transport_class_to_starget(dev); struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); struct spi_internal *i = to_spi_internal(shost->transportt); struct spi_transport_attrs *tp = (struct spi_transport_attrs *)&starget->starget_data; if (i->f->get_period) i->f->get_period(starget); return show_spi_transport_period_helper(buf, tp->period); } static ssize_t store_spi_transport_period(struct device *cdev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_target *starget = transport_class_to_starget(cdev); struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); struct spi_internal *i = to_spi_internal(shost->transportt); struct spi_transport_attrs *tp = (struct spi_transport_attrs *)&starget->starget_data; int period, retval; if (!i->f->set_period) return -EINVAL; retval = store_spi_transport_period_helper(cdev, buf, count, &period); if (period < tp->min_period) period = tp->min_period; i->f->set_period(starget, period); return retval; } static DEVICE_ATTR(period, S_IRUGO, show_spi_transport_period, store_spi_transport_period); static ssize_t show_spi_transport_min_period(struct device *cdev, struct device_attribute *attr, char *buf) { struct scsi_target *starget = transport_class_to_starget(cdev); struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); struct spi_internal *i = to_spi_internal(shost->transportt); struct spi_transport_attrs *tp = (struct spi_transport_attrs *)&starget->starget_data; if (!i->f->set_period) return -EINVAL; return show_spi_transport_period_helper(buf, tp->min_period); } static ssize_t store_spi_transport_min_period(struct device *cdev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_target *starget = transport_class_to_starget(cdev); struct spi_transport_attrs *tp = (struct spi_transport_attrs *)&starget->starget_data; return store_spi_transport_period_helper(cdev, buf, count, &tp->min_period); } static DEVICE_ATTR(min_period, S_IRUGO, show_spi_transport_min_period, store_spi_transport_min_period); static ssize_t show_spi_host_signalling(struct device *cdev, struct device_attribute *attr, char *buf) { struct Scsi_Host *shost = transport_class_to_shost(cdev); struct spi_internal *i = to_spi_internal(shost->transportt); if (i->f->get_signalling) i->f->get_signalling(shost); return sprintf(buf, "%s\n", spi_signal_to_string(spi_signalling(shost))); } static ssize_t store_spi_host_signalling(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct Scsi_Host *shost = transport_class_to_shost(dev); struct spi_internal *i = to_spi_internal(shost->transportt); enum spi_signal_type type = spi_signal_to_value(buf); if (!i->f->set_signalling) return -EINVAL; if (type != SPI_SIGNAL_UNKNOWN) i->f->set_signalling(shost, type); return count; } static DEVICE_ATTR(signalling, S_IRUGO, show_spi_host_signalling, store_spi_host_signalling); static ssize_t show_spi_host_width(struct device *cdev, struct device_attribute *attr, char *buf) { struct Scsi_Host *shost = transport_class_to_shost(cdev); return sprintf(buf, "%s\n", shost->max_id == 16 ? "wide" : "narrow"); } static DEVICE_ATTR(host_width, S_IRUGO, show_spi_host_width, NULL); static ssize_t show_spi_host_hba_id(struct device *cdev, struct device_attribute *attr, char *buf) { struct Scsi_Host *shost = transport_class_to_shost(cdev); return sprintf(buf, "%d\n", shost->this_id); } static DEVICE_ATTR(hba_id, S_IRUGO, show_spi_host_hba_id, NULL); #define DV_SET(x, y) \ if(i->f->set_##x) \ i->f->set_##x(sdev->sdev_target, y) enum spi_compare_returns { SPI_COMPARE_SUCCESS, SPI_COMPARE_FAILURE, SPI_COMPARE_SKIP_TEST, }; /* This is for read/write Domain Validation: If the device supports * an echo buffer, we do read/write tests to it */ static enum spi_compare_returns spi_dv_device_echo_buffer(struct scsi_device *sdev, u8 *buffer, u8 *ptr, const int retries) { int len = ptr - buffer; int j, k, r, result; unsigned int pattern = 0x0000ffff; struct scsi_sense_hdr sshdr; const char spi_write_buffer[] = { WRITE_BUFFER, 0x0a, 0, 0, 0, 0, 0, len >> 8, len & 0xff, 0 }; const char spi_read_buffer[] = { READ_BUFFER, 0x0a, 0, 0, 0, 0, 0, len >> 8, len & 0xff, 0 }; /* set up the pattern buffer. Doesn't matter if we spill * slightly beyond since that's where the read buffer is */ for (j = 0; j < len; ) { /* fill the buffer with counting (test a) */ for ( ; j < min(len, 32); j++) buffer[j] = j; k = j; /* fill the buffer with alternating words of 0x0 and * 0xffff (test b) */ for ( ; j < min(len, k + 32); j += 2) { u16 *word = (u16 *)&buffer[j]; *word = (j & 0x02) ? 0x0000 : 0xffff; } k = j; /* fill with crosstalk (alternating 0x5555 0xaaa) * (test c) */ for ( ; j < min(len, k + 32); j += 2) { u16 *word = (u16 *)&buffer[j]; *word = (j & 0x02) ? 0x5555 : 0xaaaa; } k = j; /* fill with shifting bits (test d) */ for ( ; j < min(len, k + 32); j += 4) { u32 *word = (unsigned int *)&buffer[j]; u32 roll = (pattern & 0x80000000) ? 1 : 0; *word = pattern; pattern = (pattern << 1) | roll; } /* don't bother with random data (test e) */ } for (r = 0; r < retries; r++) { result = spi_execute(sdev, spi_write_buffer, REQ_OP_DRV_OUT, buffer, len, &sshdr); if (result || !scsi_device_online(sdev)) { scsi_device_set_state(sdev, SDEV_QUIESCE); if (result > 0 && scsi_sense_valid(&sshdr) && sshdr.sense_key == ILLEGAL_REQUEST /* INVALID FIELD IN CDB */ && sshdr.asc == 0x24 && sshdr.ascq == 0x00) /* This would mean that the drive lied * to us about supporting an echo * buffer (unfortunately some Western * Digital drives do precisely this) */ return SPI_COMPARE_SKIP_TEST; sdev_printk(KERN_ERR, sdev, "Write Buffer failure %x\n", result); return SPI_COMPARE_FAILURE; } memset(ptr, 0, len); spi_execute(sdev, spi_read_buffer, REQ_OP_DRV_IN, ptr, len, NULL); scsi_device_set_state(sdev, SDEV_QUIESCE); if (memcmp(buffer, ptr, len) != 0) return SPI_COMPARE_FAILURE; } return SPI_COMPARE_SUCCESS; } /* This is for the simplest form of Domain Validation: a read test * on the inquiry data from the device */ static enum spi_compare_returns spi_dv_device_compare_inquiry(struct scsi_device *sdev, u8 *buffer, u8 *ptr, const int retries) { int r, result; const int len = sdev->inquiry_len; const char spi_inquiry[] = { INQUIRY, 0, 0, 0, len, 0 }; for (r = 0; r < retries; r++) { memset(ptr, 0, len); result = spi_execute(sdev, spi_inquiry, REQ_OP_DRV_IN, ptr, len, NULL); if(result || !scsi_device_online(sdev)) { scsi_device_set_state(sdev, SDEV_QUIESCE); return SPI_COMPARE_FAILURE; } /* If we don't have the inquiry data already, the * first read gets it */ if (ptr == buffer) { ptr += len; --r; continue; } if (memcmp(buffer, ptr, len) != 0) /* failure */ return SPI_COMPARE_FAILURE; } return SPI_COMPARE_SUCCESS; } static enum spi_compare_returns spi_dv_retrain(struct scsi_device *sdev, u8 *buffer, u8 *ptr, enum spi_compare_returns (*compare_fn)(struct scsi_device *, u8 *, u8 *, int)) { struct spi_internal *i = to_spi_internal(sdev->host->transportt); struct scsi_target *starget = sdev->sdev_target; int period = 0, prevperiod = 0; enum spi_compare_returns retval; for (;;) { int newperiod; retval = compare_fn(sdev, buffer, ptr, DV_LOOPS); if (retval == SPI_COMPARE_SUCCESS || retval == SPI_COMPARE_SKIP_TEST) break; /* OK, retrain, fallback */ if (i->f->get_iu) i->f->get_iu(starget); if (i->f->get_qas) i->f->get_qas(starget); if (i->f->get_period) i->f->get_period(sdev->sdev_target); /* Here's the fallback sequence; first try turning off * IU, then QAS (if we can control them), then finally * fall down the periods */ if (i->f->set_iu && spi_iu(starget)) { starget_printk(KERN_ERR, starget, "Domain Validation Disabling Information Units\n"); DV_SET(iu, 0); } else if (i->f->set_qas && spi_qas(starget)) { starget_printk(KERN_ERR, starget, "Domain Validation Disabling Quick Arbitration and Selection\n"); DV_SET(qas, 0); } else { newperiod = spi_period(starget); period = newperiod > period ? newperiod : period; if (period < 0x0d) period++; else period += period >> 1; if (unlikely(period > 0xff || period == prevperiod)) { /* Total failure; set to async and return */ starget_printk(KERN_ERR, starget, "Domain Validation Failure, dropping back to Asynchronous\n"); DV_SET(offset, 0); return SPI_COMPARE_FAILURE; } starget_printk(KERN_ERR, starget, "Domain Validation detected failure, dropping back\n"); DV_SET(period, period); prevperiod = period; } } return retval; } static int spi_dv_device_get_echo_buffer(struct scsi_device *sdev, u8 *buffer) { int l, result; /* first off do a test unit ready. This can error out * because of reservations or some other reason. If it * fails, the device won't let us write to the echo buffer * so just return failure */ static const char spi_test_unit_ready[] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; static const char spi_read_buffer_descriptor[] = { READ_BUFFER, 0x0b, 0, 0, 0, 0, 0, 0, 4, 0 }; /* We send a set of three TURs to clear any outstanding * unit attention conditions if they exist (Otherwise the * buffer tests won't be happy). If the TUR still fails * (reservation conflict, device not ready, etc) just * skip the write tests */ for (l = 0; ; l++) { result = spi_execute(sdev, spi_test_unit_ready, REQ_OP_DRV_IN, NULL, 0, NULL); if(result) { if(l >= 3) return 0; } else { /* TUR succeeded */ break; } } result = spi_execute(sdev, spi_read_buffer_descriptor, REQ_OP_DRV_IN, buffer, 4, NULL); if (result) /* Device has no echo buffer */ return 0; return buffer[3] + ((buffer[2] & 0x1f) << 8); } static void spi_dv_device_internal(struct scsi_device *sdev, u8 *buffer) { struct spi_internal *i = to_spi_internal(sdev->host->transportt); struct scsi_target *starget = sdev->sdev_target; struct Scsi_Host *shost = sdev->host; int len = sdev->inquiry_len; int min_period = spi_min_period(starget); int max_width = spi_max_width(starget); /* first set us up for narrow async */ DV_SET(offset, 0); DV_SET(width, 0); if (spi_dv_device_compare_inquiry(sdev, buffer, buffer, DV_LOOPS) != SPI_COMPARE_SUCCESS) { starget_printk(KERN_ERR, starget, "Domain Validation Initial Inquiry Failed\n"); /* FIXME: should probably offline the device here? */ return; } if (!spi_support_wide(starget)) { spi_max_width(starget) = 0; max_width = 0; } /* test width */ if (i->f->set_width && max_width) { i->f->set_width(starget, 1); if (spi_dv_device_compare_inquiry(sdev, buffer, buffer + len, DV_LOOPS) != SPI_COMPARE_SUCCESS) { starget_printk(KERN_ERR, starget, "Wide Transfers Fail\n"); i->f->set_width(starget, 0); /* Make sure we don't force wide back on by asking * for a transfer period that requires it */ max_width = 0; if (min_period < 10) min_period = 10; } } if (!i->f->set_period) return; /* device can't handle synchronous */ if (!spi_support_sync(starget) && !spi_support_dt(starget)) return; /* len == -1 is the signal that we need to ascertain the * presence of an echo buffer before trying to use it. len == * 0 means we don't have an echo buffer */ len = -1; retry: /* now set up to the maximum */ DV_SET(offset, spi_max_offset(starget)); DV_SET(period, min_period); /* try QAS requests; this should be harmless to set if the * target supports it */ if (spi_support_qas(starget) && spi_max_qas(starget)) { DV_SET(qas, 1); } else { DV_SET(qas, 0); } if (spi_support_ius(starget) && spi_max_iu(starget) && min_period < 9) { /* This u320 (or u640). Set IU transfers */ DV_SET(iu, 1); /* Then set the optional parameters */ DV_SET(rd_strm, 1); DV_SET(wr_flow, 1); DV_SET(rti, 1); if (min_period == 8) DV_SET(pcomp_en, 1); } else { DV_SET(iu, 0); } /* now that we've done all this, actually check the bus * signal type (if known). Some devices are stupid on * a SE bus and still claim they can try LVD only settings */ if (i->f->get_signalling) i->f->get_signalling(shost); if (spi_signalling(shost) == SPI_SIGNAL_SE || spi_signalling(shost) == SPI_SIGNAL_HVD || !spi_support_dt(starget)) { DV_SET(dt, 0); } else { DV_SET(dt, 1); } /* set width last because it will pull all the other * parameters down to required values */ DV_SET(width, max_width); /* Do the read only INQUIRY tests */ spi_dv_retrain(sdev, buffer, buffer + sdev->inquiry_len, spi_dv_device_compare_inquiry); /* See if we actually managed to negotiate and sustain DT */ if (i->f->get_dt) i->f->get_dt(starget); /* see if the device has an echo buffer. If it does we can do * the SPI pattern write tests. Because of some broken * devices, we *only* try this on a device that has actually * negotiated DT */ if (len == -1 && spi_dt(starget)) len = spi_dv_device_get_echo_buffer(sdev, buffer); if (len <= 0) { starget_printk(KERN_INFO, starget, "Domain Validation skipping write tests\n"); return; } if (len > SPI_MAX_ECHO_BUFFER_SIZE) { starget_printk(KERN_WARNING, starget, "Echo buffer size %d is too big, trimming to %d\n", len, SPI_MAX_ECHO_BUFFER_SIZE); len = SPI_MAX_ECHO_BUFFER_SIZE; } if (spi_dv_retrain(sdev, buffer, buffer + len, spi_dv_device_echo_buffer) == SPI_COMPARE_SKIP_TEST) { /* OK, the stupid drive can't do a write echo buffer * test after all, fall back to the read tests */ len = 0; goto retry; } } /** * spi_dv_device - Do Domain Validation on the device * @sdev: scsi device to validate * * Performs the domain validation on the given device in the * current execution thread. Since DV operations may sleep, * the current thread must have user context. Also no SCSI * related locks that would deadlock I/O issued by the DV may * be held. */ void spi_dv_device(struct scsi_device *sdev) { struct scsi_target *starget = sdev->sdev_target; const int len = SPI_MAX_ECHO_BUFFER_SIZE*2; unsigned int sleep_flags; u8 *buffer; /* * Because this function and the power management code both call * scsi_device_quiesce(), it is not safe to perform domain validation * while suspend or resume is in progress. Hence the * lock/unlock_system_sleep() calls. */ sleep_flags = lock_system_sleep(); if (scsi_autopm_get_device(sdev)) goto unlock_system_sleep; if (unlikely(spi_dv_in_progress(starget))) goto put_autopm; if (unlikely(scsi_device_get(sdev))) goto put_autopm; spi_dv_in_progress(starget) = 1; buffer = kzalloc(len, GFP_KERNEL); if (unlikely(!buffer)) goto put_sdev; /* We need to verify that the actual device will quiesce; the * later target quiesce is just a nice to have */ if (unlikely(scsi_device_quiesce(sdev))) goto free_buffer; scsi_target_quiesce(starget); spi_dv_pending(starget) = 1; mutex_lock(&spi_dv_mutex(starget)); starget_printk(KERN_INFO, starget, "Beginning Domain Validation\n"); spi_dv_device_internal(sdev, buffer); starget_printk(KERN_INFO, starget, "Ending Domain Validation\n"); mutex_unlock(&spi_dv_mutex(starget)); spi_dv_pending(starget) = 0; scsi_target_resume(starget); spi_initial_dv(starget) = 1; free_buffer: kfree(buffer); put_sdev: spi_dv_in_progress(starget) = 0; scsi_device_put(sdev); put_autopm: scsi_autopm_put_device(sdev); unlock_system_sleep: unlock_system_sleep(sleep_flags); } EXPORT_SYMBOL(spi_dv_device); struct work_queue_wrapper { struct work_struct work; struct scsi_device *sdev; }; static void spi_dv_device_work_wrapper(struct work_struct *work) { struct work_queue_wrapper *wqw = container_of(work, struct work_queue_wrapper, work); struct scsi_device *sdev = wqw->sdev; kfree(wqw); spi_dv_device(sdev); spi_dv_pending(sdev->sdev_target) = 0; scsi_device_put(sdev); } /** * spi_schedule_dv_device - schedule domain validation to occur on the device * @sdev: The device to validate * * Identical to spi_dv_device() above, except that the DV will be * scheduled to occur in a workqueue later. All memory allocations * are atomic, so may be called from any context including those holding * SCSI locks. */ void spi_schedule_dv_device(struct scsi_device *sdev) { struct work_queue_wrapper *wqw = kmalloc(sizeof(struct work_queue_wrapper), GFP_ATOMIC); if (unlikely(!wqw)) return; if (unlikely(spi_dv_pending(sdev->sdev_target))) { kfree(wqw); return; } /* Set pending early (dv_device doesn't check it, only sets it) */ spi_dv_pending(sdev->sdev_target) = 1; if (unlikely(scsi_device_get(sdev))) { kfree(wqw); spi_dv_pending(sdev->sdev_target) = 0; return; } INIT_WORK(&wqw->work, spi_dv_device_work_wrapper); wqw->sdev = sdev; schedule_work(&wqw->work); } EXPORT_SYMBOL(spi_schedule_dv_device); /** * spi_display_xfer_agreement - Print the current target transfer agreement * @starget: The target for which to display the agreement * * Each SPI port is required to maintain a transfer agreement for each * other port on the bus. This function prints a one-line summary of * the current agreement; more detailed information is available in sysfs. */ void spi_display_xfer_agreement(struct scsi_target *starget) { struct spi_transport_attrs *tp; tp = (struct spi_transport_attrs *)&starget->starget_data; if (tp->offset > 0 && tp->period > 0) { unsigned int picosec, kb100; char *scsi = "FAST-?"; char tmp[8]; if (tp->period <= SPI_STATIC_PPR) { picosec = ppr_to_ps[tp->period]; switch (tp->period) { case 7: scsi = "FAST-320"; break; case 8: scsi = "FAST-160"; break; case 9: scsi = "FAST-80"; break; case 10: case 11: scsi = "FAST-40"; break; case 12: scsi = "FAST-20"; break; } } else { picosec = tp->period * 4000; if (tp->period < 25) scsi = "FAST-20"; else if (tp->period < 50) scsi = "FAST-10"; else scsi = "FAST-5"; } kb100 = (10000000 + picosec / 2) / picosec; if (tp->width) kb100 *= 2; sprint_frac(tmp, picosec, 1000); dev_info(&starget->dev, "%s %sSCSI %d.%d MB/s %s%s%s%s%s%s%s%s (%s ns, offset %d)\n", scsi, tp->width ? "WIDE " : "", kb100/10, kb100 % 10, tp->dt ? "DT" : "ST", tp->iu ? " IU" : "", tp->qas ? " QAS" : "", tp->rd_strm ? " RDSTRM" : "", tp->rti ? " RTI" : "", tp->wr_flow ? " WRFLOW" : "", tp->pcomp_en ? " PCOMP" : "", tp->hold_mcs ? " HMCS" : "", tmp, tp->offset); } else { dev_info(&starget->dev, "%sasynchronous\n", tp->width ? "wide " : ""); } } EXPORT_SYMBOL(spi_display_xfer_agreement); int spi_populate_width_msg(unsigned char *msg, int width) { msg[0] = EXTENDED_MESSAGE; msg[1] = 2; msg[2] = EXTENDED_WDTR; msg[3] = width; return 4; } EXPORT_SYMBOL_GPL(spi_populate_width_msg); int spi_populate_sync_msg(unsigned char *msg, int period, int offset) { msg[0] = EXTENDED_MESSAGE; msg[1] = 3; msg[2] = EXTENDED_SDTR; msg[3] = period; msg[4] = offset; return 5; } EXPORT_SYMBOL_GPL(spi_populate_sync_msg); int spi_populate_ppr_msg(unsigned char *msg, int period, int offset, int width, int options) { msg[0] = EXTENDED_MESSAGE; msg[1] = 6; msg[2] = EXTENDED_PPR; msg[3] = period; msg[4] = 0; msg[5] = offset; msg[6] = width; msg[7] = options; return 8; } EXPORT_SYMBOL_GPL(spi_populate_ppr_msg); /** * spi_populate_tag_msg - place a tag message in a buffer * @msg: pointer to the area to place the tag * @cmd: pointer to the scsi command for the tag * * Notes: * designed to create the correct type of tag message for the * particular request. Returns the size of the tag message. * May return 0 if TCQ is disabled for this device. **/ int spi_populate_tag_msg(unsigned char *msg, struct scsi_cmnd *cmd) { if (cmd->flags & SCMD_TAGGED) { *msg++ = SIMPLE_QUEUE_TAG; *msg++ = scsi_cmd_to_rq(cmd)->tag; return 2; } return 0; } EXPORT_SYMBOL_GPL(spi_populate_tag_msg); #ifdef CONFIG_SCSI_CONSTANTS static const char * const one_byte_msgs[] = { /* 0x00 */ "Task Complete", NULL /* Extended Message */, "Save Pointers", /* 0x03 */ "Restore Pointers", "Disconnect", "Initiator Error", /* 0x06 */ "Abort Task Set", "Message Reject", "Nop", "Message Parity Error", /* 0x0a */ "Linked Command Complete", "Linked Command Complete w/flag", /* 0x0c */ "Target Reset", "Abort Task", "Clear Task Set", /* 0x0f */ "Initiate Recovery", "Release Recovery", /* 0x11 */ "Terminate Process", "Continue Task", "Target Transfer Disable", /* 0x14 */ NULL, NULL, "Clear ACA", "LUN Reset" }; static const char * const two_byte_msgs[] = { /* 0x20 */ "Simple Queue Tag", "Head of Queue Tag", "Ordered Queue Tag", /* 0x23 */ "Ignore Wide Residue", "ACA" }; static const char * const extended_msgs[] = { /* 0x00 */ "Modify Data Pointer", "Synchronous Data Transfer Request", /* 0x02 */ "SCSI-I Extended Identify", "Wide Data Transfer Request", /* 0x04 */ "Parallel Protocol Request", "Modify Bidirectional Data Pointer" }; static void print_nego(const unsigned char *msg, int per, int off, int width) { if (per) { char buf[20]; period_to_str(buf, msg[per]); printk("period = %s ns ", buf); } if (off) printk("offset = %d ", msg[off]); if (width) printk("width = %d ", 8 << msg[width]); } static void print_ptr(const unsigned char *msg, int msb, const char *desc) { int ptr = (msg[msb] << 24) | (msg[msb+1] << 16) | (msg[msb+2] << 8) | msg[msb+3]; printk("%s = %d ", desc, ptr); } int spi_print_msg(const unsigned char *msg) { int len = 1, i; if (msg[0] == EXTENDED_MESSAGE) { len = 2 + msg[1]; if (len == 2) len += 256; if (msg[2] < ARRAY_SIZE(extended_msgs)) printk ("%s ", extended_msgs[msg[2]]); else printk ("Extended Message, reserved code (0x%02x) ", (int) msg[2]); switch (msg[2]) { case EXTENDED_MODIFY_DATA_POINTER: print_ptr(msg, 3, "pointer"); break; case EXTENDED_SDTR: print_nego(msg, 3, 4, 0); break; case EXTENDED_WDTR: print_nego(msg, 0, 0, 3); break; case EXTENDED_PPR: print_nego(msg, 3, 5, 6); break; case EXTENDED_MODIFY_BIDI_DATA_PTR: print_ptr(msg, 3, "out"); print_ptr(msg, 7, "in"); break; default: for (i = 2; i < len; ++i) printk("%02x ", msg[i]); } /* Identify */ } else if (msg[0] & 0x80) { printk("Identify disconnect %sallowed %s %d ", (msg[0] & 0x40) ? "" : "not ", (msg[0] & 0x20) ? "target routine" : "lun", msg[0] & 0x7); /* Normal One byte */ } else if (msg[0] < 0x1f) { if (msg[0] < ARRAY_SIZE(one_byte_msgs) && one_byte_msgs[msg[0]]) printk("%s ", one_byte_msgs[msg[0]]); else printk("reserved (%02x) ", msg[0]); } else if (msg[0] == 0x55) { printk("QAS Request "); /* Two byte */ } else if (msg[0] <= 0x2f) { if ((msg[0] - 0x20) < ARRAY_SIZE(two_byte_msgs)) printk("%s %02x ", two_byte_msgs[msg[0] - 0x20], msg[1]); else printk("reserved two byte (%02x %02x) ", msg[0], msg[1]); len = 2; } else printk("reserved "); return len; } EXPORT_SYMBOL(spi_print_msg); #else /* ifndef CONFIG_SCSI_CONSTANTS */ int spi_print_msg(const unsigned char *msg) { int len = 1, i; if (msg[0] == EXTENDED_MESSAGE) { len = 2 + msg[1]; if (len == 2) len += 256; for (i = 0; i < len; ++i) printk("%02x ", msg[i]); /* Identify */ } else if (msg[0] & 0x80) { printk("%02x ", msg[0]); /* Normal One byte */ } else if ((msg[0] < 0x1f) || (msg[0] == 0x55)) { printk("%02x ", msg[0]); /* Two byte */ } else if (msg[0] <= 0x2f) { printk("%02x %02x", msg[0], msg[1]); len = 2; } else printk("%02x ", msg[0]); return len; } EXPORT_SYMBOL(spi_print_msg); #endif /* ! CONFIG_SCSI_CONSTANTS */ static int spi_device_match(struct attribute_container *cont, struct device *dev) { struct scsi_device *sdev; struct Scsi_Host *shost; struct spi_internal *i; if (!scsi_is_sdev_device(dev)) return 0; sdev = to_scsi_device(dev); shost = sdev->host; if (!shost->transportt || shost->transportt->host_attrs.ac.class != &spi_host_class.class) return 0; /* Note: this class has no device attributes, so it has * no per-HBA allocation and thus we don't need to distinguish * the attribute containers for the device */ i = to_spi_internal(shost->transportt); if (i->f->deny_binding && i->f->deny_binding(sdev->sdev_target)) return 0; return 1; } static int spi_target_match(struct attribute_container *cont, struct device *dev) { struct Scsi_Host *shost; struct scsi_target *starget; struct spi_internal *i; if (!scsi_is_target_device(dev)) return 0; shost = dev_to_shost(dev->parent); if (!shost->transportt || shost->transportt->host_attrs.ac.class != &spi_host_class.class) return 0; i = to_spi_internal(shost->transportt); starget = to_scsi_target(dev); if (i->f->deny_binding && i->f->deny_binding(starget)) return 0; return &i->t.target_attrs.ac == cont; } static DECLARE_TRANSPORT_CLASS(spi_transport_class, "spi_transport", spi_setup_transport_attrs, NULL, spi_target_configure); static DECLARE_ANON_TRANSPORT_CLASS(spi_device_class, spi_device_match, spi_device_configure); static struct attribute *host_attributes[] = { &dev_attr_signalling.attr, &dev_attr_host_width.attr, &dev_attr_hba_id.attr, NULL }; static struct attribute_group host_attribute_group = { .attrs = host_attributes, }; static int spi_host_configure(struct transport_container *tc, struct device *dev, struct device *cdev) { struct kobject *kobj = &cdev->kobj; struct Scsi_Host *shost = transport_class_to_shost(cdev); struct spi_internal *si = to_spi_internal(shost->transportt); struct attribute *attr = &dev_attr_signalling.attr; int rc = 0; if (si->f->set_signalling) rc = sysfs_chmod_file(kobj, attr, attr->mode | S_IWUSR); return rc; } /* returns true if we should be showing the variable. Also * overloads the return by setting 1<<1 if the attribute should * be writeable */ #define TARGET_ATTRIBUTE_HELPER(name) \ (si->f->show_##name ? S_IRUGO : 0) | \ (si->f->set_##name ? S_IWUSR : 0) static umode_t target_attribute_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *cdev = container_of(kobj, struct device, kobj); struct scsi_target *starget = transport_class_to_starget(cdev); struct Scsi_Host *shost = transport_class_to_shost(cdev); struct spi_internal *si = to_spi_internal(shost->transportt); if (attr == &dev_attr_period.attr && spi_support_sync(starget)) return TARGET_ATTRIBUTE_HELPER(period); else if (attr == &dev_attr_min_period.attr && spi_support_sync(starget)) return TARGET_ATTRIBUTE_HELPER(period); else if (attr == &dev_attr_offset.attr && spi_support_sync(starget)) return TARGET_ATTRIBUTE_HELPER(offset); else if (attr == &dev_attr_max_offset.attr && spi_support_sync(starget)) return TARGET_ATTRIBUTE_HELPER(offset); else if (attr == &dev_attr_width.attr && spi_support_wide(starget)) return TARGET_ATTRIBUTE_HELPER(width); else if (attr == &dev_attr_max_width.attr && spi_support_wide(starget)) return TARGET_ATTRIBUTE_HELPER(width); else if (attr == &dev_attr_iu.attr && spi_support_ius(starget)) return TARGET_ATTRIBUTE_HELPER(iu); else if (attr == &dev_attr_max_iu.attr && spi_support_ius(starget)) return TARGET_ATTRIBUTE_HELPER(iu); else if (attr == &dev_attr_dt.attr && spi_support_dt(starget)) return TARGET_ATTRIBUTE_HELPER(dt); else if (attr == &dev_attr_qas.attr && spi_support_qas(starget)) return TARGET_ATTRIBUTE_HELPER(qas); else if (attr == &dev_attr_max_qas.attr && spi_support_qas(starget)) return TARGET_ATTRIBUTE_HELPER(qas); else if (attr == &dev_attr_wr_flow.attr && spi_support_ius(starget)) return TARGET_ATTRIBUTE_HELPER(wr_flow); else if (attr == &dev_attr_rd_strm.attr && spi_support_ius(starget)) return TARGET_ATTRIBUTE_HELPER(rd_strm); else if (attr == &dev_attr_rti.attr && spi_support_ius(starget)) return TARGET_ATTRIBUTE_HELPER(rti); else if (attr == &dev_attr_pcomp_en.attr && spi_support_ius(starget)) return TARGET_ATTRIBUTE_HELPER(pcomp_en); else if (attr == &dev_attr_hold_mcs.attr && spi_support_ius(starget)) return TARGET_ATTRIBUTE_HELPER(hold_mcs); else if (attr == &dev_attr_revalidate.attr) return S_IWUSR; return 0; } static struct attribute *target_attributes[] = { &dev_attr_period.attr, &dev_attr_min_period.attr, &dev_attr_offset.attr, &dev_attr_max_offset.attr, &dev_attr_width.attr, &dev_attr_max_width.attr, &dev_attr_iu.attr, &dev_attr_max_iu.attr, &dev_attr_dt.attr, &dev_attr_qas.attr, &dev_attr_max_qas.attr, &dev_attr_wr_flow.attr, &dev_attr_rd_strm.attr, &dev_attr_rti.attr, &dev_attr_pcomp_en.attr, &dev_attr_hold_mcs.attr, &dev_attr_revalidate.attr, NULL }; static struct attribute_group target_attribute_group = { .attrs = target_attributes, .is_visible = target_attribute_is_visible, }; static int spi_target_configure(struct transport_container *tc, struct device *dev, struct device *cdev) { struct kobject *kobj = &cdev->kobj; /* force an update based on parameters read from the device */ sysfs_update_group(kobj, &target_attribute_group); return 0; } struct scsi_transport_template * spi_attach_transport(struct spi_function_template *ft) { struct spi_internal *i = kzalloc(sizeof(struct spi_internal), GFP_KERNEL); if (unlikely(!i)) return NULL; i->t.target_attrs.ac.class = &spi_transport_class.class; i->t.target_attrs.ac.grp = &target_attribute_group; i->t.target_attrs.ac.match = spi_target_match; transport_container_register(&i->t.target_attrs); i->t.target_size = sizeof(struct spi_transport_attrs); i->t.host_attrs.ac.class = &spi_host_class.class; i->t.host_attrs.ac.grp = &host_attribute_group; i->t.host_attrs.ac.match = spi_host_match; transport_container_register(&i->t.host_attrs); i->t.host_size = sizeof(struct spi_host_attrs); i->f = ft; return &i->t; } EXPORT_SYMBOL(spi_attach_transport); void spi_release_transport(struct scsi_transport_template *t) { struct spi_internal *i = to_spi_internal(t); transport_container_unregister(&i->t.target_attrs); transport_container_unregister(&i->t.host_attrs); kfree(i); } EXPORT_SYMBOL(spi_release_transport); static __init int spi_transport_init(void) { int error = scsi_dev_info_add_list(SCSI_DEVINFO_SPI, "SCSI Parallel Transport Class"); if (!error) { int i; for (i = 0; spi_static_device_list[i].vendor; i++) scsi_dev_info_list_add_keyed(1, /* compatible */ spi_static_device_list[i].vendor, spi_static_device_list[i].model, NULL, spi_static_device_list[i].flags, SCSI_DEVINFO_SPI); } error = transport_class_register(&spi_transport_class); if (error) return error; error = anon_transport_class_register(&spi_device_class); return transport_class_register(&spi_host_class); } static void __exit spi_transport_exit(void) { transport_class_unregister(&spi_transport_class); anon_transport_class_unregister(&spi_device_class); transport_class_unregister(&spi_host_class); scsi_dev_info_remove_list(SCSI_DEVINFO_SPI); } MODULE_AUTHOR("Martin Hicks"); MODULE_DESCRIPTION("SPI Transport Attributes"); MODULE_LICENSE("GPL"); module_init(spi_transport_init); module_exit(spi_transport_exit);
13 1 15 15 2 13 13 9 8 8 6 4 2 2 11 10 10 10 9 9 9 7 2 9 4 9 3 3 3 7 2 1 4 4 5 2 2 2 2 2 2 2 2 2 1 34 34 32 31 31 15 15 32 16 16 1 15 16 15 15 15 15 2 7 9 4 2 15 4 4 73 73 73 73 73 73 73 73 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 fragment reassembly * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on: net/ipv4/ip_fragment.c */ /* * Fixes: * Andi Kleen Make it work with multiple hosts. * More RFC compliance. * * Horst von Brand Add missing #include <linux/string.h> * Alexey Kuznetsov SMP races, threading, cleanup. * Patrick McHardy LRU queue of frag heads for evictor. * Mitsuru KANDA @USAGI Register inet6_protocol{}. * David Stevens and * YOSHIFUJI,H. @USAGI Always remove fragment header to * calculate ICV correctly. */ #define pr_fmt(fmt) "IPv6: " fmt #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/jiffies.h> #include <linux/net.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/in6.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> #include <linux/jhash.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/export.h> #include <linux/tcp.h> #include <linux/udp.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/rawv6.h> #include <net/ndisc.h> #include <net/addrconf.h> #include <net/ipv6_frag.h> #include <net/inet_ecn.h> static const char ip6_frag_cache_name[] = "ip6-frags"; static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); } static struct inet_frags ip6_frags; static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev, int *refs); static void ip6_frag_expire(struct timer_list *t) { struct inet_frag_queue *frag = timer_container_of(frag, t, timer); struct frag_queue *fq; fq = container_of(frag, struct frag_queue, q); ip6frag_expire_frag_queue(fq->q.fqdir->net, fq); } static struct frag_queue * fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) { struct frag_v6_compare_key key = { .id = id, .saddr = hdr->saddr, .daddr = hdr->daddr, .user = IP6_DEFRAG_LOCAL_DELIVER, .iif = iif, }; struct inet_frag_queue *q; if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL))) key.iif = 0; q = inet_frag_find(net->ipv6.fqdir, &key); if (!q) return NULL; return container_of(q, struct frag_queue, q); } static int ip6_frag_queue(struct net *net, struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff, u32 *prob_offset, int *refs) { int offset, end, fragsize; struct sk_buff *prev_tail; struct net_device *dev; int err = -ENOENT; SKB_DR(reason); u8 ecn; /* If reassembly is already done, @skb must be a duplicate frag. */ if (fq->q.flags & INET_FRAG_COMPLETE) { SKB_DR_SET(reason, DUP_FRAG); goto err; } err = -EINVAL; offset = ntohs(fhdr->frag_off) & ~0x7; end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb); /* note that if prob_offset is set, the skb is freed elsewhere, * we do not free it here. */ return -1; } ecn = ip6_frag_ecn(ipv6_hdr(skb)); if (skb->ip_summed == CHECKSUM_COMPLETE) { const unsigned char *nh = skb_network_header(skb); skb->csum = csum_sub(skb->csum, csum_partial(nh, (u8 *)(fhdr + 1) - nh, 0)); } /* Is this the final fragment? */ if (!(fhdr->frag_off & htons(IP6_MF))) { /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ if (end < fq->q.len || ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) goto discard_fq; fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. * Required by the RFC. */ if (end & 0x7) { /* RFC2460 says always send parameter problem in * this case. -DaveM */ *prob_offset = offsetof(struct ipv6hdr, payload_len); return -1; } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ if (fq->q.flags & INET_FRAG_LAST_IN) goto discard_fq; fq->q.len = end; } } if (end == offset) goto discard_fq; err = -ENOMEM; /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) goto discard_fq; err = pskb_trim_rcsum(skb, end - offset); if (err) goto discard_fq; /* Note : skb->rbnode and skb->dev share the same location. */ dev = skb->dev; /* Makes sure compiler wont do silly aliasing games */ barrier(); prev_tail = fq->q.fragments_tail; err = inet_frag_queue_insert(&fq->q, skb, offset, end); if (err) goto insert_error; if (dev) fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; fq->q.tstamp_type = skb->tstamp_type; fq->q.meat += skb->len; fq->ecn |= ecn; add_frag_mem_limit(fq->q.fqdir, skb->truesize); fragsize = -skb_network_offset(skb) + skb->len; if (fragsize > fq->q.max_size) fq->q.max_size = fragsize; /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ if (offset == 0) { fq->nhoffset = nhoff; fq->q.flags |= INET_FRAG_FIRST_IN; } if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; err = ip6_frag_reasm(fq, skb, prev_tail, dev, refs); skb->_skb_refdst = orefdst; return err; } skb_dst_drop(skb); return -EINPROGRESS; insert_error: if (err == IPFRAG_DUP) { SKB_DR_SET(reason, DUP_FRAG); err = -EINVAL; goto err; } err = -EINVAL; __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASM_OVERLAPS); discard_fq: inet_frag_kill(&fq->q, refs); __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); err: kfree_skb_reason(skb, reason); return err; } /* * Check if this packet is complete. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev, int *refs) { struct net *net = fq->q.fqdir->net; unsigned int nhoff; void *reasm_data; int payload_len; u8 ecn; inet_frag_kill(&fq->q, refs); ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) goto out_fail; reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); if (!reasm_data) goto out_oom; payload_len = -skb_network_offset(skb) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr); if (payload_len > IPV6_MAXPLEN) goto out_oversize; /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ nhoff = fq->nhoffset; skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0]; memmove(skb->head + sizeof(struct frag_hdr), skb->head, (skb->data - skb->head) - sizeof(struct frag_hdr)); if (skb_mac_header_was_set(skb)) skb->mac_header += sizeof(struct frag_hdr); skb->network_header += sizeof(struct frag_hdr); skb_reset_transport_header(skb); inet_frag_reasm_finish(&fq->q, skb, reasm_data, true); skb->dev = dev; ipv6_hdr(skb)->payload_len = htons(payload_len); ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); IP6CB(skb)->nhoff = nhoff; IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; IP6CB(skb)->frag_max_size = fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ skb_postpush_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS); fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; fq->q.last_run_head = NULL; return 1; out_oversize: net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len); goto out_fail; out_oom: net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); out_fail: __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS); inet_frag_kill(&fq->q, refs); return -1; } static int ipv6_frag_rcv(struct sk_buff *skb) { const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = skb_dst_dev_net(skb); struct frag_hdr *fhdr; struct frag_queue *fq; u8 nexthdr; int iif; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) goto fail_hdr; __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ if (hdr->payload_len == 0) goto fail_hdr; if (!pskb_may_pull(skb, (skb_transport_offset(skb) + sizeof(struct frag_hdr)))) goto fail_hdr; hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); if (!(fhdr->frag_off & htons(IP6_OFFSET | IP6_MF))) { /* It is not a fragmented frame */ skb->transport_header += sizeof(struct frag_hdr); __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; IP6CB(skb)->frag_max_size = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr); return 1; } /* RFC 8200, Section 4.5 Fragment Header: * If the first fragment does not include all headers through an * Upper-Layer header, then that fragment should be discarded and * an ICMP Parameter Problem, Code 3, message should be sent to * the source of the fragment, with the Pointer field set to zero. */ nexthdr = hdr->nexthdr; if (ipv6frag_thdr_truncated(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr), &nexthdr)) { __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0); return -1; } iif = skb->dev ? skb->dev->ifindex : 0; rcu_read_lock(); fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { u32 prob_offset = 0; int ret, refs = 0; spin_lock(&fq->q.lock); fq->iif = iif; ret = ip6_frag_queue(net, fq, skb, fhdr, IP6CB(skb)->nhoff, &prob_offset, &refs); spin_unlock(&fq->q.lock); rcu_read_unlock(); inet_frag_putn(&fq->q, refs); if (prob_offset) { __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INHDRERRORS); /* icmpv6_param_prob() calls kfree_skb(skb) */ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset); } return ret; } rcu_read_unlock(); __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; fail_hdr: __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); return -1; } static const struct inet6_protocol frag_protocol = { .handler = ipv6_frag_rcv, .flags = INET6_PROTO_NOPOLICY, }; #ifdef CONFIG_SYSCTL static struct ctl_table ip6_frags_ns_ctl_table[] = { { .procname = "ip6frag_high_thresh", .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "ip6frag_low_thresh", .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "ip6frag_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, }; /* secret interval has been deprecated */ static int ip6_frags_secret_interval_unused; static struct ctl_table ip6_frags_ctl_table[] = { { .procname = "ip6frag_secret_interval", .data = &ip6_frags_secret_interval_unused, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, }; static int __net_init ip6_frags_ns_sysctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; table = ip6_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL); if (!table) goto err_alloc; } table[0].data = &net->ipv6.fqdir->high_thresh; table[0].extra1 = &net->ipv6.fqdir->low_thresh; table[1].data = &net->ipv6.fqdir->low_thresh; table[1].extra2 = &net->ipv6.fqdir->high_thresh; table[2].data = &net->ipv6.fqdir->timeout; hdr = register_net_sysctl_sz(net, "net/ipv6", table, ARRAY_SIZE(ip6_frags_ns_ctl_table)); if (!hdr) goto err_reg; net->ipv6.sysctl.frags_hdr = hdr; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net) { const struct ctl_table *table; table = net->ipv6.sysctl.frags_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr); if (!net_eq(net, &init_net)) kfree(table); } static struct ctl_table_header *ip6_ctl_header; static int ip6_frags_sysctl_register(void) { ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6", ip6_frags_ctl_table); return ip6_ctl_header == NULL ? -ENOMEM : 0; } static void ip6_frags_sysctl_unregister(void) { unregister_net_sysctl_table(ip6_ctl_header); } #else static int ip6_frags_ns_sysctl_register(struct net *net) { return 0; } static void ip6_frags_ns_sysctl_unregister(struct net *net) { } static int ip6_frags_sysctl_register(void) { return 0; } static void ip6_frags_sysctl_unregister(void) { } #endif static int __net_init ipv6_frags_init_net(struct net *net) { int res; res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net); if (res < 0) return res; net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH; net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH; net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT; res = ip6_frags_ns_sysctl_register(net); if (res < 0) fqdir_exit(net->ipv6.fqdir); return res; } static void __net_exit ipv6_frags_pre_exit_net(struct net *net) { fqdir_pre_exit(net->ipv6.fqdir); } static void __net_exit ipv6_frags_exit_net(struct net *net) { ip6_frags_ns_sysctl_unregister(net); fqdir_exit(net->ipv6.fqdir); } static struct pernet_operations ip6_frags_ops = { .init = ipv6_frags_init_net, .pre_exit = ipv6_frags_pre_exit_net, .exit = ipv6_frags_exit_net, }; static const struct rhashtable_params ip6_rhash_params = { .head_offset = offsetof(struct inet_frag_queue, node), .hashfn = ip6frag_key_hashfn, .obj_hashfn = ip6frag_obj_hashfn, .obj_cmpfn = ip6frag_obj_cmpfn, .automatic_shrinking = true, }; int __init ipv6_frag_init(void) { int ret; ip6_frags.constructor = ip6frag_init; ip6_frags.destructor = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.frag_expire = ip6_frag_expire; ip6_frags.frags_cache_name = ip6_frag_cache_name; ip6_frags.rhash_params = ip6_rhash_params; ret = inet_frags_init(&ip6_frags); if (ret) goto out; ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT); if (ret) goto err_protocol; ret = ip6_frags_sysctl_register(); if (ret) goto err_sysctl; ret = register_pernet_subsys(&ip6_frags_ops); if (ret) goto err_pernet; out: return ret; err_pernet: ip6_frags_sysctl_unregister(); err_sysctl: inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); err_protocol: inet_frags_fini(&ip6_frags); goto out; } void ipv6_frag_exit(void) { ip6_frags_sysctl_unregister(); unregister_pernet_subsys(&ip6_frags_ops); inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); inet_frags_fini(&ip6_frags); }
129 23 4 23 130 23 10 23 23 23 23 137 23 16 2 129 127 129 137 23 137 79 82 137 137 122 129 129 128 128 129 129 128 129 28 10 102 137 137 3 129 129 130 130 15 130 2 126 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/list.h> #include <linux/rculist.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/static_key.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_log.h> #include <net/netfilter/nft_meta.h> #ifdef CONFIG_MITIGATION_RETPOLINE static struct static_key_false nf_tables_skip_direct_calls; static inline bool nf_skip_indirect_calls(void) { return static_branch_likely(&nf_tables_skip_direct_calls); } static inline void __init nf_skip_indirect_calls_enable(void) { if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE)) static_branch_enable(&nf_tables_skip_direct_calls); } #else static inline void nf_skip_indirect_calls_enable(void) { } #endif /* CONFIG_MITIGATION_RETPOLINE */ static noinline void __nft_trace_packet(const struct nft_pktinfo *pkt, const struct nft_verdict *verdict, const struct nft_rule_dp *rule, struct nft_traceinfo *info, enum nft_trace_types type) { if (!info->trace || !info->nf_trace) return; info->type = type; nft_trace_notify(pkt, verdict, rule, info); } static inline void nft_trace_packet(const struct nft_pktinfo *pkt, struct nft_verdict *verdict, struct nft_traceinfo *info, const struct nft_rule_dp *rule, enum nft_trace_types type) { if (static_branch_unlikely(&nft_trace_enabled)) { info->nf_trace = pkt->skb->nf_trace; __nft_trace_packet(pkt, verdict, rule, info, type); } } static inline void nft_trace_copy_nftrace(const struct nft_pktinfo *pkt, struct nft_traceinfo *info) { if (static_branch_unlikely(&nft_trace_enabled)) info->nf_trace = pkt->skb->nf_trace; } static void nft_bitwise_fast_eval(const struct nft_expr *expr, struct nft_regs *regs) { const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); u32 *src = &regs->data[priv->sreg]; u32 *dst = &regs->data[priv->dreg]; *dst = (*src & priv->mask) ^ priv->xor; } static void nft_cmp_fast_eval(const struct nft_expr *expr, struct nft_regs *regs) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); if (((regs->data[priv->sreg] & priv->mask) == priv->data) ^ priv->inv) return; regs->verdict.code = NFT_BREAK; } static void nft_cmp16_fast_eval(const struct nft_expr *expr, struct nft_regs *regs) { const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr); const u64 *reg_data = (const u64 *)&regs->data[priv->sreg]; const u64 *mask = (const u64 *)&priv->mask; const u64 *data = (const u64 *)&priv->data; if (((reg_data[0] & mask[0]) == data[0] && ((reg_data[1] & mask[1]) == data[1])) ^ priv->inv) return; regs->verdict.code = NFT_BREAK; } static noinline void __nft_trace_verdict(const struct nft_pktinfo *pkt, struct nft_traceinfo *info, const struct nft_rule_dp *rule, const struct nft_regs *regs) { enum nft_trace_types type; switch (regs->verdict.code & NF_VERDICT_MASK) { case NFT_CONTINUE: case NFT_RETURN: type = NFT_TRACETYPE_RETURN; break; case NF_STOLEN: type = NFT_TRACETYPE_RULE; /* can't access skb->nf_trace; use copy */ break; default: type = NFT_TRACETYPE_RULE; if (info->trace) info->nf_trace = pkt->skb->nf_trace; break; } __nft_trace_packet(pkt, &regs->verdict, rule, info, type); } static inline void nft_trace_verdict(const struct nft_pktinfo *pkt, struct nft_traceinfo *info, const struct nft_rule_dp *rule, const struct nft_regs *regs) { if (static_branch_unlikely(&nft_trace_enabled)) __nft_trace_verdict(pkt, info, rule, regs); } static bool nft_payload_fast_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_payload *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; u32 *dest = &regs->data[priv->dreg]; unsigned char *ptr; if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) ptr = skb_network_header(skb); else { if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) return false; ptr = skb->data + nft_thoff(pkt); } ptr += priv->offset; if (unlikely(ptr + priv->len > skb_tail_pointer(skb))) return false; *dest = 0; if (priv->len == 2) *(u16 *)dest = *(u16 *)ptr; else if (priv->len == 4) *(u32 *)dest = *(u32 *)ptr; else *(u8 *)dest = *(u8 *)ptr; return true; } DEFINE_STATIC_KEY_FALSE(nft_counters_enabled); static noinline void nft_update_chain_stats(const struct nft_chain *chain, const struct nft_pktinfo *pkt) { struct nft_base_chain *base_chain; struct nft_stats __percpu *pstats; struct nft_stats *stats; base_chain = nft_base_chain(chain); pstats = READ_ONCE(base_chain->stats); if (pstats) { local_bh_disable(); stats = this_cpu_ptr(pstats); u64_stats_update_begin(&stats->syncp); stats->pkts++; stats->bytes += pkt->skb->len; u64_stats_update_end(&stats->syncp); local_bh_enable(); } } struct nft_jumpstack { const struct nft_rule_dp *rule; }; static void expr_call_ops_eval(const struct nft_expr *expr, struct nft_regs *regs, struct nft_pktinfo *pkt) { #ifdef CONFIG_MITIGATION_RETPOLINE unsigned long e; if (nf_skip_indirect_calls()) goto indirect_call; e = (unsigned long)expr->ops->eval; #define X(e, fun) \ do { if ((e) == (unsigned long)(fun)) \ return fun(expr, regs, pkt); } while (0) X(e, nft_payload_eval); X(e, nft_cmp_eval); X(e, nft_counter_eval); X(e, nft_meta_get_eval); X(e, nft_lookup_eval); #if IS_ENABLED(CONFIG_NFT_CT) X(e, nft_ct_get_fast_eval); #endif X(e, nft_range_eval); X(e, nft_immediate_eval); X(e, nft_byteorder_eval); X(e, nft_dynset_eval); X(e, nft_rt_get_eval); X(e, nft_bitwise_eval); X(e, nft_objref_eval); X(e, nft_objref_map_eval); #undef X indirect_call: #endif /* CONFIG_MITIGATION_RETPOLINE */ expr->ops->eval(expr, regs, pkt); } #define nft_rule_expr_first(rule) (struct nft_expr *)&rule->data[0] #define nft_rule_expr_next(expr) ((void *)expr) + expr->ops->size #define nft_rule_expr_last(rule) (struct nft_expr *)&rule->data[rule->dlen] #define nft_rule_dp_for_each_expr(expr, last, rule) \ for ((expr) = nft_rule_expr_first(rule), (last) = nft_rule_expr_last(rule); \ (expr) != (last); \ (expr) = nft_rule_expr_next(expr)) unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv) { const struct nft_chain *chain = priv, *basechain = chain; const struct net *net = nft_net(pkt); const struct nft_expr *expr, *last; const struct nft_rule_dp *rule; struct nft_regs regs; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; bool genbit = READ_ONCE(net->nft.gencursor); struct nft_rule_blob *blob; struct nft_traceinfo info; info.trace = false; if (static_branch_unlikely(&nft_trace_enabled)) nft_trace_init(&info, pkt, basechain); do_chain: if (genbit) blob = rcu_dereference(chain->blob_gen_1); else blob = rcu_dereference(chain->blob_gen_0); rule = (struct nft_rule_dp *)blob->data; next_rule: regs.verdict.code = NFT_CONTINUE; for (; !rule->is_last ; rule = nft_rule_next(rule)) { nft_rule_dp_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, &regs); else if (expr->ops == &nft_cmp16_fast_ops) nft_cmp16_fast_eval(expr, &regs); else if (expr->ops == &nft_bitwise_fast_ops) nft_bitwise_fast_eval(expr, &regs); else if (expr->ops != &nft_payload_fast_ops || !nft_payload_fast_eval(expr, &regs, pkt)) expr_call_ops_eval(expr, &regs, pkt); if (regs.verdict.code != NFT_CONTINUE) break; } switch (regs.verdict.code) { case NFT_BREAK: regs.verdict.code = NFT_CONTINUE; nft_trace_copy_nftrace(pkt, &info); continue; case NFT_CONTINUE: nft_trace_packet(pkt, &regs.verdict, &info, rule, NFT_TRACETYPE_RULE); continue; } break; } nft_trace_verdict(pkt, &info, rule, &regs); switch (regs.verdict.code & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_QUEUE: case NF_STOLEN: return regs.verdict.code; case NF_DROP: return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM); } switch (regs.verdict.code) { case NFT_JUMP: if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE)) return NF_DROP; jumpstack[stackptr].rule = nft_rule_next(rule); stackptr++; fallthrough; case NFT_GOTO: chain = regs.verdict.chain; goto do_chain; case NFT_CONTINUE: case NFT_RETURN: break; default: WARN_ON_ONCE(1); } if (stackptr > 0) { stackptr--; rule = jumpstack[stackptr].rule; goto next_rule; } nft_trace_packet(pkt, &regs.verdict, &info, NULL, NFT_TRACETYPE_POLICY); if (static_branch_unlikely(&nft_counters_enabled)) nft_update_chain_stats(basechain, pkt); if (nft_base_chain(basechain)->policy == NF_DROP) return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM); return nft_base_chain(basechain)->policy; } EXPORT_SYMBOL_GPL(nft_do_chain); static struct nft_expr_type *nft_basic_types[] = { &nft_imm_type, &nft_cmp_type, &nft_lookup_type, &nft_bitwise_type, &nft_byteorder_type, &nft_payload_type, &nft_dynset_type, &nft_range_type, &nft_meta_type, &nft_rt_type, &nft_exthdr_type, &nft_last_type, &nft_counter_type, &nft_objref_type, &nft_inner_type, }; static struct nft_object_type *nft_basic_objects[] = { #ifdef CONFIG_NETWORK_SECMARK &nft_secmark_obj_type, #endif &nft_counter_obj_type, }; int __init nf_tables_core_module_init(void) { int err, i, j = 0; nft_counter_init_seqcount(); for (i = 0; i < ARRAY_SIZE(nft_basic_objects); i++) { err = nft_register_obj(nft_basic_objects[i]); if (err) goto err; } for (j = 0; j < ARRAY_SIZE(nft_basic_types); j++) { err = nft_register_expr(nft_basic_types[j]); if (err) goto err; } nf_skip_indirect_calls_enable(); return 0; err: while (j-- > 0) nft_unregister_expr(nft_basic_types[j]); while (i-- > 0) nft_unregister_obj(nft_basic_objects[i]); return err; } void nf_tables_core_module_exit(void) { int i; i = ARRAY_SIZE(nft_basic_types); while (i-- > 0) nft_unregister_expr(nft_basic_types[i]); i = ARRAY_SIZE(nft_basic_objects); while (i-- > 0) nft_unregister_obj(nft_basic_objects[i]); }
41 42 9 42 42 42 42 2 2 2 2 2 2 41 41 2 1 4 41 40 41 3 2 2 3 41 2 38 39 39 38 39 44 44 44 44 40 39 1 39 4 3 2 1 1 1 1 1 3 13 18 23 18 16 16 15 6 16 16 16 16 16 16 10 10 10 9 9 9 9 5 5 5 5 7 7 7 7 3 3 3 3 2 2 2 3 2 2 4 4 4 4 3 3 2 3 3 3 3 3 1 2 3 3 1 3 3 1 3 3 3 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Red Hat, Inc. */ #include <linux/cred.h> #include <linux/file.h> #include <linux/mount.h> #include <linux/xattr.h> #include <linux/uio.h> #include <linux/uaccess.h> #include <linux/security.h> #include <linux/fs.h> #include <linux/backing-file.h> #include "overlayfs.h" static char ovl_whatisit(struct inode *inode, struct inode *realinode) { if (realinode != ovl_inode_upper(inode)) return 'l'; if (ovl_has_upperdata(inode)) return 'u'; else return 'm'; } static struct file *ovl_open_realfile(const struct file *file, const struct path *realpath) { struct inode *realinode = d_inode(realpath->dentry); struct inode *inode = file_inode(file); struct mnt_idmap *real_idmap; struct file *realfile; const struct cred *old_cred; int flags = file->f_flags | OVL_OPEN_FLAGS; int acc_mode = ACC_MODE(flags); int err; if (flags & O_APPEND) acc_mode |= MAY_APPEND; old_cred = ovl_override_creds(inode->i_sb); real_idmap = mnt_idmap(realpath->mnt); err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode); if (err) { realfile = ERR_PTR(err); } else { if (!inode_owner_or_capable(real_idmap, realinode)) flags &= ~O_NOATIME; realfile = backing_file_open(file_user_path(file), flags, realpath, current_cred()); } ovl_revert_creds(old_cred); pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n", file, file, ovl_whatisit(inode, realinode), file->f_flags, realfile, IS_ERR(realfile) ? 0 : realfile->f_flags); return realfile; } #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT) static int ovl_change_flags(struct file *file, unsigned int flags) { struct inode *inode = file_inode(file); int err; flags &= OVL_SETFL_MASK; if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode)) return -EPERM; if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT)) return -EINVAL; if (file->f_op->check_flags) { err = file->f_op->check_flags(flags); if (err) return err; } spin_lock(&file->f_lock); file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags; file->f_iocb_flags = iocb_flags(file); spin_unlock(&file->f_lock); return 0; } struct ovl_file { struct file *realfile; struct file *upperfile; }; struct ovl_file *ovl_file_alloc(struct file *realfile) { struct ovl_file *of = kzalloc(sizeof(struct ovl_file), GFP_KERNEL); if (unlikely(!of)) return NULL; of->realfile = realfile; return of; } void ovl_file_free(struct ovl_file *of) { fput(of->realfile); if (of->upperfile) fput(of->upperfile); kfree(of); } static bool ovl_is_real_file(const struct file *realfile, const struct path *realpath) { return file_inode(realfile) == d_inode(realpath->dentry); } static struct file *ovl_real_file_path(const struct file *file, const struct path *realpath) { struct ovl_file *of = file->private_data; struct file *realfile = of->realfile; if (WARN_ON_ONCE(!realpath->dentry)) return ERR_PTR(-EIO); /* * If the realfile that we want is not where the data used to be at * open time, either we'd been copied up, or it's an fsync of a * metacopied file. We need the upperfile either way, so see if it * is already opened and if it is not then open and store it. */ if (unlikely(!ovl_is_real_file(realfile, realpath))) { struct file *upperfile = READ_ONCE(of->upperfile); struct file *old; if (!upperfile) { /* Nobody opened upperfile yet */ upperfile = ovl_open_realfile(file, realpath); if (IS_ERR(upperfile)) return upperfile; /* Store the upperfile for later */ old = cmpxchg_release(&of->upperfile, NULL, upperfile); if (old) { /* Someone opened upperfile before us */ fput(upperfile); upperfile = old; } } /* * Stored file must be from the right inode, unless someone's * been corrupting the upper layer. */ if (WARN_ON_ONCE(!ovl_is_real_file(upperfile, realpath))) return ERR_PTR(-EIO); realfile = upperfile; } /* Did the flags change since open? */ if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS)) { int err = ovl_change_flags(realfile, file->f_flags); if (err) return ERR_PTR(err); } return realfile; } static struct file *ovl_real_file(const struct file *file) { struct dentry *dentry = file_dentry(file); struct path realpath; int err; if (d_is_dir(dentry)) { struct file *f = ovl_dir_real_file(file, false); if (WARN_ON_ONCE(!f)) return ERR_PTR(-EIO); return f; } /* lazy lookup and verify of lowerdata */ err = ovl_verify_lowerdata(dentry); if (err) return ERR_PTR(err); ovl_path_realdata(dentry, &realpath); return ovl_real_file_path(file, &realpath); } static int ovl_open(struct inode *inode, struct file *file) { struct dentry *dentry = file_dentry(file); struct file *realfile; struct path realpath; struct ovl_file *of; int err; /* lazy lookup and verify lowerdata */ err = ovl_verify_lowerdata(dentry); if (err) return err; err = ovl_maybe_copy_up(dentry, file->f_flags); if (err) return err; /* No longer need these flags, so don't pass them on to underlying fs */ file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); ovl_path_realdata(dentry, &realpath); if (!realpath.dentry) return -EIO; realfile = ovl_open_realfile(file, &realpath); if (IS_ERR(realfile)) return PTR_ERR(realfile); of = ovl_file_alloc(realfile); if (!of) { fput(realfile); return -ENOMEM; } file->private_data = of; return 0; } static int ovl_release(struct inode *inode, struct file *file) { ovl_file_free(file->private_data); return 0; } static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file_inode(file); struct file *realfile; const struct cred *old_cred; loff_t ret; /* * The two special cases below do not need to involve real fs, * so we can optimizing concurrent callers. */ if (offset == 0) { if (whence == SEEK_CUR) return file->f_pos; if (whence == SEEK_SET) return vfs_setpos(file, 0, 0); } realfile = ovl_real_file(file); if (IS_ERR(realfile)) return PTR_ERR(realfile); /* * Overlay file f_pos is the master copy that is preserved * through copy up and modified on read/write, but only real * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose * limitations that are more strict than ->s_maxbytes for specific * files, so we use the real file to perform seeks. */ ovl_inode_lock(inode); realfile->f_pos = file->f_pos; old_cred = ovl_override_creds(inode->i_sb); ret = vfs_llseek(realfile, offset, whence); ovl_revert_creds(old_cred); file->f_pos = realfile->f_pos; ovl_inode_unlock(inode); return ret; } static void ovl_file_modified(struct file *file) { /* Update size/mtime */ ovl_copyattr(file_inode(file)); } static void ovl_file_end_write(struct kiocb *iocb, ssize_t ret) { ovl_file_modified(iocb->ki_filp); } static void ovl_file_accessed(struct file *file) { struct inode *inode, *upperinode; struct timespec64 ctime, uctime; struct timespec64 mtime, umtime; if (file->f_flags & O_NOATIME) return; inode = file_inode(file); upperinode = ovl_inode_upper(inode); if (!upperinode) return; ctime = inode_get_ctime(inode); uctime = inode_get_ctime(upperinode); mtime = inode_get_mtime(inode); umtime = inode_get_mtime(upperinode); if ((!timespec64_equal(&mtime, &umtime)) || !timespec64_equal(&ctime, &uctime)) { inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode)); inode_set_ctime_to_ts(inode, uctime); } touch_atime(&file->f_path); } static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct file *realfile; struct backing_file_ctx ctx = { .cred = ovl_creds(file_inode(file)->i_sb), .accessed = ovl_file_accessed, }; if (!iov_iter_count(iter)) return 0; realfile = ovl_real_file(file); if (IS_ERR(realfile)) return PTR_ERR(realfile); return backing_file_read_iter(realfile, iter, iocb, iocb->ki_flags, &ctx); } static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct file *realfile; ssize_t ret; int ifl = iocb->ki_flags; struct backing_file_ctx ctx = { .cred = ovl_creds(inode->i_sb), .end_write = ovl_file_end_write, }; if (!iov_iter_count(iter)) return 0; inode_lock(inode); /* Update mode */ ovl_copyattr(inode); realfile = ovl_real_file(file); ret = PTR_ERR(realfile); if (IS_ERR(realfile)) goto out_unlock; if (!ovl_should_sync(OVL_FS(inode->i_sb))) ifl &= ~(IOCB_DSYNC | IOCB_SYNC); ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx); out_unlock: inode_unlock(inode); return ret; } static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct file *realfile; ssize_t ret; struct backing_file_ctx ctx = { .cred = ovl_creds(file_inode(in)->i_sb), .accessed = ovl_file_accessed, }; struct kiocb iocb; realfile = ovl_real_file(in); if (IS_ERR(realfile)) return PTR_ERR(realfile); init_sync_kiocb(&iocb, in); iocb.ki_pos = *ppos; ret = backing_file_splice_read(realfile, &iocb, pipe, len, flags, &ctx); *ppos = iocb.ki_pos; return ret; } /* * Calling iter_file_splice_write() directly from overlay's f_op may deadlock * due to lock order inversion between pipe->mutex in iter_file_splice_write() * and file_start_write(realfile) in ovl_write_iter(). * * So do everything ovl_write_iter() does and call iter_file_splice_write() on * the real file. */ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { struct file *realfile; struct inode *inode = file_inode(out); ssize_t ret; struct backing_file_ctx ctx = { .cred = ovl_creds(inode->i_sb), .end_write = ovl_file_end_write, }; struct kiocb iocb; inode_lock(inode); /* Update mode */ ovl_copyattr(inode); realfile = ovl_real_file(out); ret = PTR_ERR(realfile); if (IS_ERR(realfile)) goto out_unlock; init_sync_kiocb(&iocb, out); iocb.ki_pos = *ppos; ret = backing_file_splice_write(pipe, realfile, &iocb, len, flags, &ctx); *ppos = iocb.ki_pos; out_unlock: inode_unlock(inode); return ret; } static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file_dentry(file); enum ovl_path_type type; struct path upperpath; struct file *upperfile; const struct cred *old_cred; int ret; ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb)); if (ret <= 0) return ret; /* Don't sync lower file for fear of receiving EROFS error */ type = ovl_path_type(dentry); if (!OVL_TYPE_UPPER(type) || (datasync && OVL_TYPE_MERGE(type))) return 0; ovl_path_upper(dentry, &upperpath); upperfile = ovl_real_file_path(file, &upperpath); if (IS_ERR(upperfile)) return PTR_ERR(upperfile); old_cred = ovl_override_creds(file_inode(file)->i_sb); ret = vfs_fsync_range(upperfile, start, end, datasync); ovl_revert_creds(old_cred); return ret; } static int ovl_mmap(struct file *file, struct vm_area_struct *vma) { struct ovl_file *of = file->private_data; struct backing_file_ctx ctx = { .cred = ovl_creds(file_inode(file)->i_sb), .accessed = ovl_file_accessed, }; return backing_file_mmap(of->realfile, vma, &ctx); } static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct file *realfile; const struct cred *old_cred; int ret; inode_lock(inode); /* Update mode */ ovl_copyattr(inode); ret = file_remove_privs(file); if (ret) goto out_unlock; realfile = ovl_real_file(file); ret = PTR_ERR(realfile); if (IS_ERR(realfile)) goto out_unlock; old_cred = ovl_override_creds(file_inode(file)->i_sb); ret = vfs_fallocate(realfile, mode, offset, len); ovl_revert_creds(old_cred); /* Update size */ ovl_file_modified(file); out_unlock: inode_unlock(inode); return ret; } static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice) { struct file *realfile; const struct cred *old_cred; int ret; realfile = ovl_real_file(file); if (IS_ERR(realfile)) return PTR_ERR(realfile); old_cred = ovl_override_creds(file_inode(file)->i_sb); ret = vfs_fadvise(realfile, offset, len, advice); ovl_revert_creds(old_cred); return ret; } enum ovl_copyop { OVL_COPY, OVL_CLONE, OVL_DEDUPE, }; static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int flags, enum ovl_copyop op) { struct inode *inode_out = file_inode(file_out); struct file *realfile_in, *realfile_out; const struct cred *old_cred; loff_t ret; inode_lock(inode_out); if (op != OVL_DEDUPE) { /* Update mode */ ovl_copyattr(inode_out); ret = file_remove_privs(file_out); if (ret) goto out_unlock; } realfile_out = ovl_real_file(file_out); ret = PTR_ERR(realfile_out); if (IS_ERR(realfile_out)) goto out_unlock; realfile_in = ovl_real_file(file_in); ret = PTR_ERR(realfile_in); if (IS_ERR(realfile_in)) goto out_unlock; old_cred = ovl_override_creds(file_inode(file_out)->i_sb); switch (op) { case OVL_COPY: ret = vfs_copy_file_range(realfile_in, pos_in, realfile_out, pos_out, len, flags); break; case OVL_CLONE: ret = vfs_clone_file_range(realfile_in, pos_in, realfile_out, pos_out, len, flags); break; case OVL_DEDUPE: ret = vfs_dedupe_file_range_one(realfile_in, pos_in, realfile_out, pos_out, len, flags); break; } ovl_revert_creds(old_cred); /* Update size */ ovl_file_modified(file_out); out_unlock: inode_unlock(inode_out); return ret; } static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, OVL_COPY); } static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags) { enum ovl_copyop op; if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; if (remap_flags & REMAP_FILE_DEDUP) op = OVL_DEDUPE; else op = OVL_CLONE; /* * Don't copy up because of a dedupe request, this wouldn't make sense * most of the time (data would be duplicated instead of deduplicated). */ if (op == OVL_DEDUPE && (!ovl_inode_upper(file_inode(file_in)) || !ovl_inode_upper(file_inode(file_out)))) return -EPERM; return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, remap_flags, op); } static int ovl_flush(struct file *file, fl_owner_t id) { struct file *realfile; const struct cred *old_cred; int err = 0; realfile = ovl_real_file(file); if (IS_ERR(realfile)) return PTR_ERR(realfile); if (realfile->f_op->flush) { old_cred = ovl_override_creds(file_inode(file)->i_sb); err = realfile->f_op->flush(realfile, id); ovl_revert_creds(old_cred); } return err; } const struct file_operations ovl_file_operations = { .open = ovl_open, .release = ovl_release, .llseek = ovl_llseek, .read_iter = ovl_read_iter, .write_iter = ovl_write_iter, .fsync = ovl_fsync, .mmap = ovl_mmap, .fallocate = ovl_fallocate, .fadvise = ovl_fadvise, .flush = ovl_flush, .splice_read = ovl_splice_read, .splice_write = ovl_splice_write, .copy_file_range = ovl_copy_file_range, .remap_file_range = ovl_remap_file_range, };
1 4 3 3 3 1 3 3 1 3 1 1 1 1 17 1 1 16 17 15 17 1 1 1 1 4 1 4 4 4 4 1 1 1 6 3 4 5 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 // SPDX-License-Identifier: GPL-2.0-or-later /* * vimc-capture.c Virtual Media Controller Driver * * Copyright (C) 2015-2017 Helen Koike <helen.fornazier@gmail.com> */ #include <media/v4l2-ioctl.h> #include <media/videobuf2-core.h> #include <media/videobuf2-dma-contig.h> #include <media/videobuf2-vmalloc.h> #include "vimc-common.h" #include "vimc-streamer.h" struct vimc_capture_device { struct vimc_ent_device ved; struct video_device vdev; struct v4l2_pix_format format; struct vb2_queue queue; struct list_head buf_list; /* * NOTE: in a real driver, a spin lock must be used to access the * queue because the frames are generated from a hardware interruption * and the isr is not allowed to sleep. * Even if it is not necessary a spinlock in the vimc driver, we * use it here as a code reference */ spinlock_t qlock; struct mutex lock; u32 sequence; struct vimc_stream stream; struct media_pad pad; }; static const struct v4l2_pix_format fmt_default = { .width = 640, .height = 480, .pixelformat = V4L2_PIX_FMT_RGB24, .field = V4L2_FIELD_NONE, .colorspace = V4L2_COLORSPACE_SRGB, }; struct vimc_capture_buffer { /* * struct vb2_v4l2_buffer must be the first element * the videobuf2 framework will allocate this struct based on * buf_struct_size and use the first sizeof(struct vb2_buffer) bytes of * memory as a vb2_buffer */ struct vb2_v4l2_buffer vb2; struct list_head list; }; static int vimc_capture_querycap(struct file *file, void *priv, struct v4l2_capability *cap) { strscpy(cap->driver, VIMC_PDEV_NAME, sizeof(cap->driver)); strscpy(cap->card, KBUILD_MODNAME, sizeof(cap->card)); return 0; } static void vimc_capture_get_format(struct vimc_ent_device *ved, struct v4l2_pix_format *fmt) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); *fmt = vcapture->format; } static int vimc_capture_g_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vimc_capture_device *vcapture = video_drvdata(file); f->fmt.pix = vcapture->format; return 0; } static int vimc_capture_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct v4l2_pix_format *format = &f->fmt.pix; const struct vimc_pix_map *vpix; format->width = clamp_t(u32, format->width, VIMC_FRAME_MIN_WIDTH, VIMC_FRAME_MAX_WIDTH) & ~1; format->height = clamp_t(u32, format->height, VIMC_FRAME_MIN_HEIGHT, VIMC_FRAME_MAX_HEIGHT) & ~1; /* Don't accept a pixelformat that is not on the table */ vpix = vimc_pix_map_by_pixelformat(format->pixelformat); if (!vpix) { format->pixelformat = fmt_default.pixelformat; vpix = vimc_pix_map_by_pixelformat(format->pixelformat); } /* TODO: Add support for custom bytesperline values */ format->bytesperline = format->width * vpix->bpp; format->sizeimage = format->bytesperline * format->height; if (format->field == V4L2_FIELD_ANY) format->field = fmt_default.field; vimc_colorimetry_clamp(format); if (format->colorspace == V4L2_COLORSPACE_DEFAULT) format->colorspace = fmt_default.colorspace; return 0; } static int vimc_capture_s_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vimc_capture_device *vcapture = video_drvdata(file); int ret; /* Do not change the format while stream is on */ if (vb2_is_busy(&vcapture->queue)) return -EBUSY; ret = vimc_capture_try_fmt_vid_cap(file, priv, f); if (ret) return ret; dev_dbg(vcapture->ved.dev, "%s: format update: " "old:%dx%d (0x%x, %d, %d, %d, %d) " "new:%dx%d (0x%x, %d, %d, %d, %d)\n", vcapture->vdev.name, /* old */ vcapture->format.width, vcapture->format.height, vcapture->format.pixelformat, vcapture->format.colorspace, vcapture->format.quantization, vcapture->format.xfer_func, vcapture->format.ycbcr_enc, /* new */ f->fmt.pix.width, f->fmt.pix.height, f->fmt.pix.pixelformat, f->fmt.pix.colorspace, f->fmt.pix.quantization, f->fmt.pix.xfer_func, f->fmt.pix.ycbcr_enc); vcapture->format = f->fmt.pix; return 0; } static int vimc_capture_enum_fmt_vid_cap(struct file *file, void *priv, struct v4l2_fmtdesc *f) { const struct vimc_pix_map *vpix; if (f->mbus_code) { if (f->index > 0) return -EINVAL; vpix = vimc_pix_map_by_code(f->mbus_code); } else { vpix = vimc_pix_map_by_index(f->index); } if (!vpix) return -EINVAL; f->pixelformat = vpix->pixelformat; return 0; } static int vimc_capture_enum_framesizes(struct file *file, void *priv, struct v4l2_frmsizeenum *fsize) { const struct vimc_pix_map *vpix; if (fsize->index) return -EINVAL; /* Only accept code in the pix map table */ vpix = vimc_pix_map_by_code(fsize->pixel_format); if (!vpix) return -EINVAL; fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; fsize->stepwise.min_width = VIMC_FRAME_MIN_WIDTH; fsize->stepwise.max_width = VIMC_FRAME_MAX_WIDTH; fsize->stepwise.min_height = VIMC_FRAME_MIN_HEIGHT; fsize->stepwise.max_height = VIMC_FRAME_MAX_HEIGHT; fsize->stepwise.step_width = 1; fsize->stepwise.step_height = 1; return 0; } static const struct v4l2_file_operations vimc_capture_fops = { .owner = THIS_MODULE, .open = v4l2_fh_open, .release = vb2_fop_release, .read = vb2_fop_read, .poll = vb2_fop_poll, .unlocked_ioctl = video_ioctl2, .mmap = vb2_fop_mmap, }; static const struct v4l2_ioctl_ops vimc_capture_ioctl_ops = { .vidioc_querycap = vimc_capture_querycap, .vidioc_g_fmt_vid_cap = vimc_capture_g_fmt_vid_cap, .vidioc_s_fmt_vid_cap = vimc_capture_s_fmt_vid_cap, .vidioc_try_fmt_vid_cap = vimc_capture_try_fmt_vid_cap, .vidioc_enum_fmt_vid_cap = vimc_capture_enum_fmt_vid_cap, .vidioc_enum_framesizes = vimc_capture_enum_framesizes, .vidioc_reqbufs = vb2_ioctl_reqbufs, .vidioc_create_bufs = vb2_ioctl_create_bufs, .vidioc_prepare_buf = vb2_ioctl_prepare_buf, .vidioc_querybuf = vb2_ioctl_querybuf, .vidioc_qbuf = vb2_ioctl_qbuf, .vidioc_dqbuf = vb2_ioctl_dqbuf, .vidioc_expbuf = vb2_ioctl_expbuf, .vidioc_streamon = vb2_ioctl_streamon, .vidioc_streamoff = vb2_ioctl_streamoff, .vidioc_remove_bufs = vb2_ioctl_remove_bufs, }; static void vimc_capture_return_all_buffers(struct vimc_capture_device *vcapture, enum vb2_buffer_state state) { struct vimc_capture_buffer *vbuf, *node; spin_lock(&vcapture->qlock); list_for_each_entry_safe(vbuf, node, &vcapture->buf_list, list) { list_del(&vbuf->list); vb2_buffer_done(&vbuf->vb2.vb2_buf, state); } spin_unlock(&vcapture->qlock); } static int vimc_capture_start_streaming(struct vb2_queue *vq, unsigned int count) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vq); int ret; vcapture->sequence = 0; /* Start the media pipeline */ ret = video_device_pipeline_start(&vcapture->vdev, &vcapture->stream.pipe); if (ret) { vimc_capture_return_all_buffers(vcapture, VB2_BUF_STATE_QUEUED); return ret; } ret = vimc_streamer_s_stream(&vcapture->stream, &vcapture->ved, 1); if (ret) { video_device_pipeline_stop(&vcapture->vdev); vimc_capture_return_all_buffers(vcapture, VB2_BUF_STATE_QUEUED); return ret; } return 0; } /* * Stop the stream engine. Any remaining buffers in the stream queue are * dequeued and passed on to the vb2 framework marked as STATE_ERROR. */ static void vimc_capture_stop_streaming(struct vb2_queue *vq) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vq); vimc_streamer_s_stream(&vcapture->stream, &vcapture->ved, 0); /* Stop the media pipeline */ video_device_pipeline_stop(&vcapture->vdev); /* Release all active buffers */ vimc_capture_return_all_buffers(vcapture, VB2_BUF_STATE_ERROR); } static void vimc_capture_buf_queue(struct vb2_buffer *vb2_buf) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vb2_buf->vb2_queue); struct vimc_capture_buffer *buf = container_of(vb2_buf, struct vimc_capture_buffer, vb2.vb2_buf); spin_lock(&vcapture->qlock); list_add_tail(&buf->list, &vcapture->buf_list); spin_unlock(&vcapture->qlock); } static int vimc_capture_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[]) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vq); if (*nplanes) return sizes[0] < vcapture->format.sizeimage ? -EINVAL : 0; /* We don't support multiplanes for now */ *nplanes = 1; sizes[0] = vcapture->format.sizeimage; return 0; } static int vimc_capture_buffer_prepare(struct vb2_buffer *vb) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vb->vb2_queue); unsigned long size = vcapture->format.sizeimage; if (vb2_plane_size(vb, 0) < size) { dev_err(vcapture->ved.dev, "%s: buffer too small (%lu < %lu)\n", vcapture->vdev.name, vb2_plane_size(vb, 0), size); return -EINVAL; } return 0; } static const struct vb2_ops vimc_capture_qops = { .start_streaming = vimc_capture_start_streaming, .stop_streaming = vimc_capture_stop_streaming, .buf_queue = vimc_capture_buf_queue, .queue_setup = vimc_capture_queue_setup, .buf_prepare = vimc_capture_buffer_prepare, }; static const struct media_entity_operations vimc_capture_mops = { .link_validate = vimc_vdev_link_validate, }; static void vimc_capture_release(struct vimc_ent_device *ved) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); media_entity_cleanup(vcapture->ved.ent); kfree(vcapture); } static void vimc_capture_unregister(struct vimc_ent_device *ved) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); vb2_video_unregister_device(&vcapture->vdev); } static void *vimc_capture_process_frame(struct vimc_ent_device *ved, const void *frame) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); struct vimc_capture_buffer *vimc_buf; void *vbuf; spin_lock(&vcapture->qlock); /* Get the first entry of the list */ vimc_buf = list_first_entry_or_null(&vcapture->buf_list, typeof(*vimc_buf), list); if (!vimc_buf) { spin_unlock(&vcapture->qlock); return ERR_PTR(-EAGAIN); } /* Remove this entry from the list */ list_del(&vimc_buf->list); spin_unlock(&vcapture->qlock); /* Fill the buffer */ vimc_buf->vb2.vb2_buf.timestamp = ktime_get_ns(); vimc_buf->vb2.sequence = vcapture->sequence++; vimc_buf->vb2.field = vcapture->format.field; vbuf = vb2_plane_vaddr(&vimc_buf->vb2.vb2_buf, 0); memcpy(vbuf, frame, vcapture->format.sizeimage); /* Set it as ready */ vb2_set_plane_payload(&vimc_buf->vb2.vb2_buf, 0, vcapture->format.sizeimage); vb2_buffer_done(&vimc_buf->vb2.vb2_buf, VB2_BUF_STATE_DONE); return NULL; } static struct vimc_ent_device *vimc_capture_add(struct vimc_device *vimc, const char *vcfg_name) { struct v4l2_device *v4l2_dev = &vimc->v4l2_dev; const struct vimc_pix_map *vpix; struct vimc_capture_device *vcapture; struct video_device *vdev; struct vb2_queue *q; int ret; /* Allocate the vimc_capture_device struct */ vcapture = kzalloc(sizeof(*vcapture), GFP_KERNEL); if (!vcapture) return ERR_PTR(-ENOMEM); /* Initialize the media entity */ vcapture->vdev.entity.name = vcfg_name; vcapture->vdev.entity.function = MEDIA_ENT_F_IO_V4L; vcapture->pad.flags = MEDIA_PAD_FL_SINK; ret = media_entity_pads_init(&vcapture->vdev.entity, 1, &vcapture->pad); if (ret) goto err_free_vcapture; /* Initialize the lock */ mutex_init(&vcapture->lock); /* Initialize the vb2 queue */ q = &vcapture->queue; q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; q->io_modes = VB2_MMAP | VB2_DMABUF; if (vimc_allocator == VIMC_ALLOCATOR_VMALLOC) q->io_modes |= VB2_USERPTR; q->drv_priv = vcapture; q->buf_struct_size = sizeof(struct vimc_capture_buffer); q->ops = &vimc_capture_qops; q->mem_ops = vimc_allocator == VIMC_ALLOCATOR_DMA_CONTIG ? &vb2_dma_contig_memops : &vb2_vmalloc_memops; q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; q->min_reqbufs_allocation = 2; q->lock = &vcapture->lock; q->dev = v4l2_dev->dev; ret = vb2_queue_init(q); if (ret) { dev_err(vimc->mdev.dev, "%s: vb2 queue init failed (err=%d)\n", vcfg_name, ret); goto err_clean_m_ent; } /* Initialize buffer list and its lock */ INIT_LIST_HEAD(&vcapture->buf_list); spin_lock_init(&vcapture->qlock); /* Set default frame format */ vcapture->format = fmt_default; vpix = vimc_pix_map_by_pixelformat(vcapture->format.pixelformat); vcapture->format.bytesperline = vcapture->format.width * vpix->bpp; vcapture->format.sizeimage = vcapture->format.bytesperline * vcapture->format.height; /* Fill the vimc_ent_device struct */ vcapture->ved.ent = &vcapture->vdev.entity; vcapture->ved.process_frame = vimc_capture_process_frame; vcapture->ved.vdev_get_format = vimc_capture_get_format; vcapture->ved.dev = vimc->mdev.dev; /* Initialize the video_device struct */ vdev = &vcapture->vdev; vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING | V4L2_CAP_IO_MC; vdev->entity.ops = &vimc_capture_mops; vdev->release = video_device_release_empty; vdev->fops = &vimc_capture_fops; vdev->ioctl_ops = &vimc_capture_ioctl_ops; vdev->lock = &vcapture->lock; vdev->queue = q; vdev->v4l2_dev = v4l2_dev; vdev->vfl_dir = VFL_DIR_RX; strscpy(vdev->name, vcfg_name, sizeof(vdev->name)); video_set_drvdata(vdev, &vcapture->ved); /* Register the video_device with the v4l2 and the media framework */ ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1); if (ret) { dev_err(vimc->mdev.dev, "%s: video register failed (err=%d)\n", vcapture->vdev.name, ret); goto err_clean_m_ent; } return &vcapture->ved; err_clean_m_ent: media_entity_cleanup(&vcapture->vdev.entity); err_free_vcapture: kfree(vcapture); return ERR_PTR(ret); } const struct vimc_ent_type vimc_capture_type = { .add = vimc_capture_add, .unregister = vimc_capture_unregister, .release = vimc_capture_release };
11 11 10 10 11 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 /* * linux/fs/nls/nls_cp862.c * * Charset cp862 translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80*/ 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, /* 0x90*/ 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, 0x05e8, 0x05e9, 0x05ea, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192, /* 0xa0*/ 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, /* 0xb0*/ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, /* 0xc0*/ 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, /* 0xd0*/ 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, /* 0xe0*/ 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4, 0x221e, 0x03c6, 0x03b5, 0x2229, /* 0xf0*/ 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xff, 0xad, 0x9b, 0x9c, 0x00, 0x9d, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0xa6, 0xae, 0xaa, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0xf8, 0xf1, 0xfd, 0x00, 0x00, 0xe6, 0x00, 0xfa, /* 0xb0-0xb7 */ 0x00, 0x00, 0xa7, 0xaf, 0xac, 0xab, 0x00, 0xa8, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0xa5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe1, /* 0xd8-0xdf */ 0x00, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0xa1, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0xa4, 0x00, 0xa2, 0x00, 0x00, 0x00, 0xf6, /* 0xf0-0xf7 */ 0x00, 0x00, 0xa3, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page01[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x9f, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ }; static const unsigned char page03[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0xe2, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0xe9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0xe8, 0x00, /* 0xa0-0xa7 */ 0x00, 0xea, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0xe0, 0x00, 0x00, 0xeb, 0xee, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0xe3, 0x00, 0x00, 0xe5, 0xe7, 0x00, 0xed, 0x00, /* 0xc0-0xc7 */ }; static const unsigned char page05[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0xd0-0xd7 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0xd8-0xdf */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0xe0-0xe7 */ 0x98, 0x99, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9e, /* 0xa0-0xa7 */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0xf9, 0xfb, 0x00, 0x00, 0x00, 0xec, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0xef, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0xf0, 0x00, 0x00, 0xf3, 0xf2, 0x00, 0x00, /* 0x60-0x67 */ }; static const unsigned char page23[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0xa9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0xf4, 0xf5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ }; static const unsigned char page25[256] = { 0xc4, 0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xda, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0xbf, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xd9, 0x00, 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0xc5, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0xcd, 0xba, 0xd5, 0xd6, 0xc9, 0xb8, 0xb7, 0xbb, /* 0x50-0x57 */ 0xd4, 0xd3, 0xc8, 0xbe, 0xbd, 0xbc, 0xc6, 0xc7, /* 0x58-0x5f */ 0xcc, 0xb5, 0xb6, 0xb9, 0xd1, 0xd2, 0xcb, 0xcf, /* 0x60-0x67 */ 0xd0, 0xca, 0xd8, 0xd7, 0xce, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0xdb, 0x00, 0x00, 0x00, 0xdd, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0xde, 0xb0, 0xb1, 0xb2, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ }; static const unsigned char *const page_uni2charset[256] = { page00, page01, NULL, page03, NULL, page05, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, NULL, page22, page23, NULL, page25, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa4, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0xe0, 0xe1, 0x00, 0xe3, 0xe5, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 0xed, 0x00, 0x00, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0xa5, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */ 0x00, 0xe1, 0xe2, 0x00, 0xe4, 0xe4, 0x00, 0x00, /* 0xe0-0xe7 */ 0xe8, 0xe9, 0xea, 0x00, 0xec, 0xe8, 0x00, 0xef, /* 0xe8-0xef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "cp862", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_cp862(void) { return register_nls(&table); } static void __exit exit_nls_cp862(void) { unregister_nls(&table); } module_init(init_nls_cp862) module_exit(exit_nls_cp862) MODULE_DESCRIPTION("NLS Codepage 862 (Hebrew)"); MODULE_LICENSE("Dual BSD/GPL");
48 7 71 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hfsplus/xattr_trusted.c * * Vyacheslav Dubeyko <slava@dubeyko.com> * * Handler for storing security labels as extended attributes. */ #include <linux/security.h> #include <linux/nls.h> #include "hfsplus_fs.h" #include "xattr.h" static int hfsplus_security_getxattr(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) { return hfsplus_getxattr(inode, name, buffer, size, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); } static int hfsplus_security_setxattr(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) { return hfsplus_setxattr(inode, name, buffer, size, flags, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); } static int hfsplus_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *fs_info) { const struct xattr *xattr; char *xattr_name; int err = 0; xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1, GFP_KERNEL); if (!xattr_name) return -ENOMEM; for (xattr = xattr_array; xattr->name != NULL; xattr++) { if (!strcmp(xattr->name, "")) continue; strcpy(xattr_name, XATTR_SECURITY_PREFIX); strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, xattr->name); memset(xattr_name + XATTR_SECURITY_PREFIX_LEN + strlen(xattr->name), 0, 1); err = __hfsplus_setxattr(inode, xattr_name, xattr->value, xattr->value_len, 0); if (err) break; } kfree(xattr_name); return err; } int hfsplus_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr) { return security_inode_init_security(inode, dir, qstr, &hfsplus_initxattrs, NULL); } const struct xattr_handler hfsplus_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, .get = hfsplus_security_getxattr, .set = hfsplus_security_setxattr, };
37 36 37 37 37 20 20 27 37 33 32 32 23 31 32 28 4 4 24 23 22 22 22 28 27 27 24 26 27 27 27 27 20 27 26 25 25 20 20 20 20 38 20 20 20 20 20 20 20 20 20 20 24 20 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 // SPDX-License-Identifier: GPL-2.0-or-later /* Decoder for ASN.1 BER/DER/CER encoded bytestream * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/export.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/asn1_decoder.h> #include <linux/asn1_ber_bytecode.h> static const unsigned char asn1_op_lengths[ASN1_OP__NR] = { /* OPC TAG JMP ACT */ [ASN1_OP_MATCH] = 1 + 1, [ASN1_OP_MATCH_OR_SKIP] = 1 + 1, [ASN1_OP_MATCH_ACT] = 1 + 1 + 1, [ASN1_OP_MATCH_ACT_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_MATCH_JUMP] = 1 + 1 + 1, [ASN1_OP_MATCH_JUMP_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_MATCH_ANY] = 1, [ASN1_OP_MATCH_ANY_OR_SKIP] = 1, [ASN1_OP_MATCH_ANY_ACT] = 1 + 1, [ASN1_OP_MATCH_ANY_ACT_OR_SKIP] = 1 + 1, [ASN1_OP_COND_MATCH_OR_SKIP] = 1 + 1, [ASN1_OP_COND_MATCH_ACT_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_COND_MATCH_JUMP_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_COND_MATCH_ANY] = 1, [ASN1_OP_COND_MATCH_ANY_OR_SKIP] = 1, [ASN1_OP_COND_MATCH_ANY_ACT] = 1 + 1, [ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP] = 1 + 1, [ASN1_OP_COND_FAIL] = 1, [ASN1_OP_COMPLETE] = 1, [ASN1_OP_ACT] = 1 + 1, [ASN1_OP_MAYBE_ACT] = 1 + 1, [ASN1_OP_RETURN] = 1, [ASN1_OP_END_SEQ] = 1, [ASN1_OP_END_SEQ_OF] = 1 + 1, [ASN1_OP_END_SET] = 1, [ASN1_OP_END_SET_OF] = 1 + 1, [ASN1_OP_END_SEQ_ACT] = 1 + 1, [ASN1_OP_END_SEQ_OF_ACT] = 1 + 1 + 1, [ASN1_OP_END_SET_ACT] = 1 + 1, [ASN1_OP_END_SET_OF_ACT] = 1 + 1 + 1, }; /* * Find the length of an indefinite length object * @data: The data buffer * @datalen: The end of the innermost containing element in the buffer * @_dp: The data parse cursor (updated before returning) * @_len: Where to return the size of the element. * @_errmsg: Where to return a pointer to an error message on error */ static int asn1_find_indefinite_length(const unsigned char *data, size_t datalen, size_t *_dp, size_t *_len, const char **_errmsg) { unsigned char tag, tmp; size_t dp = *_dp, len, n; int indef_level = 1; next_tag: if (unlikely(datalen - dp < 2)) { if (datalen == dp) goto missing_eoc; goto data_overrun_error; } /* Extract a tag from the data */ tag = data[dp++]; if (tag == ASN1_EOC) { /* It appears to be an EOC. */ if (data[dp++] != 0) goto invalid_eoc; if (--indef_level <= 0) { *_len = dp - *_dp; *_dp = dp; return 0; } goto next_tag; } if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) { do { if (unlikely(datalen - dp < 2)) goto data_overrun_error; tmp = data[dp++]; } while (tmp & 0x80); } /* Extract the length */ len = data[dp++]; if (len <= 0x7f) goto check_length; if (unlikely(len == ASN1_INDEFINITE_LENGTH)) { /* Indefinite length */ if (unlikely((tag & ASN1_CONS_BIT) == ASN1_PRIM << 5)) goto indefinite_len_primitive; indef_level++; goto next_tag; } n = len - 0x80; if (unlikely(n > sizeof(len) - 1)) goto length_too_long; if (unlikely(n > datalen - dp)) goto data_overrun_error; len = 0; for (; n > 0; n--) { len <<= 8; len |= data[dp++]; } check_length: if (len > datalen - dp) goto data_overrun_error; dp += len; goto next_tag; length_too_long: *_errmsg = "Unsupported length"; goto error; indefinite_len_primitive: *_errmsg = "Indefinite len primitive not permitted"; goto error; invalid_eoc: *_errmsg = "Invalid length EOC"; goto error; data_overrun_error: *_errmsg = "Data overrun error"; goto error; missing_eoc: *_errmsg = "Missing EOC in indefinite len cons"; error: *_dp = dp; return -1; } /** * asn1_ber_decoder - Decoder BER/DER/CER ASN.1 according to pattern * @decoder: The decoder definition (produced by asn1_compiler) * @context: The caller's context (to be passed to the action functions) * @data: The encoded data * @datalen: The size of the encoded data * * Decode BER/DER/CER encoded ASN.1 data according to a bytecode pattern * produced by asn1_compiler. Action functions are called on marked tags to * allow the caller to retrieve significant data. * * LIMITATIONS: * * To keep down the amount of stack used by this function, the following limits * have been imposed: * * (1) This won't handle datalen > 65535 without increasing the size of the * cons stack elements and length_too_long checking. * * (2) The stack of constructed types is 10 deep. If the depth of non-leaf * constructed types exceeds this, the decode will fail. * * (3) The SET type (not the SET OF type) isn't really supported as tracking * what members of the set have been seen is a pain. */ int asn1_ber_decoder(const struct asn1_decoder *decoder, void *context, const unsigned char *data, size_t datalen) { const unsigned char *machine = decoder->machine; const asn1_action_t *actions = decoder->actions; size_t machlen = decoder->machlen; enum asn1_opcode op; unsigned char tag = 0, csp = 0, jsp = 0, optag = 0, hdr = 0; const char *errmsg; size_t pc = 0, dp = 0, tdp = 0, len = 0; int ret; unsigned char flags = 0; #define FLAG_INDEFINITE_LENGTH 0x01 #define FLAG_MATCHED 0x02 #define FLAG_LAST_MATCHED 0x04 /* Last tag matched */ #define FLAG_CONS 0x20 /* Corresponds to CONS bit in the opcode tag * - ie. whether or not we are going to parse * a compound type. */ #define NR_CONS_STACK 10 unsigned short cons_dp_stack[NR_CONS_STACK]; unsigned short cons_datalen_stack[NR_CONS_STACK]; unsigned char cons_hdrlen_stack[NR_CONS_STACK]; #define NR_JUMP_STACK 10 unsigned char jump_stack[NR_JUMP_STACK]; if (datalen > 65535) return -EMSGSIZE; next_op: pr_debug("next_op: pc=\e[32m%zu\e[m/%zu dp=\e[33m%zu\e[m/%zu C=%d J=%d\n", pc, machlen, dp, datalen, csp, jsp); if (unlikely(pc >= machlen)) goto machine_overrun_error; op = machine[pc]; if (unlikely(pc + asn1_op_lengths[op] > machlen)) goto machine_overrun_error; /* If this command is meant to match a tag, then do that before * evaluating the command. */ if (op <= ASN1_OP__MATCHES_TAG) { unsigned char tmp; /* Skip conditional matches if possible */ if ((op & ASN1_OP_MATCH__COND && flags & FLAG_MATCHED) || (op & ASN1_OP_MATCH__SKIP && dp == datalen)) { flags &= ~FLAG_LAST_MATCHED; pc += asn1_op_lengths[op]; goto next_op; } flags = 0; hdr = 2; /* Extract a tag from the data */ if (unlikely(datalen - dp < 2)) goto data_overrun_error; tag = data[dp++]; if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) goto long_tag_not_supported; if (op & ASN1_OP_MATCH__ANY) { pr_debug("- any %02x\n", tag); } else { /* Extract the tag from the machine * - Either CONS or PRIM are permitted in the data if * CONS is not set in the op stream, otherwise CONS * is mandatory. */ optag = machine[pc + 1]; flags |= optag & FLAG_CONS; /* Determine whether the tag matched */ tmp = optag ^ tag; tmp &= ~(optag & ASN1_CONS_BIT); pr_debug("- match? %02x %02x %02x\n", tag, optag, tmp); if (tmp != 0) { /* All odd-numbered tags are MATCH_OR_SKIP. */ if (op & ASN1_OP_MATCH__SKIP) { pc += asn1_op_lengths[op]; dp--; goto next_op; } goto tag_mismatch; } } flags |= FLAG_MATCHED; len = data[dp++]; if (len > 0x7f) { if (unlikely(len == ASN1_INDEFINITE_LENGTH)) { /* Indefinite length */ if (unlikely(!(tag & ASN1_CONS_BIT))) goto indefinite_len_primitive; flags |= FLAG_INDEFINITE_LENGTH; if (unlikely(2 > datalen - dp)) goto data_overrun_error; } else { int n = len - 0x80; if (unlikely(n > 2)) goto length_too_long; if (unlikely(n > datalen - dp)) goto data_overrun_error; hdr += n; for (len = 0; n > 0; n--) { len <<= 8; len |= data[dp++]; } if (unlikely(len > datalen - dp)) goto data_overrun_error; } } else { if (unlikely(len > datalen - dp)) goto data_overrun_error; } if (flags & FLAG_CONS) { /* For expected compound forms, we stack the positions * of the start and end of the data. */ if (unlikely(csp >= NR_CONS_STACK)) goto cons_stack_overflow; cons_dp_stack[csp] = dp; cons_hdrlen_stack[csp] = hdr; if (!(flags & FLAG_INDEFINITE_LENGTH)) { cons_datalen_stack[csp] = datalen; datalen = dp + len; } else { cons_datalen_stack[csp] = 0; } csp++; } pr_debug("- TAG: %02x %zu%s\n", tag, len, flags & FLAG_CONS ? " CONS" : ""); tdp = dp; } /* Decide how to handle the operation */ switch (op) { case ASN1_OP_MATCH: case ASN1_OP_MATCH_OR_SKIP: case ASN1_OP_MATCH_ACT: case ASN1_OP_MATCH_ACT_OR_SKIP: case ASN1_OP_MATCH_ANY: case ASN1_OP_MATCH_ANY_OR_SKIP: case ASN1_OP_MATCH_ANY_ACT: case ASN1_OP_MATCH_ANY_ACT_OR_SKIP: case ASN1_OP_COND_MATCH_OR_SKIP: case ASN1_OP_COND_MATCH_ACT_OR_SKIP: case ASN1_OP_COND_MATCH_ANY: case ASN1_OP_COND_MATCH_ANY_OR_SKIP: case ASN1_OP_COND_MATCH_ANY_ACT: case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP: if (!(flags & FLAG_CONS)) { if (flags & FLAG_INDEFINITE_LENGTH) { size_t tmp = dp; ret = asn1_find_indefinite_length( data, datalen, &tmp, &len, &errmsg); if (ret < 0) goto error; } pr_debug("- LEAF: %zu\n", len); } if (op & ASN1_OP_MATCH__ACT) { unsigned char act; if (op & ASN1_OP_MATCH__ANY) act = machine[pc + 1]; else act = machine[pc + 2]; ret = actions[act](context, hdr, tag, data + dp, len); if (ret < 0) return ret; } if (!(flags & FLAG_CONS)) dp += len; pc += asn1_op_lengths[op]; goto next_op; case ASN1_OP_MATCH_JUMP: case ASN1_OP_MATCH_JUMP_OR_SKIP: case ASN1_OP_COND_MATCH_JUMP_OR_SKIP: pr_debug("- MATCH_JUMP\n"); if (unlikely(jsp == NR_JUMP_STACK)) goto jump_stack_overflow; jump_stack[jsp++] = pc + asn1_op_lengths[op]; pc = machine[pc + 2]; goto next_op; case ASN1_OP_COND_FAIL: if (unlikely(!(flags & FLAG_MATCHED))) goto tag_mismatch; pc += asn1_op_lengths[op]; goto next_op; case ASN1_OP_COMPLETE: if (unlikely(jsp != 0 || csp != 0)) { pr_err("ASN.1 decoder error: Stacks not empty at completion (%u, %u)\n", jsp, csp); return -EBADMSG; } return 0; case ASN1_OP_END_SET: case ASN1_OP_END_SET_ACT: if (unlikely(!(flags & FLAG_MATCHED))) goto tag_mismatch; fallthrough; case ASN1_OP_END_SEQ: case ASN1_OP_END_SET_OF: case ASN1_OP_END_SEQ_OF: case ASN1_OP_END_SEQ_ACT: case ASN1_OP_END_SET_OF_ACT: case ASN1_OP_END_SEQ_OF_ACT: if (unlikely(csp <= 0)) goto cons_stack_underflow; csp--; tdp = cons_dp_stack[csp]; hdr = cons_hdrlen_stack[csp]; len = datalen; datalen = cons_datalen_stack[csp]; pr_debug("- end cons t=%zu dp=%zu l=%zu/%zu\n", tdp, dp, len, datalen); if (datalen == 0) { /* Indefinite length - check for the EOC. */ datalen = len; if (unlikely(datalen - dp < 2)) goto data_overrun_error; if (data[dp++] != 0) { if (op & ASN1_OP_END__OF) { dp--; csp++; pc = machine[pc + 1]; pr_debug("- continue\n"); goto next_op; } goto missing_eoc; } if (data[dp++] != 0) goto invalid_eoc; len = dp - tdp - 2; } else { if (dp < len && (op & ASN1_OP_END__OF)) { datalen = len; csp++; pc = machine[pc + 1]; pr_debug("- continue\n"); goto next_op; } if (dp != len) goto cons_length_error; len -= tdp; pr_debug("- cons len l=%zu d=%zu\n", len, dp - tdp); } if (op & ASN1_OP_END__ACT) { unsigned char act; if (op & ASN1_OP_END__OF) act = machine[pc + 2]; else act = machine[pc + 1]; ret = actions[act](context, hdr, 0, data + tdp, len); if (ret < 0) return ret; } pc += asn1_op_lengths[op]; goto next_op; case ASN1_OP_MAYBE_ACT: if (!(flags & FLAG_LAST_MATCHED)) { pc += asn1_op_lengths[op]; goto next_op; } fallthrough; case ASN1_OP_ACT: ret = actions[machine[pc + 1]](context, hdr, tag, data + tdp, len); if (ret < 0) return ret; pc += asn1_op_lengths[op]; goto next_op; case ASN1_OP_RETURN: if (unlikely(jsp <= 0)) goto jump_stack_underflow; pc = jump_stack[--jsp]; flags |= FLAG_MATCHED | FLAG_LAST_MATCHED; goto next_op; default: break; } /* Shouldn't reach here */ pr_err("ASN.1 decoder error: Found reserved opcode (%u) pc=%zu\n", op, pc); return -EBADMSG; data_overrun_error: errmsg = "Data overrun error"; goto error; machine_overrun_error: errmsg = "Machine overrun error"; goto error; jump_stack_underflow: errmsg = "Jump stack underflow"; goto error; jump_stack_overflow: errmsg = "Jump stack overflow"; goto error; cons_stack_underflow: errmsg = "Cons stack underflow"; goto error; cons_stack_overflow: errmsg = "Cons stack overflow"; goto error; cons_length_error: errmsg = "Cons length error"; goto error; missing_eoc: errmsg = "Missing EOC in indefinite len cons"; goto error; invalid_eoc: errmsg = "Invalid length EOC"; goto error; length_too_long: errmsg = "Unsupported length"; goto error; indefinite_len_primitive: errmsg = "Indefinite len primitive not permitted"; goto error; tag_mismatch: errmsg = "Unexpected tag"; goto error; long_tag_not_supported: errmsg = "Long tag not supported"; error: pr_debug("\nASN1: %s [m=%zu d=%zu ot=%02x t=%02x l=%zu]\n", errmsg, pc, dp, optag, tag, len); return -EBADMSG; } EXPORT_SYMBOL_GPL(asn1_ber_decoder); MODULE_DESCRIPTION("Decoder for ASN.1 BER/DER/CER encoded bytestream"); MODULE_LICENSE("GPL");
12 11 9 12 1 1 1 5 4 3 2 2 2 1 2 7 6 6 6 6 6 6 10 10 10 8 7 1 7 6 2 2 2 2 2 2 5 5 2 3 1 3 2 2 5 3 3 1 3 2 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 // SPDX-License-Identifier: GPL-2.0-or-later /* * (C) 2012 Pablo Neira Ayuso <pablo@netfilter.org> * * This software has been sponsored by Vyatta Inc. <http://www.vyatta.com> */ #include <linux/init.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/rculist.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/list.h> #include <linux/errno.h> #include <linux/capability.h> #include <net/netlink.h> #include <net/sock.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> #include <linux/netfilter/nfnetlink_cthelper.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers"); struct nfnl_cthelper { struct list_head list; struct nf_conntrack_helper helper; }; static LIST_HEAD(nfnl_cthelper_list); static int nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { const struct nf_conn_help *help; struct nf_conntrack_helper *helper; help = nfct_help(ct); if (help == NULL) return NF_DROP; /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); if (helper == NULL) return NF_DROP; /* This is a user-space helper not yet configured, skip. */ if ((helper->flags & (NF_CT_HELPER_F_USERSPACE | NF_CT_HELPER_F_CONFIGURED)) == NF_CT_HELPER_F_USERSPACE) return NF_ACCEPT; /* If the user-space helper is not available, don't block traffic. */ return NF_QUEUE_NR(helper->queue_num) | NF_VERDICT_FLAG_QUEUE_BYPASS; } static const struct nla_policy nfnl_cthelper_tuple_pol[NFCTH_TUPLE_MAX+1] = { [NFCTH_TUPLE_L3PROTONUM] = { .type = NLA_U16, }, [NFCTH_TUPLE_L4PROTONUM] = { .type = NLA_U8, }, }; static int nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, const struct nlattr *attr) { int err; struct nlattr *tb[NFCTH_TUPLE_MAX+1]; err = nla_parse_nested_deprecated(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol, NULL); if (err < 0) return err; if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; /* Not all fields are initialized so first zero the tuple */ memset(tuple, 0, sizeof(struct nf_conntrack_tuple)); tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); return 0; } static int nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) { struct nf_conn_help *help = nfct_help(ct); const struct nf_conntrack_helper *helper; if (attr == NULL) return -EINVAL; helper = rcu_dereference(help->helper); if (!helper || helper->data_len == 0) return -EINVAL; nla_memcpy(help->data, attr, sizeof(help->data)); return 0; } static int nfnl_cthelper_to_nlattr(struct sk_buff *skb, const struct nf_conn *ct) { const struct nf_conn_help *help = nfct_help(ct); const struct nf_conntrack_helper *helper; helper = rcu_dereference(help->helper); if (helper && helper->data_len && nla_put(skb, CTA_HELP_INFO, helper->data_len, &help->data)) goto nla_put_failure; return 0; nla_put_failure: return -ENOSPC; } static const struct nla_policy nfnl_cthelper_expect_pol[NFCTH_POLICY_MAX+1] = { [NFCTH_POLICY_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN-1 }, [NFCTH_POLICY_EXPECT_MAX] = { .type = NLA_U32, }, [NFCTH_POLICY_EXPECT_TIMEOUT] = { .type = NLA_U32, }, }; static int nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, const struct nlattr *attr) { int err; struct nlattr *tb[NFCTH_POLICY_MAX+1]; err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol, NULL); if (err < 0) return err; if (!tb[NFCTH_POLICY_NAME] || !tb[NFCTH_POLICY_EXPECT_MAX] || !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) return -EINVAL; nla_strscpy(expect_policy->name, tb[NFCTH_POLICY_NAME], NF_CT_HELPER_NAME_LEN); expect_policy->max_expected = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) return -EINVAL; expect_policy->timeout = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); return 0; } static const struct nla_policy nfnl_cthelper_expect_policy_set[NFCTH_POLICY_SET_MAX+1] = { [NFCTH_POLICY_SET_NUM] = { .type = NLA_U32, }, }; static int nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, const struct nlattr *attr) { int i, ret; struct nf_conntrack_expect_policy *expect_policy; struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; unsigned int class_max; ret = nla_parse_nested_deprecated(tb, NFCTH_POLICY_SET_MAX, attr, nfnl_cthelper_expect_policy_set, NULL); if (ret < 0) return ret; if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); if (class_max == 0) return -EINVAL; if (class_max > NF_CT_MAX_EXPECT_CLASSES) return -EOVERFLOW; expect_policy = kcalloc(class_max, sizeof(struct nf_conntrack_expect_policy), GFP_KERNEL); if (expect_policy == NULL) return -ENOMEM; for (i = 0; i < class_max; i++) { if (!tb[NFCTH_POLICY_SET+i]) goto err; ret = nfnl_cthelper_expect_policy(&expect_policy[i], tb[NFCTH_POLICY_SET+i]); if (ret < 0) goto err; } helper->expect_class_max = class_max - 1; helper->expect_policy = expect_policy; return 0; err: kfree(expect_policy); return -EINVAL; } static int nfnl_cthelper_create(const struct nlattr * const tb[], struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; struct nfnl_cthelper *nfcth; unsigned int size; int ret; if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) return -EINVAL; nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL); if (nfcth == NULL) return -ENOMEM; helper = &nfcth->helper; ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) goto err1; nla_strscpy(helper->name, tb[NFCTH_NAME], NF_CT_HELPER_NAME_LEN); size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); if (size > sizeof_field(struct nf_conn_help, data)) { ret = -ENOMEM; goto err2; } helper->data_len = size; helper->flags |= NF_CT_HELPER_F_USERSPACE; memcpy(&helper->tuple, tuple, sizeof(struct nf_conntrack_tuple)); helper->me = THIS_MODULE; helper->help = nfnl_userspace_cthelper; helper->from_nlattr = nfnl_cthelper_from_nlattr; helper->to_nlattr = nfnl_cthelper_to_nlattr; /* Default to queue number zero, this can be updated at any time. */ if (tb[NFCTH_QUEUE_NUM]) helper->queue_num = ntohl(nla_get_be32(tb[NFCTH_QUEUE_NUM])); if (tb[NFCTH_STATUS]) { int status = ntohl(nla_get_be32(tb[NFCTH_STATUS])); switch(status) { case NFCT_HELPER_STATUS_ENABLED: helper->flags |= NF_CT_HELPER_F_CONFIGURED; break; case NFCT_HELPER_STATUS_DISABLED: helper->flags &= ~NF_CT_HELPER_F_CONFIGURED; break; } } ret = nf_conntrack_helper_register(helper); if (ret < 0) goto err2; list_add_tail(&nfcth->list, &nfnl_cthelper_list); return 0; err2: kfree(helper->expect_policy); err1: kfree(nfcth); return ret; } static int nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, struct nf_conntrack_expect_policy *new_policy, const struct nlattr *attr) { struct nlattr *tb[NFCTH_POLICY_MAX + 1]; int err; err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol, NULL); if (err < 0) return err; if (!tb[NFCTH_POLICY_NAME] || !tb[NFCTH_POLICY_EXPECT_MAX] || !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) return -EINVAL; if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name)) return -EBUSY; new_policy->max_expected = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); if (new_policy->max_expected > NF_CT_EXPECT_MAX_CNT) return -EINVAL; new_policy->timeout = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); return 0; } static int nfnl_cthelper_update_policy_all(struct nlattr *tb[], struct nf_conntrack_helper *helper) { struct nf_conntrack_expect_policy *new_policy; struct nf_conntrack_expect_policy *policy; int i, ret = 0; new_policy = kmalloc_array(helper->expect_class_max + 1, sizeof(*new_policy), GFP_KERNEL); if (!new_policy) return -ENOMEM; /* Check first that all policy attributes are well-formed, so we don't * leave things in inconsistent state on errors. */ for (i = 0; i < helper->expect_class_max + 1; i++) { if (!tb[NFCTH_POLICY_SET + i]) { ret = -EINVAL; goto err; } ret = nfnl_cthelper_update_policy_one(&helper->expect_policy[i], &new_policy[i], tb[NFCTH_POLICY_SET + i]); if (ret < 0) goto err; } /* Now we can safely update them. */ for (i = 0; i < helper->expect_class_max + 1; i++) { policy = (struct nf_conntrack_expect_policy *) &helper->expect_policy[i]; policy->max_expected = new_policy->max_expected; policy->timeout = new_policy->timeout; } err: kfree(new_policy); return ret; } static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper, const struct nlattr *attr) { struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1]; unsigned int class_max; int err; err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_SET_MAX, attr, nfnl_cthelper_expect_policy_set, NULL); if (err < 0) return err; if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); if (helper->expect_class_max + 1 != class_max) return -EBUSY; return nfnl_cthelper_update_policy_all(tb, helper); } static int nfnl_cthelper_update(const struct nlattr * const tb[], struct nf_conntrack_helper *helper) { u32 size; int ret; if (tb[NFCTH_PRIV_DATA_LEN]) { size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); if (size != helper->data_len) return -EBUSY; } if (tb[NFCTH_POLICY]) { ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) return ret; } if (tb[NFCTH_QUEUE_NUM]) helper->queue_num = ntohl(nla_get_be32(tb[NFCTH_QUEUE_NUM])); if (tb[NFCTH_STATUS]) { int status = ntohl(nla_get_be32(tb[NFCTH_STATUS])); switch(status) { case NFCT_HELPER_STATUS_ENABLED: helper->flags |= NF_CT_HELPER_F_CONFIGURED; break; case NFCT_HELPER_STATUS_DISABLED: helper->flags &= ~NF_CT_HELPER_F_CONFIGURED; break; } } return 0; } static int nfnl_cthelper_new(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const tb[]) { const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; struct nf_conntrack_tuple tuple; struct nfnl_cthelper *nlcth; int ret = 0; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) return -EINVAL; helper_name = nla_data(tb[NFCTH_NAME]); ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); if (ret < 0) return ret; list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { cur = &nlcth->helper; if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) continue; if ((tuple.src.l3num != cur->tuple.src.l3num || tuple.dst.protonum != cur->tuple.dst.protonum)) continue; if (info->nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; helper = cur; break; } if (helper == NULL) ret = nfnl_cthelper_create(tb, &tuple); else ret = nfnl_cthelper_update(tb, helper); return ret; } static int nfnl_cthelper_dump_tuple(struct sk_buff *skb, struct nf_conntrack_helper *helper) { struct nlattr *nest_parms; nest_parms = nla_nest_start(skb, NFCTH_TUPLE); if (nest_parms == NULL) goto nla_put_failure; if (nla_put_be16(skb, NFCTH_TUPLE_L3PROTONUM, htons(helper->tuple.src.l3num))) goto nla_put_failure; if (nla_put_u8(skb, NFCTH_TUPLE_L4PROTONUM, helper->tuple.dst.protonum)) goto nla_put_failure; nla_nest_end(skb, nest_parms); return 0; nla_put_failure: return -1; } static int nfnl_cthelper_dump_policy(struct sk_buff *skb, struct nf_conntrack_helper *helper) { int i; struct nlattr *nest_parms1, *nest_parms2; nest_parms1 = nla_nest_start(skb, NFCTH_POLICY); if (nest_parms1 == NULL) goto nla_put_failure; if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM, htonl(helper->expect_class_max + 1))) goto nla_put_failure; for (i = 0; i < helper->expect_class_max + 1; i++) { nest_parms2 = nla_nest_start(skb, (NFCTH_POLICY_SET + i)); if (nest_parms2 == NULL) goto nla_put_failure; if (nla_put_string(skb, NFCTH_POLICY_NAME, helper->expect_policy[i].name)) goto nla_put_failure; if (nla_put_be32(skb, NFCTH_POLICY_EXPECT_MAX, htonl(helper->expect_policy[i].max_expected))) goto nla_put_failure; if (nla_put_be32(skb, NFCTH_POLICY_EXPECT_TIMEOUT, htonl(helper->expect_policy[i].timeout))) goto nla_put_failure; nla_nest_end(skb, nest_parms2); } nla_nest_end(skb, nest_parms1); return 0; nla_put_failure: return -1; } static int nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_conntrack_helper *helper) { struct nlmsghdr *nlh; unsigned int flags = portid ? NLM_F_MULTI : 0; int status; event = nfnl_msg_type(NFNL_SUBSYS_CTHELPER, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, NFNETLINK_V0, 0); if (!nlh) goto nlmsg_failure; if (nla_put_string(skb, NFCTH_NAME, helper->name)) goto nla_put_failure; if (nla_put_be32(skb, NFCTH_QUEUE_NUM, htonl(helper->queue_num))) goto nla_put_failure; if (nfnl_cthelper_dump_tuple(skb, helper) < 0) goto nla_put_failure; if (nfnl_cthelper_dump_policy(skb, helper) < 0) goto nla_put_failure; if (nla_put_be32(skb, NFCTH_PRIV_DATA_LEN, htonl(helper->data_len))) goto nla_put_failure; if (helper->flags & NF_CT_HELPER_F_CONFIGURED) status = NFCT_HELPER_STATUS_ENABLED; else status = NFCT_HELPER_STATUS_DISABLED; if (nla_put_be32(skb, NFCTH_STATUS, htonl(status))) goto nla_put_failure; nlmsg_end(skb, nlh); return skb->len; nlmsg_failure: nla_put_failure: nlmsg_cancel(skb, nlh); return -1; } static int nfnl_cthelper_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct nf_conntrack_helper *cur, *last; rcu_read_lock(); last = (struct nf_conntrack_helper *)cb->args[1]; for (; cb->args[0] < nf_ct_helper_hsize; cb->args[0]++) { restart: hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[cb->args[0]], hnode) { /* skip non-userspace conntrack helpers. */ if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) continue; if (cb->args[1]) { if (cur != last) continue; cb->args[1] = 0; } if (nfnl_cthelper_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur) < 0) { cb->args[1] = (unsigned long)cur; goto out; } } } if (cb->args[1]) { cb->args[1] = 0; goto restart; } out: rcu_read_unlock(); return skb->len; } static int nfnl_cthelper_get(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const tb[]) { int ret = -ENOENT; struct nf_conntrack_helper *cur; struct sk_buff *skb2; char *helper_name = NULL; struct nf_conntrack_tuple tuple; struct nfnl_cthelper *nlcth; bool tuple_set = false; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nfnl_cthelper_dump_table, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); if (tb[NFCTH_TUPLE]) { ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); if (ret < 0) return ret; tuple_set = true; } list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { cur = &nlcth->helper; if (helper_name && strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) continue; if (tuple_set && (tuple.src.l3num != cur->tuple.src.l3num || tuple.dst.protonum != cur->tuple.dst.protonum)) continue; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (skb2 == NULL) { ret = -ENOMEM; break; } ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFNL_MSG_TYPE(info->nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur); if (ret <= 0) { kfree_skb(skb2); break; } ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); break; } return ret; } static int nfnl_cthelper_del(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const tb[]) { char *helper_name = NULL; struct nf_conntrack_helper *cur; struct nf_conntrack_tuple tuple; bool tuple_set = false, found = false; struct nfnl_cthelper *nlcth, *n; int j = 0, ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); if (tb[NFCTH_TUPLE]) { ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); if (ret < 0) return ret; tuple_set = true; } ret = -ENOENT; list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { cur = &nlcth->helper; j++; if (helper_name && strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) continue; if (tuple_set && (tuple.src.l3num != cur->tuple.src.l3num || tuple.dst.protonum != cur->tuple.dst.protonum)) continue; if (refcount_dec_if_one(&cur->refcnt)) { found = true; nf_conntrack_helper_unregister(cur); kfree(cur->expect_policy); list_del(&nlcth->list); kfree(nlcth); } else { ret = -EBUSY; } } /* Make sure we return success if we flush and there is no helpers */ return (found || j == 0) ? 0 : ret; } static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { [NFCTH_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN-1 }, [NFCTH_QUEUE_NUM] = { .type = NLA_U32, }, [NFCTH_PRIV_DATA_LEN] = { .type = NLA_U32, }, [NFCTH_STATUS] = { .type = NLA_U32, }, }; static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = { [NFNL_MSG_CTHELPER_NEW] = { .call = nfnl_cthelper_new, .type = NFNL_CB_MUTEX, .attr_count = NFCTH_MAX, .policy = nfnl_cthelper_policy }, [NFNL_MSG_CTHELPER_GET] = { .call = nfnl_cthelper_get, .type = NFNL_CB_MUTEX, .attr_count = NFCTH_MAX, .policy = nfnl_cthelper_policy }, [NFNL_MSG_CTHELPER_DEL] = { .call = nfnl_cthelper_del, .type = NFNL_CB_MUTEX, .attr_count = NFCTH_MAX, .policy = nfnl_cthelper_policy }, }; static const struct nfnetlink_subsystem nfnl_cthelper_subsys = { .name = "cthelper", .subsys_id = NFNL_SUBSYS_CTHELPER, .cb_count = NFNL_MSG_CTHELPER_MAX, .cb = nfnl_cthelper_cb, }; MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTHELPER); static int __init nfnl_cthelper_init(void) { int ret; ret = nfnetlink_subsys_register(&nfnl_cthelper_subsys); if (ret < 0) { pr_err("nfnl_cthelper: cannot register with nfnetlink.\n"); goto err_out; } return 0; err_out: return ret; } static void __exit nfnl_cthelper_exit(void) { struct nf_conntrack_helper *cur; struct nfnl_cthelper *nlcth, *n; nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { cur = &nlcth->helper; nf_conntrack_helper_unregister(cur); kfree(cur->expect_policy); kfree(nlcth); } } module_init(nfnl_cthelper_init); module_exit(nfnl_cthelper_exit);
1 1 1 1 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 /* * mtdram - a test mtd device * Author: Alexander Larsson <alex@cendio.se> * * Copyright (c) 1999 Alexander Larsson <alex@cendio.se> * Copyright (c) 2005 Joern Engel <joern@wh.fh-wedel.de> * * This code is GPL * */ #include <linux/module.h> #include <linux/slab.h> #include <linux/ioport.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/mtd/mtd.h> #include <linux/mtd/mtdram.h> static unsigned long total_size = CONFIG_MTDRAM_TOTAL_SIZE; static unsigned long erase_size = CONFIG_MTDRAM_ERASE_SIZE; static unsigned long writebuf_size = 64; #define MTDRAM_TOTAL_SIZE (total_size * 1024) #define MTDRAM_ERASE_SIZE (erase_size * 1024) module_param(total_size, ulong, 0); MODULE_PARM_DESC(total_size, "Total device size in KiB"); module_param(erase_size, ulong, 0); MODULE_PARM_DESC(erase_size, "Device erase block size in KiB"); module_param(writebuf_size, ulong, 0); MODULE_PARM_DESC(writebuf_size, "Device write buf size in Bytes (Default: 64)"); // We could store these in the mtd structure, but we only support 1 device.. static struct mtd_info *mtd_info; static int check_offs_len(struct mtd_info *mtd, loff_t ofs, uint64_t len) { int ret = 0; /* Start address must align on block boundary */ if (mtd_mod_by_eb(ofs, mtd)) { pr_debug("%s: unaligned address\n", __func__); ret = -EINVAL; } /* Length must align on block boundary */ if (mtd_mod_by_eb(len, mtd)) { pr_debug("%s: length not block aligned\n", __func__); ret = -EINVAL; } return ret; } static int ram_erase(struct mtd_info *mtd, struct erase_info *instr) { if (check_offs_len(mtd, instr->addr, instr->len)) return -EINVAL; memset((char *)mtd->priv + instr->addr, 0xff, instr->len); return 0; } static int ram_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, void **virt, resource_size_t *phys) { *virt = mtd->priv + from; *retlen = len; if (phys) { /* limit retlen to the number of contiguous physical pages */ unsigned long page_ofs = offset_in_page(*virt); void *addr = *virt - page_ofs; unsigned long pfn1, pfn0 = vmalloc_to_pfn(addr); *phys = __pfn_to_phys(pfn0) + page_ofs; len += page_ofs; while (len > PAGE_SIZE) { len -= PAGE_SIZE; addr += PAGE_SIZE; pfn0++; pfn1 = vmalloc_to_pfn(addr); if (pfn1 != pfn0) { *retlen = addr - *virt; break; } } } return 0; } static int ram_unpoint(struct mtd_info *mtd, loff_t from, size_t len) { return 0; } static int ram_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { memcpy(buf, mtd->priv + from, len); *retlen = len; return 0; } static int ram_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { memcpy((char *)mtd->priv + to, buf, len); *retlen = len; return 0; } static void __exit cleanup_mtdram(void) { if (mtd_info) { mtd_device_unregister(mtd_info); vfree(mtd_info->priv); kfree(mtd_info); } } int mtdram_init_device(struct mtd_info *mtd, void *mapped_address, unsigned long size, const char *name) { memset(mtd, 0, sizeof(*mtd)); /* Setup the MTD structure */ mtd->name = name; mtd->type = MTD_RAM; mtd->flags = MTD_CAP_RAM; mtd->size = size; mtd->writesize = 1; mtd->writebufsize = writebuf_size; mtd->erasesize = MTDRAM_ERASE_SIZE; mtd->priv = mapped_address; mtd->owner = THIS_MODULE; mtd->_erase = ram_erase; mtd->_point = ram_point; mtd->_unpoint = ram_unpoint; mtd->_read = ram_read; mtd->_write = ram_write; if (mtd_device_register(mtd, NULL, 0)) return -EIO; return 0; } static int __init init_mtdram(void) { void *addr; int err; if (!total_size) return -EINVAL; /* Allocate some memory */ mtd_info = kmalloc(sizeof(struct mtd_info), GFP_KERNEL); if (!mtd_info) return -ENOMEM; addr = vmalloc(MTDRAM_TOTAL_SIZE); if (!addr) { kfree(mtd_info); mtd_info = NULL; return -ENOMEM; } err = mtdram_init_device(mtd_info, addr, MTDRAM_TOTAL_SIZE, "mtdram test device"); if (err) { vfree(addr); kfree(mtd_info); mtd_info = NULL; return err; } memset(mtd_info->priv, 0xff, MTDRAM_TOTAL_SIZE); return err; } module_init(init_mtdram); module_exit(cleanup_mtdram); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alexander Larsson <alexl@redhat.com>"); MODULE_DESCRIPTION("Simulated MTD driver for testing");
13 22 74 2965 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_CTYPE_H #define _LINUX_CTYPE_H #include <linux/compiler.h> /* * NOTE! This ctype does not handle EOF like the standard C * library is required to. */ #define _U 0x01 /* upper */ #define _L 0x02 /* lower */ #define _D 0x04 /* digit */ #define _C 0x08 /* cntrl */ #define _P 0x10 /* punct */ #define _S 0x20 /* white space (space/lf/tab) */ #define _X 0x40 /* hex digit */ #define _SP 0x80 /* hard space (0x20) */ extern const unsigned char _ctype[]; #define __ismask(x) (_ctype[(int)(unsigned char)(x)]) #define isalnum(c) ((__ismask(c)&(_U|_L|_D)) != 0) #define isalpha(c) ((__ismask(c)&(_U|_L)) != 0) #define iscntrl(c) ((__ismask(c)&(_C)) != 0) #define isgraph(c) ((__ismask(c)&(_P|_U|_L|_D)) != 0) #define islower(c) ((__ismask(c)&(_L)) != 0) #define isprint(c) ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0) #define ispunct(c) ((__ismask(c)&(_P)) != 0) /* Note: isspace() must return false for %NUL-terminator */ #define isspace(c) ((__ismask(c)&(_S)) != 0) #define isupper(c) ((__ismask(c)&(_U)) != 0) #define isxdigit(c) ((__ismask(c)&(_D|_X)) != 0) #define isascii(c) (((unsigned char)(c))<=0x7f) #define toascii(c) (((unsigned char)(c))&0x7f) #if __has_builtin(__builtin_isdigit) #define isdigit(c) __builtin_isdigit(c) #else static inline int isdigit(int c) { return '0' <= c && c <= '9'; } #endif static inline unsigned char __tolower(unsigned char c) { if (isupper(c)) c -= 'A'-'a'; return c; } static inline unsigned char __toupper(unsigned char c) { if (islower(c)) c -= 'a'-'A'; return c; } #define tolower(c) __tolower(c) #define toupper(c) __toupper(c) /* * Fast implementation of tolower() for internal usage. Do not use in your * code. */ static inline char _tolower(const char c) { return c | 0x20; } /* Fast check for octal digit */ static inline int isodigit(const char c) { return c >= '0' && c <= '7'; } #endif
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 // SPDX-License-Identifier: GPL-2.0 /* * Multipath support for RPC * * Copyright (c) 2015, 2016, Primary Data, Inc. All rights reserved. * * Trond Myklebust <trond.myklebust@primarydata.com> * */ #include <linux/atomic.h> #include <linux/types.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/rculist.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/sunrpc/xprt.h> #include <linux/sunrpc/addr.h> #include <linux/sunrpc/xprtmultipath.h> #include "sysfs.h" typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps, const struct rpc_xprt *cur); static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular; static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin; static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall; static const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline; static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt) { if (unlikely(xprt_get(xprt) == NULL)) return; list_add_tail_rcu(&xprt->xprt_switch, &xps->xps_xprt_list); smp_wmb(); if (xps->xps_nxprts == 0) xps->xps_net = xprt->xprt_net; xps->xps_nxprts++; xps->xps_nactive++; } /** * rpc_xprt_switch_add_xprt - Add a new rpc_xprt to an rpc_xprt_switch * @xps: pointer to struct rpc_xprt_switch * @xprt: pointer to struct rpc_xprt * * Adds xprt to the end of the list of struct rpc_xprt in xps. */ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt) { if (xprt == NULL) return; spin_lock(&xps->xps_lock); if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) xprt_switch_add_xprt_locked(xps, xprt); spin_unlock(&xps->xps_lock); rpc_sysfs_xprt_setup(xps, xprt, GFP_KERNEL); } static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt, bool offline) { if (unlikely(xprt == NULL)) return; if (!test_bit(XPRT_OFFLINE, &xprt->state) && offline) xps->xps_nactive--; xps->xps_nxprts--; if (xps->xps_nxprts == 0) xps->xps_net = NULL; smp_wmb(); list_del_rcu(&xprt->xprt_switch); } /** * rpc_xprt_switch_remove_xprt - Removes an rpc_xprt from a rpc_xprt_switch * @xps: pointer to struct rpc_xprt_switch * @xprt: pointer to struct rpc_xprt * @offline: indicates if the xprt that's being removed is in an offline state * * Removes xprt from the list of struct rpc_xprt in xps. */ void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt, bool offline) { spin_lock(&xps->xps_lock); xprt_switch_remove_xprt_locked(xps, xprt, offline); spin_unlock(&xps->xps_lock); xprt_put(xprt); } /** * rpc_xprt_switch_get_main_xprt - Get the 'main' xprt for an xprt switch. * @xps: pointer to struct rpc_xprt_switch. */ struct rpc_xprt *rpc_xprt_switch_get_main_xprt(struct rpc_xprt_switch *xps) { struct rpc_xprt_iter xpi; struct rpc_xprt *xprt; xprt_iter_init_listall(&xpi, xps); xprt = xprt_iter_get_next(&xpi); while (xprt && !xprt->main) { xprt_put(xprt); xprt = xprt_iter_get_next(&xpi); } xprt_iter_destroy(&xpi); return xprt; } static DEFINE_IDA(rpc_xprtswitch_ids); void xprt_multipath_cleanup_ids(void) { ida_destroy(&rpc_xprtswitch_ids); } static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags) { int id; id = ida_alloc(&rpc_xprtswitch_ids, gfp_flags); if (id < 0) return id; xps->xps_id = id; return 0; } static void xprt_switch_free_id(struct rpc_xprt_switch *xps) { ida_free(&rpc_xprtswitch_ids, xps->xps_id); } /** * xprt_switch_alloc - Allocate a new struct rpc_xprt_switch * @xprt: pointer to struct rpc_xprt * @gfp_flags: allocation flags * * On success, returns an initialised struct rpc_xprt_switch, containing * the entry xprt. Returns NULL on failure. */ struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt, gfp_t gfp_flags) { struct rpc_xprt_switch *xps; xps = kmalloc(sizeof(*xps), gfp_flags); if (xps != NULL) { spin_lock_init(&xps->xps_lock); kref_init(&xps->xps_kref); xprt_switch_alloc_id(xps, gfp_flags); xps->xps_nxprts = xps->xps_nactive = 0; atomic_long_set(&xps->xps_queuelen, 0); xps->xps_net = NULL; INIT_LIST_HEAD(&xps->xps_xprt_list); xps->xps_iter_ops = &rpc_xprt_iter_singular; rpc_sysfs_xprt_switch_setup(xps, xprt, gfp_flags); xprt_switch_add_xprt_locked(xps, xprt); xps->xps_nunique_destaddr_xprts = 1; rpc_sysfs_xprt_setup(xps, xprt, gfp_flags); } return xps; } static void xprt_switch_free_entries(struct rpc_xprt_switch *xps) { spin_lock(&xps->xps_lock); while (!list_empty(&xps->xps_xprt_list)) { struct rpc_xprt *xprt; xprt = list_first_entry(&xps->xps_xprt_list, struct rpc_xprt, xprt_switch); xprt_switch_remove_xprt_locked(xps, xprt, true); spin_unlock(&xps->xps_lock); xprt_put(xprt); spin_lock(&xps->xps_lock); } spin_unlock(&xps->xps_lock); } static void xprt_switch_free(struct kref *kref) { struct rpc_xprt_switch *xps = container_of(kref, struct rpc_xprt_switch, xps_kref); xprt_switch_free_entries(xps); rpc_sysfs_xprt_switch_destroy(xps); xprt_switch_free_id(xps); kfree_rcu(xps, xps_rcu); } /** * xprt_switch_get - Return a reference to a rpc_xprt_switch * @xps: pointer to struct rpc_xprt_switch * * Returns a reference to xps unless the refcount is already zero. */ struct rpc_xprt_switch *xprt_switch_get(struct rpc_xprt_switch *xps) { if (xps != NULL && kref_get_unless_zero(&xps->xps_kref)) return xps; return NULL; } /** * xprt_switch_put - Release a reference to a rpc_xprt_switch * @xps: pointer to struct rpc_xprt_switch * * Release the reference to xps, and free it once the refcount is zero. */ void xprt_switch_put(struct rpc_xprt_switch *xps) { if (xps != NULL) kref_put(&xps->xps_kref, xprt_switch_free); } /** * rpc_xprt_switch_set_roundrobin - Set a round-robin policy on rpc_xprt_switch * @xps: pointer to struct rpc_xprt_switch * * Sets a round-robin default policy for iterators acting on xps. */ void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps) { if (READ_ONCE(xps->xps_iter_ops) != &rpc_xprt_iter_roundrobin) WRITE_ONCE(xps->xps_iter_ops, &rpc_xprt_iter_roundrobin); } static const struct rpc_xprt_iter_ops *xprt_iter_ops(const struct rpc_xprt_iter *xpi) { if (xpi->xpi_ops != NULL) return xpi->xpi_ops; return rcu_dereference(xpi->xpi_xpswitch)->xps_iter_ops; } static void xprt_iter_no_rewind(struct rpc_xprt_iter *xpi) { } static void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi) { WRITE_ONCE(xpi->xpi_cursor, NULL); } static bool xprt_is_active(const struct rpc_xprt *xprt) { return (kref_read(&xprt->kref) != 0 && !test_bit(XPRT_OFFLINE, &xprt->state)); } static struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head) { struct rpc_xprt *pos; list_for_each_entry_rcu(pos, head, xprt_switch) { if (xprt_is_active(pos)) return pos; } return NULL; } static struct rpc_xprt *xprt_switch_find_first_entry_offline(struct list_head *head) { struct rpc_xprt *pos; list_for_each_entry_rcu(pos, head, xprt_switch) { if (!xprt_is_active(pos)) return pos; } return NULL; } static struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi) { struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); if (xps == NULL) return NULL; return xprt_switch_find_first_entry(&xps->xps_xprt_list); } static struct rpc_xprt *_xprt_switch_find_current_entry(struct list_head *head, const struct rpc_xprt *cur, bool find_active) { struct rpc_xprt *pos; bool found = false; list_for_each_entry_rcu(pos, head, xprt_switch) { if (cur == pos) found = true; if (found && ((find_active && xprt_is_active(pos)) || (!find_active && !xprt_is_active(pos)))) return pos; } return NULL; } static struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head, const struct rpc_xprt *cur) { return _xprt_switch_find_current_entry(head, cur, true); } static struct rpc_xprt * _xprt_iter_current_entry(struct rpc_xprt_iter *xpi, struct rpc_xprt *first_entry(struct list_head *head), struct rpc_xprt *current_entry(struct list_head *head, const struct rpc_xprt *cur)) { struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); struct list_head *head; if (xps == NULL) return NULL; head = &xps->xps_xprt_list; if (xpi->xpi_cursor == NULL || xps->xps_nxprts < 2) return first_entry(head); return current_entry(head, xpi->xpi_cursor); } static struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi) { return _xprt_iter_current_entry(xpi, xprt_switch_find_first_entry, xprt_switch_find_current_entry); } static struct rpc_xprt *xprt_switch_find_current_entry_offline(struct list_head *head, const struct rpc_xprt *cur) { return _xprt_switch_find_current_entry(head, cur, false); } static struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi) { return _xprt_iter_current_entry(xpi, xprt_switch_find_first_entry_offline, xprt_switch_find_current_entry_offline); } static bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, const struct sockaddr *sap) { struct list_head *head; struct rpc_xprt *pos; if (xps == NULL || sap == NULL) return false; head = &xps->xps_xprt_list; list_for_each_entry_rcu(pos, head, xprt_switch) { if (rpc_cmp_addr_port(sap, (struct sockaddr *)&pos->addr)) { pr_info("RPC: addr %s already in xprt switch\n", pos->address_strings[RPC_DISPLAY_ADDR]); return true; } } return false; } bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, const struct sockaddr *sap) { bool res; rcu_read_lock(); res = __rpc_xprt_switch_has_addr(xps, sap); rcu_read_unlock(); return res; } static struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, const struct rpc_xprt *cur, bool check_active) { struct rpc_xprt *pos, *prev = NULL; bool found = false; list_for_each_entry_rcu(pos, head, xprt_switch) { if (cur == prev) found = true; /* for request to return active transports return only * active, for request to return offline transports * return only offline */ if (found && ((check_active && xprt_is_active(pos)) || (!check_active && !xprt_is_active(pos)))) return pos; prev = pos; } return NULL; } static struct rpc_xprt *xprt_switch_set_next_cursor(struct rpc_xprt_switch *xps, struct rpc_xprt **cursor, xprt_switch_find_xprt_t find_next) { struct rpc_xprt *pos, *old; old = smp_load_acquire(cursor); pos = find_next(xps, old); smp_store_release(cursor, pos); return pos; } static struct rpc_xprt *xprt_iter_next_entry_multiple(struct rpc_xprt_iter *xpi, xprt_switch_find_xprt_t find_next) { struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); if (xps == NULL) return NULL; return xprt_switch_set_next_cursor(xps, &xpi->xpi_cursor, find_next); } static struct rpc_xprt *__xprt_switch_find_next_entry_roundrobin(struct list_head *head, const struct rpc_xprt *cur) { struct rpc_xprt *ret; ret = xprt_switch_find_next_entry(head, cur, true); if (ret != NULL) return ret; return xprt_switch_find_first_entry(head); } static struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct rpc_xprt_switch *xps, const struct rpc_xprt *cur) { struct list_head *head = &xps->xps_xprt_list; struct rpc_xprt *xprt; unsigned int nactive; for (;;) { unsigned long xprt_queuelen, xps_queuelen; xprt = __xprt_switch_find_next_entry_roundrobin(head, cur); if (!xprt) break; xprt_queuelen = atomic_long_read(&xprt->queuelen); xps_queuelen = atomic_long_read(&xps->xps_queuelen); nactive = READ_ONCE(xps->xps_nactive); /* Exit loop if xprt_queuelen <= average queue length */ if (xprt_queuelen * nactive <= xps_queuelen) break; cur = xprt; } return xprt; } static struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi) { return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry_roundrobin); } static struct rpc_xprt *xprt_switch_find_next_entry_all(struct rpc_xprt_switch *xps, const struct rpc_xprt *cur) { return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, true); } static struct rpc_xprt *xprt_switch_find_next_entry_offline(struct rpc_xprt_switch *xps, const struct rpc_xprt *cur) { return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, false); } static struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi) { return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry_all); } static struct rpc_xprt *xprt_iter_next_entry_offline(struct rpc_xprt_iter *xpi) { return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry_offline); } /* * xprt_iter_rewind - Resets the xprt iterator * @xpi: pointer to rpc_xprt_iter * * Resets xpi to ensure that it points to the first entry in the list * of transports. */ void xprt_iter_rewind(struct rpc_xprt_iter *xpi) { rcu_read_lock(); xprt_iter_ops(xpi)->xpi_rewind(xpi); rcu_read_unlock(); } static void __xprt_iter_init(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps, const struct rpc_xprt_iter_ops *ops) { rcu_assign_pointer(xpi->xpi_xpswitch, xprt_switch_get(xps)); xpi->xpi_cursor = NULL; xpi->xpi_ops = ops; } /** * xprt_iter_init - Initialise an xprt iterator * @xpi: pointer to rpc_xprt_iter * @xps: pointer to rpc_xprt_switch * * Initialises the iterator to use the default iterator ops * as set in xps. This function is mainly intended for internal * use in the rpc_client. */ void xprt_iter_init(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps) { __xprt_iter_init(xpi, xps, NULL); } /** * xprt_iter_init_listall - Initialise an xprt iterator * @xpi: pointer to rpc_xprt_iter * @xps: pointer to rpc_xprt_switch * * Initialises the iterator to iterate once through the entire list * of entries in xps. */ void xprt_iter_init_listall(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps) { __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listall); } void xprt_iter_init_listoffline(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps) { __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listoffline); } /** * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch * @xpi: pointer to rpc_xprt_iter * @newswitch: pointer to a new rpc_xprt_switch or NULL * * Swaps out the existing xpi->xpi_xpswitch with a new value. */ struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *newswitch) { struct rpc_xprt_switch __rcu *oldswitch; /* Atomically swap out the old xpswitch */ oldswitch = xchg(&xpi->xpi_xpswitch, RCU_INITIALIZER(newswitch)); if (newswitch != NULL) xprt_iter_rewind(xpi); return rcu_dereference_protected(oldswitch, true); } /** * xprt_iter_destroy - Destroys the xprt iterator * @xpi: pointer to rpc_xprt_iter */ void xprt_iter_destroy(struct rpc_xprt_iter *xpi) { xprt_switch_put(xprt_iter_xchg_switch(xpi, NULL)); } /** * xprt_iter_xprt - Returns the rpc_xprt pointed to by the cursor * @xpi: pointer to rpc_xprt_iter * * Returns a pointer to the struct rpc_xprt that is currently * pointed to by the cursor. * Caller must be holding rcu_read_lock(). */ struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi) { WARN_ON_ONCE(!rcu_read_lock_held()); return xprt_iter_ops(xpi)->xpi_xprt(xpi); } static struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi, struct rpc_xprt *(*fn)(struct rpc_xprt_iter *)) { struct rpc_xprt *ret; do { ret = fn(xpi); if (ret == NULL) break; ret = xprt_get(ret); } while (ret == NULL); return ret; } /** * xprt_iter_get_next - Returns the next rpc_xprt following the cursor * @xpi: pointer to rpc_xprt_iter * * Returns a reference to the struct rpc_xprt that immediately follows the * entry pointed to by the cursor. */ struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi) { struct rpc_xprt *xprt; rcu_read_lock(); xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_next); rcu_read_unlock(); return xprt; } /* Policy for always returning the first entry in the rpc_xprt_switch */ static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular = { .xpi_rewind = xprt_iter_no_rewind, .xpi_xprt = xprt_iter_first_entry, .xpi_next = xprt_iter_first_entry, }; /* Policy for round-robin iteration of entries in the rpc_xprt_switch */ static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin = { .xpi_rewind = xprt_iter_default_rewind, .xpi_xprt = xprt_iter_current_entry, .xpi_next = xprt_iter_next_entry_roundrobin, }; /* Policy for once-through iteration of entries in the rpc_xprt_switch */ static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall = { .xpi_rewind = xprt_iter_default_rewind, .xpi_xprt = xprt_iter_current_entry, .xpi_next = xprt_iter_next_entry_all, }; static const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline = { .xpi_rewind = xprt_iter_default_rewind, .xpi_xprt = xprt_iter_current_entry_offline, .xpi_next = xprt_iter_next_entry_offline, };
13 9 4 3 2 2 13 1 1 1 1 7 7 7 7 17 16 17 16 12 1 13 1 13 9 8 5 3 20 20 19 19 18 5 13 16 15 15 9 7 21 1 1 1 2 1 2 4 4 3 1 3 1 3 3 4 4 3 1 1 1 3 3 7 4 3 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 2 1 1 1 1 1 1 1 18 17 16 16 16 15 14 13 13 1 12 13 1 1 13 1 1 13 2 14 1 14 1 14 1 14 1 14 2 12 12 11 11 16 18 2 1 1 1 1 2 7 5 1 5 2 5 1 5 1 5 7 20 20 20 20 19 20 20 20 7 20 1 20 1 20 20 20 20 2 20 1 20 20 20 20 20 20 19 20 19 20 20 20 20 20 4 1 1 1 4 4 4 4 4 4 3 3 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 // SPDX-License-Identifier: GPL-2.0-only /* L2TP netlink layer, for management * * Copyright (c) 2008,2009,2010 Katalix Systems Ltd * * Partly based on the IrDA nelink implementation * (see net/irda/irnetlink.c) which is: * Copyright (c) 2007 Samuel Ortiz <samuel@sortiz.org> * which is in turn partly based on the wireless netlink code: * Copyright 2006 Johannes Berg <johannes@sipsolutions.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <net/sock.h> #include <net/genetlink.h> #include <net/udp.h> #include <linux/in.h> #include <linux/udp.h> #include <linux/socket.h> #include <linux/module.h> #include <linux/list.h> #include <net/net_namespace.h> #include <linux/l2tp.h> #include "l2tp_core.h" static struct genl_family l2tp_nl_family; static const struct genl_multicast_group l2tp_multicast_group[] = { { .name = L2TP_GENL_MCGROUP, }, }; static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_tunnel *tunnel, u8 cmd); static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_session *session, u8 cmd); /* Accessed under genl lock */ static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX]; static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info) { u32 tunnel_id; u32 session_id; char *ifname; struct l2tp_tunnel *tunnel; struct l2tp_session *session = NULL; struct net *net = genl_info_net(info); if (info->attrs[L2TP_ATTR_IFNAME]) { ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]); session = l2tp_session_get_by_ifname(net, ifname); } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) && (info->attrs[L2TP_ATTR_CONN_ID])) { tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); tunnel = l2tp_tunnel_get(net, tunnel_id); if (tunnel) { session = l2tp_session_get(net, tunnel->sock, tunnel->version, tunnel_id, session_id); l2tp_tunnel_put(tunnel); } } return session; } static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; void *hdr; int ret = -ENOBUFS; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto out; } hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &l2tp_nl_family, 0, L2TP_CMD_NOOP); if (!hdr) { ret = -EMSGSIZE; goto err_out; } genlmsg_end(msg, hdr); return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_out: nlmsg_free(msg); out: return ret; } static int l2tp_tunnel_notify(struct genl_family *family, struct genl_info *info, struct l2tp_tunnel *tunnel, u8 cmd) { struct sk_buff *msg; int ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, tunnel, cmd); if (ret >= 0) { ret = genlmsg_multicast_allns(family, msg, 0, 0); /* We don't care if no one is listening */ if (ret == -ESRCH) ret = 0; return ret; } nlmsg_free(msg); return ret; } static int l2tp_session_notify(struct genl_family *family, struct genl_info *info, struct l2tp_session *session, u8 cmd) { struct sk_buff *msg; int ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, session, cmd); if (ret >= 0) { ret = genlmsg_multicast_allns(family, msg, 0, 0); /* We don't care if no one is listening */ if (ret == -ESRCH) ret = 0; return ret; } nlmsg_free(msg); return ret; } static int l2tp_nl_cmd_tunnel_create_get_addr(struct nlattr **attrs, struct l2tp_tunnel_cfg *cfg) { if (attrs[L2TP_ATTR_UDP_SPORT]) cfg->local_udp_port = nla_get_u16(attrs[L2TP_ATTR_UDP_SPORT]); if (attrs[L2TP_ATTR_UDP_DPORT]) cfg->peer_udp_port = nla_get_u16(attrs[L2TP_ATTR_UDP_DPORT]); cfg->use_udp_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_CSUM]); /* Must have either AF_INET or AF_INET6 address for source and destination */ #if IS_ENABLED(CONFIG_IPV6) if (attrs[L2TP_ATTR_IP6_SADDR] && attrs[L2TP_ATTR_IP6_DADDR]) { cfg->local_ip6 = nla_data(attrs[L2TP_ATTR_IP6_SADDR]); cfg->peer_ip6 = nla_data(attrs[L2TP_ATTR_IP6_DADDR]); cfg->udp6_zero_tx_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); cfg->udp6_zero_rx_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); return 0; } #endif if (attrs[L2TP_ATTR_IP_SADDR] && attrs[L2TP_ATTR_IP_DADDR]) { cfg->local_ip.s_addr = nla_get_in_addr(attrs[L2TP_ATTR_IP_SADDR]); cfg->peer_ip.s_addr = nla_get_in_addr(attrs[L2TP_ATTR_IP_DADDR]); return 0; } return -EINVAL; } static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info) { u32 tunnel_id; u32 peer_tunnel_id; int proto_version; int fd = -1; int ret = 0; struct l2tp_tunnel_cfg cfg = { 0, }; struct l2tp_tunnel *tunnel; struct net *net = genl_info_net(info); struct nlattr **attrs = info->attrs; if (!attrs[L2TP_ATTR_CONN_ID]) { ret = -EINVAL; goto out; } tunnel_id = nla_get_u32(attrs[L2TP_ATTR_CONN_ID]); if (!attrs[L2TP_ATTR_PEER_CONN_ID]) { ret = -EINVAL; goto out; } peer_tunnel_id = nla_get_u32(attrs[L2TP_ATTR_PEER_CONN_ID]); if (!attrs[L2TP_ATTR_PROTO_VERSION]) { ret = -EINVAL; goto out; } proto_version = nla_get_u8(attrs[L2TP_ATTR_PROTO_VERSION]); if (!attrs[L2TP_ATTR_ENCAP_TYPE]) { ret = -EINVAL; goto out; } cfg.encap = nla_get_u16(attrs[L2TP_ATTR_ENCAP_TYPE]); /* Managed tunnels take the tunnel socket from userspace. * Unmanaged tunnels must call out the source and destination addresses * for the kernel to create the tunnel socket itself. */ if (attrs[L2TP_ATTR_FD]) { fd = nla_get_u32(attrs[L2TP_ATTR_FD]); } else { ret = l2tp_nl_cmd_tunnel_create_get_addr(attrs, &cfg); if (ret < 0) goto out; } ret = -EINVAL; switch (cfg.encap) { case L2TP_ENCAPTYPE_UDP: case L2TP_ENCAPTYPE_IP: ret = l2tp_tunnel_create(fd, proto_version, tunnel_id, peer_tunnel_id, &cfg, &tunnel); break; } if (ret < 0) goto out; refcount_inc(&tunnel->ref_count); ret = l2tp_tunnel_register(tunnel, net, &cfg); if (ret < 0) { kfree(tunnel); goto out; } ret = l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel, L2TP_CMD_TUNNEL_CREATE); l2tp_tunnel_put(tunnel); out: return ret; } static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info) { struct l2tp_tunnel *tunnel; u32 tunnel_id; int ret = 0; struct net *net = genl_info_net(info); if (!info->attrs[L2TP_ATTR_CONN_ID]) { ret = -EINVAL; goto out; } tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); tunnel = l2tp_tunnel_get(net, tunnel_id); if (!tunnel) { ret = -ENODEV; goto out; } l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel, L2TP_CMD_TUNNEL_DELETE); l2tp_tunnel_delete(tunnel); l2tp_tunnel_put(tunnel); out: return ret; } static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info) { struct l2tp_tunnel *tunnel; u32 tunnel_id; int ret = 0; struct net *net = genl_info_net(info); if (!info->attrs[L2TP_ATTR_CONN_ID]) { ret = -EINVAL; goto out; } tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); tunnel = l2tp_tunnel_get(net, tunnel_id); if (!tunnel) { ret = -ENODEV; goto out; } ret = l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel, L2TP_CMD_TUNNEL_MODIFY); l2tp_tunnel_put(tunnel); out: return ret; } #if IS_ENABLED(CONFIG_IPV6) static int l2tp_nl_tunnel_send_addr6(struct sk_buff *skb, struct sock *sk, enum l2tp_encap_type encap) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); switch (encap) { case L2TP_ENCAPTYPE_UDP: if (udp_get_no_check6_tx(sk) && nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_TX)) return -1; if (udp_get_no_check6_rx(sk) && nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_RX)) return -1; if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport))) return -1; fallthrough; case L2TP_ENCAPTYPE_IP: if (nla_put_in6_addr(skb, L2TP_ATTR_IP6_SADDR, &np->saddr) || nla_put_in6_addr(skb, L2TP_ATTR_IP6_DADDR, &sk->sk_v6_daddr)) return -1; break; } return 0; } #endif static int l2tp_nl_tunnel_send_addr4(struct sk_buff *skb, struct sock *sk, enum l2tp_encap_type encap) { struct inet_sock *inet = inet_sk(sk); switch (encap) { case L2TP_ENCAPTYPE_UDP: if (nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx) || nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport))) return -1; fallthrough; case L2TP_ENCAPTYPE_IP: if (nla_put_in_addr(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr) || nla_put_in_addr(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr)) return -1; break; } return 0; } /* Append attributes for the tunnel address, handling the different attribute types * used for different tunnel encapsulation and AF_INET v.s. AF_INET6. */ static int l2tp_nl_tunnel_send_addr(struct sk_buff *skb, struct l2tp_tunnel *tunnel) { struct sock *sk = tunnel->sock; if (!sk) return 0; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) return l2tp_nl_tunnel_send_addr6(skb, sk, tunnel->encap); #endif return l2tp_nl_tunnel_send_addr4(skb, sk, tunnel->encap); } static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_tunnel *tunnel, u8 cmd) { void *hdr; struct nlattr *nest; hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (nla_put_u8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version) || nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || nla_put_u32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id) || nla_put_u32(skb, L2TP_ATTR_DEBUG, 0) || nla_put_u16(skb, L2TP_ATTR_ENCAP_TYPE, tunnel->encap)) goto nla_put_failure; nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS); if (!nest) goto nla_put_failure; if (nla_put_u64_64bit(skb, L2TP_ATTR_TX_PACKETS, atomic_long_read(&tunnel->stats.tx_packets), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_TX_BYTES, atomic_long_read(&tunnel->stats.tx_bytes), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_TX_ERRORS, atomic_long_read(&tunnel->stats.tx_errors), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_PACKETS, atomic_long_read(&tunnel->stats.rx_packets), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_BYTES, atomic_long_read(&tunnel->stats.rx_bytes), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_SEQ_DISCARDS, atomic_long_read(&tunnel->stats.rx_seq_discards), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_COOKIE_DISCARDS, atomic_long_read(&tunnel->stats.rx_cookie_discards), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_OOS_PACKETS, atomic_long_read(&tunnel->stats.rx_oos_packets), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS, atomic_long_read(&tunnel->stats.rx_errors), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID, atomic_long_read(&tunnel->stats.rx_invalid), L2TP_ATTR_STATS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); if (l2tp_nl_tunnel_send_addr(skb, tunnel)) goto nla_put_failure; genlmsg_end(skb, hdr); return 0; nla_put_failure: genlmsg_cancel(skb, hdr); return -1; } static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info) { struct l2tp_tunnel *tunnel; struct sk_buff *msg; u32 tunnel_id; int ret = -ENOBUFS; struct net *net = genl_info_net(info); if (!info->attrs[L2TP_ATTR_CONN_ID]) { ret = -EINVAL; goto err; } tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto err; } tunnel = l2tp_tunnel_get(net, tunnel_id); if (!tunnel) { ret = -ENODEV; goto err_nlmsg; } ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, tunnel, L2TP_CMD_TUNNEL_GET); if (ret < 0) goto err_nlmsg_tunnel; l2tp_tunnel_put(tunnel); return genlmsg_unicast(net, msg, info->snd_portid); err_nlmsg_tunnel: l2tp_tunnel_put(tunnel); err_nlmsg: nlmsg_free(msg); err: return ret; } struct l2tp_nl_cb_data { unsigned long tkey; unsigned long skey; }; static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct l2tp_nl_cb_data *cbd = (void *)&cb->ctx[0]; unsigned long key = cbd->tkey; struct l2tp_tunnel *tunnel; struct net *net = sock_net(skb->sk); for (;;) { tunnel = l2tp_tunnel_get_next(net, &key); if (!tunnel) goto out; if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, tunnel, L2TP_CMD_TUNNEL_GET) < 0) { l2tp_tunnel_put(tunnel); goto out; } l2tp_tunnel_put(tunnel); key++; } out: cbd->tkey = key; return skb->len; } static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *info) { u32 tunnel_id = 0; u32 session_id; u32 peer_session_id; int ret = 0; struct l2tp_tunnel *tunnel; struct l2tp_session *session; struct l2tp_session_cfg cfg = { 0, }; struct net *net = genl_info_net(info); if (!info->attrs[L2TP_ATTR_CONN_ID]) { ret = -EINVAL; goto out; } tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); tunnel = l2tp_tunnel_get(net, tunnel_id); if (!tunnel) { ret = -ENODEV; goto out; } if (!info->attrs[L2TP_ATTR_SESSION_ID]) { ret = -EINVAL; goto out_tunnel; } session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) { ret = -EINVAL; goto out_tunnel; } peer_session_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_SESSION_ID]); if (!info->attrs[L2TP_ATTR_PW_TYPE]) { ret = -EINVAL; goto out_tunnel; } cfg.pw_type = nla_get_u16(info->attrs[L2TP_ATTR_PW_TYPE]); if (cfg.pw_type >= __L2TP_PWTYPE_MAX) { ret = -EINVAL; goto out_tunnel; } /* L2TPv2 only accepts PPP pseudo-wires */ if (tunnel->version == 2 && cfg.pw_type != L2TP_PWTYPE_PPP) { ret = -EPROTONOSUPPORT; goto out_tunnel; } if (tunnel->version > 2) { if (info->attrs[L2TP_ATTR_L2SPEC_TYPE]) { cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]); if (cfg.l2specific_type != L2TP_L2SPECTYPE_DEFAULT && cfg.l2specific_type != L2TP_L2SPECTYPE_NONE) { ret = -EINVAL; goto out_tunnel; } } else { cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT; } if (info->attrs[L2TP_ATTR_COOKIE]) { u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]); if (len > 8) { ret = -EINVAL; goto out_tunnel; } cfg.cookie_len = len; memcpy(&cfg.cookie[0], nla_data(info->attrs[L2TP_ATTR_COOKIE]), len); } if (info->attrs[L2TP_ATTR_PEER_COOKIE]) { u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]); if (len > 8) { ret = -EINVAL; goto out_tunnel; } cfg.peer_cookie_len = len; memcpy(&cfg.peer_cookie[0], nla_data(info->attrs[L2TP_ATTR_PEER_COOKIE]), len); } if (info->attrs[L2TP_ATTR_IFNAME]) cfg.ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]); } if (info->attrs[L2TP_ATTR_RECV_SEQ]) cfg.recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]); if (info->attrs[L2TP_ATTR_SEND_SEQ]) cfg.send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]); if (info->attrs[L2TP_ATTR_LNS_MODE]) cfg.lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]); if (info->attrs[L2TP_ATTR_RECV_TIMEOUT]) cfg.reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]); #ifdef CONFIG_MODULES if (!l2tp_nl_cmd_ops[cfg.pw_type]) { genl_unlock(); request_module("net-l2tp-type-%u", cfg.pw_type); genl_lock(); } #endif if (!l2tp_nl_cmd_ops[cfg.pw_type] || !l2tp_nl_cmd_ops[cfg.pw_type]->session_create) { ret = -EPROTONOSUPPORT; goto out_tunnel; } ret = l2tp_nl_cmd_ops[cfg.pw_type]->session_create(net, tunnel, session_id, peer_session_id, &cfg); if (ret >= 0) { session = l2tp_session_get(net, tunnel->sock, tunnel->version, tunnel_id, session_id); if (session) { ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_CREATE); l2tp_session_put(session); } } out_tunnel: l2tp_tunnel_put(tunnel); out: return ret; } static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *info) { int ret = 0; struct l2tp_session *session; u16 pw_type; session = l2tp_nl_session_get(info); if (!session) { ret = -ENODEV; goto out; } l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_DELETE); pw_type = session->pwtype; if (pw_type < __L2TP_PWTYPE_MAX) if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) l2tp_nl_cmd_ops[pw_type]->session_delete(session); l2tp_session_put(session); out: return ret; } static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *info) { int ret = 0; struct l2tp_session *session; session = l2tp_nl_session_get(info); if (!session) { ret = -ENODEV; goto out; } if (info->attrs[L2TP_ATTR_RECV_SEQ]) session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]); if (info->attrs[L2TP_ATTR_SEND_SEQ]) { struct l2tp_tunnel *tunnel = session->tunnel; session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]); l2tp_session_set_header_len(session, tunnel->version, tunnel->encap); } if (info->attrs[L2TP_ATTR_LNS_MODE]) session->lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]); if (info->attrs[L2TP_ATTR_RECV_TIMEOUT]) session->reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]); ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_MODIFY); l2tp_session_put(session); out: return ret; } static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_session *session, u8 cmd) { void *hdr; struct nlattr *nest; struct l2tp_tunnel *tunnel = session->tunnel; hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) || nla_put_u32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id) || nla_put_u32(skb, L2TP_ATTR_PEER_SESSION_ID, session->peer_session_id) || nla_put_u32(skb, L2TP_ATTR_DEBUG, 0) || nla_put_u16(skb, L2TP_ATTR_PW_TYPE, session->pwtype)) goto nla_put_failure; if ((session->ifname[0] && nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) || (session->cookie_len && nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len, session->cookie)) || (session->peer_cookie_len && nla_put(skb, L2TP_ATTR_PEER_COOKIE, session->peer_cookie_len, session->peer_cookie)) || nla_put_u8(skb, L2TP_ATTR_RECV_SEQ, session->recv_seq) || nla_put_u8(skb, L2TP_ATTR_SEND_SEQ, session->send_seq) || nla_put_u8(skb, L2TP_ATTR_LNS_MODE, session->lns_mode) || (l2tp_tunnel_uses_xfrm(tunnel) && nla_put_u8(skb, L2TP_ATTR_USING_IPSEC, 1)) || (session->reorder_timeout && nla_put_msecs(skb, L2TP_ATTR_RECV_TIMEOUT, session->reorder_timeout, L2TP_ATTR_PAD))) goto nla_put_failure; nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS); if (!nest) goto nla_put_failure; if (nla_put_u64_64bit(skb, L2TP_ATTR_TX_PACKETS, atomic_long_read(&session->stats.tx_packets), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_TX_BYTES, atomic_long_read(&session->stats.tx_bytes), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_TX_ERRORS, atomic_long_read(&session->stats.tx_errors), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_PACKETS, atomic_long_read(&session->stats.rx_packets), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_BYTES, atomic_long_read(&session->stats.rx_bytes), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_SEQ_DISCARDS, atomic_long_read(&session->stats.rx_seq_discards), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_COOKIE_DISCARDS, atomic_long_read(&session->stats.rx_cookie_discards), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_OOS_PACKETS, atomic_long_read(&session->stats.rx_oos_packets), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS, atomic_long_read(&session->stats.rx_errors), L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID, atomic_long_read(&session->stats.rx_invalid), L2TP_ATTR_STATS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); genlmsg_end(skb, hdr); return 0; nla_put_failure: genlmsg_cancel(skb, hdr); return -1; } static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) { struct l2tp_session *session; struct sk_buff *msg; int ret; session = l2tp_nl_session_get(info); if (!session) { ret = -ENODEV; goto err; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto err_ref; } ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, 0, session, L2TP_CMD_SESSION_GET); if (ret < 0) goto err_ref_msg; ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); l2tp_session_put(session); return ret; err_ref_msg: nlmsg_free(msg); err_ref: l2tp_session_put(session); err: return ret; } static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct l2tp_nl_cb_data *cbd = (void *)&cb->ctx[0]; struct net *net = sock_net(skb->sk); struct l2tp_session *session; struct l2tp_tunnel *tunnel = NULL; unsigned long tkey = cbd->tkey; unsigned long skey = cbd->skey; for (;;) { if (!tunnel) { tunnel = l2tp_tunnel_get_next(net, &tkey); if (!tunnel) goto out; } session = l2tp_session_get_next(net, tunnel->sock, tunnel->version, tunnel->tunnel_id, &skey); if (!session) { tkey++; l2tp_tunnel_put(tunnel); tunnel = NULL; skey = 0; continue; } if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, session, L2TP_CMD_SESSION_GET) < 0) { l2tp_session_put(session); l2tp_tunnel_put(tunnel); break; } l2tp_session_put(session); skey++; } out: cbd->tkey = tkey; cbd->skey = skey; return skb->len; } static const struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = { [L2TP_ATTR_NONE] = { .type = NLA_UNSPEC, }, [L2TP_ATTR_PW_TYPE] = { .type = NLA_U16, }, [L2TP_ATTR_ENCAP_TYPE] = { .type = NLA_U16, }, [L2TP_ATTR_OFFSET] = { .type = NLA_U16, }, [L2TP_ATTR_DATA_SEQ] = { .type = NLA_U8, }, [L2TP_ATTR_L2SPEC_TYPE] = { .type = NLA_U8, }, [L2TP_ATTR_L2SPEC_LEN] = { .type = NLA_U8, }, [L2TP_ATTR_PROTO_VERSION] = { .type = NLA_U8, }, [L2TP_ATTR_CONN_ID] = { .type = NLA_U32, }, [L2TP_ATTR_PEER_CONN_ID] = { .type = NLA_U32, }, [L2TP_ATTR_SESSION_ID] = { .type = NLA_U32, }, [L2TP_ATTR_PEER_SESSION_ID] = { .type = NLA_U32, }, [L2TP_ATTR_UDP_CSUM] = { .type = NLA_U8, }, [L2TP_ATTR_VLAN_ID] = { .type = NLA_U16, }, [L2TP_ATTR_DEBUG] = { .type = NLA_U32, }, [L2TP_ATTR_RECV_SEQ] = { .type = NLA_U8, }, [L2TP_ATTR_SEND_SEQ] = { .type = NLA_U8, }, [L2TP_ATTR_LNS_MODE] = { .type = NLA_U8, }, [L2TP_ATTR_USING_IPSEC] = { .type = NLA_U8, }, [L2TP_ATTR_RECV_TIMEOUT] = { .type = NLA_MSECS, }, [L2TP_ATTR_FD] = { .type = NLA_U32, }, [L2TP_ATTR_IP_SADDR] = { .type = NLA_U32, }, [L2TP_ATTR_IP_DADDR] = { .type = NLA_U32, }, [L2TP_ATTR_UDP_SPORT] = { .type = NLA_U16, }, [L2TP_ATTR_UDP_DPORT] = { .type = NLA_U16, }, [L2TP_ATTR_MTU] = { .type = NLA_U16, }, [L2TP_ATTR_MRU] = { .type = NLA_U16, }, [L2TP_ATTR_STATS] = { .type = NLA_NESTED, }, [L2TP_ATTR_IP6_SADDR] = { .type = NLA_BINARY, .len = sizeof(struct in6_addr), }, [L2TP_ATTR_IP6_DADDR] = { .type = NLA_BINARY, .len = sizeof(struct in6_addr), }, [L2TP_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1, }, [L2TP_ATTR_COOKIE] = { .type = NLA_BINARY, .len = 8, }, [L2TP_ATTR_PEER_COOKIE] = { .type = NLA_BINARY, .len = 8, }, }; static const struct genl_small_ops l2tp_nl_ops[] = { { .cmd = L2TP_CMD_NOOP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_noop, /* can be retrieved by unprivileged users */ }, { .cmd = L2TP_CMD_TUNNEL_CREATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_create, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_DELETE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_delete, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_MODIFY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_modify, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_get, .dumpit = l2tp_nl_cmd_tunnel_dump, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_CREATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_create, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_DELETE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_delete, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_MODIFY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_modify, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_get, .dumpit = l2tp_nl_cmd_session_dump, .flags = GENL_UNS_ADMIN_PERM, }, }; static struct genl_family l2tp_nl_family __ro_after_init = { .name = L2TP_GENL_NAME, .version = L2TP_GENL_VERSION, .hdrsize = 0, .maxattr = L2TP_ATTR_MAX, .policy = l2tp_nl_policy, .netnsok = true, .module = THIS_MODULE, .small_ops = l2tp_nl_ops, .n_small_ops = ARRAY_SIZE(l2tp_nl_ops), .resv_start_op = L2TP_CMD_SESSION_GET + 1, .mcgrps = l2tp_multicast_group, .n_mcgrps = ARRAY_SIZE(l2tp_multicast_group), }; int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops) { int ret; ret = -EINVAL; if (pw_type >= __L2TP_PWTYPE_MAX) goto err; genl_lock(); ret = -EBUSY; if (l2tp_nl_cmd_ops[pw_type]) goto out; l2tp_nl_cmd_ops[pw_type] = ops; ret = 0; out: genl_unlock(); err: return ret; } EXPORT_SYMBOL_GPL(l2tp_nl_register_ops); void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type) { if (pw_type < __L2TP_PWTYPE_MAX) { genl_lock(); l2tp_nl_cmd_ops[pw_type] = NULL; genl_unlock(); } } EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops); static int __init l2tp_nl_init(void) { pr_info("L2TP netlink interface\n"); return genl_register_family(&l2tp_nl_family); } static void l2tp_nl_cleanup(void) { genl_unregister_family(&l2tp_nl_family); } module_init(l2tp_nl_init); module_exit(l2tp_nl_cleanup); MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); MODULE_DESCRIPTION("L2TP netlink"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0"); MODULE_ALIAS_GENL_FAMILY("l2tp");
4 5 6 1 2 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 /* * Cryptographic API. * * AES Cipher Algorithm. * * Based on Brian Gladman's code. * * Linux developers: * Alexander Kjeldaas <astor@fast.no> * Herbert Valerio Riedel <hvr@hvrlab.org> * Kyle McMartin <kyle@debian.org> * Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API). * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * --------------------------------------------------------------------------- * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK. * All rights reserved. * * LICENSE TERMS * * The free distribution and use of this software in both source and binary * form is allowed (with or without changes) provided that: * * 1. distributions of this source code include the above copyright * notice, this list of conditions and the following disclaimer; * * 2. distributions in binary form include the above copyright * notice, this list of conditions and the following disclaimer * in the documentation and/or other associated materials; * * 3. the copyright holder's name is not used to endorse products * built using this software without specific written permission. * * ALTERNATIVELY, provided that this notice is retained in full, this product * may be distributed under the terms of the GNU General Public License (GPL), * in which case the provisions of the GPL apply INSTEAD OF those given above. * * DISCLAIMER * * This software is provided 'as is' with no explicit or implied warranties * in respect of its properties, including, but not limited to, correctness * and/or fitness for purpose. * --------------------------------------------------------------------------- */ #include <crypto/aes.h> #include <crypto/algapi.h> #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> #include <linux/errno.h> #include <asm/byteorder.h> #include <linux/unaligned.h> static inline u8 byte(const u32 x, const unsigned n) { return x >> (n << 3); } /* cacheline-aligned to facilitate prefetching into cache */ __visible const u32 crypto_ft_tab[4][256] ____cacheline_aligned = { { 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591, 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, 0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb, 0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b, 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, 0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a, 0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f, 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, 0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b, 0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413, 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, 0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85, 0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511, 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, 0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1, 0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf, 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, 0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6, 0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b, 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, 0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8, 0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2, 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, 0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810, 0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697, 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, 0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c, 0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27, 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, 0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5, 0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0, 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c, }, { 0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd, 0x6f6fdeb1, 0xc5c59154, 0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d, 0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a, 0xcaca8f45, 0x82821f9d, 0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b, 0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7, 0x7272e496, 0xc0c09b5b, 0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a, 0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f, 0x3434685c, 0xa5a551f4, 0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f, 0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1, 0x05050a0f, 0x9a9a2fb5, 0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d, 0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f, 0x0909121b, 0x83831d9e, 0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb, 0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e, 0x2f2f5e71, 0x84841397, 0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c, 0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed, 0x6a6ad4be, 0xcbcb8d46, 0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a, 0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7, 0x33336655, 0x85851194, 0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81, 0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3, 0x5151a2f3, 0xa3a35dfe, 0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104, 0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a, 0xf3f3fd0e, 0xd2d2bf6d, 0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f, 0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39, 0xc4c49357, 0xa7a755f2, 0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695, 0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e, 0x90903bab, 0x88880b83, 0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c, 0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76, 0xe0e0db3b, 0x32326456, 0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4, 0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4, 0xe4e4d337, 0x7979f28b, 0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7, 0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0, 0x6c6cd8b4, 0x5656acfa, 0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018, 0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1, 0xb4b473c7, 0xc6c69751, 0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21, 0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85, 0x7070e090, 0x3e3e7c42, 0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12, 0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958, 0x1d1d3a27, 0x9e9e27b9, 0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233, 0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7, 0x9b9b2db6, 0x1e1e3c22, 0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a, 0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731, 0x424284c6, 0x6868d0b8, 0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11, 0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a, }, { 0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5, 0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b, 0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76, 0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0, 0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0, 0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26, 0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc, 0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15, 0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a, 0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2, 0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75, 0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0, 0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784, 0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced, 0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b, 0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf, 0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485, 0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f, 0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8, 0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5, 0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2, 0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec, 0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917, 0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573, 0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388, 0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14, 0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db, 0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c, 0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79, 0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d, 0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9, 0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808, 0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6, 0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f, 0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a, 0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e, 0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e, 0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311, 0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794, 0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf, 0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868, 0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f, 0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16, }, { 0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b, 0xdeb16f6f, 0x9154c5c5, 0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b, 0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676, 0x8f45caca, 0x1f9d8282, 0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0, 0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4, 0xe4967272, 0x9b5bc0c0, 0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626, 0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc, 0x685c3434, 0x51f4a5a5, 0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515, 0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696, 0x0a0f0505, 0x2fb59a9a, 0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2, 0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575, 0x121b0909, 0x1d9e8383, 0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0, 0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3, 0x5e712f2f, 0x13978484, 0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded, 0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b, 0xd4be6a6a, 0x8d46cbcb, 0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf, 0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d, 0x66553333, 0x11948585, 0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f, 0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8, 0xa2f35151, 0x5dfea3a3, 0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5, 0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff, 0xfd0ef3f3, 0xbf6dd2d2, 0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec, 0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717, 0x9357c4c4, 0x55f2a7a7, 0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373, 0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a, 0x3bab9090, 0x0b838888, 0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414, 0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb, 0xdb3be0e0, 0x64563232, 0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c, 0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595, 0xd337e4e4, 0xf28b7979, 0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d, 0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9, 0xd8b46c6c, 0xacfa5656, 0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808, 0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6, 0x73c7b4b4, 0x9751c6c6, 0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f, 0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a, 0xe0907070, 0x7c423e3e, 0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e, 0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1, 0x3a271d1d, 0x27b99e9e, 0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111, 0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494, 0x2db69b9b, 0x3c221e1e, 0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf, 0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6, 0x84c64242, 0xd0b86868, 0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f, 0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616, } }; static const u32 crypto_fl_tab[4][256] ____cacheline_aligned = { { 0x00000063, 0x0000007c, 0x00000077, 0x0000007b, 0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5, 0x00000030, 0x00000001, 0x00000067, 0x0000002b, 0x000000fe, 0x000000d7, 0x000000ab, 0x00000076, 0x000000ca, 0x00000082, 0x000000c9, 0x0000007d, 0x000000fa, 0x00000059, 0x00000047, 0x000000f0, 0x000000ad, 0x000000d4, 0x000000a2, 0x000000af, 0x0000009c, 0x000000a4, 0x00000072, 0x000000c0, 0x000000b7, 0x000000fd, 0x00000093, 0x00000026, 0x00000036, 0x0000003f, 0x000000f7, 0x000000cc, 0x00000034, 0x000000a5, 0x000000e5, 0x000000f1, 0x00000071, 0x000000d8, 0x00000031, 0x00000015, 0x00000004, 0x000000c7, 0x00000023, 0x000000c3, 0x00000018, 0x00000096, 0x00000005, 0x0000009a, 0x00000007, 0x00000012, 0x00000080, 0x000000e2, 0x000000eb, 0x00000027, 0x000000b2, 0x00000075, 0x00000009, 0x00000083, 0x0000002c, 0x0000001a, 0x0000001b, 0x0000006e, 0x0000005a, 0x000000a0, 0x00000052, 0x0000003b, 0x000000d6, 0x000000b3, 0x00000029, 0x000000e3, 0x0000002f, 0x00000084, 0x00000053, 0x000000d1, 0x00000000, 0x000000ed, 0x00000020, 0x000000fc, 0x000000b1, 0x0000005b, 0x0000006a, 0x000000cb, 0x000000be, 0x00000039, 0x0000004a, 0x0000004c, 0x00000058, 0x000000cf, 0x000000d0, 0x000000ef, 0x000000aa, 0x000000fb, 0x00000043, 0x0000004d, 0x00000033, 0x00000085, 0x00000045, 0x000000f9, 0x00000002, 0x0000007f, 0x00000050, 0x0000003c, 0x0000009f, 0x000000a8, 0x00000051, 0x000000a3, 0x00000040, 0x0000008f, 0x00000092, 0x0000009d, 0x00000038, 0x000000f5, 0x000000bc, 0x000000b6, 0x000000da, 0x00000021, 0x00000010, 0x000000ff, 0x000000f3, 0x000000d2, 0x000000cd, 0x0000000c, 0x00000013, 0x000000ec, 0x0000005f, 0x00000097, 0x00000044, 0x00000017, 0x000000c4, 0x000000a7, 0x0000007e, 0x0000003d, 0x00000064, 0x0000005d, 0x00000019, 0x00000073, 0x00000060, 0x00000081, 0x0000004f, 0x000000dc, 0x00000022, 0x0000002a, 0x00000090, 0x00000088, 0x00000046, 0x000000ee, 0x000000b8, 0x00000014, 0x000000de, 0x0000005e, 0x0000000b, 0x000000db, 0x000000e0, 0x00000032, 0x0000003a, 0x0000000a, 0x00000049, 0x00000006, 0x00000024, 0x0000005c, 0x000000c2, 0x000000d3, 0x000000ac, 0x00000062, 0x00000091, 0x00000095, 0x000000e4, 0x00000079, 0x000000e7, 0x000000c8, 0x00000037, 0x0000006d, 0x0000008d, 0x000000d5, 0x0000004e, 0x000000a9, 0x0000006c, 0x00000056, 0x000000f4, 0x000000ea, 0x00000065, 0x0000007a, 0x000000ae, 0x00000008, 0x000000ba, 0x00000078, 0x00000025, 0x0000002e, 0x0000001c, 0x000000a6, 0x000000b4, 0x000000c6, 0x000000e8, 0x000000dd, 0x00000074, 0x0000001f, 0x0000004b, 0x000000bd, 0x0000008b, 0x0000008a, 0x00000070, 0x0000003e, 0x000000b5, 0x00000066, 0x00000048, 0x00000003, 0x000000f6, 0x0000000e, 0x00000061, 0x00000035, 0x00000057, 0x000000b9, 0x00000086, 0x000000c1, 0x0000001d, 0x0000009e, 0x000000e1, 0x000000f8, 0x00000098, 0x00000011, 0x00000069, 0x000000d9, 0x0000008e, 0x00000094, 0x0000009b, 0x0000001e, 0x00000087, 0x000000e9, 0x000000ce, 0x00000055, 0x00000028, 0x000000df, 0x0000008c, 0x000000a1, 0x00000089, 0x0000000d, 0x000000bf, 0x000000e6, 0x00000042, 0x00000068, 0x00000041, 0x00000099, 0x0000002d, 0x0000000f, 0x000000b0, 0x00000054, 0x000000bb, 0x00000016, }, { 0x00006300, 0x00007c00, 0x00007700, 0x00007b00, 0x0000f200, 0x00006b00, 0x00006f00, 0x0000c500, 0x00003000, 0x00000100, 0x00006700, 0x00002b00, 0x0000fe00, 0x0000d700, 0x0000ab00, 0x00007600, 0x0000ca00, 0x00008200, 0x0000c900, 0x00007d00, 0x0000fa00, 0x00005900, 0x00004700, 0x0000f000, 0x0000ad00, 0x0000d400, 0x0000a200, 0x0000af00, 0x00009c00, 0x0000a400, 0x00007200, 0x0000c000, 0x0000b700, 0x0000fd00, 0x00009300, 0x00002600, 0x00003600, 0x00003f00, 0x0000f700, 0x0000cc00, 0x00003400, 0x0000a500, 0x0000e500, 0x0000f100, 0x00007100, 0x0000d800, 0x00003100, 0x00001500, 0x00000400, 0x0000c700, 0x00002300, 0x0000c300, 0x00001800, 0x00009600, 0x00000500, 0x00009a00, 0x00000700, 0x00001200, 0x00008000, 0x0000e200, 0x0000eb00, 0x00002700, 0x0000b200, 0x00007500, 0x00000900, 0x00008300, 0x00002c00, 0x00001a00, 0x00001b00, 0x00006e00, 0x00005a00, 0x0000a000, 0x00005200, 0x00003b00, 0x0000d600, 0x0000b300, 0x00002900, 0x0000e300, 0x00002f00, 0x00008400, 0x00005300, 0x0000d100, 0x00000000, 0x0000ed00, 0x00002000, 0x0000fc00, 0x0000b100, 0x00005b00, 0x00006a00, 0x0000cb00, 0x0000be00, 0x00003900, 0x00004a00, 0x00004c00, 0x00005800, 0x0000cf00, 0x0000d000, 0x0000ef00, 0x0000aa00, 0x0000fb00, 0x00004300, 0x00004d00, 0x00003300, 0x00008500, 0x00004500, 0x0000f900, 0x00000200, 0x00007f00, 0x00005000, 0x00003c00, 0x00009f00, 0x0000a800, 0x00005100, 0x0000a300, 0x00004000, 0x00008f00, 0x00009200, 0x00009d00, 0x00003800, 0x0000f500, 0x0000bc00, 0x0000b600, 0x0000da00, 0x00002100, 0x00001000, 0x0000ff00, 0x0000f300, 0x0000d200, 0x0000cd00, 0x00000c00, 0x00001300, 0x0000ec00, 0x00005f00, 0x00009700, 0x00004400, 0x00001700, 0x0000c400, 0x0000a700, 0x00007e00, 0x00003d00, 0x00006400, 0x00005d00, 0x00001900, 0x00007300, 0x00006000, 0x00008100, 0x00004f00, 0x0000dc00, 0x00002200, 0x00002a00, 0x00009000, 0x00008800, 0x00004600, 0x0000ee00, 0x0000b800, 0x00001400, 0x0000de00, 0x00005e00, 0x00000b00, 0x0000db00, 0x0000e000, 0x00003200, 0x00003a00, 0x00000a00, 0x00004900, 0x00000600, 0x00002400, 0x00005c00, 0x0000c200, 0x0000d300, 0x0000ac00, 0x00006200, 0x00009100, 0x00009500, 0x0000e400, 0x00007900, 0x0000e700, 0x0000c800, 0x00003700, 0x00006d00, 0x00008d00, 0x0000d500, 0x00004e00, 0x0000a900, 0x00006c00, 0x00005600, 0x0000f400, 0x0000ea00, 0x00006500, 0x00007a00, 0x0000ae00, 0x00000800, 0x0000ba00, 0x00007800, 0x00002500, 0x00002e00, 0x00001c00, 0x0000a600, 0x0000b400, 0x0000c600, 0x0000e800, 0x0000dd00, 0x00007400, 0x00001f00, 0x00004b00, 0x0000bd00, 0x00008b00, 0x00008a00, 0x00007000, 0x00003e00, 0x0000b500, 0x00006600, 0x00004800, 0x00000300, 0x0000f600, 0x00000e00, 0x00006100, 0x00003500, 0x00005700, 0x0000b900, 0x00008600, 0x0000c100, 0x00001d00, 0x00009e00, 0x0000e100, 0x0000f800, 0x00009800, 0x00001100, 0x00006900, 0x0000d900, 0x00008e00, 0x00009400, 0x00009b00, 0x00001e00, 0x00008700, 0x0000e900, 0x0000ce00, 0x00005500, 0x00002800, 0x0000df00, 0x00008c00, 0x0000a100, 0x00008900, 0x00000d00, 0x0000bf00, 0x0000e600, 0x00004200, 0x00006800, 0x00004100, 0x00009900, 0x00002d00, 0x00000f00, 0x0000b000, 0x00005400, 0x0000bb00, 0x00001600, }, { 0x00630000, 0x007c0000, 0x00770000, 0x007b0000, 0x00f20000, 0x006b0000, 0x006f0000, 0x00c50000, 0x00300000, 0x00010000, 0x00670000, 0x002b0000, 0x00fe0000, 0x00d70000, 0x00ab0000, 0x00760000, 0x00ca0000, 0x00820000, 0x00c90000, 0x007d0000, 0x00fa0000, 0x00590000, 0x00470000, 0x00f00000, 0x00ad0000, 0x00d40000, 0x00a20000, 0x00af0000, 0x009c0000, 0x00a40000, 0x00720000, 0x00c00000, 0x00b70000, 0x00fd0000, 0x00930000, 0x00260000, 0x00360000, 0x003f0000, 0x00f70000, 0x00cc0000, 0x00340000, 0x00a50000, 0x00e50000, 0x00f10000, 0x00710000, 0x00d80000, 0x00310000, 0x00150000, 0x00040000, 0x00c70000, 0x00230000, 0x00c30000, 0x00180000, 0x00960000, 0x00050000, 0x009a0000, 0x00070000, 0x00120000, 0x00800000, 0x00e20000, 0x00eb0000, 0x00270000, 0x00b20000, 0x00750000, 0x00090000, 0x00830000, 0x002c0000, 0x001a0000, 0x001b0000, 0x006e0000, 0x005a0000, 0x00a00000, 0x00520000, 0x003b0000, 0x00d60000, 0x00b30000, 0x00290000, 0x00e30000, 0x002f0000, 0x00840000, 0x00530000, 0x00d10000, 0x00000000, 0x00ed0000, 0x00200000, 0x00fc0000, 0x00b10000, 0x005b0000, 0x006a0000, 0x00cb0000, 0x00be0000, 0x00390000, 0x004a0000, 0x004c0000, 0x00580000, 0x00cf0000, 0x00d00000, 0x00ef0000, 0x00aa0000, 0x00fb0000, 0x00430000, 0x004d0000, 0x00330000, 0x00850000, 0x00450000, 0x00f90000, 0x00020000, 0x007f0000, 0x00500000, 0x003c0000, 0x009f0000, 0x00a80000, 0x00510000, 0x00a30000, 0x00400000, 0x008f0000, 0x00920000, 0x009d0000, 0x00380000, 0x00f50000, 0x00bc0000, 0x00b60000, 0x00da0000, 0x00210000, 0x00100000, 0x00ff0000, 0x00f30000, 0x00d20000, 0x00cd0000, 0x000c0000, 0x00130000, 0x00ec0000, 0x005f0000, 0x00970000, 0x00440000, 0x00170000, 0x00c40000, 0x00a70000, 0x007e0000, 0x003d0000, 0x00640000, 0x005d0000, 0x00190000, 0x00730000, 0x00600000, 0x00810000, 0x004f0000, 0x00dc0000, 0x00220000, 0x002a0000, 0x00900000, 0x00880000, 0x00460000, 0x00ee0000, 0x00b80000, 0x00140000, 0x00de0000, 0x005e0000, 0x000b0000, 0x00db0000, 0x00e00000, 0x00320000, 0x003a0000, 0x000a0000, 0x00490000, 0x00060000, 0x00240000, 0x005c0000, 0x00c20000, 0x00d30000, 0x00ac0000, 0x00620000, 0x00910000, 0x00950000, 0x00e40000, 0x00790000, 0x00e70000, 0x00c80000, 0x00370000, 0x006d0000, 0x008d0000, 0x00d50000, 0x004e0000, 0x00a90000, 0x006c0000, 0x00560000, 0x00f40000, 0x00ea0000, 0x00650000, 0x007a0000, 0x00ae0000, 0x00080000, 0x00ba0000, 0x00780000, 0x00250000, 0x002e0000, 0x001c0000, 0x00a60000, 0x00b40000, 0x00c60000, 0x00e80000, 0x00dd0000, 0x00740000, 0x001f0000, 0x004b0000, 0x00bd0000, 0x008b0000, 0x008a0000, 0x00700000, 0x003e0000, 0x00b50000, 0x00660000, 0x00480000, 0x00030000, 0x00f60000, 0x000e0000, 0x00610000, 0x00350000, 0x00570000, 0x00b90000, 0x00860000, 0x00c10000, 0x001d0000, 0x009e0000, 0x00e10000, 0x00f80000, 0x00980000, 0x00110000, 0x00690000, 0x00d90000, 0x008e0000, 0x00940000, 0x009b0000, 0x001e0000, 0x00870000, 0x00e90000, 0x00ce0000, 0x00550000, 0x00280000, 0x00df0000, 0x008c0000, 0x00a10000, 0x00890000, 0x000d0000, 0x00bf0000, 0x00e60000, 0x00420000, 0x00680000, 0x00410000, 0x00990000, 0x002d0000, 0x000f0000, 0x00b00000, 0x00540000, 0x00bb0000, 0x00160000, }, { 0x63000000, 0x7c000000, 0x77000000, 0x7b000000, 0xf2000000, 0x6b000000, 0x6f000000, 0xc5000000, 0x30000000, 0x01000000, 0x67000000, 0x2b000000, 0xfe000000, 0xd7000000, 0xab000000, 0x76000000, 0xca000000, 0x82000000, 0xc9000000, 0x7d000000, 0xfa000000, 0x59000000, 0x47000000, 0xf0000000, 0xad000000, 0xd4000000, 0xa2000000, 0xaf000000, 0x9c000000, 0xa4000000, 0x72000000, 0xc0000000, 0xb7000000, 0xfd000000, 0x93000000, 0x26000000, 0x36000000, 0x3f000000, 0xf7000000, 0xcc000000, 0x34000000, 0xa5000000, 0xe5000000, 0xf1000000, 0x71000000, 0xd8000000, 0x31000000, 0x15000000, 0x04000000, 0xc7000000, 0x23000000, 0xc3000000, 0x18000000, 0x96000000, 0x05000000, 0x9a000000, 0x07000000, 0x12000000, 0x80000000, 0xe2000000, 0xeb000000, 0x27000000, 0xb2000000, 0x75000000, 0x09000000, 0x83000000, 0x2c000000, 0x1a000000, 0x1b000000, 0x6e000000, 0x5a000000, 0xa0000000, 0x52000000, 0x3b000000, 0xd6000000, 0xb3000000, 0x29000000, 0xe3000000, 0x2f000000, 0x84000000, 0x53000000, 0xd1000000, 0x00000000, 0xed000000, 0x20000000, 0xfc000000, 0xb1000000, 0x5b000000, 0x6a000000, 0xcb000000, 0xbe000000, 0x39000000, 0x4a000000, 0x4c000000, 0x58000000, 0xcf000000, 0xd0000000, 0xef000000, 0xaa000000, 0xfb000000, 0x43000000, 0x4d000000, 0x33000000, 0x85000000, 0x45000000, 0xf9000000, 0x02000000, 0x7f000000, 0x50000000, 0x3c000000, 0x9f000000, 0xa8000000, 0x51000000, 0xa3000000, 0x40000000, 0x8f000000, 0x92000000, 0x9d000000, 0x38000000, 0xf5000000, 0xbc000000, 0xb6000000, 0xda000000, 0x21000000, 0x10000000, 0xff000000, 0xf3000000, 0xd2000000, 0xcd000000, 0x0c000000, 0x13000000, 0xec000000, 0x5f000000, 0x97000000, 0x44000000, 0x17000000, 0xc4000000, 0xa7000000, 0x7e000000, 0x3d000000, 0x64000000, 0x5d000000, 0x19000000, 0x73000000, 0x60000000, 0x81000000, 0x4f000000, 0xdc000000, 0x22000000, 0x2a000000, 0x90000000, 0x88000000, 0x46000000, 0xee000000, 0xb8000000, 0x14000000, 0xde000000, 0x5e000000, 0x0b000000, 0xdb000000, 0xe0000000, 0x32000000, 0x3a000000, 0x0a000000, 0x49000000, 0x06000000, 0x24000000, 0x5c000000, 0xc2000000, 0xd3000000, 0xac000000, 0x62000000, 0x91000000, 0x95000000, 0xe4000000, 0x79000000, 0xe7000000, 0xc8000000, 0x37000000, 0x6d000000, 0x8d000000, 0xd5000000, 0x4e000000, 0xa9000000, 0x6c000000, 0x56000000, 0xf4000000, 0xea000000, 0x65000000, 0x7a000000, 0xae000000, 0x08000000, 0xba000000, 0x78000000, 0x25000000, 0x2e000000, 0x1c000000, 0xa6000000, 0xb4000000, 0xc6000000, 0xe8000000, 0xdd000000, 0x74000000, 0x1f000000, 0x4b000000, 0xbd000000, 0x8b000000, 0x8a000000, 0x70000000, 0x3e000000, 0xb5000000, 0x66000000, 0x48000000, 0x03000000, 0xf6000000, 0x0e000000, 0x61000000, 0x35000000, 0x57000000, 0xb9000000, 0x86000000, 0xc1000000, 0x1d000000, 0x9e000000, 0xe1000000, 0xf8000000, 0x98000000, 0x11000000, 0x69000000, 0xd9000000, 0x8e000000, 0x94000000, 0x9b000000, 0x1e000000, 0x87000000, 0xe9000000, 0xce000000, 0x55000000, 0x28000000, 0xdf000000, 0x8c000000, 0xa1000000, 0x89000000, 0x0d000000, 0xbf000000, 0xe6000000, 0x42000000, 0x68000000, 0x41000000, 0x99000000, 0x2d000000, 0x0f000000, 0xb0000000, 0x54000000, 0xbb000000, 0x16000000, } }; __visible const u32 crypto_it_tab[4][256] ____cacheline_aligned = { { 0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b, 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5, 0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, 0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b, 0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e, 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927, 0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, 0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9, 0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566, 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3, 0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, 0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4, 0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd, 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d, 0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, 0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879, 0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c, 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36, 0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, 0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c, 0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14, 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3, 0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, 0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684, 0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177, 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947, 0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, 0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f, 0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382, 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf, 0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, 0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef, 0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235, 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733, 0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, 0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546, 0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d, 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb, 0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, 0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478, 0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff, 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664, 0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0, }, { 0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1, 0x58faacab, 0x03e34b93, 0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525, 0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f, 0x5ab1de49, 0x1bba2567, 0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6, 0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3, 0x69e04929, 0xc8c98e44, 0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd, 0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4, 0x4adf6318, 0x311ae582, 0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994, 0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2, 0x8f1fe357, 0xab55662a, 0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5, 0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c, 0x1ccf8a2b, 0xb479a792, 0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a, 0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa, 0x9f715e06, 0x106ebd51, 0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46, 0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff, 0xfb981924, 0xe9bdd697, 0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db, 0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248, 0x70111eac, 0x725a6c4e, 0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627, 0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a, 0x670a0cb1, 0xe757930f, 0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16, 0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad, 0xa8b62db9, 0xa91e14c8, 0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd, 0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34, 0x29438b76, 0xc623cbdc, 0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420, 0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3, 0x52860dec, 0xe3c177d0, 0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722, 0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef, 0x4e4987c7, 0xd138d9c1, 0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4, 0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8, 0xf7392e5e, 0xafc382f5, 0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3, 0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b, 0x7826cd09, 0x18596ef4, 0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6, 0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31, 0x94a5c630, 0x66a235c0, 0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315, 0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f, 0xd64d768d, 0xb0ef434d, 0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f, 0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252, 0x5610e933, 0x47d66d13, 0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89, 0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c, 0xdfd29c59, 0x73f2553f, 0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886, 0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c, 0x493c288b, 0x950dff41, 0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490, 0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042, }, { 0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145, 0xfaacab58, 0xe34b9303, 0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c, 0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3, 0xb1de495a, 0xba25671b, 0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9, 0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321, 0xe0492969, 0xc98e44c8, 0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71, 0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a, 0xdf63184a, 0x1ae58231, 0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b, 0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202, 0x1fe3578f, 0x55662aab, 0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508, 0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82, 0xcf8a2b1c, 0x79a792b4, 0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe, 0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef, 0x715e069f, 0x6ebd5110, 0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd, 0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15, 0x981924fb, 0xbdd697e9, 0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee, 0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed, 0x111eac70, 0x5a6c4e72, 0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739, 0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e, 0x0a0cb167, 0x57930fe7, 0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a, 0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7, 0xb62db9a8, 0x1e14c8a9, 0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60, 0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e, 0x438b7629, 0x23cbdcc6, 0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011, 0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330, 0x860dec52, 0xc177d0e3, 0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264, 0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90, 0x4987c74e, 0x38d9c1d1, 0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf, 0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8, 0x392e5ef7, 0xc382f5af, 0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312, 0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb, 0x26cd0978, 0x596ef418, 0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8, 0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123, 0xa5c63094, 0xa235c066, 0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8, 0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6, 0x4d768dd6, 0xef434db0, 0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51, 0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2, 0x10e93356, 0xd66d1347, 0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c, 0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1, 0xd29c59df, 0xf2553f73, 0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db, 0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25, 0x3c288b49, 0x0dff4195, 0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1, 0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257, }, { 0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d, 0xacab58fa, 0x4b9303e3, 0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02, 0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362, 0xde495ab1, 0x25671bba, 0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3, 0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174, 0x492969e0, 0x8e44c8c9, 0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9, 0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace, 0x63184adf, 0xe582311a, 0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08, 0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b, 0xe3578f1f, 0x662aab55, 0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837, 0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216, 0x8a2b1ccf, 0xa792b479, 0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6, 0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60, 0x5e069f71, 0xbd51106e, 0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6, 0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550, 0x1924fb98, 0xd697e9bd, 0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8, 0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b, 0x1eac7011, 0x6c4e725a, 0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d, 0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36, 0x0cb1670a, 0x930fe757, 0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12, 0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b, 0x2db9a8b6, 0x14c8a91e, 0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f, 0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb, 0x8b762943, 0xcbdcc623, 0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6, 0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2, 0x0dec5286, 0x77d0e3c1, 0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9, 0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033, 0x87c74e49, 0xd9c1d138, 0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad, 0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8, 0x2e5ef739, 0x82f5afc3, 0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225, 0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b, 0xcd097826, 0x6ef41859, 0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815, 0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f, 0xc63094a5, 0x35c066a2, 0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7, 0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691, 0x768dd64d, 0x434db0ef, 0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165, 0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db, 0xe9335610, 0x6d1347d6, 0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13, 0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147, 0x9c59dfd2, 0x553f73f2, 0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44, 0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2, 0x288b493c, 0xff41950d, 0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156, 0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8, } }; static const u32 crypto_il_tab[4][256] ____cacheline_aligned = { { 0x00000052, 0x00000009, 0x0000006a, 0x000000d5, 0x00000030, 0x00000036, 0x000000a5, 0x00000038, 0x000000bf, 0x00000040, 0x000000a3, 0x0000009e, 0x00000081, 0x000000f3, 0x000000d7, 0x000000fb, 0x0000007c, 0x000000e3, 0x00000039, 0x00000082, 0x0000009b, 0x0000002f, 0x000000ff, 0x00000087, 0x00000034, 0x0000008e, 0x00000043, 0x00000044, 0x000000c4, 0x000000de, 0x000000e9, 0x000000cb, 0x00000054, 0x0000007b, 0x00000094, 0x00000032, 0x000000a6, 0x000000c2, 0x00000023, 0x0000003d, 0x000000ee, 0x0000004c, 0x00000095, 0x0000000b, 0x00000042, 0x000000fa, 0x000000c3, 0x0000004e, 0x00000008, 0x0000002e, 0x000000a1, 0x00000066, 0x00000028, 0x000000d9, 0x00000024, 0x000000b2, 0x00000076, 0x0000005b, 0x000000a2, 0x00000049, 0x0000006d, 0x0000008b, 0x000000d1, 0x00000025, 0x00000072, 0x000000f8, 0x000000f6, 0x00000064, 0x00000086, 0x00000068, 0x00000098, 0x00000016, 0x000000d4, 0x000000a4, 0x0000005c, 0x000000cc, 0x0000005d, 0x00000065, 0x000000b6, 0x00000092, 0x0000006c, 0x00000070, 0x00000048, 0x00000050, 0x000000fd, 0x000000ed, 0x000000b9, 0x000000da, 0x0000005e, 0x00000015, 0x00000046, 0x00000057, 0x000000a7, 0x0000008d, 0x0000009d, 0x00000084, 0x00000090, 0x000000d8, 0x000000ab, 0x00000000, 0x0000008c, 0x000000bc, 0x000000d3, 0x0000000a, 0x000000f7, 0x000000e4, 0x00000058, 0x00000005, 0x000000b8, 0x000000b3, 0x00000045, 0x00000006, 0x000000d0, 0x0000002c, 0x0000001e, 0x0000008f, 0x000000ca, 0x0000003f, 0x0000000f, 0x00000002, 0x000000c1, 0x000000af, 0x000000bd, 0x00000003, 0x00000001, 0x00000013, 0x0000008a, 0x0000006b, 0x0000003a, 0x00000091, 0x00000011, 0x00000041, 0x0000004f, 0x00000067, 0x000000dc, 0x000000ea, 0x00000097, 0x000000f2, 0x000000cf, 0x000000ce, 0x000000f0, 0x000000b4, 0x000000e6, 0x00000073, 0x00000096, 0x000000ac, 0x00000074, 0x00000022, 0x000000e7, 0x000000ad, 0x00000035, 0x00000085, 0x000000e2, 0x000000f9, 0x00000037, 0x000000e8, 0x0000001c, 0x00000075, 0x000000df, 0x0000006e, 0x00000047, 0x000000f1, 0x0000001a, 0x00000071, 0x0000001d, 0x00000029, 0x000000c5, 0x00000089, 0x0000006f, 0x000000b7, 0x00000062, 0x0000000e, 0x000000aa, 0x00000018, 0x000000be, 0x0000001b, 0x000000fc, 0x00000056, 0x0000003e, 0x0000004b, 0x000000c6, 0x000000d2, 0x00000079, 0x00000020, 0x0000009a, 0x000000db, 0x000000c0, 0x000000fe, 0x00000078, 0x000000cd, 0x0000005a, 0x000000f4, 0x0000001f, 0x000000dd, 0x000000a8, 0x00000033, 0x00000088, 0x00000007, 0x000000c7, 0x00000031, 0x000000b1, 0x00000012, 0x00000010, 0x00000059, 0x00000027, 0x00000080, 0x000000ec, 0x0000005f, 0x00000060, 0x00000051, 0x0000007f, 0x000000a9, 0x00000019, 0x000000b5, 0x0000004a, 0x0000000d, 0x0000002d, 0x000000e5, 0x0000007a, 0x0000009f, 0x00000093, 0x000000c9, 0x0000009c, 0x000000ef, 0x000000a0, 0x000000e0, 0x0000003b, 0x0000004d, 0x000000ae, 0x0000002a, 0x000000f5, 0x000000b0, 0x000000c8, 0x000000eb, 0x000000bb, 0x0000003c, 0x00000083, 0x00000053, 0x00000099, 0x00000061, 0x00000017, 0x0000002b, 0x00000004, 0x0000007e, 0x000000ba, 0x00000077, 0x000000d6, 0x00000026, 0x000000e1, 0x00000069, 0x00000014, 0x00000063, 0x00000055, 0x00000021, 0x0000000c, 0x0000007d, }, { 0x00005200, 0x00000900, 0x00006a00, 0x0000d500, 0x00003000, 0x00003600, 0x0000a500, 0x00003800, 0x0000bf00, 0x00004000, 0x0000a300, 0x00009e00, 0x00008100, 0x0000f300, 0x0000d700, 0x0000fb00, 0x00007c00, 0x0000e300, 0x00003900, 0x00008200, 0x00009b00, 0x00002f00, 0x0000ff00, 0x00008700, 0x00003400, 0x00008e00, 0x00004300, 0x00004400, 0x0000c400, 0x0000de00, 0x0000e900, 0x0000cb00, 0x00005400, 0x00007b00, 0x00009400, 0x00003200, 0x0000a600, 0x0000c200, 0x00002300, 0x00003d00, 0x0000ee00, 0x00004c00, 0x00009500, 0x00000b00, 0x00004200, 0x0000fa00, 0x0000c300, 0x00004e00, 0x00000800, 0x00002e00, 0x0000a100, 0x00006600, 0x00002800, 0x0000d900, 0x00002400, 0x0000b200, 0x00007600, 0x00005b00, 0x0000a200, 0x00004900, 0x00006d00, 0x00008b00, 0x0000d100, 0x00002500, 0x00007200, 0x0000f800, 0x0000f600, 0x00006400, 0x00008600, 0x00006800, 0x00009800, 0x00001600, 0x0000d400, 0x0000a400, 0x00005c00, 0x0000cc00, 0x00005d00, 0x00006500, 0x0000b600, 0x00009200, 0x00006c00, 0x00007000, 0x00004800, 0x00005000, 0x0000fd00, 0x0000ed00, 0x0000b900, 0x0000da00, 0x00005e00, 0x00001500, 0x00004600, 0x00005700, 0x0000a700, 0x00008d00, 0x00009d00, 0x00008400, 0x00009000, 0x0000d800, 0x0000ab00, 0x00000000, 0x00008c00, 0x0000bc00, 0x0000d300, 0x00000a00, 0x0000f700, 0x0000e400, 0x00005800, 0x00000500, 0x0000b800, 0x0000b300, 0x00004500, 0x00000600, 0x0000d000, 0x00002c00, 0x00001e00, 0x00008f00, 0x0000ca00, 0x00003f00, 0x00000f00, 0x00000200, 0x0000c100, 0x0000af00, 0x0000bd00, 0x00000300, 0x00000100, 0x00001300, 0x00008a00, 0x00006b00, 0x00003a00, 0x00009100, 0x00001100, 0x00004100, 0x00004f00, 0x00006700, 0x0000dc00, 0x0000ea00, 0x00009700, 0x0000f200, 0x0000cf00, 0x0000ce00, 0x0000f000, 0x0000b400, 0x0000e600, 0x00007300, 0x00009600, 0x0000ac00, 0x00007400, 0x00002200, 0x0000e700, 0x0000ad00, 0x00003500, 0x00008500, 0x0000e200, 0x0000f900, 0x00003700, 0x0000e800, 0x00001c00, 0x00007500, 0x0000df00, 0x00006e00, 0x00004700, 0x0000f100, 0x00001a00, 0x00007100, 0x00001d00, 0x00002900, 0x0000c500, 0x00008900, 0x00006f00, 0x0000b700, 0x00006200, 0x00000e00, 0x0000aa00, 0x00001800, 0x0000be00, 0x00001b00, 0x0000fc00, 0x00005600, 0x00003e00, 0x00004b00, 0x0000c600, 0x0000d200, 0x00007900, 0x00002000, 0x00009a00, 0x0000db00, 0x0000c000, 0x0000fe00, 0x00007800, 0x0000cd00, 0x00005a00, 0x0000f400, 0x00001f00, 0x0000dd00, 0x0000a800, 0x00003300, 0x00008800, 0x00000700, 0x0000c700, 0x00003100, 0x0000b100, 0x00001200, 0x00001000, 0x00005900, 0x00002700, 0x00008000, 0x0000ec00, 0x00005f00, 0x00006000, 0x00005100, 0x00007f00, 0x0000a900, 0x00001900, 0x0000b500, 0x00004a00, 0x00000d00, 0x00002d00, 0x0000e500, 0x00007a00, 0x00009f00, 0x00009300, 0x0000c900, 0x00009c00, 0x0000ef00, 0x0000a000, 0x0000e000, 0x00003b00, 0x00004d00, 0x0000ae00, 0x00002a00, 0x0000f500, 0x0000b000, 0x0000c800, 0x0000eb00, 0x0000bb00, 0x00003c00, 0x00008300, 0x00005300, 0x00009900, 0x00006100, 0x00001700, 0x00002b00, 0x00000400, 0x00007e00, 0x0000ba00, 0x00007700, 0x0000d600, 0x00002600, 0x0000e100, 0x00006900, 0x00001400, 0x00006300, 0x00005500, 0x00002100, 0x00000c00, 0x00007d00, }, { 0x00520000, 0x00090000, 0x006a0000, 0x00d50000, 0x00300000, 0x00360000, 0x00a50000, 0x00380000, 0x00bf0000, 0x00400000, 0x00a30000, 0x009e0000, 0x00810000, 0x00f30000, 0x00d70000, 0x00fb0000, 0x007c0000, 0x00e30000, 0x00390000, 0x00820000, 0x009b0000, 0x002f0000, 0x00ff0000, 0x00870000, 0x00340000, 0x008e0000, 0x00430000, 0x00440000, 0x00c40000, 0x00de0000, 0x00e90000, 0x00cb0000, 0x00540000, 0x007b0000, 0x00940000, 0x00320000, 0x00a60000, 0x00c20000, 0x00230000, 0x003d0000, 0x00ee0000, 0x004c0000, 0x00950000, 0x000b0000, 0x00420000, 0x00fa0000, 0x00c30000, 0x004e0000, 0x00080000, 0x002e0000, 0x00a10000, 0x00660000, 0x00280000, 0x00d90000, 0x00240000, 0x00b20000, 0x00760000, 0x005b0000, 0x00a20000, 0x00490000, 0x006d0000, 0x008b0000, 0x00d10000, 0x00250000, 0x00720000, 0x00f80000, 0x00f60000, 0x00640000, 0x00860000, 0x00680000, 0x00980000, 0x00160000, 0x00d40000, 0x00a40000, 0x005c0000, 0x00cc0000, 0x005d0000, 0x00650000, 0x00b60000, 0x00920000, 0x006c0000, 0x00700000, 0x00480000, 0x00500000, 0x00fd0000, 0x00ed0000, 0x00b90000, 0x00da0000, 0x005e0000, 0x00150000, 0x00460000, 0x00570000, 0x00a70000, 0x008d0000, 0x009d0000, 0x00840000, 0x00900000, 0x00d80000, 0x00ab0000, 0x00000000, 0x008c0000, 0x00bc0000, 0x00d30000, 0x000a0000, 0x00f70000, 0x00e40000, 0x00580000, 0x00050000, 0x00b80000, 0x00b30000, 0x00450000, 0x00060000, 0x00d00000, 0x002c0000, 0x001e0000, 0x008f0000, 0x00ca0000, 0x003f0000, 0x000f0000, 0x00020000, 0x00c10000, 0x00af0000, 0x00bd0000, 0x00030000, 0x00010000, 0x00130000, 0x008a0000, 0x006b0000, 0x003a0000, 0x00910000, 0x00110000, 0x00410000, 0x004f0000, 0x00670000, 0x00dc0000, 0x00ea0000, 0x00970000, 0x00f20000, 0x00cf0000, 0x00ce0000, 0x00f00000, 0x00b40000, 0x00e60000, 0x00730000, 0x00960000, 0x00ac0000, 0x00740000, 0x00220000, 0x00e70000, 0x00ad0000, 0x00350000, 0x00850000, 0x00e20000, 0x00f90000, 0x00370000, 0x00e80000, 0x001c0000, 0x00750000, 0x00df0000, 0x006e0000, 0x00470000, 0x00f10000, 0x001a0000, 0x00710000, 0x001d0000, 0x00290000, 0x00c50000, 0x00890000, 0x006f0000, 0x00b70000, 0x00620000, 0x000e0000, 0x00aa0000, 0x00180000, 0x00be0000, 0x001b0000, 0x00fc0000, 0x00560000, 0x003e0000, 0x004b0000, 0x00c60000, 0x00d20000, 0x00790000, 0x00200000, 0x009a0000, 0x00db0000, 0x00c00000, 0x00fe0000, 0x00780000, 0x00cd0000, 0x005a0000, 0x00f40000, 0x001f0000, 0x00dd0000, 0x00a80000, 0x00330000, 0x00880000, 0x00070000, 0x00c70000, 0x00310000, 0x00b10000, 0x00120000, 0x00100000, 0x00590000, 0x00270000, 0x00800000, 0x00ec0000, 0x005f0000, 0x00600000, 0x00510000, 0x007f0000, 0x00a90000, 0x00190000, 0x00b50000, 0x004a0000, 0x000d0000, 0x002d0000, 0x00e50000, 0x007a0000, 0x009f0000, 0x00930000, 0x00c90000, 0x009c0000, 0x00ef0000, 0x00a00000, 0x00e00000, 0x003b0000, 0x004d0000, 0x00ae0000, 0x002a0000, 0x00f50000, 0x00b00000, 0x00c80000, 0x00eb0000, 0x00bb0000, 0x003c0000, 0x00830000, 0x00530000, 0x00990000, 0x00610000, 0x00170000, 0x002b0000, 0x00040000, 0x007e0000, 0x00ba0000, 0x00770000, 0x00d60000, 0x00260000, 0x00e10000, 0x00690000, 0x00140000, 0x00630000, 0x00550000, 0x00210000, 0x000c0000, 0x007d0000, }, { 0x52000000, 0x09000000, 0x6a000000, 0xd5000000, 0x30000000, 0x36000000, 0xa5000000, 0x38000000, 0xbf000000, 0x40000000, 0xa3000000, 0x9e000000, 0x81000000, 0xf3000000, 0xd7000000, 0xfb000000, 0x7c000000, 0xe3000000, 0x39000000, 0x82000000, 0x9b000000, 0x2f000000, 0xff000000, 0x87000000, 0x34000000, 0x8e000000, 0x43000000, 0x44000000, 0xc4000000, 0xde000000, 0xe9000000, 0xcb000000, 0x54000000, 0x7b000000, 0x94000000, 0x32000000, 0xa6000000, 0xc2000000, 0x23000000, 0x3d000000, 0xee000000, 0x4c000000, 0x95000000, 0x0b000000, 0x42000000, 0xfa000000, 0xc3000000, 0x4e000000, 0x08000000, 0x2e000000, 0xa1000000, 0x66000000, 0x28000000, 0xd9000000, 0x24000000, 0xb2000000, 0x76000000, 0x5b000000, 0xa2000000, 0x49000000, 0x6d000000, 0x8b000000, 0xd1000000, 0x25000000, 0x72000000, 0xf8000000, 0xf6000000, 0x64000000, 0x86000000, 0x68000000, 0x98000000, 0x16000000, 0xd4000000, 0xa4000000, 0x5c000000, 0xcc000000, 0x5d000000, 0x65000000, 0xb6000000, 0x92000000, 0x6c000000, 0x70000000, 0x48000000, 0x50000000, 0xfd000000, 0xed000000, 0xb9000000, 0xda000000, 0x5e000000, 0x15000000, 0x46000000, 0x57000000, 0xa7000000, 0x8d000000, 0x9d000000, 0x84000000, 0x90000000, 0xd8000000, 0xab000000, 0x00000000, 0x8c000000, 0xbc000000, 0xd3000000, 0x0a000000, 0xf7000000, 0xe4000000, 0x58000000, 0x05000000, 0xb8000000, 0xb3000000, 0x45000000, 0x06000000, 0xd0000000, 0x2c000000, 0x1e000000, 0x8f000000, 0xca000000, 0x3f000000, 0x0f000000, 0x02000000, 0xc1000000, 0xaf000000, 0xbd000000, 0x03000000, 0x01000000, 0x13000000, 0x8a000000, 0x6b000000, 0x3a000000, 0x91000000, 0x11000000, 0x41000000, 0x4f000000, 0x67000000, 0xdc000000, 0xea000000, 0x97000000, 0xf2000000, 0xcf000000, 0xce000000, 0xf0000000, 0xb4000000, 0xe6000000, 0x73000000, 0x96000000, 0xac000000, 0x74000000, 0x22000000, 0xe7000000, 0xad000000, 0x35000000, 0x85000000, 0xe2000000, 0xf9000000, 0x37000000, 0xe8000000, 0x1c000000, 0x75000000, 0xdf000000, 0x6e000000, 0x47000000, 0xf1000000, 0x1a000000, 0x71000000, 0x1d000000, 0x29000000, 0xc5000000, 0x89000000, 0x6f000000, 0xb7000000, 0x62000000, 0x0e000000, 0xaa000000, 0x18000000, 0xbe000000, 0x1b000000, 0xfc000000, 0x56000000, 0x3e000000, 0x4b000000, 0xc6000000, 0xd2000000, 0x79000000, 0x20000000, 0x9a000000, 0xdb000000, 0xc0000000, 0xfe000000, 0x78000000, 0xcd000000, 0x5a000000, 0xf4000000, 0x1f000000, 0xdd000000, 0xa8000000, 0x33000000, 0x88000000, 0x07000000, 0xc7000000, 0x31000000, 0xb1000000, 0x12000000, 0x10000000, 0x59000000, 0x27000000, 0x80000000, 0xec000000, 0x5f000000, 0x60000000, 0x51000000, 0x7f000000, 0xa9000000, 0x19000000, 0xb5000000, 0x4a000000, 0x0d000000, 0x2d000000, 0xe5000000, 0x7a000000, 0x9f000000, 0x93000000, 0xc9000000, 0x9c000000, 0xef000000, 0xa0000000, 0xe0000000, 0x3b000000, 0x4d000000, 0xae000000, 0x2a000000, 0xf5000000, 0xb0000000, 0xc8000000, 0xeb000000, 0xbb000000, 0x3c000000, 0x83000000, 0x53000000, 0x99000000, 0x61000000, 0x17000000, 0x2b000000, 0x04000000, 0x7e000000, 0xba000000, 0x77000000, 0xd6000000, 0x26000000, 0xe1000000, 0x69000000, 0x14000000, 0x63000000, 0x55000000, 0x21000000, 0x0c000000, 0x7d000000, } }; EXPORT_SYMBOL_GPL(crypto_ft_tab); EXPORT_SYMBOL_GPL(crypto_it_tab); /** * crypto_aes_set_key - Set the AES key. * @tfm: The %crypto_tfm that is used in the context. * @in_key: The input key. * @key_len: The size of the key. * * This function uses aes_expand_key() to expand the key. &crypto_aes_ctx * _must_ be the private data embedded in @tfm which is retrieved with * crypto_tfm_ctx(). * * Return: 0 on success; -EINVAL on failure (only happens for bad key lengths) */ int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); return aes_expandkey(ctx, in_key, key_len); } EXPORT_SYMBOL_GPL(crypto_aes_set_key); /* encrypt a block of text */ #define f_rn(bo, bi, n, k) do { \ bo[n] = crypto_ft_tab[0][byte(bi[n], 0)] ^ \ crypto_ft_tab[1][byte(bi[(n + 1) & 3], 1)] ^ \ crypto_ft_tab[2][byte(bi[(n + 2) & 3], 2)] ^ \ crypto_ft_tab[3][byte(bi[(n + 3) & 3], 3)] ^ *(k + n); \ } while (0) #define f_nround(bo, bi, k) do {\ f_rn(bo, bi, 0, k); \ f_rn(bo, bi, 1, k); \ f_rn(bo, bi, 2, k); \ f_rn(bo, bi, 3, k); \ k += 4; \ } while (0) #define f_rl(bo, bi, n, k) do { \ bo[n] = crypto_fl_tab[0][byte(bi[n], 0)] ^ \ crypto_fl_tab[1][byte(bi[(n + 1) & 3], 1)] ^ \ crypto_fl_tab[2][byte(bi[(n + 2) & 3], 2)] ^ \ crypto_fl_tab[3][byte(bi[(n + 3) & 3], 3)] ^ *(k + n); \ } while (0) #define f_lround(bo, bi, k) do {\ f_rl(bo, bi, 0, k); \ f_rl(bo, bi, 1, k); \ f_rl(bo, bi, 2, k); \ f_rl(bo, bi, 3, k); \ } while (0) static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); u32 b0[4], b1[4]; const u32 *kp = ctx->key_enc + 4; const int key_len = ctx->key_length; b0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in); b0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4); b0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8); b0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12); if (key_len > 24) { f_nround(b1, b0, kp); f_nround(b0, b1, kp); } if (key_len > 16) { f_nround(b1, b0, kp); f_nround(b0, b1, kp); } f_nround(b1, b0, kp); f_nround(b0, b1, kp); f_nround(b1, b0, kp); f_nround(b0, b1, kp); f_nround(b1, b0, kp); f_nround(b0, b1, kp); f_nround(b1, b0, kp); f_nround(b0, b1, kp); f_nround(b1, b0, kp); f_lround(b0, b1, kp); put_unaligned_le32(b0[0], out); put_unaligned_le32(b0[1], out + 4); put_unaligned_le32(b0[2], out + 8); put_unaligned_le32(b0[3], out + 12); } /* decrypt a block of text */ #define i_rn(bo, bi, n, k) do { \ bo[n] = crypto_it_tab[0][byte(bi[n], 0)] ^ \ crypto_it_tab[1][byte(bi[(n + 3) & 3], 1)] ^ \ crypto_it_tab[2][byte(bi[(n + 2) & 3], 2)] ^ \ crypto_it_tab[3][byte(bi[(n + 1) & 3], 3)] ^ *(k + n); \ } while (0) #define i_nround(bo, bi, k) do {\ i_rn(bo, bi, 0, k); \ i_rn(bo, bi, 1, k); \ i_rn(bo, bi, 2, k); \ i_rn(bo, bi, 3, k); \ k += 4; \ } while (0) #define i_rl(bo, bi, n, k) do { \ bo[n] = crypto_il_tab[0][byte(bi[n], 0)] ^ \ crypto_il_tab[1][byte(bi[(n + 3) & 3], 1)] ^ \ crypto_il_tab[2][byte(bi[(n + 2) & 3], 2)] ^ \ crypto_il_tab[3][byte(bi[(n + 1) & 3], 3)] ^ *(k + n); \ } while (0) #define i_lround(bo, bi, k) do {\ i_rl(bo, bi, 0, k); \ i_rl(bo, bi, 1, k); \ i_rl(bo, bi, 2, k); \ i_rl(bo, bi, 3, k); \ } while (0) static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); u32 b0[4], b1[4]; const int key_len = ctx->key_length; const u32 *kp = ctx->key_dec + 4; b0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in); b0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4); b0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8); b0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12); if (key_len > 24) { i_nround(b1, b0, kp); i_nround(b0, b1, kp); } if (key_len > 16) { i_nround(b1, b0, kp); i_nround(b0, b1, kp); } i_nround(b1, b0, kp); i_nround(b0, b1, kp); i_nround(b1, b0, kp); i_nround(b0, b1, kp); i_nround(b1, b0, kp); i_nround(b0, b1, kp); i_nround(b1, b0, kp); i_nround(b0, b1, kp); i_nround(b1, b0, kp); i_lround(b0, b1, kp); put_unaligned_le32(b0[0], out); put_unaligned_le32(b0[1], out + 4); put_unaligned_le32(b0[2], out + 8); put_unaligned_le32(b0[3], out + 12); } static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_driver_name = "aes-generic", .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = AES_MIN_KEY_SIZE, .cia_max_keysize = AES_MAX_KEY_SIZE, .cia_setkey = crypto_aes_set_key, .cia_encrypt = crypto_aes_encrypt, .cia_decrypt = crypto_aes_decrypt } } }; static int __init aes_init(void) { return crypto_register_alg(&aes_alg); } static void __exit aes_fini(void) { crypto_unregister_alg(&aes_alg); } module_init(aes_init); module_exit(aes_fini); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_CRYPTO("aes"); MODULE_ALIAS_CRYPTO("aes-generic");
78 80 81 217 200 291 289 290 2 291 188 285 286 287 2 287 285 177 290 290 291 288 276 273 275 208 86 289 273 291 290 289 2 2 2 291 291 199 276 291 32 290 291 291 230 291 206 205 232 32 290 205 232 290 290 289 4 289 290 34 291 291 289 291 276 290 290 290 18 289 290 200 201 199 201 187 193 187 27 79 78 77 79 77 78 290 291 291 79 291 96 96 289 291 291 290 288 290 290 291 198 200 199 201 187 187 186 187 184 201 192 176 176 200 201 219 219 219 219 219 218 219 24 24 9 24 24 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2010 Red Hat, Inc. All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_shared.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_extent_busy.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_trace.h" #include "xfs_discard.h" /* * Allocate a new ticket. Failing to get a new ticket makes it really hard to * recover, so we don't allow failure here. Also, we allocate in a context that * we don't want to be issuing transactions from, so we need to tell the * allocation code this as well. * * We don't reserve any space for the ticket - we are going to steal whatever * space we require from transactions as they commit. To ensure we reserve all * the space required, we need to set the current reservation of the ticket to * zero so that we know to steal the initial transaction overhead from the * first transaction commit. */ static struct xlog_ticket * xlog_cil_ticket_alloc( struct xlog *log) { struct xlog_ticket *tic; tic = xlog_ticket_alloc(log, 0, 1, 0); /* * set the current reservation to zero so we know to steal the basic * transaction overhead reservation from the first transaction commit. */ tic->t_curr_res = 0; tic->t_iclog_hdrs = 0; return tic; } static inline void xlog_cil_set_iclog_hdr_count(struct xfs_cil *cil) { struct xlog *log = cil->xc_log; atomic_set(&cil->xc_iclog_hdrs, (XLOG_CIL_BLOCKING_SPACE_LIMIT(log) / (log->l_iclog_size - log->l_iclog_hsize))); } /* * Check if the current log item was first committed in this sequence. * We can't rely on just the log item being in the CIL, we have to check * the recorded commit sequence number. * * Note: for this to be used in a non-racy manner, it has to be called with * CIL flushing locked out. As a result, it should only be used during the * transaction commit process when deciding what to format into the item. */ static bool xlog_item_in_current_chkpt( struct xfs_cil *cil, struct xfs_log_item *lip) { if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)) return false; /* * li_seq is written on the first commit of a log item to record the * first checkpoint it is written to. Hence if it is different to the * current sequence, we're in a new checkpoint. */ return lip->li_seq == READ_ONCE(cil->xc_current_sequence); } bool xfs_log_item_in_current_chkpt( struct xfs_log_item *lip) { return xlog_item_in_current_chkpt(lip->li_log->l_cilp, lip); } /* * Unavoidable forward declaration - xlog_cil_push_work() calls * xlog_cil_ctx_alloc() itself. */ static void xlog_cil_push_work(struct work_struct *work); static struct xfs_cil_ctx * xlog_cil_ctx_alloc(void) { struct xfs_cil_ctx *ctx; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL); INIT_LIST_HEAD(&ctx->committing); INIT_LIST_HEAD(&ctx->busy_extents.extent_list); INIT_LIST_HEAD(&ctx->log_items); INIT_LIST_HEAD(&ctx->lv_chain); INIT_WORK(&ctx->push_work, xlog_cil_push_work); return ctx; } /* * Aggregate the CIL per cpu structures into global counts, lists, etc and * clear the percpu state ready for the next context to use. This is called * from the push code with the context lock held exclusively, hence nothing else * will be accessing or modifying the per-cpu counters. */ static void xlog_cil_push_pcp_aggregate( struct xfs_cil *cil, struct xfs_cil_ctx *ctx) { struct xlog_cil_pcp *cilpcp; int cpu; for_each_cpu(cpu, &ctx->cil_pcpmask) { cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); ctx->ticket->t_curr_res += cilpcp->space_reserved; cilpcp->space_reserved = 0; if (!list_empty(&cilpcp->busy_extents)) { list_splice_init(&cilpcp->busy_extents, &ctx->busy_extents.extent_list); } if (!list_empty(&cilpcp->log_items)) list_splice_init(&cilpcp->log_items, &ctx->log_items); /* * We're in the middle of switching cil contexts. Reset the * counter we use to detect when the current context is nearing * full. */ cilpcp->space_used = 0; } } /* * Aggregate the CIL per-cpu space used counters into the global atomic value. * This is called when the per-cpu counter aggregation will first pass the soft * limit threshold so we can switch to atomic counter aggregation for accurate * detection of hard limit traversal. */ static void xlog_cil_insert_pcp_aggregate( struct xfs_cil *cil, struct xfs_cil_ctx *ctx) { int cpu; int count = 0; /* Trigger atomic updates then aggregate only for the first caller */ if (!test_and_clear_bit(XLOG_CIL_PCP_SPACE, &cil->xc_flags)) return; /* * We can race with other cpus setting cil_pcpmask. However, we've * atomically cleared PCP_SPACE which forces other threads to add to * the global space used count. cil_pcpmask is a superset of cilpcp * structures that could have a nonzero space_used. */ for_each_cpu(cpu, &ctx->cil_pcpmask) { struct xlog_cil_pcp *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); count += xchg(&cilpcp->space_used, 0); } atomic_add(count, &ctx->space_used); } static void xlog_cil_ctx_switch( struct xfs_cil *cil, struct xfs_cil_ctx *ctx) { xlog_cil_set_iclog_hdr_count(cil); set_bit(XLOG_CIL_EMPTY, &cil->xc_flags); set_bit(XLOG_CIL_PCP_SPACE, &cil->xc_flags); ctx->sequence = ++cil->xc_current_sequence; ctx->cil = cil; cil->xc_ctx = ctx; } /* * After the first stage of log recovery is done, we know where the head and * tail of the log are. We need this log initialisation done before we can * initialise the first CIL checkpoint context. * * Here we allocate a log ticket to track space usage during a CIL push. This * ticket is passed to xlog_write() directly so that we don't slowly leak log * space by failing to account for space used by log headers and additional * region headers for split regions. */ void xlog_cil_init_post_recovery( struct xlog *log) { log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); log->l_cilp->xc_ctx->sequence = 1; xlog_cil_set_iclog_hdr_count(log->l_cilp); } static inline int xlog_cil_iovec_space( uint niovecs) { return round_up((sizeof(struct xfs_log_vec) + niovecs * sizeof(struct xfs_log_iovec)), sizeof(uint64_t)); } /* * Allocate or pin log vector buffers for CIL insertion. * * The CIL currently uses disposable buffers for copying a snapshot of the * modified items into the log during a push. The biggest problem with this is * the requirement to allocate the disposable buffer during the commit if: * a) does not exist; or * b) it is too small * * If we do this allocation within xlog_cil_insert_format_items(), it is done * under the xc_ctx_lock, which means that a CIL push cannot occur during * the memory allocation. This means that we have a potential deadlock situation * under low memory conditions when we have lots of dirty metadata pinned in * the CIL and we need a CIL commit to occur to free memory. * * To avoid this, we need to move the memory allocation outside the * xc_ctx_lock, but because the log vector buffers are disposable, that opens * up a TOCTOU race condition w.r.t. the CIL committing and removing the log * vector buffers between the check and the formatting of the item into the * log vector buffer within the xc_ctx_lock. * * Because the log vector buffer needs to be unchanged during the CIL push * process, we cannot share the buffer between the transaction commit (which * modifies the buffer) and the CIL push context that is writing the changes * into the log. This means skipping preallocation of buffer space is * unreliable, but we most definitely do not want to be allocating and freeing * buffers unnecessarily during commits when overwrites can be done safely. * * The simplest solution to this problem is to allocate a shadow buffer when a * log item is committed for the second time, and then to only use this buffer * if necessary. The buffer can remain attached to the log item until such time * it is needed, and this is the buffer that is reallocated to match the size of * the incoming modification. Then during the formatting of the item we can swap * the active buffer with the new one if we can't reuse the existing buffer. We * don't free the old buffer as it may be reused on the next modification if * it's size is right, otherwise we'll free and reallocate it at that point. * * This function builds a vector for the changes in each log item in the * transaction. It then works out the length of the buffer needed for each log * item, allocates them and attaches the vector to the log item in preparation * for the formatting step which occurs under the xc_ctx_lock. * * While this means the memory footprint goes up, it avoids the repeated * alloc/free pattern that repeated modifications of an item would otherwise * cause, and hence minimises the CPU overhead of such behaviour. */ static void xlog_cil_alloc_shadow_bufs( struct xlog *log, struct xfs_trans *tp) { struct xfs_log_item *lip; list_for_each_entry(lip, &tp->t_items, li_trans) { struct xfs_log_vec *lv; int niovecs = 0; int nbytes = 0; int alloc_size; bool ordered = false; /* Skip items which aren't dirty in this transaction. */ if (!test_bit(XFS_LI_DIRTY, &lip->li_flags)) continue; /* get number of vecs and size of data to be stored */ lip->li_ops->iop_size(lip, &niovecs, &nbytes); /* * Ordered items need to be tracked but we do not wish to write * them. We need a logvec to track the object, but we do not * need an iovec or buffer to be allocated for copying data. */ if (niovecs == XFS_LOG_VEC_ORDERED) { ordered = true; niovecs = 0; nbytes = 0; } /* * We 64-bit align the length of each iovec so that the start of * the next one is naturally aligned. We'll need to account for * that slack space here. * * We also add the xlog_op_header to each region when * formatting, but that's not accounted to the size of the item * at this point. Hence we'll need an addition number of bytes * for each vector to hold an opheader. * * Then round nbytes up to 64-bit alignment so that the initial * buffer alignment is easy to calculate and verify. */ nbytes = xlog_item_space(niovecs, nbytes); /* * The data buffer needs to start 64-bit aligned, so round up * that space to ensure we can align it appropriately and not * overrun the buffer. */ alloc_size = nbytes + xlog_cil_iovec_space(niovecs); /* * if we have no shadow buffer, or it is too small, we need to * reallocate it. */ if (!lip->li_lv_shadow || alloc_size > lip->li_lv_shadow->lv_alloc_size) { /* * We free and allocate here as a realloc would copy * unnecessary data. We don't use kvzalloc() for the * same reason - we don't need to zero the data area in * the buffer, only the log vector header and the iovec * storage. */ kvfree(lip->li_lv_shadow); lv = xlog_kvmalloc(alloc_size); memset(lv, 0, xlog_cil_iovec_space(niovecs)); INIT_LIST_HEAD(&lv->lv_list); lv->lv_item = lip; lv->lv_alloc_size = alloc_size; if (ordered) lv->lv_buf_used = XFS_LOG_VEC_ORDERED; else lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1]; lip->li_lv_shadow = lv; } else { /* same or smaller, optimise common overwrite case */ lv = lip->li_lv_shadow; if (ordered) lv->lv_buf_used = XFS_LOG_VEC_ORDERED; else lv->lv_buf_used = 0; lv->lv_bytes = 0; } /* Ensure the lv is set up according to ->iop_size */ lv->lv_niovecs = niovecs; /* The allocated data region lies beyond the iovec region */ lv->lv_buf = (char *)lv + xlog_cil_iovec_space(niovecs); } } /* * Prepare the log item for insertion into the CIL. Calculate the difference in * log space it will consume, and if it is a new item pin it as well. */ STATIC void xfs_cil_prepare_item( struct xlog *log, struct xfs_log_item *lip, struct xfs_log_vec *lv, int *diff_len) { /* Account for the new LV being passed in */ if (lv->lv_buf_used != XFS_LOG_VEC_ORDERED) *diff_len += lv->lv_bytes; /* * If there is no old LV, this is the first time we've seen the item in * this CIL context and so we need to pin it. If we are replacing the * old lv, then remove the space it accounts for and make it the shadow * buffer for later freeing. In both cases we are now switching to the * shadow buffer, so update the pointer to it appropriately. */ if (!lip->li_lv) { if (lv->lv_item->li_ops->iop_pin) lv->lv_item->li_ops->iop_pin(lv->lv_item); lv->lv_item->li_lv_shadow = NULL; } else if (lip->li_lv != lv) { ASSERT(lv->lv_buf_used != XFS_LOG_VEC_ORDERED); *diff_len -= lip->li_lv->lv_bytes; lv->lv_item->li_lv_shadow = lip->li_lv; } /* attach new log vector to log item */ lv->lv_item->li_lv = lv; /* * If this is the first time the item is being committed to the * CIL, store the sequence number on the log item so we can * tell in future commits whether this is the first checkpoint * the item is being committed into. */ if (!lv->lv_item->li_seq) lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence; } /* * Format log item into a flat buffers * * For delayed logging, we need to hold a formatted buffer containing all the * changes on the log item. This enables us to relog the item in memory and * write it out asynchronously without needing to relock the object that was * modified at the time it gets written into the iclog. * * This function takes the prepared log vectors attached to each log item, and * formats the changes into the log vector buffer. The buffer it uses is * dependent on the current state of the vector in the CIL - the shadow lv is * guaranteed to be large enough for the current modification, but we will only * use that if we can't reuse the existing lv. If we can't reuse the existing * lv, then simple swap it out for the shadow lv. We don't free it - that is * done lazily either by th enext modification or the freeing of the log item. * * We don't set up region headers during this process; we simply copy the * regions into the flat buffer. We can do this because we still have to do a * formatting step to write the regions into the iclog buffer. Writing the * ophdrs during the iclog write means that we can support splitting large * regions across iclog boundares without needing a change in the format of the * item/region encapsulation. * * Hence what we need to do now is change the rewrite the vector array to point * to the copied region inside the buffer we just allocated. This allows us to * format the regions into the iclog as though they are being formatted * directly out of the objects themselves. */ static void xlog_cil_insert_format_items( struct xlog *log, struct xfs_trans *tp, int *diff_len) { struct xfs_log_item *lip; /* Bail out if we didn't find a log item. */ if (list_empty(&tp->t_items)) { ASSERT(0); return; } list_for_each_entry(lip, &tp->t_items, li_trans) { struct xfs_log_vec *lv = lip->li_lv; struct xfs_log_vec *shadow = lip->li_lv_shadow; /* Skip items which aren't dirty in this transaction. */ if (!test_bit(XFS_LI_DIRTY, &lip->li_flags)) continue; /* * The formatting size information is already attached to * the shadow lv on the log item. */ if (shadow->lv_buf_used == XFS_LOG_VEC_ORDERED) { if (!lv) { lv = shadow; lv->lv_item = lip; } ASSERT(shadow->lv_alloc_size == lv->lv_alloc_size); xfs_cil_prepare_item(log, lip, lv, diff_len); continue; } /* Skip items that do not have any vectors for writing */ if (!shadow->lv_niovecs) continue; /* compare to existing item size */ if (lv && shadow->lv_alloc_size <= lv->lv_alloc_size) { /* same or smaller, optimise common overwrite case */ /* * set the item up as though it is a new insertion so * that the space reservation accounting is correct. */ *diff_len -= lv->lv_bytes; /* Ensure the lv is set up according to ->iop_size */ lv->lv_niovecs = shadow->lv_niovecs; /* reset the lv buffer information for new formatting */ lv->lv_buf_used = 0; lv->lv_bytes = 0; lv->lv_buf = (char *)lv + xlog_cil_iovec_space(lv->lv_niovecs); } else { /* switch to shadow buffer! */ lv = shadow; lv->lv_item = lip; } ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t))); lip->li_ops->iop_format(lip, lv); xfs_cil_prepare_item(log, lip, lv, diff_len); } } /* * The use of lockless waitqueue_active() requires that the caller has * serialised itself against the wakeup call in xlog_cil_push_work(). That * can be done by either holding the push lock or the context lock. */ static inline bool xlog_cil_over_hard_limit( struct xlog *log, int32_t space_used) { if (waitqueue_active(&log->l_cilp->xc_push_wait)) return true; if (space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) return true; return false; } /* * Insert the log items into the CIL and calculate the difference in space * consumed by the item. Add the space to the checkpoint ticket and calculate * if the change requires additional log metadata. If it does, take that space * as well. Remove the amount of space we added to the checkpoint ticket from * the current transaction ticket so that the accounting works out correctly. */ static void xlog_cil_insert_items( struct xlog *log, struct xfs_trans *tp, uint32_t released_space) { struct xfs_cil *cil = log->l_cilp; struct xfs_cil_ctx *ctx = cil->xc_ctx; struct xfs_log_item *lip; int len = 0; int iovhdr_res = 0, split_res = 0, ctx_res = 0; int space_used; int order; unsigned int cpu_nr; struct xlog_cil_pcp *cilpcp; ASSERT(tp); /* * We can do this safely because the context can't checkpoint until we * are done so it doesn't matter exactly how we update the CIL. */ xlog_cil_insert_format_items(log, tp, &len); /* * Subtract the space released by intent cancelation from the space we * consumed so that we remove it from the CIL space and add it back to * the current transaction reservation context. */ len -= released_space; /* * Grab the per-cpu pointer for the CIL before we start any accounting. * That ensures that we are running with pre-emption disabled and so we * can't be scheduled away between split sample/update operations that * are done without outside locking to serialise them. */ cpu_nr = get_cpu(); cilpcp = this_cpu_ptr(cil->xc_pcp); /* Tell the future push that there was work added by this CPU. */ if (!cpumask_test_cpu(cpu_nr, &ctx->cil_pcpmask)) cpumask_test_and_set_cpu(cpu_nr, &ctx->cil_pcpmask); /* * We need to take the CIL checkpoint unit reservation on the first * commit into the CIL. Test the XLOG_CIL_EMPTY bit first so we don't * unnecessarily do an atomic op in the fast path here. We can clear the * XLOG_CIL_EMPTY bit as we are under the xc_ctx_lock here and that * needs to be held exclusively to reset the XLOG_CIL_EMPTY bit. */ if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags) && test_and_clear_bit(XLOG_CIL_EMPTY, &cil->xc_flags)) ctx_res = ctx->ticket->t_unit_res; /* * Check if we need to steal iclog headers. atomic_read() is not a * locked atomic operation, so we can check the value before we do any * real atomic ops in the fast path. If we've already taken the CIL unit * reservation from this commit, we've already got one iclog header * space reserved so we have to account for that otherwise we risk * overrunning the reservation on this ticket. * * If the CIL is already at the hard limit, we might need more header * space that originally reserved. So steal more header space from every * commit that occurs once we are over the hard limit to ensure the CIL * push won't run out of reservation space. * * This can steal more than we need, but that's OK. * * The cil->xc_ctx_lock provides the serialisation necessary for safely * calling xlog_cil_over_hard_limit() in this context. */ space_used = atomic_read(&ctx->space_used) + cilpcp->space_used + len; if (atomic_read(&cil->xc_iclog_hdrs) > 0 || xlog_cil_over_hard_limit(log, space_used)) { split_res = log->l_iclog_hsize + sizeof(struct xlog_op_header); if (ctx_res) ctx_res += split_res * (tp->t_ticket->t_iclog_hdrs - 1); else ctx_res = split_res * tp->t_ticket->t_iclog_hdrs; atomic_sub(tp->t_ticket->t_iclog_hdrs, &cil->xc_iclog_hdrs); } cilpcp->space_reserved += ctx_res; /* * Accurately account when over the soft limit, otherwise fold the * percpu count into the global count if over the per-cpu threshold. */ if (!test_bit(XLOG_CIL_PCP_SPACE, &cil->xc_flags)) { atomic_add(len, &ctx->space_used); } else if (cilpcp->space_used + len > (XLOG_CIL_SPACE_LIMIT(log) / num_online_cpus())) { space_used = atomic_add_return(cilpcp->space_used + len, &ctx->space_used); cilpcp->space_used = 0; /* * If we just transitioned over the soft limit, we need to * transition to the global atomic counter. */ if (space_used >= XLOG_CIL_SPACE_LIMIT(log)) xlog_cil_insert_pcp_aggregate(cil, ctx); } else { cilpcp->space_used += len; } /* attach the transaction to the CIL if it has any busy extents */ if (!list_empty(&tp->t_busy)) list_splice_init(&tp->t_busy, &cilpcp->busy_extents); /* * Now update the order of everything modified in the transaction * and insert items into the CIL if they aren't already there. * We do this here so we only need to take the CIL lock once during * the transaction commit. */ order = atomic_inc_return(&ctx->order_id); list_for_each_entry(lip, &tp->t_items, li_trans) { /* Skip items which aren't dirty in this transaction. */ if (!test_bit(XFS_LI_DIRTY, &lip->li_flags)) continue; lip->li_order_id = order; if (!list_empty(&lip->li_cil)) continue; list_add_tail(&lip->li_cil, &cilpcp->log_items); } put_cpu(); /* * If we've overrun the reservation, dump the tx details before we move * the log items. Shutdown is imminent... */ tp->t_ticket->t_curr_res -= ctx_res + len; if (WARN_ON(tp->t_ticket->t_curr_res < 0)) { xfs_warn(log->l_mp, "Transaction log reservation overrun:"); xfs_warn(log->l_mp, " log items: %d bytes (iov hdrs: %d bytes)", len, iovhdr_res); xfs_warn(log->l_mp, " split region headers: %d bytes", split_res); xfs_warn(log->l_mp, " ctx ticket: %d bytes", ctx_res); xlog_print_trans(tp); xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); } } static inline void xlog_cil_ail_insert_batch( struct xfs_ail *ailp, struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t commit_lsn) { int i; spin_lock(&ailp->ail_lock); /* xfs_trans_ail_update_bulk drops ailp->ail_lock */ xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); for (i = 0; i < nr_items; i++) { struct xfs_log_item *lip = log_items[i]; if (lip->li_ops->iop_unpin) lip->li_ops->iop_unpin(lip, 0); } } /* * Take the checkpoint's log vector chain of items and insert the attached log * items into the AIL. This uses bulk insertion techniques to minimise AIL lock * traffic. * * The AIL tracks log items via the start record LSN of the checkpoint, * not the commit record LSN. This is because we can pipeline multiple * checkpoints, and so the start record of checkpoint N+1 can be * written before the commit record of checkpoint N. i.e: * * start N commit N * +-------------+------------+----------------+ * start N+1 commit N+1 * * The tail of the log cannot be moved to the LSN of commit N when all * the items of that checkpoint are written back, because then the * start record for N+1 is no longer in the active portion of the log * and recovery will fail/corrupt the filesystem. * * Hence when all the log items in checkpoint N are written back, the * tail of the log most now only move as far forwards as the start LSN * of checkpoint N+1. * * If we are called with the aborted flag set, it is because a log write during * a CIL checkpoint commit has failed. In this case, all the items in the * checkpoint have already gone through iop_committed and iop_committing, which * means that checkpoint commit abort handling is treated exactly the same as an * iclog write error even though we haven't started any IO yet. Hence in this * case all we need to do is iop_committed processing, followed by an * iop_unpin(aborted) call. * * The AIL cursor is used to optimise the insert process. If commit_lsn is not * at the end of the AIL, the insert cursor avoids the need to walk the AIL to * find the insertion point on every xfs_log_item_batch_insert() call. This * saves a lot of needless list walking and is a net win, even though it * slightly increases that amount of AIL lock traffic to set it up and tear it * down. */ static void xlog_cil_ail_insert( struct xfs_cil_ctx *ctx, bool aborted) { #define LOG_ITEM_BATCH_SIZE 32 struct xfs_ail *ailp = ctx->cil->xc_log->l_ailp; struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; struct xfs_log_vec *lv; struct xfs_ail_cursor cur; xfs_lsn_t old_head; int i = 0; /* * Update the AIL head LSN with the commit record LSN of this * checkpoint. As iclogs are always completed in order, this should * always be the same (as iclogs can contain multiple commit records) or * higher LSN than the current head. We do this before insertion of the * items so that log space checks during insertion will reflect the * space that this checkpoint has already consumed. We call * xfs_ail_update_finish() so that tail space and space-based wakeups * will be recalculated appropriately. */ ASSERT(XFS_LSN_CMP(ctx->commit_lsn, ailp->ail_head_lsn) >= 0 || aborted); spin_lock(&ailp->ail_lock); xfs_trans_ail_cursor_last(ailp, &cur, ctx->start_lsn); old_head = ailp->ail_head_lsn; ailp->ail_head_lsn = ctx->commit_lsn; /* xfs_ail_update_finish() drops the ail_lock */ xfs_ail_update_finish(ailp, NULLCOMMITLSN); /* * We move the AIL head forwards to account for the space used in the * log before we remove that space from the grant heads. This prevents a * transient condition where reservation space appears to become * available on return, only for it to disappear again immediately as * the AIL head update accounts in the log tail space. */ smp_wmb(); /* paired with smp_rmb in xlog_grant_space_left */ xlog_grant_return_space(ailp->ail_log, old_head, ailp->ail_head_lsn); /* unpin all the log items */ list_for_each_entry(lv, &ctx->lv_chain, lv_list) { struct xfs_log_item *lip = lv->lv_item; xfs_lsn_t item_lsn; if (aborted) { trace_xlog_ail_insert_abort(lip); set_bit(XFS_LI_ABORTED, &lip->li_flags); } if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { lip->li_ops->iop_release(lip); continue; } if (lip->li_ops->iop_committed) item_lsn = lip->li_ops->iop_committed(lip, ctx->start_lsn); else item_lsn = ctx->start_lsn; /* item_lsn of -1 means the item needs no further processing */ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) continue; /* * if we are aborting the operation, no point in inserting the * object into the AIL as we are in a shutdown situation. */ if (aborted) { ASSERT(xlog_is_shutdown(ailp->ail_log)); if (lip->li_ops->iop_unpin) lip->li_ops->iop_unpin(lip, 1); continue; } if (item_lsn != ctx->start_lsn) { /* * Not a bulk update option due to unusual item_lsn. * Push into AIL immediately, rechecking the lsn once * we have the ail lock. Then unpin the item. This does * not affect the AIL cursor the bulk insert path is * using. */ spin_lock(&ailp->ail_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) xfs_trans_ail_update(ailp, lip, item_lsn); else spin_unlock(&ailp->ail_lock); if (lip->li_ops->iop_unpin) lip->li_ops->iop_unpin(lip, 0); continue; } /* Item is a candidate for bulk AIL insert. */ log_items[i++] = lv->lv_item; if (i >= LOG_ITEM_BATCH_SIZE) { xlog_cil_ail_insert_batch(ailp, &cur, log_items, LOG_ITEM_BATCH_SIZE, ctx->start_lsn); i = 0; } } /* make sure we insert the remainder! */ if (i) xlog_cil_ail_insert_batch(ailp, &cur, log_items, i, ctx->start_lsn); spin_lock(&ailp->ail_lock); xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->ail_lock); } static void xlog_cil_free_logvec( struct list_head *lv_chain) { struct xfs_log_vec *lv; while (!list_empty(lv_chain)) { lv = list_first_entry(lv_chain, struct xfs_log_vec, lv_list); list_del_init(&lv->lv_list); kvfree(lv); } } /* * Mark all items committed and clear busy extents. We free the log vector * chains in a separate pass so that we unpin the log items as quickly as * possible. */ static void xlog_cil_committed( struct xfs_cil_ctx *ctx) { struct xfs_mount *mp = ctx->cil->xc_log->l_mp; bool abort = xlog_is_shutdown(ctx->cil->xc_log); /* * If the I/O failed, we're aborting the commit and already shutdown. * Wake any commit waiters before aborting the log items so we don't * block async log pushers on callbacks. Async log pushers explicitly do * not wait on log force completion because they may be holding locks * required to unpin items. */ if (abort) { spin_lock(&ctx->cil->xc_push_lock); wake_up_all(&ctx->cil->xc_start_wait); wake_up_all(&ctx->cil->xc_commit_wait); spin_unlock(&ctx->cil->xc_push_lock); } xlog_cil_ail_insert(ctx, abort); xfs_extent_busy_sort(&ctx->busy_extents.extent_list); xfs_extent_busy_clear(&ctx->busy_extents.extent_list, xfs_has_discard(mp) && !abort); spin_lock(&ctx->cil->xc_push_lock); list_del(&ctx->committing); spin_unlock(&ctx->cil->xc_push_lock); xlog_cil_free_logvec(&ctx->lv_chain); if (!list_empty(&ctx->busy_extents.extent_list)) { ctx->busy_extents.owner = ctx; xfs_discard_extents(mp, &ctx->busy_extents); return; } kfree(ctx); } void xlog_cil_process_committed( struct list_head *list) { struct xfs_cil_ctx *ctx; while ((ctx = list_first_entry_or_null(list, struct xfs_cil_ctx, iclog_entry))) { list_del(&ctx->iclog_entry); xlog_cil_committed(ctx); } } /* * Record the LSN of the iclog we were just granted space to start writing into. * If the context doesn't have a start_lsn recorded, then this iclog will * contain the start record for the checkpoint. Otherwise this write contains * the commit record for the checkpoint. */ void xlog_cil_set_ctx_write_state( struct xfs_cil_ctx *ctx, struct xlog_in_core *iclog) { struct xfs_cil *cil = ctx->cil; xfs_lsn_t lsn = be64_to_cpu(iclog->ic_header.h_lsn); ASSERT(!ctx->commit_lsn); if (!ctx->start_lsn) { spin_lock(&cil->xc_push_lock); /* * The LSN we need to pass to the log items on transaction * commit is the LSN reported by the first log vector write, not * the commit lsn. If we use the commit record lsn then we can * move the grant write head beyond the tail LSN and overwrite * it. */ ctx->start_lsn = lsn; wake_up_all(&cil->xc_start_wait); spin_unlock(&cil->xc_push_lock); /* * Make sure the metadata we are about to overwrite in the log * has been flushed to stable storage before this iclog is * issued. */ spin_lock(&cil->xc_log->l_icloglock); iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; spin_unlock(&cil->xc_log->l_icloglock); return; } /* * Take a reference to the iclog for the context so that we still hold * it when xlog_write is done and has released it. This means the * context controls when the iclog is released for IO. */ atomic_inc(&iclog->ic_refcnt); /* * xlog_state_get_iclog_space() guarantees there is enough space in the * iclog for an entire commit record, so we can attach the context * callbacks now. This needs to be done before we make the commit_lsn * visible to waiters so that checkpoints with commit records in the * same iclog order their IO completion callbacks in the same order that * the commit records appear in the iclog. */ spin_lock(&cil->xc_log->l_icloglock); list_add_tail(&ctx->iclog_entry, &iclog->ic_callbacks); spin_unlock(&cil->xc_log->l_icloglock); /* * Now we can record the commit LSN and wake anyone waiting for this * sequence to have the ordered commit record assigned to a physical * location in the log. */ spin_lock(&cil->xc_push_lock); ctx->commit_iclog = iclog; ctx->commit_lsn = lsn; wake_up_all(&cil->xc_commit_wait); spin_unlock(&cil->xc_push_lock); } /* * Ensure that the order of log writes follows checkpoint sequence order. This * relies on the context LSN being zero until the log write has guaranteed the * LSN that the log write will start at via xlog_state_get_iclog_space(). */ enum _record_type { _START_RECORD, _COMMIT_RECORD, }; static int xlog_cil_order_write( struct xfs_cil *cil, xfs_csn_t sequence, enum _record_type record) { struct xfs_cil_ctx *ctx; restart: spin_lock(&cil->xc_push_lock); list_for_each_entry(ctx, &cil->xc_committing, committing) { /* * Avoid getting stuck in this loop because we were woken by the * shutdown, but then went back to sleep once already in the * shutdown state. */ if (xlog_is_shutdown(cil->xc_log)) { spin_unlock(&cil->xc_push_lock); return -EIO; } /* * Higher sequences will wait for this one so skip them. * Don't wait for our own sequence, either. */ if (ctx->sequence >= sequence) continue; /* Wait until the LSN for the record has been recorded. */ switch (record) { case _START_RECORD: if (!ctx->start_lsn) { xlog_wait(&cil->xc_start_wait, &cil->xc_push_lock); goto restart; } break; case _COMMIT_RECORD: if (!ctx->commit_lsn) { xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock); goto restart; } break; } } spin_unlock(&cil->xc_push_lock); return 0; } /* * Write out the log vector change now attached to the CIL context. This will * write a start record that needs to be strictly ordered in ascending CIL * sequence order so that log recovery will always use in-order start LSNs when * replaying checkpoints. */ static int xlog_cil_write_chain( struct xfs_cil_ctx *ctx, uint32_t chain_len) { struct xlog *log = ctx->cil->xc_log; int error; error = xlog_cil_order_write(ctx->cil, ctx->sequence, _START_RECORD); if (error) return error; return xlog_write(log, ctx, &ctx->lv_chain, ctx->ticket, chain_len); } /* * Write out the commit record of a checkpoint transaction to close off a * running log write. These commit records are strictly ordered in ascending CIL * sequence order so that log recovery will always replay the checkpoints in the * correct order. */ static int xlog_cil_write_commit_record( struct xfs_cil_ctx *ctx) { struct xlog *log = ctx->cil->xc_log; struct xlog_op_header ophdr = { .oh_clientid = XFS_TRANSACTION, .oh_tid = cpu_to_be32(ctx->ticket->t_tid), .oh_flags = XLOG_COMMIT_TRANS, }; struct xfs_log_iovec reg = { .i_addr = &ophdr, .i_len = sizeof(struct xlog_op_header), .i_type = XLOG_REG_TYPE_COMMIT, }; struct xfs_log_vec vec = { .lv_niovecs = 1, .lv_iovecp = &reg, }; int error; LIST_HEAD(lv_chain); list_add(&vec.lv_list, &lv_chain); if (xlog_is_shutdown(log)) return -EIO; error = xlog_cil_order_write(ctx->cil, ctx->sequence, _COMMIT_RECORD); if (error) return error; /* account for space used by record data */ ctx->ticket->t_curr_res -= reg.i_len; error = xlog_write(log, ctx, &lv_chain, ctx->ticket, reg.i_len); if (error) xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); return error; } struct xlog_cil_trans_hdr { struct xlog_op_header oph[2]; struct xfs_trans_header thdr; struct xfs_log_iovec lhdr[2]; }; /* * Build a checkpoint transaction header to begin the journal transaction. We * need to account for the space used by the transaction header here as it is * not accounted for in xlog_write(). * * This is the only place we write a transaction header, so we also build the * log opheaders that indicate the start of a log transaction and wrap the * transaction header. We keep the start record in it's own log vector rather * than compacting them into a single region as this ends up making the logic * in xlog_write() for handling empty opheaders for start, commit and unmount * records much simpler. */ static void xlog_cil_build_trans_hdr( struct xfs_cil_ctx *ctx, struct xlog_cil_trans_hdr *hdr, struct xfs_log_vec *lvhdr, int num_iovecs) { struct xlog_ticket *tic = ctx->ticket; __be32 tid = cpu_to_be32(tic->t_tid); memset(hdr, 0, sizeof(*hdr)); /* Log start record */ hdr->oph[0].oh_tid = tid; hdr->oph[0].oh_clientid = XFS_TRANSACTION; hdr->oph[0].oh_flags = XLOG_START_TRANS; /* log iovec region pointer */ hdr->lhdr[0].i_addr = &hdr->oph[0]; hdr->lhdr[0].i_len = sizeof(struct xlog_op_header); hdr->lhdr[0].i_type = XLOG_REG_TYPE_LRHEADER; /* log opheader */ hdr->oph[1].oh_tid = tid; hdr->oph[1].oh_clientid = XFS_TRANSACTION; hdr->oph[1].oh_len = cpu_to_be32(sizeof(struct xfs_trans_header)); /* transaction header in host byte order format */ hdr->thdr.th_magic = XFS_TRANS_HEADER_MAGIC; hdr->thdr.th_type = XFS_TRANS_CHECKPOINT; hdr->thdr.th_tid = tic->t_tid; hdr->thdr.th_num_items = num_iovecs; /* log iovec region pointer */ hdr->lhdr[1].i_addr = &hdr->oph[1]; hdr->lhdr[1].i_len = sizeof(struct xlog_op_header) + sizeof(struct xfs_trans_header); hdr->lhdr[1].i_type = XLOG_REG_TYPE_TRANSHDR; lvhdr->lv_niovecs = 2; lvhdr->lv_iovecp = &hdr->lhdr[0]; lvhdr->lv_bytes = hdr->lhdr[0].i_len + hdr->lhdr[1].i_len; tic->t_curr_res -= lvhdr->lv_bytes; } /* * CIL item reordering compare function. We want to order in ascending ID order, * but we want to leave items with the same ID in the order they were added to * the list. This is important for operations like reflink where we log 4 order * dependent intents in a single transaction when we overwrite an existing * shared extent with a new shared extent. i.e. BUI(unmap), CUI(drop), * CUI (inc), BUI(remap)... */ static int xlog_cil_order_cmp( void *priv, const struct list_head *a, const struct list_head *b) { struct xfs_log_vec *l1 = container_of(a, struct xfs_log_vec, lv_list); struct xfs_log_vec *l2 = container_of(b, struct xfs_log_vec, lv_list); return l1->lv_order_id > l2->lv_order_id; } /* * Pull all the log vectors off the items in the CIL, and remove the items from * the CIL. We don't need the CIL lock here because it's only needed on the * transaction commit side which is currently locked out by the flush lock. * * If a log item is marked with a whiteout, we do not need to write it to the * journal and so we just move them to the whiteout list for the caller to * dispose of appropriately. */ static void xlog_cil_build_lv_chain( struct xfs_cil_ctx *ctx, struct list_head *whiteouts, uint32_t *num_iovecs, uint32_t *num_bytes) { while (!list_empty(&ctx->log_items)) { struct xfs_log_item *item; struct xfs_log_vec *lv; item = list_first_entry(&ctx->log_items, struct xfs_log_item, li_cil); if (test_bit(XFS_LI_WHITEOUT, &item->li_flags)) { list_move(&item->li_cil, whiteouts); trace_xfs_cil_whiteout_skip(item); continue; } lv = item->li_lv; lv->lv_order_id = item->li_order_id; /* we don't write ordered log vectors */ if (lv->lv_buf_used != XFS_LOG_VEC_ORDERED) *num_bytes += lv->lv_bytes; *num_iovecs += lv->lv_niovecs; list_add_tail(&lv->lv_list, &ctx->lv_chain); list_del_init(&item->li_cil); item->li_order_id = 0; item->li_lv = NULL; } } static void xlog_cil_cleanup_whiteouts( struct list_head *whiteouts) { while (!list_empty(whiteouts)) { struct xfs_log_item *item = list_first_entry(whiteouts, struct xfs_log_item, li_cil); list_del_init(&item->li_cil); trace_xfs_cil_whiteout_unpin(item); item->li_ops->iop_unpin(item, 1); } } /* * Push the Committed Item List to the log. * * If the current sequence is the same as xc_push_seq we need to do a flush. If * xc_push_seq is less than the current sequence, then it has already been * flushed and we don't need to do anything - the caller will wait for it to * complete if necessary. * * xc_push_seq is checked unlocked against the sequence number for a match. * Hence we can allow log forces to run racily and not issue pushes for the * same sequence twice. If we get a race between multiple pushes for the same * sequence they will block on the first one and then abort, hence avoiding * needless pushes. * * This runs from a workqueue so it does not inherent any specific memory * allocation context. However, we do not want to block on memory reclaim * recursing back into the filesystem because this push may have been triggered * by memory reclaim itself. Hence we really need to run under full GFP_NOFS * contraints here. */ static void xlog_cil_push_work( struct work_struct *work) { unsigned int nofs_flags = memalloc_nofs_save(); struct xfs_cil_ctx *ctx = container_of(work, struct xfs_cil_ctx, push_work); struct xfs_cil *cil = ctx->cil; struct xlog *log = cil->xc_log; struct xfs_cil_ctx *new_ctx; int num_iovecs = 0; int num_bytes = 0; int error = 0; struct xlog_cil_trans_hdr thdr; struct xfs_log_vec lvhdr = {}; xfs_csn_t push_seq; bool push_commit_stable; LIST_HEAD (whiteouts); struct xlog_ticket *ticket; new_ctx = xlog_cil_ctx_alloc(); new_ctx->ticket = xlog_cil_ticket_alloc(log); down_write(&cil->xc_ctx_lock); spin_lock(&cil->xc_push_lock); push_seq = cil->xc_push_seq; ASSERT(push_seq <= ctx->sequence); push_commit_stable = cil->xc_push_commit_stable; cil->xc_push_commit_stable = false; /* * As we are about to switch to a new, empty CIL context, we no longer * need to throttle tasks on CIL space overruns. Wake any waiters that * the hard push throttle may have caught so they can start committing * to the new context. The ctx->xc_push_lock provides the serialisation * necessary for safely using the lockless waitqueue_active() check in * this context. */ if (waitqueue_active(&cil->xc_push_wait)) wake_up_all(&cil->xc_push_wait); xlog_cil_push_pcp_aggregate(cil, ctx); /* * Check if we've anything to push. If there is nothing, then we don't * move on to a new sequence number and so we have to be able to push * this sequence again later. */ if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)) { cil->xc_push_seq = 0; spin_unlock(&cil->xc_push_lock); goto out_skip; } /* check for a previously pushed sequence */ if (push_seq < ctx->sequence) { spin_unlock(&cil->xc_push_lock); goto out_skip; } /* * We are now going to push this context, so add it to the committing * list before we do anything else. This ensures that anyone waiting on * this push can easily detect the difference between a "push in * progress" and "CIL is empty, nothing to do". * * IOWs, a wait loop can now check for: * the current sequence not being found on the committing list; * an empty CIL; and * an unchanged sequence number * to detect a push that had nothing to do and therefore does not need * waiting on. If the CIL is not empty, we get put on the committing * list before emptying the CIL and bumping the sequence number. Hence * an empty CIL and an unchanged sequence number means we jumped out * above after doing nothing. * * Hence the waiter will either find the commit sequence on the * committing list or the sequence number will be unchanged and the CIL * still dirty. In that latter case, the push has not yet started, and * so the waiter will have to continue trying to check the CIL * committing list until it is found. In extreme cases of delay, the * sequence may fully commit between the attempts the wait makes to wait * on the commit sequence. */ list_add(&ctx->committing, &cil->xc_committing); spin_unlock(&cil->xc_push_lock); xlog_cil_build_lv_chain(ctx, &whiteouts, &num_iovecs, &num_bytes); /* * Switch the contexts so we can drop the context lock and move out * of a shared context. We can't just go straight to the commit record, * though - we need to synchronise with previous and future commits so * that the commit records are correctly ordered in the log to ensure * that we process items during log IO completion in the correct order. * * For example, if we get an EFI in one checkpoint and the EFD in the * next (e.g. due to log forces), we do not want the checkpoint with * the EFD to be committed before the checkpoint with the EFI. Hence * we must strictly order the commit records of the checkpoints so * that: a) the checkpoint callbacks are attached to the iclogs in the * correct order; and b) the checkpoints are replayed in correct order * in log recovery. * * Hence we need to add this context to the committing context list so * that higher sequences will wait for us to write out a commit record * before they do. * * xfs_log_force_seq requires us to mirror the new sequence into the cil * structure atomically with the addition of this sequence to the * committing list. This also ensures that we can do unlocked checks * against the current sequence in log forces without risking * deferencing a freed context pointer. */ spin_lock(&cil->xc_push_lock); xlog_cil_ctx_switch(cil, new_ctx); spin_unlock(&cil->xc_push_lock); up_write(&cil->xc_ctx_lock); /* * Sort the log vector chain before we add the transaction headers. * This ensures we always have the transaction headers at the start * of the chain. */ list_sort(NULL, &ctx->lv_chain, xlog_cil_order_cmp); /* * Build a checkpoint transaction header and write it to the log to * begin the transaction. We need to account for the space used by the * transaction header here as it is not accounted for in xlog_write(). * Add the lvhdr to the head of the lv chain we pass to xlog_write() so * it gets written into the iclog first. */ xlog_cil_build_trans_hdr(ctx, &thdr, &lvhdr, num_iovecs); num_bytes += lvhdr.lv_bytes; list_add(&lvhdr.lv_list, &ctx->lv_chain); /* * Take the lvhdr back off the lv_chain immediately after calling * xlog_cil_write_chain() as it should not be passed to log IO * completion. */ error = xlog_cil_write_chain(ctx, num_bytes); list_del(&lvhdr.lv_list); if (error) goto out_abort_free_ticket; error = xlog_cil_write_commit_record(ctx); if (error) goto out_abort_free_ticket; /* * Grab the ticket from the ctx so we can ungrant it after releasing the * commit_iclog. The ctx may be freed by the time we return from * releasing the commit_iclog (i.e. checkpoint has been completed and * callback run) so we can't reference the ctx after the call to * xlog_state_release_iclog(). */ ticket = ctx->ticket; /* * If the checkpoint spans multiple iclogs, wait for all previous iclogs * to complete before we submit the commit_iclog. We can't use state * checks for this - ACTIVE can be either a past completed iclog or a * future iclog being filled, while WANT_SYNC through SYNC_DONE can be a * past or future iclog awaiting IO or ordered IO completion to be run. * In the latter case, if it's a future iclog and we wait on it, the we * will hang because it won't get processed through to ic_force_wait * wakeup until this commit_iclog is written to disk. Hence we use the * iclog header lsn and compare it to the commit lsn to determine if we * need to wait on iclogs or not. */ spin_lock(&log->l_icloglock); if (ctx->start_lsn != ctx->commit_lsn) { xfs_lsn_t plsn; plsn = be64_to_cpu(ctx->commit_iclog->ic_prev->ic_header.h_lsn); if (plsn && XFS_LSN_CMP(plsn, ctx->commit_lsn) < 0) { /* * Waiting on ic_force_wait orders the completion of * iclogs older than ic_prev. Hence we only need to wait * on the most recent older iclog here. */ xlog_wait_on_iclog(ctx->commit_iclog->ic_prev); spin_lock(&log->l_icloglock); } /* * We need to issue a pre-flush so that the ordering for this * checkpoint is correctly preserved down to stable storage. */ ctx->commit_iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; } /* * The commit iclog must be written to stable storage to guarantee * journal IO vs metadata writeback IO is correctly ordered on stable * storage. * * If the push caller needs the commit to be immediately stable and the * commit_iclog is not yet marked as XLOG_STATE_WANT_SYNC to indicate it * will be written when released, switch it's state to WANT_SYNC right * now. */ ctx->commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA; if (push_commit_stable && ctx->commit_iclog->ic_state == XLOG_STATE_ACTIVE) xlog_state_switch_iclogs(log, ctx->commit_iclog, 0); ticket = ctx->ticket; xlog_state_release_iclog(log, ctx->commit_iclog, ticket); /* Not safe to reference ctx now! */ spin_unlock(&log->l_icloglock); xlog_cil_cleanup_whiteouts(&whiteouts); xfs_log_ticket_ungrant(log, ticket); memalloc_nofs_restore(nofs_flags); return; out_skip: up_write(&cil->xc_ctx_lock); xfs_log_ticket_put(new_ctx->ticket); kfree(new_ctx); memalloc_nofs_restore(nofs_flags); return; out_abort_free_ticket: ASSERT(xlog_is_shutdown(log)); xlog_cil_cleanup_whiteouts(&whiteouts); if (!ctx->commit_iclog) { xfs_log_ticket_ungrant(log, ctx->ticket); xlog_cil_committed(ctx); memalloc_nofs_restore(nofs_flags); return; } spin_lock(&log->l_icloglock); ticket = ctx->ticket; xlog_state_release_iclog(log, ctx->commit_iclog, ticket); /* Not safe to reference ctx now! */ spin_unlock(&log->l_icloglock); xfs_log_ticket_ungrant(log, ticket); memalloc_nofs_restore(nofs_flags); } /* * We need to push CIL every so often so we don't cache more than we can fit in * the log. The limit really is that a checkpoint can't be more than half the * log (the current checkpoint is not allowed to overwrite the previous * checkpoint), but commit latency and memory usage limit this to a smaller * size. */ static void xlog_cil_push_background( struct xlog *log) { struct xfs_cil *cil = log->l_cilp; int space_used = atomic_read(&cil->xc_ctx->space_used); /* * The cil won't be empty because we are called while holding the * context lock so whatever we added to the CIL will still be there. */ ASSERT(!test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)); /* * We are done if: * - we haven't used up all the space available yet; or * - we've already queued up a push; and * - we're not over the hard limit; and * - nothing has been over the hard limit. * * If so, we don't need to take the push lock as there's nothing to do. */ if (space_used < XLOG_CIL_SPACE_LIMIT(log) || (cil->xc_push_seq == cil->xc_current_sequence && space_used < XLOG_CIL_BLOCKING_SPACE_LIMIT(log) && !waitqueue_active(&cil->xc_push_wait))) { up_read(&cil->xc_ctx_lock); return; } spin_lock(&cil->xc_push_lock); if (cil->xc_push_seq < cil->xc_current_sequence) { cil->xc_push_seq = cil->xc_current_sequence; queue_work(cil->xc_push_wq, &cil->xc_ctx->push_work); } /* * Drop the context lock now, we can't hold that if we need to sleep * because we are over the blocking threshold. The push_lock is still * held, so blocking threshold sleep/wakeup is still correctly * serialised here. */ up_read(&cil->xc_ctx_lock); /* * If we are well over the space limit, throttle the work that is being * done until the push work on this context has begun. Enforce the hard * throttle on all transaction commits once it has been activated, even * if the committing transactions have resulted in the space usage * dipping back down under the hard limit. * * The ctx->xc_push_lock provides the serialisation necessary for safely * calling xlog_cil_over_hard_limit() in this context. */ if (xlog_cil_over_hard_limit(log, space_used)) { trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket); ASSERT(space_used < log->l_logsize); xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock); return; } spin_unlock(&cil->xc_push_lock); } /* * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence * number that is passed. When it returns, the work will be queued for * @push_seq, but it won't be completed. * * If the caller is performing a synchronous force, we will flush the workqueue * to get previously queued work moving to minimise the wait time they will * undergo waiting for all outstanding pushes to complete. The caller is * expected to do the required waiting for push_seq to complete. * * If the caller is performing an async push, we need to ensure that the * checkpoint is fully flushed out of the iclogs when we finish the push. If we * don't do this, then the commit record may remain sitting in memory in an * ACTIVE iclog. This then requires another full log force to push to disk, * which defeats the purpose of having an async, non-blocking CIL force * mechanism. Hence in this case we need to pass a flag to the push work to * indicate it needs to flush the commit record itself. */ static void xlog_cil_push_now( struct xlog *log, xfs_lsn_t push_seq, bool async) { struct xfs_cil *cil = log->l_cilp; if (!cil) return; ASSERT(push_seq && push_seq <= cil->xc_current_sequence); /* start on any pending background push to minimise wait time on it */ if (!async) flush_workqueue(cil->xc_push_wq); spin_lock(&cil->xc_push_lock); /* * If this is an async flush request, we always need to set the * xc_push_commit_stable flag even if something else has already queued * a push. The flush caller is asking for the CIL to be on stable * storage when the next push completes, so regardless of who has queued * the push, the flush requires stable semantics from it. */ cil->xc_push_commit_stable = async; /* * If the CIL is empty or we've already pushed the sequence then * there's no more work that we need to do. */ if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags) || push_seq <= cil->xc_push_seq) { spin_unlock(&cil->xc_push_lock); return; } cil->xc_push_seq = push_seq; queue_work(cil->xc_push_wq, &cil->xc_ctx->push_work); spin_unlock(&cil->xc_push_lock); } bool xlog_cil_empty( struct xlog *log) { struct xfs_cil *cil = log->l_cilp; bool empty = false; spin_lock(&cil->xc_push_lock); if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)) empty = true; spin_unlock(&cil->xc_push_lock); return empty; } /* * If there are intent done items in this transaction and the related intent was * committed in the current (same) CIL checkpoint, we don't need to write either * the intent or intent done item to the journal as the change will be * journalled atomically within this checkpoint. As we cannot remove items from * the CIL here, mark the related intent with a whiteout so that the CIL push * can remove it rather than writing it to the journal. Then remove the intent * done item from the current transaction and release it so it doesn't get put * into the CIL at all. */ static uint32_t xlog_cil_process_intents( struct xfs_cil *cil, struct xfs_trans *tp) { struct xfs_log_item *lip, *ilip, *next; uint32_t len = 0; list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { if (!(lip->li_ops->flags & XFS_ITEM_INTENT_DONE)) continue; ilip = lip->li_ops->iop_intent(lip); if (!ilip || !xlog_item_in_current_chkpt(cil, ilip)) continue; set_bit(XFS_LI_WHITEOUT, &ilip->li_flags); trace_xfs_cil_whiteout_mark(ilip); len += ilip->li_lv->lv_bytes; kvfree(ilip->li_lv); ilip->li_lv = NULL; xfs_trans_del_item(lip); lip->li_ops->iop_release(lip); } return len; } /* * Commit a transaction with the given vector to the Committed Item List. * * To do this, we need to format the item, pin it in memory if required and * account for the space used by the transaction. Once we have done that we * need to release the unused reservation for the transaction, attach the * transaction to the checkpoint context so we carry the busy extents through * to checkpoint completion, and then unlock all the items in the transaction. * * Called with the context lock already held in read mode to lock out * background commit, returns without it held once background commits are * allowed again. */ void xlog_cil_commit( struct xlog *log, struct xfs_trans *tp, xfs_csn_t *commit_seq, bool regrant) { struct xfs_cil *cil = log->l_cilp; struct xfs_log_item *lip, *next; uint32_t released_space = 0; /* * Do all necessary memory allocation before we lock the CIL. * This ensures the allocation does not deadlock with a CIL * push in memory reclaim (e.g. from kswapd). */ xlog_cil_alloc_shadow_bufs(log, tp); /* lock out background commit */ down_read(&cil->xc_ctx_lock); if (tp->t_flags & XFS_TRANS_HAS_INTENT_DONE) released_space = xlog_cil_process_intents(cil, tp); xlog_cil_insert_items(log, tp, released_space); if (regrant && !xlog_is_shutdown(log)) xfs_log_ticket_regrant(log, tp->t_ticket); else xfs_log_ticket_ungrant(log, tp->t_ticket); tp->t_ticket = NULL; xfs_trans_unreserve_and_mod_sb(tp); /* * Once all the items of the transaction have been copied to the CIL, * the items can be unlocked and possibly freed. * * This needs to be done before we drop the CIL context lock because we * have to update state in the log items and unlock them before they go * to disk. If we don't, then the CIL checkpoint can race with us and * we can run checkpoint completion before we've updated and unlocked * the log items. This affects (at least) processing of stale buffers, * inodes and EFIs. */ trace_xfs_trans_commit_items(tp, _RET_IP_); list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { xfs_trans_del_item(lip); if (lip->li_ops->iop_committing) lip->li_ops->iop_committing(lip, cil->xc_ctx->sequence); } if (commit_seq) *commit_seq = cil->xc_ctx->sequence; /* xlog_cil_push_background() releases cil->xc_ctx_lock */ xlog_cil_push_background(log); } /* * Flush the CIL to stable storage but don't wait for it to complete. This * requires the CIL push to ensure the commit record for the push hits the disk, * but otherwise is no different to a push done from a log force. */ void xlog_cil_flush( struct xlog *log) { xfs_csn_t seq = log->l_cilp->xc_current_sequence; trace_xfs_log_force(log->l_mp, seq, _RET_IP_); xlog_cil_push_now(log, seq, true); /* * If the CIL is empty, make sure that any previous checkpoint that may * still be in an active iclog is pushed to stable storage. */ if (test_bit(XLOG_CIL_EMPTY, &log->l_cilp->xc_flags)) xfs_log_force(log->l_mp, 0); } /* * Conditionally push the CIL based on the sequence passed in. * * We only need to push if we haven't already pushed the sequence number given. * Hence the only time we will trigger a push here is if the push sequence is * the same as the current context. * * We return the current commit lsn to allow the callers to determine if a * iclog flush is necessary following this call. */ xfs_lsn_t xlog_cil_force_seq( struct xlog *log, xfs_csn_t sequence) { struct xfs_cil *cil = log->l_cilp; struct xfs_cil_ctx *ctx; xfs_lsn_t commit_lsn = NULLCOMMITLSN; ASSERT(sequence <= cil->xc_current_sequence); if (!sequence) sequence = cil->xc_current_sequence; trace_xfs_log_force(log->l_mp, sequence, _RET_IP_); /* * check to see if we need to force out the current context. * xlog_cil_push() handles racing pushes for the same sequence, * so no need to deal with it here. */ restart: xlog_cil_push_now(log, sequence, false); /* * See if we can find a previous sequence still committing. * We need to wait for all previous sequence commits to complete * before allowing the force of push_seq to go ahead. Hence block * on commits for those as well. */ spin_lock(&cil->xc_push_lock); list_for_each_entry(ctx, &cil->xc_committing, committing) { /* * Avoid getting stuck in this loop because we were woken by the * shutdown, but then went back to sleep once already in the * shutdown state. */ if (xlog_is_shutdown(log)) goto out_shutdown; if (ctx->sequence > sequence) continue; if (!ctx->commit_lsn) { /* * It is still being pushed! Wait for the push to * complete, then start again from the beginning. */ XFS_STATS_INC(log->l_mp, xs_log_force_sleep); xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock); goto restart; } if (ctx->sequence != sequence) continue; /* found it! */ commit_lsn = ctx->commit_lsn; } /* * The call to xlog_cil_push_now() executes the push in the background. * Hence by the time we have got here it our sequence may not have been * pushed yet. This is true if the current sequence still matches the * push sequence after the above wait loop and the CIL still contains * dirty objects. This is guaranteed by the push code first adding the * context to the committing list before emptying the CIL. * * Hence if we don't find the context in the committing list and the * current sequence number is unchanged then the CIL contents are * significant. If the CIL is empty, if means there was nothing to push * and that means there is nothing to wait for. If the CIL is not empty, * it means we haven't yet started the push, because if it had started * we would have found the context on the committing list. */ if (sequence == cil->xc_current_sequence && !test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)) { spin_unlock(&cil->xc_push_lock); goto restart; } spin_unlock(&cil->xc_push_lock); return commit_lsn; /* * We detected a shutdown in progress. We need to trigger the log force * to pass through it's iclog state machine error handling, even though * we are already in a shutdown state. Hence we can't return * NULLCOMMITLSN here as that has special meaning to log forces (i.e. * LSN is already stable), so we return a zero LSN instead. */ out_shutdown: spin_unlock(&cil->xc_push_lock); return 0; } /* * Perform initial CIL structure initialisation. */ int xlog_cil_init( struct xlog *log) { struct xfs_cil *cil; struct xfs_cil_ctx *ctx; struct xlog_cil_pcp *cilpcp; int cpu; cil = kzalloc(sizeof(*cil), GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!cil) return -ENOMEM; /* * Limit the CIL pipeline depth to 4 concurrent works to bound the * concurrency the log spinlocks will be exposed to. */ cil->xc_push_wq = alloc_workqueue("xfs-cil/%s", XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_UNBOUND), 4, log->l_mp->m_super->s_id); if (!cil->xc_push_wq) goto out_destroy_cil; cil->xc_log = log; cil->xc_pcp = alloc_percpu(struct xlog_cil_pcp); if (!cil->xc_pcp) goto out_destroy_wq; for_each_possible_cpu(cpu) { cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); INIT_LIST_HEAD(&cilpcp->busy_extents); INIT_LIST_HEAD(&cilpcp->log_items); } INIT_LIST_HEAD(&cil->xc_committing); spin_lock_init(&cil->xc_push_lock); init_waitqueue_head(&cil->xc_push_wait); init_rwsem(&cil->xc_ctx_lock); init_waitqueue_head(&cil->xc_start_wait); init_waitqueue_head(&cil->xc_commit_wait); log->l_cilp = cil; ctx = xlog_cil_ctx_alloc(); xlog_cil_ctx_switch(cil, ctx); return 0; out_destroy_wq: destroy_workqueue(cil->xc_push_wq); out_destroy_cil: kfree(cil); return -ENOMEM; } void xlog_cil_destroy( struct xlog *log) { struct xfs_cil *cil = log->l_cilp; if (cil->xc_ctx) { if (cil->xc_ctx->ticket) xfs_log_ticket_put(cil->xc_ctx->ticket); kfree(cil->xc_ctx); } ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)); free_percpu(cil->xc_pcp); destroy_workqueue(cil->xc_push_wq); kfree(cil); }
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/fs.h> #include <linux/qnx4_fs.h> #define QNX4_DEBUG 0 #if QNX4_DEBUG #define QNX4DEBUG(X) printk X #else #define QNX4DEBUG(X) (void) 0 #endif struct qnx4_sb_info { unsigned int Version; /* may be useful */ struct qnx4_inode_entry *BitMap; /* useful */ }; struct qnx4_inode_info { struct qnx4_inode_entry raw; loff_t mmu_private; struct inode vfs_inode; }; extern struct inode *qnx4_iget(struct super_block *, unsigned long); extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); extern unsigned long qnx4_count_free_blocks(struct super_block *sb); extern unsigned long qnx4_block_map(struct inode *inode, long iblock); extern const struct inode_operations qnx4_dir_inode_operations; extern const struct file_operations qnx4_dir_operations; extern int qnx4_is_free(struct super_block *sb, long block); static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) { return sb->s_fs_info; } static inline struct qnx4_inode_info *qnx4_i(struct inode *inode) { return container_of(inode, struct qnx4_inode_info, vfs_inode); } static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode) { return &qnx4_i(inode)->raw; } /* * A qnx4 directory entry is an inode entry or link info * depending on the status field in the last byte. The * first byte is where the name start either way, and a * zero means it's empty. * * Also, due to a bug in gcc, we don't want to use the * real (differently sized) name arrays in the inode and * link entries, but always the 'de_name[]' one in the * fake struct entry. * * See * * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578#c6 * * for details, but basically gcc will take the size of the * 'name' array from one of the used union entries randomly. * * This use of 'de_name[]' (48 bytes) avoids the false positive * warnings that would happen if gcc decides to use 'inode.di_name' * (16 bytes) even when the pointer and size were to come from * 'link.dl_name' (48 bytes). * * In all cases the actual name pointer itself is the same, it's * only the gcc internal 'what is the size of this field' logic * that can get confused. */ union qnx4_directory_entry { struct { const char de_name[48]; u8 de_pad[15]; u8 de_status; }; struct qnx4_inode_entry inode; struct qnx4_link_info link; }; static inline const char *get_entry_fname(union qnx4_directory_entry *de, int *size) { /* Make sure the status byte is in the same place for all structs. */ BUILD_BUG_ON(offsetof(struct qnx4_inode_entry, di_status) != offsetof(struct qnx4_link_info, dl_status)); BUILD_BUG_ON(offsetof(struct qnx4_inode_entry, di_status) != offsetof(union qnx4_directory_entry, de_status)); if (!de->de_name[0]) return NULL; if (!(de->de_status & (QNX4_FILE_USED|QNX4_FILE_LINK))) return NULL; if (!(de->de_status & QNX4_FILE_LINK)) *size = sizeof(de->inode.di_fname); else *size = sizeof(de->link.dl_fname); *size = strnlen(de->de_name, *size); return de->de_name; }
1 1 1 3 3 3 5 5 5 5 13 13 13 13 6 5 5 5 5 5 5 5 5 6 1 1 1 8 8 1 1 1 2 1 3 3 2 8 8 4 4 4 4 4 4 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 // SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) Hans Alblas PE1AYX <hans@esrac.ele.tue.nl> * Copyright (C) 2004, 05 Ralf Baechle DL5RB <ralf@linux-mips.org> * Copyright (C) 2004, 05 Thomas Osterried DL9SAU <thomas@x-berg.in-berlin.de> */ #include <linux/module.h> #include <linux/bitops.h> #include <linux/uaccess.h> #include <linux/crc16.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/major.h> #include <linux/init.h> #include <linux/rtnetlink.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/jiffies.h> #include <linux/refcount.h> #include <net/ax25.h> #define AX_MTU 236 /* some arch define END as assembly function ending, just undef it */ #undef END /* SLIP/KISS protocol characters. */ #define END 0300 /* indicates end of frame */ #define ESC 0333 /* indicates byte stuffing */ #define ESC_END 0334 /* ESC ESC_END means END 'data' */ #define ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */ struct mkiss { struct tty_struct *tty; /* ptr to TTY structure */ struct net_device *dev; /* easy for intr handling */ /* These are pointers to the malloc()ed frame buffers. */ spinlock_t buflock;/* lock for rbuf and xbuf */ unsigned char *rbuff; /* receiver buffer */ int rcount; /* received chars counter */ unsigned char *xbuff; /* transmitter buffer */ unsigned char *xhead; /* pointer to next byte to XMIT */ int xleft; /* bytes left in XMIT queue */ /* Detailed SLIP statistics. */ int mtu; /* Our mtu (to spot changes!) */ int buffsize; /* Max buffers sizes */ unsigned long flags; /* Flag values/ mode etc */ /* long req'd: used by set_bit --RR */ #define AXF_INUSE 0 /* Channel in use */ #define AXF_ESCAPE 1 /* ESC received */ #define AXF_ERROR 2 /* Parity, etc. error */ #define AXF_KEEPTEST 3 /* Keepalive test flag */ #define AXF_OUTWAIT 4 /* is outpacket was flag */ int mode; int crcmode; /* MW: for FlexNet, SMACK etc. */ int crcauto; /* CRC auto mode */ #define CRC_MODE_NONE 0 #define CRC_MODE_FLEX 1 #define CRC_MODE_SMACK 2 #define CRC_MODE_FLEX_TEST 3 #define CRC_MODE_SMACK_TEST 4 refcount_t refcnt; struct completion dead; }; /*---------------------------------------------------------------------------*/ static const unsigned short crc_flex_table[] = { 0x0f87, 0x1e0e, 0x2c95, 0x3d1c, 0x49a3, 0x582a, 0x6ab1, 0x7b38, 0x83cf, 0x9246, 0xa0dd, 0xb154, 0xc5eb, 0xd462, 0xe6f9, 0xf770, 0x1f06, 0x0e8f, 0x3c14, 0x2d9d, 0x5922, 0x48ab, 0x7a30, 0x6bb9, 0x934e, 0x82c7, 0xb05c, 0xa1d5, 0xd56a, 0xc4e3, 0xf678, 0xe7f1, 0x2e85, 0x3f0c, 0x0d97, 0x1c1e, 0x68a1, 0x7928, 0x4bb3, 0x5a3a, 0xa2cd, 0xb344, 0x81df, 0x9056, 0xe4e9, 0xf560, 0xc7fb, 0xd672, 0x3e04, 0x2f8d, 0x1d16, 0x0c9f, 0x7820, 0x69a9, 0x5b32, 0x4abb, 0xb24c, 0xa3c5, 0x915e, 0x80d7, 0xf468, 0xe5e1, 0xd77a, 0xc6f3, 0x4d83, 0x5c0a, 0x6e91, 0x7f18, 0x0ba7, 0x1a2e, 0x28b5, 0x393c, 0xc1cb, 0xd042, 0xe2d9, 0xf350, 0x87ef, 0x9666, 0xa4fd, 0xb574, 0x5d02, 0x4c8b, 0x7e10, 0x6f99, 0x1b26, 0x0aaf, 0x3834, 0x29bd, 0xd14a, 0xc0c3, 0xf258, 0xe3d1, 0x976e, 0x86e7, 0xb47c, 0xa5f5, 0x6c81, 0x7d08, 0x4f93, 0x5e1a, 0x2aa5, 0x3b2c, 0x09b7, 0x183e, 0xe0c9, 0xf140, 0xc3db, 0xd252, 0xa6ed, 0xb764, 0x85ff, 0x9476, 0x7c00, 0x6d89, 0x5f12, 0x4e9b, 0x3a24, 0x2bad, 0x1936, 0x08bf, 0xf048, 0xe1c1, 0xd35a, 0xc2d3, 0xb66c, 0xa7e5, 0x957e, 0x84f7, 0x8b8f, 0x9a06, 0xa89d, 0xb914, 0xcdab, 0xdc22, 0xeeb9, 0xff30, 0x07c7, 0x164e, 0x24d5, 0x355c, 0x41e3, 0x506a, 0x62f1, 0x7378, 0x9b0e, 0x8a87, 0xb81c, 0xa995, 0xdd2a, 0xcca3, 0xfe38, 0xefb1, 0x1746, 0x06cf, 0x3454, 0x25dd, 0x5162, 0x40eb, 0x7270, 0x63f9, 0xaa8d, 0xbb04, 0x899f, 0x9816, 0xeca9, 0xfd20, 0xcfbb, 0xde32, 0x26c5, 0x374c, 0x05d7, 0x145e, 0x60e1, 0x7168, 0x43f3, 0x527a, 0xba0c, 0xab85, 0x991e, 0x8897, 0xfc28, 0xeda1, 0xdf3a, 0xceb3, 0x3644, 0x27cd, 0x1556, 0x04df, 0x7060, 0x61e9, 0x5372, 0x42fb, 0xc98b, 0xd802, 0xea99, 0xfb10, 0x8faf, 0x9e26, 0xacbd, 0xbd34, 0x45c3, 0x544a, 0x66d1, 0x7758, 0x03e7, 0x126e, 0x20f5, 0x317c, 0xd90a, 0xc883, 0xfa18, 0xeb91, 0x9f2e, 0x8ea7, 0xbc3c, 0xadb5, 0x5542, 0x44cb, 0x7650, 0x67d9, 0x1366, 0x02ef, 0x3074, 0x21fd, 0xe889, 0xf900, 0xcb9b, 0xda12, 0xaead, 0xbf24, 0x8dbf, 0x9c36, 0x64c1, 0x7548, 0x47d3, 0x565a, 0x22e5, 0x336c, 0x01f7, 0x107e, 0xf808, 0xe981, 0xdb1a, 0xca93, 0xbe2c, 0xafa5, 0x9d3e, 0x8cb7, 0x7440, 0x65c9, 0x5752, 0x46db, 0x3264, 0x23ed, 0x1176, 0x00ff }; static unsigned short calc_crc_flex(unsigned char *cp, int size) { unsigned short crc = 0xffff; while (size--) crc = (crc << 8) ^ crc_flex_table[((crc >> 8) ^ *cp++) & 0xff]; return crc; } static int check_crc_flex(unsigned char *cp, int size) { unsigned short crc = 0xffff; if (size < 3) return -1; while (size--) crc = (crc << 8) ^ crc_flex_table[((crc >> 8) ^ *cp++) & 0xff]; if ((crc & 0xffff) != 0x7070) return -1; return 0; } static int check_crc_16(unsigned char *cp, int size) { unsigned short crc = 0x0000; if (size < 3) return -1; crc = crc16(0, cp, size); if (crc != 0x0000) return -1; return 0; } /* * Standard encapsulation */ static int kiss_esc(unsigned char *s, unsigned char *d, int len) { unsigned char *ptr = d; unsigned char c; /* * Send an initial END character to flush out any data that may have * accumulated in the receiver due to line noise. */ *ptr++ = END; while (len-- > 0) { switch (c = *s++) { case END: *ptr++ = ESC; *ptr++ = ESC_END; break; case ESC: *ptr++ = ESC; *ptr++ = ESC_ESC; break; default: *ptr++ = c; break; } } *ptr++ = END; return ptr - d; } /* * MW: * OK its ugly, but tell me a better solution without copying the * packet to a temporary buffer :-) */ static int kiss_esc_crc(unsigned char *s, unsigned char *d, unsigned short crc, int len) { unsigned char *ptr = d; unsigned char c=0; *ptr++ = END; while (len > 0) { if (len > 2) c = *s++; else if (len > 1) c = crc >> 8; else c = crc & 0xff; len--; switch (c) { case END: *ptr++ = ESC; *ptr++ = ESC_END; break; case ESC: *ptr++ = ESC; *ptr++ = ESC_ESC; break; default: *ptr++ = c; break; } } *ptr++ = END; return ptr - d; } /* Send one completely decapsulated AX.25 packet to the AX.25 layer. */ static void ax_bump(struct mkiss *ax) { struct sk_buff *skb; int count; spin_lock_bh(&ax->buflock); if (ax->rbuff[0] > 0x0f) { if (ax->rbuff[0] & 0x80) { if (check_crc_16(ax->rbuff, ax->rcount) < 0) { ax->dev->stats.rx_errors++; spin_unlock_bh(&ax->buflock); return; } if (ax->crcmode != CRC_MODE_SMACK && ax->crcauto) { printk(KERN_INFO "mkiss: %s: Switching to crc-smack\n", ax->dev->name); ax->crcmode = CRC_MODE_SMACK; } ax->rcount -= 2; *ax->rbuff &= ~0x80; } else if (ax->rbuff[0] & 0x20) { if (check_crc_flex(ax->rbuff, ax->rcount) < 0) { ax->dev->stats.rx_errors++; spin_unlock_bh(&ax->buflock); return; } if (ax->crcmode != CRC_MODE_FLEX && ax->crcauto) { printk(KERN_INFO "mkiss: %s: Switching to crc-flexnet\n", ax->dev->name); ax->crcmode = CRC_MODE_FLEX; } ax->rcount -= 2; /* * dl9sau bugfix: the trailling two bytes flexnet crc * will not be passed to the kernel. thus we have to * correct the kissparm signature, because it indicates * a crc but there's none */ *ax->rbuff &= ~0x20; } } count = ax->rcount; if ((skb = dev_alloc_skb(count)) == NULL) { printk(KERN_ERR "mkiss: %s: memory squeeze, dropping packet.\n", ax->dev->name); ax->dev->stats.rx_dropped++; spin_unlock_bh(&ax->buflock); return; } skb_put_data(skb, ax->rbuff, count); skb->protocol = ax25_type_trans(skb, ax->dev); netif_rx(skb); ax->dev->stats.rx_packets++; ax->dev->stats.rx_bytes += count; spin_unlock_bh(&ax->buflock); } static void kiss_unesc(struct mkiss *ax, unsigned char s) { switch (s) { case END: /* drop keeptest bit = VSV */ if (test_bit(AXF_KEEPTEST, &ax->flags)) clear_bit(AXF_KEEPTEST, &ax->flags); if (!test_and_clear_bit(AXF_ERROR, &ax->flags) && (ax->rcount > 2)) ax_bump(ax); clear_bit(AXF_ESCAPE, &ax->flags); ax->rcount = 0; return; case ESC: set_bit(AXF_ESCAPE, &ax->flags); return; case ESC_ESC: if (test_and_clear_bit(AXF_ESCAPE, &ax->flags)) s = ESC; break; case ESC_END: if (test_and_clear_bit(AXF_ESCAPE, &ax->flags)) s = END; break; } spin_lock_bh(&ax->buflock); if (!test_bit(AXF_ERROR, &ax->flags)) { if (ax->rcount < ax->buffsize) { ax->rbuff[ax->rcount++] = s; spin_unlock_bh(&ax->buflock); return; } ax->dev->stats.rx_over_errors++; set_bit(AXF_ERROR, &ax->flags); } spin_unlock_bh(&ax->buflock); } static int ax_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr_ax25 *sa = addr; netif_tx_lock_bh(dev); netif_addr_lock(dev); __dev_addr_set(dev, &sa->sax25_call, AX25_ADDR_LEN); netif_addr_unlock(dev); netif_tx_unlock_bh(dev); return 0; } /*---------------------------------------------------------------------------*/ static void ax_changedmtu(struct mkiss *ax) { struct net_device *dev = ax->dev; unsigned char *xbuff, *rbuff, *oxbuff, *orbuff; int len; len = dev->mtu * 2; /* * allow for arrival of larger UDP packets, even if we say not to * also fixes a bug in which SunOS sends 512-byte packets even with * an MSS of 128 */ if (len < 576 * 2) len = 576 * 2; xbuff = kmalloc(len + 4, GFP_ATOMIC); rbuff = kmalloc(len + 4, GFP_ATOMIC); if (xbuff == NULL || rbuff == NULL) { printk(KERN_ERR "mkiss: %s: unable to grow ax25 buffers, " "MTU change cancelled.\n", ax->dev->name); dev->mtu = ax->mtu; kfree(xbuff); kfree(rbuff); return; } spin_lock_bh(&ax->buflock); oxbuff = ax->xbuff; ax->xbuff = xbuff; orbuff = ax->rbuff; ax->rbuff = rbuff; if (ax->xleft) { if (ax->xleft <= len) { memcpy(ax->xbuff, ax->xhead, ax->xleft); } else { ax->xleft = 0; dev->stats.tx_dropped++; } } ax->xhead = ax->xbuff; if (ax->rcount) { if (ax->rcount <= len) { memcpy(ax->rbuff, orbuff, ax->rcount); } else { ax->rcount = 0; dev->stats.rx_over_errors++; set_bit(AXF_ERROR, &ax->flags); } } ax->mtu = dev->mtu + 73; ax->buffsize = len; spin_unlock_bh(&ax->buflock); kfree(oxbuff); kfree(orbuff); } /* Encapsulate one AX.25 packet and stuff into a TTY queue. */ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len) { struct mkiss *ax = netdev_priv(dev); unsigned char *p; int actual, count; if (ax->mtu != ax->dev->mtu + 73) /* Someone has been ifconfigging */ ax_changedmtu(ax); if (len > ax->mtu) { /* Sigh, shouldn't occur BUT ... */ printk(KERN_ERR "mkiss: %s: truncating oversized transmit packet!\n", ax->dev->name); dev->stats.tx_dropped++; netif_start_queue(dev); return; } p = icp; spin_lock_bh(&ax->buflock); if ((*p & 0x0f) != 0) { /* Configuration Command (kissparms(1). * Protocol spec says: never append CRC. * This fixes a very old bug in the linux * kiss driver. -- dl9sau */ switch (*p & 0xff) { case 0x85: /* command from userspace especially for us, * not for delivery to the tnc */ if (len > 1) { int cmd = (p[1] & 0xff); switch(cmd) { case 3: ax->crcmode = CRC_MODE_SMACK; break; case 2: ax->crcmode = CRC_MODE_FLEX; break; case 1: ax->crcmode = CRC_MODE_NONE; break; case 0: default: ax->crcmode = CRC_MODE_SMACK_TEST; cmd = 0; } ax->crcauto = (cmd ? 0 : 1); printk(KERN_INFO "mkiss: %s: crc mode set to %d\n", ax->dev->name, cmd); } spin_unlock_bh(&ax->buflock); netif_start_queue(dev); return; default: count = kiss_esc(p, ax->xbuff, len); } } else { unsigned short crc; switch (ax->crcmode) { case CRC_MODE_SMACK_TEST: ax->crcmode = CRC_MODE_FLEX_TEST; printk(KERN_INFO "mkiss: %s: Trying crc-smack\n", ax->dev->name); fallthrough; case CRC_MODE_SMACK: *p |= 0x80; crc = swab16(crc16(0, p, len)); count = kiss_esc_crc(p, ax->xbuff, crc, len+2); break; case CRC_MODE_FLEX_TEST: ax->crcmode = CRC_MODE_NONE; printk(KERN_INFO "mkiss: %s: Trying crc-flexnet\n", ax->dev->name); fallthrough; case CRC_MODE_FLEX: *p |= 0x20; crc = calc_crc_flex(p, len); count = kiss_esc_crc(p, ax->xbuff, crc, len+2); break; default: count = kiss_esc(p, ax->xbuff, len); } } spin_unlock_bh(&ax->buflock); set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); actual = ax->tty->ops->write(ax->tty, ax->xbuff, count); dev->stats.tx_packets++; dev->stats.tx_bytes += actual; netif_trans_update(ax->dev); ax->xleft = count - actual; ax->xhead = ax->xbuff + actual; } /* Encapsulate an AX.25 packet and kick it into a TTY queue. */ static netdev_tx_t ax_xmit(struct sk_buff *skb, struct net_device *dev) { struct mkiss *ax = netdev_priv(dev); if (skb->protocol == htons(ETH_P_IP)) return ax25_ip_xmit(skb); if (!netif_running(dev)) { printk(KERN_ERR "mkiss: %s: xmit call when iface is down\n", dev->name); return NETDEV_TX_BUSY; } if (netif_queue_stopped(dev)) { /* * May be we must check transmitter timeout here ? * 14 Oct 1994 Dmitry Gorodchanin. */ if (time_before(jiffies, dev_trans_start(dev) + 20 * HZ)) { /* 20 sec timeout not reached */ return NETDEV_TX_BUSY; } printk(KERN_ERR "mkiss: %s: transmit timed out, %s?\n", dev->name, (tty_chars_in_buffer(ax->tty) || ax->xleft) ? "bad line quality" : "driver error"); ax->xleft = 0; clear_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); netif_start_queue(dev); } /* We were not busy, so we are now... :-) */ netif_stop_queue(dev); ax_encaps(dev, skb->data, skb->len); kfree_skb(skb); return NETDEV_TX_OK; } static int ax_open_dev(struct net_device *dev) { struct mkiss *ax = netdev_priv(dev); if (ax->tty == NULL) return -ENODEV; return 0; } /* Open the low-level part of the AX25 channel. Easy! */ static int ax_open(struct net_device *dev) { struct mkiss *ax = netdev_priv(dev); unsigned long len; if (ax->tty == NULL) return -ENODEV; /* * Allocate the frame buffers: * * rbuff Receive buffer. * xbuff Transmit buffer. */ len = dev->mtu * 2; /* * allow for arrival of larger UDP packets, even if we say not to * also fixes a bug in which SunOS sends 512-byte packets even with * an MSS of 128 */ if (len < 576 * 2) len = 576 * 2; if ((ax->rbuff = kmalloc(len + 4, GFP_KERNEL)) == NULL) goto norbuff; if ((ax->xbuff = kmalloc(len + 4, GFP_KERNEL)) == NULL) goto noxbuff; ax->mtu = dev->mtu + 73; ax->buffsize = len; ax->rcount = 0; ax->xleft = 0; ax->flags &= (1 << AXF_INUSE); /* Clear ESCAPE & ERROR flags */ spin_lock_init(&ax->buflock); return 0; noxbuff: kfree(ax->rbuff); norbuff: return -ENOMEM; } /* Close the low-level part of the AX25 channel. Easy! */ static int ax_close(struct net_device *dev) { struct mkiss *ax = netdev_priv(dev); if (ax->tty) clear_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); netif_stop_queue(dev); return 0; } static const struct net_device_ops ax_netdev_ops = { .ndo_open = ax_open_dev, .ndo_stop = ax_close, .ndo_start_xmit = ax_xmit, .ndo_set_mac_address = ax_set_mac_address, }; static void ax_setup(struct net_device *dev) { /* Finish setting up the DEVICE info. */ dev->mtu = AX_MTU; dev->hard_header_len = AX25_MAX_HEADER_LEN; dev->addr_len = AX25_ADDR_LEN; dev->type = ARPHRD_AX25; dev->tx_queue_len = 10; dev->header_ops = &ax25_header_ops; dev->netdev_ops = &ax_netdev_ops; memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); dev_addr_set(dev, (u8 *)&ax25_defaddr); dev->flags = IFF_BROADCAST | IFF_MULTICAST; } /* * We have a potential race on dereferencing tty->disc_data, because the tty * layer provides no locking at all - thus one cpu could be running * sixpack_receive_buf while another calls sixpack_close, which zeroes * tty->disc_data and frees the memory that sixpack_receive_buf is using. The * best way to fix this is to use a rwlock in the tty struct, but for now we * use a single global rwlock for all ttys in ppp line discipline. */ static DEFINE_RWLOCK(disc_data_lock); static struct mkiss *mkiss_get(struct tty_struct *tty) { struct mkiss *ax; read_lock(&disc_data_lock); ax = tty->disc_data; if (ax) refcount_inc(&ax->refcnt); read_unlock(&disc_data_lock); return ax; } static void mkiss_put(struct mkiss *ax) { if (refcount_dec_and_test(&ax->refcnt)) complete(&ax->dead); } static int crc_force = 0; /* Can be overridden with insmod */ static int mkiss_open(struct tty_struct *tty) { struct net_device *dev; struct mkiss *ax; int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (tty->ops->write == NULL) return -EOPNOTSUPP; dev = alloc_netdev(sizeof(struct mkiss), "ax%d", NET_NAME_UNKNOWN, ax_setup); if (!dev) { err = -ENOMEM; goto out; } ax = netdev_priv(dev); ax->dev = dev; spin_lock_init(&ax->buflock); refcount_set(&ax->refcnt, 1); init_completion(&ax->dead); ax->tty = tty; tty->disc_data = ax; tty->receive_room = 65535; tty_driver_flush_buffer(tty); /* Restore default settings */ dev->type = ARPHRD_AX25; /* Perform the low-level AX25 initialization. */ err = ax_open(ax->dev); if (err) goto out_free_netdev; err = register_netdev(dev); if (err) goto out_free_buffers; /* after register_netdev() - because else printk smashes the kernel */ switch (crc_force) { case 3: ax->crcmode = CRC_MODE_SMACK; printk(KERN_INFO "mkiss: %s: crc mode smack forced.\n", ax->dev->name); break; case 2: ax->crcmode = CRC_MODE_FLEX; printk(KERN_INFO "mkiss: %s: crc mode flexnet forced.\n", ax->dev->name); break; case 1: ax->crcmode = CRC_MODE_NONE; printk(KERN_INFO "mkiss: %s: crc mode disabled.\n", ax->dev->name); break; case 0: default: crc_force = 0; printk(KERN_INFO "mkiss: %s: crc mode is auto.\n", ax->dev->name); ax->crcmode = CRC_MODE_SMACK_TEST; } ax->crcauto = (crc_force ? 0 : 1); netif_start_queue(dev); /* Done. We have linked the TTY line to a channel. */ return 0; out_free_buffers: kfree(ax->rbuff); kfree(ax->xbuff); out_free_netdev: free_netdev(dev); out: return err; } static void mkiss_close(struct tty_struct *tty) { struct mkiss *ax; write_lock_irq(&disc_data_lock); ax = tty->disc_data; tty->disc_data = NULL; write_unlock_irq(&disc_data_lock); if (!ax) return; /* * We have now ensured that nobody can start using ap from now on, but * we have to wait for all existing users to finish. */ if (!refcount_dec_and_test(&ax->refcnt)) wait_for_completion(&ax->dead); /* * Halt the transmit queue so that a new transmit cannot scribble * on our buffers */ netif_stop_queue(ax->dev); unregister_netdev(ax->dev); /* Free all AX25 frame buffers after unreg. */ kfree(ax->rbuff); kfree(ax->xbuff); ax->tty = NULL; free_netdev(ax->dev); } /* Perform I/O control on an active ax25 channel. */ static int mkiss_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct mkiss *ax = mkiss_get(tty); struct net_device *dev; unsigned int tmp, err; /* First make sure we're connected. */ if (ax == NULL) return -ENXIO; dev = ax->dev; switch (cmd) { case SIOCGIFNAME: err = copy_to_user((void __user *) arg, ax->dev->name, strlen(ax->dev->name) + 1) ? -EFAULT : 0; break; case SIOCGIFENCAP: err = put_user(4, (int __user *) arg); break; case SIOCSIFENCAP: if (get_user(tmp, (int __user *) arg)) { err = -EFAULT; break; } ax->mode = tmp; dev->addr_len = AX25_ADDR_LEN; dev->hard_header_len = AX25_KISS_HEADER_LEN + AX25_MAX_HEADER_LEN + 3; dev->type = ARPHRD_AX25; err = 0; break; case SIOCSIFHWADDR: { char addr[AX25_ADDR_LEN]; if (copy_from_user(&addr, (void __user *) arg, AX25_ADDR_LEN)) { err = -EFAULT; break; } netif_tx_lock_bh(dev); __dev_addr_set(dev, addr, AX25_ADDR_LEN); netif_tx_unlock_bh(dev); err = 0; break; } default: err = -ENOIOCTLCMD; } mkiss_put(ax); return err; } /* * Handle the 'receiver data ready' interrupt. * This function is called by the 'tty_io' module in the kernel when * a block of data has been received, which can now be decapsulated * and sent on to the AX.25 layer for further processing. */ static void mkiss_receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct mkiss *ax = mkiss_get(tty); if (!ax) return; /* * Argh! mtu change time! - costs us the packet part received * at the change */ if (ax->mtu != ax->dev->mtu + 73) ax_changedmtu(ax); /* Read the characters out of the buffer */ while (count--) { if (fp != NULL && *fp++) { if (!test_and_set_bit(AXF_ERROR, &ax->flags)) ax->dev->stats.rx_errors++; cp++; continue; } kiss_unesc(ax, *cp++); } mkiss_put(ax); tty_unthrottle(tty); } /* * Called by the driver when there's room for more data. If we have * more packets to send, we send them here. */ static void mkiss_write_wakeup(struct tty_struct *tty) { struct mkiss *ax = mkiss_get(tty); int actual; if (!ax) return; if (ax->xleft <= 0) { /* Now serial buffer is almost free & we can start * transmission of another packet */ clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); netif_wake_queue(ax->dev); goto out; } actual = tty->ops->write(tty, ax->xhead, ax->xleft); ax->xleft -= actual; ax->xhead += actual; out: mkiss_put(ax); } static struct tty_ldisc_ops ax_ldisc = { .owner = THIS_MODULE, .num = N_AX25, .name = "mkiss", .open = mkiss_open, .close = mkiss_close, .ioctl = mkiss_ioctl, .receive_buf = mkiss_receive_buf, .write_wakeup = mkiss_write_wakeup }; static const char banner[] __initconst = KERN_INFO \ "mkiss: AX.25 Multikiss, Hans Albas PE1AYX\n"; static const char msg_regfail[] __initconst = KERN_ERR \ "mkiss: can't register line discipline (err = %d)\n"; static int __init mkiss_init_driver(void) { int status; printk(banner); status = tty_register_ldisc(&ax_ldisc); if (status != 0) printk(msg_regfail, status); return status; } static void __exit mkiss_exit_driver(void) { tty_unregister_ldisc(&ax_ldisc); } MODULE_AUTHOR("Ralf Baechle DL5RB <ralf@linux-mips.org>"); MODULE_DESCRIPTION("KISS driver for AX.25 over TTYs"); module_param(crc_force, int, 0); MODULE_PARM_DESC(crc_force, "crc [0 = auto | 1 = none | 2 = flexnet | 3 = smack]"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_AX25); module_init(mkiss_init_driver); module_exit(mkiss_exit_driver);
183 150 148 151 1 1 1 1 1 1 2 2 1 2 38 38 38 32 32 31 31 31 31 1 31 3 5 5 5 4 3 3 3 1 2 1 1 25 25 3 3 3 2 2 1 1 37 37 30 10 10 37 32 32 1 32 25 24 32 21 32 25 161 6 134 135 133 24 20 24 158 156 91 3 90 90 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 /* * Created: Tue Feb 2 08:37:54 1999 by faith@valinux.com * * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. * All Rights Reserved. * * Author Rickard E. (Rik) Faith <faith@valinux.com> * Author Gareth Hughes <gareth@valinux.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/export.h> #include <linux/slab.h> #include <drm/drm_auth.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_lease.h> #include <drm/drm_print.h> #include "drm_internal.h" /** * DOC: master and authentication * * &struct drm_master is used to track groups of clients with open * primary device nodes. For every &struct drm_file which has had at * least once successfully became the device master (either through the * SET_MASTER IOCTL, or implicitly through opening the primary device node when * no one else is the current master that time) there exists one &drm_master. * This is noted in &drm_file.is_master. All other clients have just a pointer * to the &drm_master they are associated with. * * In addition only one &drm_master can be the current master for a &drm_device. * It can be switched through the DROP_MASTER and SET_MASTER IOCTL, or * implicitly through closing/opening the primary device node. See also * drm_is_current_master(). * * Clients can authenticate against the current master (if it matches their own) * using the GETMAGIC and AUTHMAGIC IOCTLs. Together with exchanging masters, * this allows controlled access to the device for an entire group of mutually * trusted clients. */ static bool drm_is_current_master_locked(struct drm_file *fpriv) { lockdep_assert_once(lockdep_is_held(&fpriv->master_lookup_lock) || lockdep_is_held(&fpriv->minor->dev->master_mutex)); return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master; } /** * drm_is_current_master - checks whether @priv is the current master * @fpriv: DRM file private * * Checks whether @fpriv is current master on its device. This decides whether a * client is allowed to run DRM_MASTER IOCTLs. * * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting * - the current master is assumed to own the non-shareable display hardware. */ bool drm_is_current_master(struct drm_file *fpriv) { bool ret; spin_lock(&fpriv->master_lookup_lock); ret = drm_is_current_master_locked(fpriv); spin_unlock(&fpriv->master_lookup_lock); return ret; } EXPORT_SYMBOL(drm_is_current_master); int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_auth *auth = data; int ret = 0; guard(mutex)(&dev->master_mutex); if (!file_priv->magic) { ret = idr_alloc(&file_priv->master->magic_map, file_priv, 1, 0, GFP_KERNEL); if (ret >= 0) file_priv->magic = ret; } auth->magic = file_priv->magic; drm_dbg_core(dev, "%u\n", auth->magic); return ret < 0 ? ret : 0; } int drm_authmagic(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_auth *auth = data; struct drm_file *file; drm_dbg_core(dev, "%u\n", auth->magic); guard(mutex)(&dev->master_mutex); file = idr_find(&file_priv->master->magic_map, auth->magic); if (file) { file->authenticated = 1; idr_replace(&file_priv->master->magic_map, NULL, auth->magic); } return file ? 0 : -EINVAL; } struct drm_master *drm_master_create(struct drm_device *dev) { struct drm_master *master; master = kzalloc(sizeof(*master), GFP_KERNEL); if (!master) return NULL; kref_init(&master->refcount); idr_init_base(&master->magic_map, 1); master->dev = dev; /* initialize the tree of output resource lessees */ INIT_LIST_HEAD(&master->lessees); INIT_LIST_HEAD(&master->lessee_list); idr_init(&master->leases); idr_init_base(&master->lessee_idr, 1); return master; } static void drm_set_master(struct drm_device *dev, struct drm_file *fpriv, bool new_master) { dev->master = drm_master_get(fpriv->master); if (dev->driver->master_set) dev->driver->master_set(dev, fpriv, new_master); fpriv->was_master = true; } static int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv) { struct drm_master *old_master; struct drm_master *new_master; lockdep_assert_held_once(&dev->master_mutex); WARN_ON(fpriv->is_master); old_master = fpriv->master; new_master = drm_master_create(dev); if (!new_master) return -ENOMEM; spin_lock(&fpriv->master_lookup_lock); fpriv->master = new_master; spin_unlock(&fpriv->master_lookup_lock); fpriv->is_master = 1; fpriv->authenticated = 1; drm_set_master(dev, fpriv, true); if (old_master) drm_master_put(&old_master); return 0; } /* * In the olden days the SET/DROP_MASTER ioctls used to return EACCES when * CAP_SYS_ADMIN was not set. This was used to prevent rogue applications * from becoming master and/or failing to release it. * * At the same time, the first client (for a given VT) is _always_ master. * Thus in order for the ioctls to succeed, one had to _explicitly_ run the * application as root or flip the setuid bit. * * If the CAP_SYS_ADMIN was missing, no other client could become master... * EVER :-( Leading to a) the graphics session dying badly or b) a completely * locked session. * * * As some point systemd-logind was introduced to orchestrate and delegate * master as applicable. It does so by opening the fd and passing it to users * while in itself logind a) does the set/drop master per users' request and * b) * implicitly drops master on VT switch. * * Even though logind looks like the future, there are a few issues: * - some platforms don't have equivalent (Android, CrOS, some BSDs) so * root is required _solely_ for SET/DROP MASTER. * - applications may not be updated to use it, * - any client which fails to drop master* can DoS the application using * logind, to a varying degree. * * * Either due missing CAP_SYS_ADMIN or simply not calling DROP_MASTER. * * * Here we implement the next best thing: * - ensure the logind style of fd passing works unchanged, and * - allow a client to drop/set master, iff it is/was master at a given point * in time. * * Note: DROP_MASTER cannot be free for all, as an arbitrator user could: * - DoS/crash the arbitrator - details would be implementation specific * - open the node, become master implicitly and cause issues * * As a result this fixes the following when using root-less build w/o logind * - startx * - weston * - various compositors based on wlroots */ static int drm_master_check_perm(struct drm_device *dev, struct drm_file *file_priv) { if (file_priv->was_master && rcu_access_pointer(file_priv->pid) == task_tgid(current)) return 0; if (!capable(CAP_SYS_ADMIN)) return -EACCES; return 0; } int drm_setmaster_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { int ret; guard(mutex)(&dev->master_mutex); ret = drm_master_check_perm(dev, file_priv); if (ret) return ret; if (drm_is_current_master_locked(file_priv)) return ret; if (dev->master) return -EBUSY; if (!file_priv->master) return -EINVAL; if (!file_priv->is_master) return drm_new_set_master(dev, file_priv); if (file_priv->master->lessor != NULL) { drm_dbg_lease(dev, "Attempt to set lessee %d as master\n", file_priv->master->lessee_id); return -EINVAL; } drm_set_master(dev, file_priv, false); return ret; } static void drm_drop_master(struct drm_device *dev, struct drm_file *fpriv) { if (dev->driver->master_drop) dev->driver->master_drop(dev, fpriv); drm_master_put(&dev->master); } int drm_dropmaster_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { int ret; guard(mutex)(&dev->master_mutex); ret = drm_master_check_perm(dev, file_priv); if (ret) return ret; if (!drm_is_current_master_locked(file_priv)) return -EINVAL; if (!dev->master) return -EINVAL; if (file_priv->master->lessor != NULL) { drm_dbg_lease(dev, "Attempt to drop lessee %d as master\n", file_priv->master->lessee_id); return -EINVAL; } drm_drop_master(dev, file_priv); return ret; } int drm_master_open(struct drm_file *file_priv) { struct drm_device *dev = file_priv->minor->dev; int ret = 0; /* if there is no current master make this fd it, but do not create * any master object for render clients */ guard(mutex)(&dev->master_mutex); if (!dev->master) { ret = drm_new_set_master(dev, file_priv); } else { spin_lock(&file_priv->master_lookup_lock); file_priv->master = drm_master_get(dev->master); spin_unlock(&file_priv->master_lookup_lock); } return ret; } void drm_master_release(struct drm_file *file_priv) { struct drm_device *dev = file_priv->minor->dev; struct drm_master *master; guard(mutex)(&dev->master_mutex); master = file_priv->master; if (file_priv->magic) idr_remove(&file_priv->master->magic_map, file_priv->magic); if (!drm_is_current_master_locked(file_priv)) goto out; if (dev->master == file_priv->master) drm_drop_master(dev, file_priv); out: if (drm_core_check_feature(dev, DRIVER_MODESET) && file_priv->is_master) { /* Revoke any leases held by this or lessees, but only if * this is the "real" master */ drm_lease_revoke(master); } /* drop the master reference held by the file priv */ if (file_priv->master) drm_master_put(&file_priv->master); } /** * drm_master_get - reference a master pointer * @master: &struct drm_master * * Increments the reference count of @master and returns a pointer to @master. */ struct drm_master *drm_master_get(struct drm_master *master) { kref_get(&master->refcount); return master; } EXPORT_SYMBOL(drm_master_get); /** * drm_file_get_master - reference &drm_file.master of @file_priv * @file_priv: DRM file private * * Increments the reference count of @file_priv's &drm_file.master and returns * the &drm_file.master. If @file_priv has no &drm_file.master, returns NULL. * * Master pointers returned from this function should be unreferenced using * drm_master_put(). */ struct drm_master *drm_file_get_master(struct drm_file *file_priv) { struct drm_master *master = NULL; spin_lock(&file_priv->master_lookup_lock); if (!file_priv->master) goto unlock; master = drm_master_get(file_priv->master); unlock: spin_unlock(&file_priv->master_lookup_lock); return master; } EXPORT_SYMBOL(drm_file_get_master); static void drm_master_destroy(struct kref *kref) { struct drm_master *master = container_of(kref, struct drm_master, refcount); struct drm_device *dev = master->dev; if (drm_core_check_feature(dev, DRIVER_MODESET)) drm_lease_destroy(master); idr_destroy(&master->magic_map); idr_destroy(&master->leases); idr_destroy(&master->lessee_idr); kfree(master->unique); kfree(master); } /** * drm_master_put - unreference and clear a master pointer * @master: pointer to a pointer of &struct drm_master * * This decrements the &drm_master behind @master and sets it to NULL. */ void drm_master_put(struct drm_master **master) { kref_put(&(*master)->refcount, drm_master_destroy); *master = NULL; } EXPORT_SYMBOL(drm_master_put); /* Used by drm_client and drm_fb_helper */ bool drm_master_internal_acquire(struct drm_device *dev) { mutex_lock(&dev->master_mutex); if (dev->master) { mutex_unlock(&dev->master_mutex); return false; } return true; } EXPORT_SYMBOL(drm_master_internal_acquire); /* Used by drm_client and drm_fb_helper */ void drm_master_internal_release(struct drm_device *dev) { mutex_unlock(&dev->master_mutex); } EXPORT_SYMBOL(drm_master_internal_release);
5 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 // SPDX-License-Identifier: GPL-2.0-or-later /* * mount.c - operations for initializing and mounting configfs. * * Based on sysfs: * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel * * configfs Copyright (C) 2005 Oracle. All rights reserved. */ #include <linux/fs.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/fs_context.h> #include <linux/pagemap.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/configfs.h> #include "configfs_internal.h" /* Random magic number */ #define CONFIGFS_MAGIC 0x62656570 static struct vfsmount *configfs_mount = NULL; struct kmem_cache *configfs_dir_cachep; static int configfs_mnt_count = 0; static void configfs_free_inode(struct inode *inode) { if (S_ISLNK(inode->i_mode)) kfree(inode->i_link); free_inode_nonrcu(inode); } static const struct super_operations configfs_ops = { .statfs = simple_statfs, .drop_inode = inode_just_drop, .free_inode = configfs_free_inode, }; static struct config_group configfs_root_group = { .cg_item = { .ci_namebuf = "root", .ci_name = configfs_root_group.cg_item.ci_namebuf, }, }; int configfs_is_root(struct config_item *item) { return item == &configfs_root_group.cg_item; } static struct configfs_dirent configfs_root = { .s_sibling = LIST_HEAD_INIT(configfs_root.s_sibling), .s_children = LIST_HEAD_INIT(configfs_root.s_children), .s_element = &configfs_root_group.cg_item, .s_type = CONFIGFS_ROOT, .s_iattr = NULL, }; static int configfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; struct dentry *root; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = CONFIGFS_MAGIC; sb->s_op = &configfs_ops; sb->s_time_gran = 1; inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, &configfs_root, sb); if (inode) { inode->i_op = &configfs_root_inode_operations; inode->i_fop = &configfs_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); } else { pr_debug("could not get root inode\n"); return -ENOMEM; } root = d_make_root(inode); if (!root) { pr_debug("%s: could not get root dentry!\n",__func__); return -ENOMEM; } config_group_init(&configfs_root_group); configfs_root_group.cg_item.ci_dentry = root; root->d_fsdata = &configfs_root; sb->s_root = root; set_default_d_op(sb, &configfs_dentry_ops); /* the rest get that */ sb->s_d_flags |= DCACHE_DONTCACHE; return 0; } static int configfs_get_tree(struct fs_context *fc) { return get_tree_single(fc, configfs_fill_super); } static const struct fs_context_operations configfs_context_ops = { .get_tree = configfs_get_tree, }; static int configfs_init_fs_context(struct fs_context *fc) { fc->ops = &configfs_context_ops; return 0; } static struct file_system_type configfs_fs_type = { .owner = THIS_MODULE, .name = "configfs", .init_fs_context = configfs_init_fs_context, .kill_sb = kill_litter_super, }; MODULE_ALIAS_FS("configfs"); struct dentry *configfs_pin_fs(void) { int err = simple_pin_fs(&configfs_fs_type, &configfs_mount, &configfs_mnt_count); return err ? ERR_PTR(err) : configfs_mount->mnt_root; } void configfs_release_fs(void) { simple_release_fs(&configfs_mount, &configfs_mnt_count); } static int __init configfs_init(void) { int err = -ENOMEM; configfs_dir_cachep = kmem_cache_create("configfs_dir_cache", sizeof(struct configfs_dirent), 0, 0, NULL); if (!configfs_dir_cachep) goto out; err = sysfs_create_mount_point(kernel_kobj, "config"); if (err) goto out2; err = register_filesystem(&configfs_fs_type); if (err) goto out3; return 0; out3: pr_err("Unable to register filesystem!\n"); sysfs_remove_mount_point(kernel_kobj, "config"); out2: kmem_cache_destroy(configfs_dir_cachep); configfs_dir_cachep = NULL; out: return err; } static void __exit configfs_exit(void) { unregister_filesystem(&configfs_fs_type); sysfs_remove_mount_point(kernel_kobj, "config"); kmem_cache_destroy(configfs_dir_cachep); configfs_dir_cachep = NULL; } MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); MODULE_VERSION("0.0.2"); MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration."); core_initcall(configfs_init); module_exit(configfs_exit);
459 25 46 13803 2014 1358 13 479 1192 1 269 930 1557 4 15891 15233 11486 11494 9405 11304 2810 2123 3 9409 41 42 14 330 34 93 87 87 7 7 11 182 1331 23 152 145 203 451 289 167 75 114 8872 80 15645 12818 11 238 15349 37 19 2070 14 32 29 31 24 13 30 10 8 30 10 6 2 29 86 10 29 29 2055 9320 816 567 151 151 230 162 199 35 2987 549 730 4 8 28 6 28 93 19 48 46 87 36 9 1 49 5688 219 26 26 16 15 16 1117 857 8 3 251 552 73 349 822 61 36 21 1 73 15 35 95 157 17 979 17 978 1744 828 1399 1219 71 26 26 10251 7485 6 16 48 61 61 148 257 66 485 486 484 59 425 425 425 14965 129 113 11611 5092 5595 3 67 22 179 1 2 5 912 1504 87 8897 10835 10836 10616 239 6506 290 136 6500 660 654 1605 8940 3078 125 130 127 1 34 68 218 516 19 126 19 1267 7 3 8 8 12 53 6 1901 1681 1930 167 721 1106 36 35 773 1598 2355 111 1135 10212 88 602 2470 2489 22 19 1609 86 10321 30 264 302 1331 915 927 43 1110 744 991 1299 50 383 36 1696 1812 82 1795 182 68 122 109 245 4 2 80 11 84 3734 1711 484 3780 19 11610 19 1248 8 238 193 11 11 825 965 359 140 58 58 503 90 13 13 31 112 12 120 42 64 18 166 5 5 5 212 215 144 93 225 36 177 45 14 2 1 1 27 3 56 565 47 5 27 25 24 27 25 479 156 1 1 2 1 2 942 93 8811 68 520 673 1767 295 5 398 1117 456 8812 23 222 1 1 161 842 279 7 460 743 772 819 518 29 29 16 100 1058 17 569 464 1 245 17 15 2 1 4 14 11 1 1 11 10 5 584 46 3 147 47 17 544 178 25 1 15233 1265 15 15086 11651 23 24 23 122 59 9615 195 16 9599 12 209 22 931 945 11612 56 121 120 121 76 510 8953 149 131 291 145 1139 8843 8966 35 5 2 2 42 1 2 1123 654 372 75 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Definitions for the 'struct sk_buff' memory handlers. * * Authors: * Alan Cox, <gw4pts@gw4pts.ampr.org> * Florian La Roche, <rzsfl@rz.uni-sb.de> */ #ifndef _LINUX_SKBUFF_H #define _LINUX_SKBUFF_H #include <linux/kernel.h> #include <linux/compiler.h> #include <linux/time.h> #include <linux/bug.h> #include <linux/bvec.h> #include <linux/cache.h> #include <linux/rbtree.h> #include <linux/socket.h> #include <linux/refcount.h> #include <linux/atomic.h> #include <asm/types.h> #include <linux/spinlock.h> #include <net/checksum.h> #include <linux/rcupdate.h> #include <linux/dma-mapping.h> #include <linux/netdev_features.h> #include <net/flow_dissector.h> #include <linux/in6.h> #include <linux/if_packet.h> #include <linux/llist.h> #include <linux/page_frag_cache.h> #include <net/flow.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <linux/netfilter/nf_conntrack_common.h> #endif #include <net/net_debug.h> #include <net/dropreason-core.h> #include <net/netmem.h> /** * DOC: skb checksums * * The interface for checksum offload between the stack and networking drivers * is as follows... * * IP checksum related features * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * Drivers advertise checksum offload capabilities in the features of a device. * From the stack's point of view these are capabilities offered by the driver. * A driver typically only advertises features that it is capable of offloading * to its device. * * .. flat-table:: Checksum related device features * :widths: 1 10 * * * - %NETIF_F_HW_CSUM * - The driver (or its device) is able to compute one * IP (one's complement) checksum for any combination * of protocols or protocol layering. The checksum is * computed and set in a packet per the CHECKSUM_PARTIAL * interface (see below). * * * - %NETIF_F_IP_CSUM * - Driver (device) is only able to checksum plain * TCP or UDP packets over IPv4. These are specifically * unencapsulated packets of the form IPv4|TCP or * IPv4|UDP where the Protocol field in the IPv4 header * is TCP or UDP. The IPv4 header may contain IP options. * This feature cannot be set in features for a device * with NETIF_F_HW_CSUM also set. This feature is being * DEPRECATED (see below). * * * - %NETIF_F_IPV6_CSUM * - Driver (device) is only able to checksum plain * TCP or UDP packets over IPv6. These are specifically * unencapsulated packets of the form IPv6|TCP or * IPv6|UDP where the Next Header field in the IPv6 * header is either TCP or UDP. IPv6 extension headers * are not supported with this feature. This feature * cannot be set in features for a device with * NETIF_F_HW_CSUM also set. This feature is being * DEPRECATED (see below). * * * - %NETIF_F_RXCSUM * - Driver (device) performs receive checksum offload. * This flag is only used to disable the RX checksum * feature for a device. The stack will accept receive * checksum indication in packets received on a device * regardless of whether NETIF_F_RXCSUM is set. * * Checksumming of received packets by device * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * Indication of checksum verification is set in &sk_buff.ip_summed. * Possible values are: * * - %CHECKSUM_NONE * * Device did not checksum this packet e.g. due to lack of capabilities. * The packet contains full (though not verified) checksum in packet but * not in skb->csum. Thus, skb->csum is undefined in this case. * * - %CHECKSUM_UNNECESSARY * * The hardware you're dealing with doesn't calculate the full checksum * (as in %CHECKSUM_COMPLETE), but it does parse headers and verify checksums * for specific protocols. For such packets it will set %CHECKSUM_UNNECESSARY * if their checksums are okay. &sk_buff.csum is still undefined in this case * though. A driver or device must never modify the checksum field in the * packet even if checksum is verified. * * %CHECKSUM_UNNECESSARY is applicable to following protocols: * * - TCP: IPv6 and IPv4. * - UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a * zero UDP checksum for either IPv4 or IPv6, the networking stack * may perform further validation in this case. * - GRE: only if the checksum is present in the header. * - SCTP: indicates the CRC in SCTP header has been validated. * - FCOE: indicates the CRC in FC frame has been validated. * * &sk_buff.csum_level indicates the number of consecutive checksums found in * the packet minus one that have been verified as %CHECKSUM_UNNECESSARY. * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet * and a device is able to verify the checksums for UDP (possibly zero), * GRE (checksum flag is set) and TCP, &sk_buff.csum_level would be set to * two. If the device were only able to verify the UDP checksum and not * GRE, either because it doesn't support GRE checksum or because GRE * checksum is bad, skb->csum_level would be set to zero (TCP checksum is * not considered in this case). * * - %CHECKSUM_COMPLETE * * This is the most generic way. The device supplied checksum of the _whole_ * packet as seen by netif_rx() and fills in &sk_buff.csum. This means the * hardware doesn't need to parse L3/L4 headers to implement this. * * Notes: * * - Even if device supports only some protocols, but is able to produce * skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY. * - CHECKSUM_COMPLETE is not applicable to SCTP and FCoE protocols. * * - %CHECKSUM_PARTIAL * * A checksum is set up to be offloaded to a device as described in the * output description for CHECKSUM_PARTIAL. This may occur on a packet * received directly from another Linux OS, e.g., a virtualized Linux kernel * on the same host, or it may be set in the input path in GRO or remote * checksum offload. For the purposes of checksum verification, the checksum * referred to by skb->csum_start + skb->csum_offset and any preceding * checksums in the packet are considered verified. Any checksums in the * packet that are after the checksum being offloaded are not considered to * be verified. * * Checksumming on transmit for non-GSO * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * The stack requests checksum offload in the &sk_buff.ip_summed for a packet. * Values are: * * - %CHECKSUM_PARTIAL * * The driver is required to checksum the packet as seen by hard_start_xmit() * from &sk_buff.csum_start up to the end, and to record/write the checksum at * offset &sk_buff.csum_start + &sk_buff.csum_offset. * A driver may verify that the * csum_start and csum_offset values are valid values given the length and * offset of the packet, but it should not attempt to validate that the * checksum refers to a legitimate transport layer checksum -- it is the * purview of the stack to validate that csum_start and csum_offset are set * correctly. * * When the stack requests checksum offload for a packet, the driver MUST * ensure that the checksum is set correctly. A driver can either offload the * checksum calculation to the device, or call skb_checksum_help (in the case * that the device does not support offload for a particular checksum). * * %NETIF_F_IP_CSUM and %NETIF_F_IPV6_CSUM are being deprecated in favor of * %NETIF_F_HW_CSUM. New devices should use %NETIF_F_HW_CSUM to indicate * checksum offload capability. * skb_csum_hwoffload_help() can be called to resolve %CHECKSUM_PARTIAL based * on network device checksumming capabilities: if a packet does not match * them, skb_checksum_help() or skb_crc32c_help() (depending on the value of * &sk_buff.csum_not_inet, see :ref:`crc`) * is called to resolve the checksum. * * - %CHECKSUM_NONE * * The skb was already checksummed by the protocol, or a checksum is not * required. * * - %CHECKSUM_UNNECESSARY * * This has the same meaning as CHECKSUM_NONE for checksum offload on * output. * * - %CHECKSUM_COMPLETE * * Not used in checksum output. If a driver observes a packet with this value * set in skbuff, it should treat the packet as if %CHECKSUM_NONE were set. * * .. _crc: * * Non-IP checksum (CRC) offloads * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * .. flat-table:: * :widths: 1 10 * * * - %NETIF_F_SCTP_CRC * - This feature indicates that a device is capable of * offloading the SCTP CRC in a packet. To perform this offload the stack * will set csum_start and csum_offset accordingly, set ip_summed to * %CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication * in the skbuff that the %CHECKSUM_PARTIAL refers to CRC32c. * A driver that supports both IP checksum offload and SCTP CRC32c offload * must verify which offload is configured for a packet by testing the * value of &sk_buff.csum_not_inet; skb_crc32c_csum_help() is provided to * resolve %CHECKSUM_PARTIAL on skbs where csum_not_inet is set to 1. * * * - %NETIF_F_FCOE_CRC * - This feature indicates that a device is capable of offloading the FCOE * CRC in a packet. To perform this offload the stack will set ip_summed * to %CHECKSUM_PARTIAL and set csum_start and csum_offset * accordingly. Note that there is no indication in the skbuff that the * %CHECKSUM_PARTIAL refers to an FCOE checksum, so a driver that supports * both IP checksum offload and FCOE CRC offload must verify which offload * is configured for a packet, presumably by inspecting packet headers. * * Checksumming on output with GSO * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * In the case of a GSO packet (skb_is_gso() is true), checksum offload * is implied by the SKB_GSO_* flags in gso_type. Most obviously, if the * gso_type is %SKB_GSO_TCPV4 or %SKB_GSO_TCPV6, TCP checksum offload as * part of the GSO operation is implied. If a checksum is being offloaded * with GSO then ip_summed is %CHECKSUM_PARTIAL, and both csum_start and * csum_offset are set to refer to the outermost checksum being offloaded * (two offloaded checksums are possible with UDP encapsulation). */ /* Don't change this without changing skb_csum_unnecessary! */ #define CHECKSUM_NONE 0 #define CHECKSUM_UNNECESSARY 1 #define CHECKSUM_COMPLETE 2 #define CHECKSUM_PARTIAL 3 /* Maximum value in skb->csum_level */ #define SKB_MAX_CSUM_LEVEL 3 #define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES) #define SKB_WITH_OVERHEAD(X) \ ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) /* For X bytes available in skb->head, what is the minimal * allocation needed, knowing struct skb_shared_info needs * to be aligned. */ #define SKB_HEAD_ALIGN(X) (SKB_DATA_ALIGN(X) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define SKB_MAX_ORDER(X, ORDER) \ SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X)) #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) /* return minimum truesize of one skb containing X bytes of data */ #define SKB_TRUESIZE(X) ((X) + \ SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) struct net_device; struct scatterlist; struct pipe_inode_info; struct iov_iter; struct napi_struct; struct bpf_prog; union bpf_attr; struct skb_ext; struct ts_config; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { enum { BRNF_PROTO_UNCHANGED, BRNF_PROTO_8021Q, BRNF_PROTO_PPPOE } orig_proto:8; u8 pkt_otherhost:1; u8 in_prerouting:1; u8 bridged_dnat:1; u8 sabotage_in_done:1; __u16 frag_max_size; int physinif; /* always valid & non-NULL from FORWARD on, for physdev match */ struct net_device *physoutdev; union { /* prerouting: detect dnat in orig/reply direction */ __be32 ipv4_daddr; struct in6_addr ipv6_daddr; /* after prerouting + nat detected: store original source * mac since neigh resolution overwrites it, only used while * skb is out in neigh layer. */ char neigh_header[8]; }; }; #endif #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) /* Chain in tc_skb_ext will be used to share the tc chain with * ovs recirc_id. It will be set to the current chain by tc * and read by ovs to recirc_id. */ struct tc_skb_ext { union { u64 act_miss_cookie; __u32 chain; }; __u16 mru; __u16 zone; u8 post_ct:1; u8 post_ct_snat:1; u8 post_ct_dnat:1; u8 act_miss:1; /* Set if act_miss_cookie is used */ u8 l2_miss:1; /* Set by bridge upon FDB or MDB miss */ }; #endif struct sk_buff_head { /* These two members must be first to match sk_buff. */ struct_group_tagged(sk_buff_list, list, struct sk_buff *next; struct sk_buff *prev; ); __u32 qlen; spinlock_t lock; }; struct sk_buff; #ifndef CONFIG_MAX_SKB_FRAGS # define CONFIG_MAX_SKB_FRAGS 17 #endif #define MAX_SKB_FRAGS CONFIG_MAX_SKB_FRAGS /* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to * segment using its current segmentation instead. */ #define GSO_BY_FRAGS 0xFFFF typedef struct skb_frag { netmem_ref netmem; unsigned int len; unsigned int offset; } skb_frag_t; /** * skb_frag_size() - Returns the size of a skb fragment * @frag: skb fragment */ static inline unsigned int skb_frag_size(const skb_frag_t *frag) { return frag->len; } /** * skb_frag_size_set() - Sets the size of a skb fragment * @frag: skb fragment * @size: size of fragment */ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) { frag->len = size; } /** * skb_frag_size_add() - Increments the size of a skb fragment by @delta * @frag: skb fragment * @delta: value to add */ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) { frag->len += delta; } /** * skb_frag_size_sub() - Decrements the size of a skb fragment by @delta * @frag: skb fragment * @delta: value to subtract */ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) { frag->len -= delta; } /** * skb_frag_must_loop - Test if %p is a high memory page * @p: fragment's page */ static inline bool skb_frag_must_loop(struct page *p) { #if defined(CONFIG_HIGHMEM) if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) || PageHighMem(p)) return true; #endif return false; } /** * skb_frag_foreach_page - loop over pages in a fragment * * @f: skb frag to operate on * @f_off: offset from start of f->netmem * @f_len: length from f_off to loop over * @p: (temp var) current page * @p_off: (temp var) offset from start of current page, * non-zero only on first page. * @p_len: (temp var) length in current page, * < PAGE_SIZE only on first and last page. * @copied: (temp var) length so far, excluding current p_len. * * A fragment can hold a compound page, in which case per-page * operations, notably kmap_atomic, must be called for each * regular page. */ #define skb_frag_foreach_page(f, f_off, f_len, p, p_off, p_len, copied) \ for (p = skb_frag_page(f) + ((f_off) >> PAGE_SHIFT), \ p_off = (f_off) & (PAGE_SIZE - 1), \ p_len = skb_frag_must_loop(p) ? \ min_t(u32, f_len, PAGE_SIZE - p_off) : f_len, \ copied = 0; \ copied < f_len; \ copied += p_len, p++, p_off = 0, \ p_len = min_t(u32, f_len - copied, PAGE_SIZE)) \ /** * struct skb_shared_hwtstamps - hardware time stamps * @hwtstamp: hardware time stamp transformed into duration * since arbitrary point in time * @netdev_data: address/cookie of network device driver used as * reference to actual hardware time stamp * * Software time stamps generated by ktime_get_real() are stored in * skb->tstamp. * * hwtstamps can only be compared against other hwtstamps from * the same device. * * This structure is attached to packets as part of the * &skb_shared_info. Use skb_hwtstamps() to get a pointer. */ struct skb_shared_hwtstamps { union { ktime_t hwtstamp; void *netdev_data; }; }; /* Definitions for tx_flags in struct skb_shared_info */ enum { /* generate hardware time stamp */ SKBTX_HW_TSTAMP_NOBPF = 1 << 0, /* generate software time stamp when queueing packet to NIC */ SKBTX_SW_TSTAMP = 1 << 1, /* device driver is going to provide hardware time stamp */ SKBTX_IN_PROGRESS = 1 << 2, /* generate software time stamp on packet tx completion */ SKBTX_COMPLETION_TSTAMP = 1 << 3, /* determine hardware time stamp based on time or cycles */ SKBTX_HW_TSTAMP_NETDEV = 1 << 5, /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, /* used for bpf extension when a bpf program is loaded */ SKBTX_BPF = 1 << 7, }; #define SKBTX_HW_TSTAMP (SKBTX_HW_TSTAMP_NOBPF | SKBTX_BPF) #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ SKBTX_SCHED_TSTAMP | \ SKBTX_BPF | \ SKBTX_COMPLETION_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | \ SKBTX_ANY_SW_TSTAMP) /* Definitions for flags in struct skb_shared_info */ enum { /* use zcopy routines */ SKBFL_ZEROCOPY_ENABLE = BIT(0), /* This indicates at least one fragment might be overwritten * (as in vmsplice(), sendfile() ...) * If we need to compute a TX checksum, we'll need to copy * all frags to avoid possible bad checksum */ SKBFL_SHARED_FRAG = BIT(1), /* segment contains only zerocopy data and should not be * charged to the kernel memory. */ SKBFL_PURE_ZEROCOPY = BIT(2), SKBFL_DONT_ORPHAN = BIT(3), /* page references are managed by the ubuf_info, so it's safe to * use frags only up until ubuf_info is released */ SKBFL_MANAGED_FRAG_REFS = BIT(4), }; #define SKBFL_ZEROCOPY_FRAG (SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG) #define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \ SKBFL_DONT_ORPHAN | SKBFL_MANAGED_FRAG_REFS) struct ubuf_info_ops { void (*complete)(struct sk_buff *, struct ubuf_info *, bool zerocopy_success); /* has to be compatible with skb_zcopy_set() */ int (*link_skb)(struct sk_buff *skb, struct ubuf_info *uarg); }; /* * The callback notifies userspace to release buffers when skb DMA is done in * lower device, the skb last reference should be 0 when calling this. * The zerocopy_success argument is true if zero copy transmit occurred, * false on data copy or out of memory error caused by data copy attempt. * The ctx field is used to track device context. * The desc field is used to track userspace buffer index. */ struct ubuf_info { const struct ubuf_info_ops *ops; refcount_t refcnt; u8 flags; }; struct ubuf_info_msgzc { struct ubuf_info ubuf; union { struct { unsigned long desc; void *ctx; }; struct { u32 id; u16 len; u16 zerocopy:1; u32 bytelen; }; }; struct mmpin { struct user_struct *user; unsigned int num_pg; } mmp; }; #define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) #define uarg_to_msgzc(ubuf_ptr) container_of((ubuf_ptr), struct ubuf_info_msgzc, \ ubuf) int mm_account_pinned_pages(struct mmpin *mmp, size_t size); void mm_unaccount_pinned_pages(struct mmpin *mmp); /* Preserve some data across TX submission and completion. * * Note, this state is stored in the driver. Extending the layout * might need some special care. */ struct xsk_tx_metadata_compl { __u64 *tx_timestamp; }; /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ struct skb_shared_info { __u8 flags; __u8 meta_len; __u8 nr_frags; __u8 tx_flags; unsigned short gso_size; /* Warning: this field is not always filled in (UFO)! */ unsigned short gso_segs; struct sk_buff *frag_list; union { struct skb_shared_hwtstamps hwtstamps; struct xsk_tx_metadata_compl xsk_meta; }; unsigned int gso_type; u32 tskey; /* * Warning : all fields before dataref are cleared in __alloc_skb() */ atomic_t dataref; union { struct { u32 xdp_frags_size; u32 xdp_frags_truesize; }; /* * Intermediate layers must ensure that destructor_arg * remains valid until skb destructor. */ void *destructor_arg; }; /* must be last field, see pskb_expand_head() */ skb_frag_t frags[MAX_SKB_FRAGS]; }; /** * DOC: dataref and headerless skbs * * Transport layers send out clones of payload skbs they hold for * retransmissions. To allow lower layers of the stack to prepend their headers * we split &skb_shared_info.dataref into two halves. * The lower 16 bits count the overall number of references. * The higher 16 bits indicate how many of the references are payload-only. * skb_header_cloned() checks if skb is allowed to add / write the headers. * * The creator of the skb (e.g. TCP) marks its skb as &sk_buff.nohdr * (via __skb_header_release()). Any clone created from marked skb will get * &sk_buff.hdr_len populated with the available headroom. * If there's the only clone in existence it's able to modify the headroom * at will. The sequence of calls inside the transport layer is:: * * <alloc skb> * skb_reserve() * __skb_header_release() * skb_clone() * // send the clone down the stack * * This is not a very generic construct and it depends on the transport layers * doing the right thing. In practice there's usually only one payload-only skb. * Having multiple payload-only skbs with different lengths of hdr_len is not * possible. The payload-only skbs should never leave their owner. */ #define SKB_DATAREF_SHIFT 16 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) enum { SKB_FCLONE_UNAVAILABLE, /* skb has no fclone (from head_cache) */ SKB_FCLONE_ORIG, /* orig skb (from fclone_cache) */ SKB_FCLONE_CLONE, /* companion fclone skb (from fclone_cache) */ }; enum { SKB_GSO_TCPV4 = 1 << 0, /* This indicates the skb is from an untrusted source. */ SKB_GSO_DODGY = 1 << 1, /* This indicates the tcp segment has CWR set. */ SKB_GSO_TCP_ECN = 1 << 2, __SKB_GSO_TCP_FIXEDID = 1 << 3, SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_FCOE = 1 << 5, SKB_GSO_GRE = 1 << 6, SKB_GSO_GRE_CSUM = 1 << 7, SKB_GSO_IPXIP4 = 1 << 8, SKB_GSO_IPXIP6 = 1 << 9, SKB_GSO_UDP_TUNNEL = 1 << 10, SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, SKB_GSO_PARTIAL = 1 << 12, SKB_GSO_TUNNEL_REMCSUM = 1 << 13, SKB_GSO_SCTP = 1 << 14, SKB_GSO_ESP = 1 << 15, SKB_GSO_UDP = 1 << 16, SKB_GSO_UDP_L4 = 1 << 17, SKB_GSO_FRAGLIST = 1 << 18, SKB_GSO_TCP_ACCECN = 1 << 19, /* These indirectly map onto the same netdev feature. * If NETIF_F_TSO_MANGLEID is set it may mangle both inner and outer IDs. */ SKB_GSO_TCP_FIXEDID = 1 << 30, SKB_GSO_TCP_FIXEDID_INNER = 1 << 31, }; #if BITS_PER_LONG > 32 #define NET_SKBUFF_DATA_USES_OFFSET 1 #endif #ifdef NET_SKBUFF_DATA_USES_OFFSET typedef unsigned int sk_buff_data_t; #else typedef unsigned char *sk_buff_data_t; #endif enum skb_tstamp_type { SKB_CLOCK_REALTIME, SKB_CLOCK_MONOTONIC, SKB_CLOCK_TAI, __SKB_CLOCK_MAX = SKB_CLOCK_TAI, }; /** * DOC: Basic sk_buff geometry * * struct sk_buff itself is a metadata structure and does not hold any packet * data. All the data is held in associated buffers. * * &sk_buff.head points to the main "head" buffer. The head buffer is divided * into two parts: * * - data buffer, containing headers and sometimes payload; * this is the part of the skb operated on by the common helpers * such as skb_put() or skb_pull(); * - shared info (struct skb_shared_info) which holds an array of pointers * to read-only data in the (page, offset, length) format. * * Optionally &skb_shared_info.frag_list may point to another skb. * * Basic diagram may look like this:: * * --------------- * | sk_buff | * --------------- * ,--------------------------- + head * / ,----------------- + data * / / ,----------- + tail * | | | , + end * | | | | * v v v v * ----------------------------------------------- * | headroom | data | tailroom | skb_shared_info | * ----------------------------------------------- * + [page frag] * + [page frag] * + [page frag] * + [page frag] --------- * + frag_list --> | sk_buff | * --------- * */ /** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list * @tstamp: Time we arrived/left * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point * for retransmit timer * @rbnode: RB tree node, alternative to next/prev for netem/tcp * @list: queue head * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by * @dev: Device we arrived on/are leaving by * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here * @_skb_refdst: destination entry (with norefcount bit) * @len: Length of actual data * @data_len: Data length * @mac_len: Length of link layer header * @hdr_len: writable header length of cloned skb * @csum: Checksum (must include start/offset pair) * @csum_start: Offset from skb->head where checksumming should start * @csum_offset: Offset from csum_start where checksum should be stored * @priority: Packet queueing priority * @ignore_df: allow local fragmentation * @cloned: Head may be cloned (check refcnt to be sure) * @ip_summed: Driver fed us an IP checksum * @nohdr: Payload reference only, must not modify header * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs * @inner_protocol_type: whether the inner protocol is * ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO * @remcsum_offload: remote checksum offload is enabled * @offload_fwd_mark: Packet was L2-forwarded in hardware * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware * @tc_skip_classify: do not classify packet. set by IFB device * @tc_at_ingress: used within tc_classify to distinguish in/egress * @redirected: packet was redirected by packet classifier * @from_ingress: packet was redirected from the ingress path * @nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag * @protocol: Packet protocol from driver * @destructor: Destruct function * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue) * @_sk_redir: socket redirection information for skmsg * @_nfct: Associated connection, if any (with nfctinfo bits) * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @head_frag: skb was allocated from page fragments, * not allocated by kmalloc() or vmalloc(). * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves * @pp_recycle: mark the packet for recycling instead of freeing (implies * page_pool support on driver) * @active_extensions: active extensions (skb_ext_id types) * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport * ports. * @sw_hash: indicates hash was computed in software stack * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @encapsulation: indicates the inner headers in the skbuff are valid * @encap_hdr_csum: software checksum is needed * @csum_valid: checksum is already valid * @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL * @csum_complete_sw: checksum was completed by software * @csum_level: indicates the number of consecutive checksums found in * the packet minus one that have been verified as * CHECKSUM_UNNECESSARY (max 3) * @unreadable: indicates that at least 1 of the fragments in this skb is * unreadable. * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required * @tstamp_type: When set, skb->tstamp has the * delivery_time clock base of skb->tstamp. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. * @secmark: security marking * @mark: Generic packet mark * @reserved_tailroom: (aka @mark) number of bytes of free space available * at the tail of an sk_buff * @vlan_all: vlan fields (proto & tci) * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_protocol: Protocol (encapsulation) * @inner_ipproto: (aka @inner_protocol) stores ipproto when * skb->inner_protocol_type == ENCAP_TYPE_IPPROTO; * @inner_transport_header: Inner transport layer header (encapsulation) * @inner_network_header: Network layer header (encapsulation) * @inner_mac_header: Link layer header (encapsulation) * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header * @kcov_handle: KCOV remote handle for remote coverage collection * @tail: Tail pointer * @end: End pointer * @head: Head of buffer * @data: Data head pointer * @truesize: Buffer size * @users: User count - see {datagram,tcp}.c * @extensions: allocated extensions, valid if active_extensions is nonzero */ struct sk_buff { union { struct { /* These two members must be first to match sk_buff_head. */ struct sk_buff *next; struct sk_buff *prev; union { struct net_device *dev; /* Some protocols might use this space to store information, * while device pointer would be NULL. * UDP receive path is one user. */ unsigned long dev_scratch; }; }; struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */ struct list_head list; struct llist_node ll_node; }; struct sock *sk; union { ktime_t tstamp; u64 skb_mstamp_ns; /* earliest departure time */ }; /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you * want to keep them across layers you have to do a skb_clone() * first. This is owned by whoever has the skb queued ATM. */ char cb[48] __aligned(8); union { struct { unsigned long _skb_refdst; void (*destructor)(struct sk_buff *skb); }; struct list_head tcp_tsorted_anchor; #ifdef CONFIG_NET_SOCK_MSG unsigned long _sk_redir; #endif }; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) unsigned long _nfct; #endif unsigned int len, data_len; __u16 mac_len, hdr_len; /* Following fields are _not_ copied in __copy_skb_header() * Note that queue_mapping is here mostly to fill a hole. */ __u16 queue_mapping; /* if you move cloned around you also must adapt those constants */ #ifdef __BIG_ENDIAN_BITFIELD #define CLONED_MASK (1 << 7) #else #define CLONED_MASK 1 #endif #define CLONED_OFFSET offsetof(struct sk_buff, __cloned_offset) /* private: */ __u8 __cloned_offset[0]; /* public: */ __u8 cloned:1, nohdr:1, fclone:2, peeked:1, head_frag:1, pfmemalloc:1, pp_recycle:1; /* page_pool recycle indicator */ #ifdef CONFIG_SKB_EXTENSIONS __u8 active_extensions; #endif /* Fields enclosed in headers group are copied * using a single memcpy() in __copy_skb_header() */ struct_group(headers, /* private: */ __u8 __pkt_type_offset[0]; /* public: */ __u8 pkt_type:3; /* see PKT_TYPE_MAX */ __u8 ignore_df:1; __u8 dst_pending_confirm:1; __u8 ip_summed:2; __u8 ooo_okay:1; /* private: */ __u8 __mono_tc_offset[0]; /* public: */ __u8 tstamp_type:2; /* See skb_tstamp_type */ #ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; #endif __u8 remcsum_offload:1; __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 inner_protocol_type:1; __u8 l4_hash:1; __u8 sw_hash:1; #ifdef CONFIG_WIRELESS __u8 wifi_acked_valid:1; __u8 wifi_acked:1; #endif __u8 no_fcs:1; /* Indicates the inner headers are valid in the skbuff. */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; __u8 csum_valid:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif #if IS_ENABLED(CONFIG_IP_VS) __u8 ipvs_property:1; #endif #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES) __u8 nf_trace:1; #endif #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; __u8 offload_l3_fwd_mark:1; #endif __u8 redirected:1; #ifdef CONFIG_NET_REDIRECT __u8 from_ingress:1; #endif #ifdef CONFIG_NETFILTER_SKIP_EGRESS __u8 nf_skip_egress:1; #endif #ifdef CONFIG_SKB_DECRYPTED __u8 decrypted:1; #endif __u8 slow_gro:1; #if IS_ENABLED(CONFIG_IP_SCTP) __u8 csum_not_inet:1; #endif __u8 unreadable:1; #if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS) __u16 tc_index; /* traffic control index */ #endif u16 alloc_cpu; union { __wsum csum; struct { __u16 csum_start; __u16 csum_offset; }; }; __u32 priority; int skb_iif; __u32 hash; union { u32 vlan_all; struct { __be16 vlan_proto; __u16 vlan_tci; }; }; #if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS) union { unsigned int napi_id; unsigned int sender_cpu; }; #endif #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; #endif union { __u32 mark; __u32 reserved_tailroom; }; union { __be16 inner_protocol; __u8 inner_ipproto; }; __u16 inner_transport_header; __u16 inner_network_header; __u16 inner_mac_header; __be16 protocol; __u16 transport_header; __u16 network_header; __u16 mac_header; #ifdef CONFIG_KCOV u64 kcov_handle; #endif ); /* end headers group */ /* These elements must be at the end, see alloc_skb() for details. */ sk_buff_data_t tail; sk_buff_data_t end; unsigned char *head, *data; unsigned int truesize; refcount_t users; #ifdef CONFIG_SKB_EXTENSIONS /* only usable after checking ->active_extensions != 0 */ struct skb_ext *extensions; #endif }; /* if you move pkt_type around you also must adapt those constants */ #ifdef __BIG_ENDIAN_BITFIELD #define PKT_TYPE_MAX (7 << 5) #else #define PKT_TYPE_MAX 7 #endif #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) /* if you move tc_at_ingress or tstamp_type * around, you also must adapt these constants. */ #ifdef __BIG_ENDIAN_BITFIELD #define SKB_TSTAMP_TYPE_MASK (3 << 6) #define SKB_TSTAMP_TYPE_RSHIFT (6) #define TC_AT_INGRESS_MASK (1 << 5) #else #define SKB_TSTAMP_TYPE_MASK (3) #define TC_AT_INGRESS_MASK (1 << 2) #endif #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset) #ifdef __KERNEL__ /* * Handling routines are only of interest to the kernel */ #define SKB_ALLOC_FCLONE 0x01 #define SKB_ALLOC_RX 0x02 #define SKB_ALLOC_NAPI 0x04 /** * skb_pfmemalloc - Test if the skb was allocated from PFMEMALLOC reserves * @skb: buffer */ static inline bool skb_pfmemalloc(const struct sk_buff *skb) { return unlikely(skb->pfmemalloc); } /* * skb might have a dst pointer attached, refcounted or not. * _skb_refdst low order bit is set if refcount was _not_ taken */ #define SKB_DST_NOREF 1UL #define SKB_DST_PTRMASK ~(SKB_DST_NOREF) /** * skb_dst - returns skb dst_entry * @skb: buffer * * Returns: skb dst_entry, regardless of reference taken or not. */ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) { /* If refdst was not refcounted, check we still are in a * rcu_read_lock section */ WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) && !rcu_read_lock_held() && !rcu_read_lock_bh_held()); return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); } static inline void skb_dst_check_unset(struct sk_buff *skb) { DEBUG_NET_WARN_ON_ONCE((skb->_skb_refdst & SKB_DST_PTRMASK) && !(skb->_skb_refdst & SKB_DST_NOREF)); } /** * skb_dstref_steal() - return current dst_entry value and clear it * @skb: buffer * * Resets skb dst_entry without adjusting its reference count. Useful in * cases where dst_entry needs to be temporarily reset and restored. * Note that the returned value cannot be used directly because it * might contain SKB_DST_NOREF bit. * * When in doubt, prefer skb_dst_drop() over skb_dstref_steal() to correctly * handle dst_entry reference counting. * * Returns: original skb dst_entry. */ static inline unsigned long skb_dstref_steal(struct sk_buff *skb) { unsigned long refdst = skb->_skb_refdst; skb->_skb_refdst = 0; return refdst; } /** * skb_dstref_restore() - restore skb dst_entry removed via skb_dstref_steal() * @skb: buffer * @refdst: dst entry from a call to skb_dstref_steal() */ static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst) { skb_dst_check_unset(skb); skb->_skb_refdst = refdst; } /** * skb_dst_set - sets skb dst * @skb: buffer * @dst: dst entry * * Sets skb dst, assuming a reference was taken on dst and should * be released by skb_dst_drop() */ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) { skb_dst_check_unset(skb); skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst; } /** * skb_dst_set_noref - sets skb dst, hopefully, without taking reference * @skb: buffer * @dst: dst entry * * Sets skb dst, assuming a reference was not taken on dst. * If dst entry is cached, we do not take reference and dst_release * will be avoided by refdst_drop. If dst entry is not cached, we take * reference, so that last dst_release can destroy the dst immediately. */ static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { skb_dst_check_unset(skb); WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } /** * skb_dst_is_noref - Test if skb dst isn't refcounted * @skb: buffer */ static inline bool skb_dst_is_noref(const struct sk_buff *skb) { return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb); } /* For mangling skb->pkt_type from user space side from applications * such as nft, tc, etc, we only allow a conservative subset of * possible pkt_types to be set. */ static inline bool skb_pkt_type_ok(u32 ptype) { return ptype <= PACKET_OTHERHOST; } /** * skb_napi_id - Returns the skb's NAPI id * @skb: buffer */ static inline unsigned int skb_napi_id(const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL return skb->napi_id; #else return 0; #endif } static inline bool skb_wifi_acked_valid(const struct sk_buff *skb) { #ifdef CONFIG_WIRELESS return skb->wifi_acked_valid; #else return 0; #endif } /** * skb_unref - decrement the skb's reference count * @skb: buffer * * Returns: true if we can free the skb. */ static inline bool skb_unref(struct sk_buff *skb) { if (unlikely(!skb)) return false; if (!IS_ENABLED(CONFIG_DEBUG_NET) && likely(refcount_read(&skb->users) == 1)) smp_rmb(); else if (likely(!refcount_dec_and_test(&skb->users))) return false; return true; } static inline bool skb_data_unref(const struct sk_buff *skb, struct skb_shared_info *shinfo) { int bias; if (!skb->cloned) return true; bias = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; if (atomic_read(&shinfo->dataref) == bias) smp_rmb(); else if (atomic_sub_return(bias, &shinfo->dataref)) return false; return true; } void __fix_address sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason); static inline void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) { sk_skb_reason_drop(NULL, skb, reason); } /** * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason * @skb: buffer to free */ static inline void kfree_skb(struct sk_buff *skb) { kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); } void skb_release_head_state(struct sk_buff *skb); void kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason); void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_tx_error(struct sk_buff *skb); static inline void kfree_skb_list(struct sk_buff *segs) { kfree_skb_list_reason(segs, SKB_DROP_REASON_NOT_SPECIFIED); } #ifdef CONFIG_TRACEPOINTS void consume_skb(struct sk_buff *skb); #else static inline void consume_skb(struct sk_buff *skb) { return kfree_skb(skb); } #endif void __consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize); struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, int node); struct sk_buff *__build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size); void skb_attempt_defer_free(struct sk_buff *skb); u32 napi_skb_cache_get_bulk(void **skbs, u32 n); struct sk_buff *napi_build_skb(void *data, unsigned int frag_size); struct sk_buff *slab_build_skb(void *data); /** * alloc_skb - allocate a network buffer * @size: size to allocate * @priority: allocation mask * * This function is a convenient wrapper around __alloc_skb(). */ static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) { return __alloc_skb(size, priority, 0, NUMA_NO_NODE); } struct sk_buff *alloc_skb_with_frags(unsigned long header_len, unsigned long data_len, int max_page_order, int *errcode, gfp_t gfp_mask); struct sk_buff *alloc_skb_for_msg(struct sk_buff *first); /* Layout of fast clones : [skb1][skb2][fclone_ref] */ struct sk_buff_fclones { struct sk_buff skb1; struct sk_buff skb2; refcount_t fclone_ref; }; /** * skb_fclone_busy - check if fclone is busy * @sk: socket * @skb: buffer * * Returns: true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), * so we also check that didn't happen. */ static inline bool skb_fclone_busy(const struct sock *sk, const struct sk_buff *skb) { const struct sk_buff_fclones *fclones; fclones = container_of(skb, struct sk_buff_fclones, skb1); return skb->fclone == SKB_FCLONE_ORIG && refcount_read(&fclones->fclone_ref) > 1 && READ_ONCE(fclones->skb2.sk) == sk; } /** * alloc_skb_fclone - allocate a network buffer from fclone cache * @size: size to allocate * @priority: allocation mask * * This function is a convenient wrapper around __alloc_skb(). */ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); void skb_headers_offset_update(struct sk_buff *skb, int off); int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); void skb_copy_header(struct sk_buff *new, const struct sk_buff *old); struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority); struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, gfp_t gfp_mask, bool fclone); static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) { return __pskb_copy_fclone(skb, headroom, gfp_mask, false); } int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask); struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom); struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t priority); int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, int offset, int len); int __must_check skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len); int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error); /** * skb_pad - zero pad the tail of an skb * @skb: buffer to pad * @pad: space to pad * * Ensure that a buffer is followed by a padding area that is zero * filled. Used by network drivers which may DMA or transfer data * beyond the buffer end onto the wire. * * May return error in out of memory cases. The skb is freed on error. */ static inline int skb_pad(struct sk_buff *skb, int pad) { return __skb_pad(skb, pad, true); } #define dev_kfree_skb(a) consume_skb(a) int skb_append_pagefrags(struct sk_buff *skb, struct page *page, int offset, size_t size, size_t max_frags); struct skb_seq_state { __u32 lower_offset; __u32 upper_offset; __u32 frag_idx; __u32 stepped_offset; struct sk_buff *root_skb; struct sk_buff *cur_skb; __u8 *frag_data; __u32 frag_off; }; void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, unsigned int to, struct skb_seq_state *st); unsigned int skb_seq_read(unsigned int consumed, const u8 **data, struct skb_seq_state *st); void skb_abort_seq_read(struct skb_seq_state *st); int skb_copy_seq_read(struct skb_seq_state *st, int offset, void *to, int len); unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config); /* * Packet hash types specify the type of hash in skb_set_hash. * * Hash types refer to the protocol layer addresses which are used to * construct a packet's hash. The hashes are used to differentiate or identify * flows of the protocol layer for the hash type. Hash types are either * layer-2 (L2), layer-3 (L3), or layer-4 (L4). * * Properties of hashes: * * 1) Two packets in different flows have different hash values * 2) Two packets in the same flow should have the same hash value * * A hash at a higher layer is considered to be more specific. A driver should * set the most specific hash possible. * * A driver cannot indicate a more specific hash than the layer at which a hash * was computed. For instance an L3 hash cannot be set as an L4 hash. * * A driver may indicate a hash level which is less specific than the * actual layer the hash was computed on. For instance, a hash computed * at L4 may be considered an L3 hash. This should only be done if the * driver can't unambiguously determine that the HW computed the hash at * the higher layer. Note that the "should" in the second property above * permits this. */ enum pkt_hash_types { PKT_HASH_TYPE_NONE, /* Undefined type */ PKT_HASH_TYPE_L2, /* Input: src_MAC, dest_MAC */ PKT_HASH_TYPE_L3, /* Input: src_IP, dst_IP */ PKT_HASH_TYPE_L4, /* Input: src_IP, dst_IP, src_port, dst_port */ }; static inline void skb_clear_hash(struct sk_buff *skb) { skb->hash = 0; skb->sw_hash = 0; skb->l4_hash = 0; } static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb) { if (!skb->l4_hash) skb_clear_hash(skb); } static inline void __skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4) { skb->l4_hash = is_l4; skb->sw_hash = is_sw; skb->hash = hash; } static inline void skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) { /* Used by drivers to set hash from HW */ __skb_set_hash(skb, hash, false, type == PKT_HASH_TYPE_L4); } static inline void __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) { __skb_set_hash(skb, hash, true, is_l4); } u32 __skb_get_hash_symmetric_net(const struct net *net, const struct sk_buff *skb); static inline u32 __skb_get_hash_symmetric(const struct sk_buff *skb) { return __skb_get_hash_symmetric_net(NULL, skb); } void __skb_get_hash_net(const struct net *net, struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen); __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, const void *data, int hlen_proto); void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, unsigned int key_count); struct bpf_flow_dissector; u32 bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, __be16 proto, int nhoff, int hlen, unsigned int flags); bool __skb_flow_dissect(const struct net *net, const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, __be16 proto, int nhoff, int hlen, unsigned int flags); static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, unsigned int flags) { return __skb_flow_dissect(NULL, skb, flow_dissector, target_container, NULL, 0, 0, 0, flags); } static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, struct flow_keys *flow, unsigned int flags) { memset(flow, 0, sizeof(*flow)); return __skb_flow_dissect(NULL, skb, &flow_keys_dissector, flow, NULL, 0, 0, 0, flags); } static inline bool skb_flow_dissect_flow_keys_basic(const struct net *net, const struct sk_buff *skb, struct flow_keys_basic *flow, const void *data, __be16 proto, int nhoff, int hlen, unsigned int flags) { memset(flow, 0, sizeof(*flow)); return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow, data, proto, nhoff, hlen, flags); } void skb_flow_dissect_meta(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container); /* Gets a skb connection tracking info, ctinfo map should be a * map of mapsize to translate enum ip_conntrack_info states * to user states. */ void skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, u16 *ctinfo_map, size_t mapsize, bool post_ct, u16 zone); void skb_flow_dissect_tunnel_info(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container); void skb_flow_dissect_hash(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container); static inline __u32 skb_get_hash_net(const struct net *net, struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) __skb_get_hash_net(net, skb); return skb->hash; } static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) __skb_get_hash_net(NULL, skb); return skb->hash; } static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6) { if (!skb->l4_hash && !skb->sw_hash) { struct flow_keys keys; __u32 hash = __get_hash_from_flowi6(fl6, &keys); __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } return skb->hash; } __u32 skb_get_hash_perturb(const struct sk_buff *skb, const siphash_key_t *perturb); static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) { return skb->hash; } static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) { to->hash = from->hash; to->sw_hash = from->sw_hash; to->l4_hash = from->l4_hash; }; static inline int skb_cmp_decrypted(const struct sk_buff *skb1, const struct sk_buff *skb2) { #ifdef CONFIG_SKB_DECRYPTED return skb2->decrypted - skb1->decrypted; #else return 0; #endif } static inline bool skb_is_decrypted(const struct sk_buff *skb) { #ifdef CONFIG_SKB_DECRYPTED return skb->decrypted; #else return false; #endif } static inline void skb_copy_decrypted(struct sk_buff *to, const struct sk_buff *from) { #ifdef CONFIG_SKB_DECRYPTED to->decrypted = from->decrypted; #endif } #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { return skb->head + skb->end; } static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end; } static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) { skb->end = offset; } #else static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { return skb->end; } static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end - skb->head; } static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) { skb->end = skb->head + offset; } #endif extern const struct ubuf_info_ops msg_zerocopy_ubuf_ops; struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, struct ubuf_info *uarg, bool devmem); void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); struct net_devmem_dmabuf_binding; int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length, struct net_devmem_dmabuf_binding *binding); int zerocopy_fill_skb_from_iter(struct sk_buff *skb, struct iov_iter *from, size_t length); static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len, NULL); } int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, struct ubuf_info *uarg, struct net_devmem_dmabuf_binding *binding); /* Internal */ #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) { return &skb_shinfo(skb)->hwtstamps; } static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) { bool is_zcopy = skb && skb_shinfo(skb)->flags & SKBFL_ZEROCOPY_ENABLE; return is_zcopy ? skb_uarg(skb) : NULL; } static inline bool skb_zcopy_pure(const struct sk_buff *skb) { return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY; } static inline bool skb_zcopy_managed(const struct sk_buff *skb) { return skb_shinfo(skb)->flags & SKBFL_MANAGED_FRAG_REFS; } static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1, const struct sk_buff *skb2) { return skb_zcopy_pure(skb1) == skb_zcopy_pure(skb2); } static inline void net_zcopy_get(struct ubuf_info *uarg) { refcount_inc(&uarg->refcnt); } static inline void skb_zcopy_init(struct sk_buff *skb, struct ubuf_info *uarg) { skb_shinfo(skb)->destructor_arg = uarg; skb_shinfo(skb)->flags |= uarg->flags; } static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg, bool *have_ref) { if (skb && uarg && !skb_zcopy(skb)) { if (unlikely(have_ref && *have_ref)) *have_ref = false; else net_zcopy_get(uarg); skb_zcopy_init(skb, uarg); } } static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val) { skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL); skb_shinfo(skb)->flags |= SKBFL_ZEROCOPY_FRAG; } static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb) { return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL; } static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb) { return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL); } static inline void net_zcopy_put(struct ubuf_info *uarg) { if (uarg) uarg->ops->complete(NULL, uarg, true); } static inline void net_zcopy_put_abort(struct ubuf_info *uarg, bool have_uref) { if (uarg) { if (uarg->ops == &msg_zerocopy_ubuf_ops) msg_zerocopy_put_abort(uarg, have_uref); else if (have_uref) net_zcopy_put(uarg); } } /* Release a reference on a zerocopy structure */ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success) { struct ubuf_info *uarg = skb_zcopy(skb); if (uarg) { if (!skb_zcopy_is_nouarg(skb)) uarg->ops->complete(skb, uarg, zerocopy_success); skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY; } } void __skb_zcopy_downgrade_managed(struct sk_buff *skb); static inline void skb_zcopy_downgrade_managed(struct sk_buff *skb) { if (unlikely(skb_zcopy_managed(skb))) __skb_zcopy_downgrade_managed(skb); } /* Return true if frags in this skb are readable by the host. */ static inline bool skb_frags_readable(const struct sk_buff *skb) { return !skb->unreadable; } static inline void skb_mark_not_on_list(struct sk_buff *skb) { skb->next = NULL; } static inline void skb_poison_list(struct sk_buff *skb) { #ifdef CONFIG_DEBUG_NET skb->next = SKB_LIST_POISON_NEXT; #endif } /* Iterate through singly-linked GSO fragments of an skb. */ #define skb_list_walk_safe(first, skb, next_skb) \ for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb); \ (skb) = (next_skb), (next_skb) = (skb) ? (skb)->next : NULL) static inline void skb_list_del_init(struct sk_buff *skb) { __list_del_entry(&skb->list); skb_mark_not_on_list(skb); } /** * skb_queue_empty - check if a queue is empty * @list: queue head * * Returns true if the queue is empty, false otherwise. */ static inline int skb_queue_empty(const struct sk_buff_head *list) { return list->next == (const struct sk_buff *) list; } /** * skb_queue_empty_lockless - check if a queue is empty * @list: queue head * * Returns true if the queue is empty, false otherwise. * This variant can be used in lockless contexts. */ static inline bool skb_queue_empty_lockless(const struct sk_buff_head *list) { return READ_ONCE(list->next) == (const struct sk_buff *) list; } /** * skb_queue_is_last - check if skb is the last entry in the queue * @list: queue head * @skb: buffer * * Returns true if @skb is the last buffer on the list. */ static inline bool skb_queue_is_last(const struct sk_buff_head *list, const struct sk_buff *skb) { return skb->next == (const struct sk_buff *) list; } /** * skb_queue_is_first - check if skb is the first entry in the queue * @list: queue head * @skb: buffer * * Returns true if @skb is the first buffer on the list. */ static inline bool skb_queue_is_first(const struct sk_buff_head *list, const struct sk_buff *skb) { return skb->prev == (const struct sk_buff *) list; } /** * skb_queue_next - return the next packet in the queue * @list: queue head * @skb: current buffer * * Return the next packet in @list after @skb. It is only valid to * call this if skb_queue_is_last() evaluates to false. */ static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list, const struct sk_buff *skb) { /* This BUG_ON may seem severe, but if we just return then we * are going to dereference garbage. */ BUG_ON(skb_queue_is_last(list, skb)); return skb->next; } /** * skb_queue_prev - return the prev packet in the queue * @list: queue head * @skb: current buffer * * Return the prev packet in @list before @skb. It is only valid to * call this if skb_queue_is_first() evaluates to false. */ static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list, const struct sk_buff *skb) { /* This BUG_ON may seem severe, but if we just return then we * are going to dereference garbage. */ BUG_ON(skb_queue_is_first(list, skb)); return skb->prev; } /** * skb_get - reference buffer * @skb: buffer to reference * * Makes another reference to a socket buffer and returns a pointer * to the buffer. */ static inline struct sk_buff *skb_get(struct sk_buff *skb) { refcount_inc(&skb->users); return skb; } /* * If users == 1, we are the only owner and can avoid redundant atomic changes. */ /** * skb_cloned - is the buffer a clone * @skb: buffer to check * * Returns true if the buffer was generated with skb_clone() and is * one of multiple shared copies of the buffer. Cloned buffers are * shared data so must not be written to under normal circumstances. */ static inline int skb_cloned(const struct sk_buff *skb) { return skb->cloned && (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1; } static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_cloned(skb)) return pskb_expand_head(skb, 0, 0, pri); return 0; } /* This variant of skb_unclone() makes sure skb->truesize * and skb_end_offset() are not changed, whenever a new skb->head is needed. * * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X)) * when various debugging features are in place. */ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri); static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_cloned(skb)) return __skb_unclone_keeptruesize(skb, pri); return 0; } /** * skb_header_cloned - is the header a clone * @skb: buffer to check * * Returns true if modifying the header part of the buffer requires * the data to be copied. */ static inline int skb_header_cloned(const struct sk_buff *skb) { int dataref; if (!skb->cloned) return 0; dataref = atomic_read(&skb_shinfo(skb)->dataref); dataref = (dataref & SKB_DATAREF_MASK) - (dataref >> SKB_DATAREF_SHIFT); return dataref != 1; } static inline int skb_header_unclone(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_header_cloned(skb)) return pskb_expand_head(skb, 0, 0, pri); return 0; } /** * __skb_header_release() - allow clones to use the headroom * @skb: buffer to operate on * * See "DOC: dataref and headerless skbs". */ static inline void __skb_header_release(struct sk_buff *skb) { skb->nohdr = 1; atomic_set(&skb_shinfo(skb)->dataref, 1 + (1 << SKB_DATAREF_SHIFT)); } /** * skb_shared - is the buffer shared * @skb: buffer to check * * Returns true if more than one person has a reference to this * buffer. */ static inline int skb_shared(const struct sk_buff *skb) { return refcount_read(&skb->users) != 1; } /** * skb_share_check - check if buffer is shared and if so clone it * @skb: buffer to check * @pri: priority for memory allocation * * If the buffer is shared the buffer is cloned and the old copy * drops a reference. A new clone with a single reference is returned. * If the buffer is not shared the original buffer is returned. When * being called from interrupt status or with spinlocks held pri must * be GFP_ATOMIC. * * NULL is returned on a memory allocation failure. */ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_shared(skb)) { struct sk_buff *nskb = skb_clone(skb, pri); if (likely(nskb)) consume_skb(skb); else kfree_skb(skb); skb = nskb; } return skb; } /* * Copy shared buffers into a new sk_buff. We effectively do COW on * packets to handle cases where we have a local reader and forward * and a couple of other messy ones. The normal one is tcpdumping * a packet that's being forwarded. */ /** * skb_unshare - make a copy of a shared buffer * @skb: buffer to check * @pri: priority for memory allocation * * If the socket buffer is a clone then this function creates a new * copy of the data, drops a reference count on the old copy and returns * the new copy with the reference count at 1. If the buffer is not a clone * the original buffer is returned. When called with a spinlock held or * from interrupt state @pri must be %GFP_ATOMIC * * %NULL is returned on a memory allocation failure. */ static inline struct sk_buff *skb_unshare(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, pri); /* Free our shared copy */ if (likely(nskb)) consume_skb(skb); else kfree_skb(skb); skb = nskb; } return skb; } /** * skb_peek - peek at the head of an &sk_buff_head * @list_: list to peek at * * Peek an &sk_buff. Unlike most other operations you _MUST_ * be careful with this one. A peek leaves the buffer on the * list and someone else may run off with it. You must hold * the appropriate locks or have a private queue to do this. * * Returns %NULL for an empty list or a pointer to the head element. * The reference count is not incremented and the reference is therefore * volatile. Use with caution. */ static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_) { struct sk_buff *skb = list_->next; if (skb == (struct sk_buff *)list_) skb = NULL; return skb; } /** * __skb_peek - peek at the head of a non-empty &sk_buff_head * @list_: list to peek at * * Like skb_peek(), but the caller knows that the list is not empty. */ static inline struct sk_buff *__skb_peek(const struct sk_buff_head *list_) { return list_->next; } /** * skb_peek_next - peek skb following the given one from a queue * @skb: skb to start from * @list_: list to peek at * * Returns %NULL when the end of the list is met or a pointer to the * next element. The reference count is not incremented and the * reference is therefore volatile. Use with caution. */ static inline struct sk_buff *skb_peek_next(struct sk_buff *skb, const struct sk_buff_head *list_) { struct sk_buff *next = skb->next; if (next == (struct sk_buff *)list_) next = NULL; return next; } /** * skb_peek_tail - peek at the tail of an &sk_buff_head * @list_: list to peek at * * Peek an &sk_buff. Unlike most other operations you _MUST_ * be careful with this one. A peek leaves the buffer on the * list and someone else may run off with it. You must hold * the appropriate locks or have a private queue to do this. * * Returns %NULL for an empty list or a pointer to the tail element. * The reference count is not incremented and the reference is therefore * volatile. Use with caution. */ static inline struct sk_buff *skb_peek_tail(const struct sk_buff_head *list_) { struct sk_buff *skb = READ_ONCE(list_->prev); if (skb == (struct sk_buff *)list_) skb = NULL; return skb; } /** * skb_queue_len - get queue length * @list_: list to measure * * Return the length of an &sk_buff queue. */ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) { return list_->qlen; } /** * skb_queue_len_lockless - get queue length * @list_: list to measure * * Return the length of an &sk_buff queue. * This variant can be used in lockless contexts. */ static inline __u32 skb_queue_len_lockless(const struct sk_buff_head *list_) { return READ_ONCE(list_->qlen); } /** * __skb_queue_head_init - initialize non-spinlock portions of sk_buff_head * @list: queue to initialize * * This initializes only the list and queue length aspects of * an sk_buff_head object. This allows to initialize the list * aspects of an sk_buff_head without reinitializing things like * the spinlock. It can also be used for on-stack sk_buff_head * objects where the spinlock is known to not be used. */ static inline void __skb_queue_head_init(struct sk_buff_head *list) { list->prev = list->next = (struct sk_buff *)list; list->qlen = 0; } /* * This function creates a split out lock class for each invocation; * this is needed for now since a whole lot of users of the skb-queue * infrastructure in drivers have different locking usage (in hardirq) * than the networking core (in softirq only). In the long run either the * network layer or drivers should need annotation to consolidate the * main types of usage into 3 classes. */ static inline void skb_queue_head_init(struct sk_buff_head *list) { spin_lock_init(&list->lock); __skb_queue_head_init(list); } static inline void skb_queue_head_init_class(struct sk_buff_head *list, struct lock_class_key *class) { skb_queue_head_init(list); lockdep_set_class(&list->lock, class); } /* * Insert an sk_buff on a list. * * The "__skb_xxxx()" functions are the non-atomic ones that * can only be called with interrupts disabled. */ static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) { /* See skb_queue_empty_lockless() and skb_peek_tail() * for the opposite READ_ONCE() */ WRITE_ONCE(newsk->next, next); WRITE_ONCE(newsk->prev, prev); WRITE_ONCE(((struct sk_buff_list *)next)->prev, newsk); WRITE_ONCE(((struct sk_buff_list *)prev)->next, newsk); WRITE_ONCE(list->qlen, list->qlen + 1); } static inline void __skb_queue_splice(const struct sk_buff_head *list, struct sk_buff *prev, struct sk_buff *next) { struct sk_buff *first = list->next; struct sk_buff *last = list->prev; WRITE_ONCE(first->prev, prev); WRITE_ONCE(prev->next, first); WRITE_ONCE(last->next, next); WRITE_ONCE(next->prev, last); } /** * skb_queue_splice - join two skb lists, this is designed for stacks * @list: the new list to add * @head: the place to add it in the first list */ static inline void skb_queue_splice(const struct sk_buff_head *list, struct sk_buff_head *head) { if (!skb_queue_empty(list)) { __skb_queue_splice(list, (struct sk_buff *) head, head->next); head->qlen += list->qlen; } } /** * skb_queue_splice_init - join two skb lists and reinitialise the emptied list * @list: the new list to add * @head: the place to add it in the first list * * The list at @list is reinitialised */ static inline void skb_queue_splice_init(struct sk_buff_head *list, struct sk_buff_head *head) { if (!skb_queue_empty(list)) { __skb_queue_splice(list, (struct sk_buff *) head, head->next); head->qlen += list->qlen; __skb_queue_head_init(list); } } /** * skb_queue_splice_tail - join two skb lists, each list being a queue * @list: the new list to add * @head: the place to add it in the first list */ static inline void skb_queue_splice_tail(const struct sk_buff_head *list, struct sk_buff_head *head) { if (!skb_queue_empty(list)) { __skb_queue_splice(list, head->prev, (struct sk_buff *) head); head->qlen += list->qlen; } } /** * skb_queue_splice_tail_init - join two skb lists and reinitialise the emptied list * @list: the new list to add * @head: the place to add it in the first list * * Each of the lists is a queue. * The list at @list is reinitialised */ static inline void skb_queue_splice_tail_init(struct sk_buff_head *list, struct sk_buff_head *head) { if (!skb_queue_empty(list)) { __skb_queue_splice(list, head->prev, (struct sk_buff *) head); head->qlen += list->qlen; __skb_queue_head_init(list); } } /** * __skb_queue_after - queue a buffer at the list head * @list: list to use * @prev: place after this buffer * @newsk: buffer to queue * * Queue a buffer int the middle of a list. This function takes no locks * and you must therefore hold required locks before calling it. * * A buffer cannot be placed on two lists at the same time. */ static inline void __skb_queue_after(struct sk_buff_head *list, struct sk_buff *prev, struct sk_buff *newsk) { __skb_insert(newsk, prev, ((struct sk_buff_list *)prev)->next, list); } void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); static inline void __skb_queue_before(struct sk_buff_head *list, struct sk_buff *next, struct sk_buff *newsk) { __skb_insert(newsk, ((struct sk_buff_list *)next)->prev, next, list); } /** * __skb_queue_head - queue a buffer at the list head * @list: list to use * @newsk: buffer to queue * * Queue a buffer at the start of a list. This function takes no locks * and you must therefore hold required locks before calling it. * * A buffer cannot be placed on two lists at the same time. */ static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) { __skb_queue_after(list, (struct sk_buff *)list, newsk); } void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); /** * __skb_queue_tail - queue a buffer at the list tail * @list: list to use * @newsk: buffer to queue * * Queue a buffer at the end of a list. This function takes no locks * and you must therefore hold required locks before calling it. * * A buffer cannot be placed on two lists at the same time. */ static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) { __skb_queue_before(list, (struct sk_buff *)list, newsk); } void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); /* * remove sk_buff from list. _Must_ be called atomically, and with * the list known.. */ void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { struct sk_buff *next, *prev; WRITE_ONCE(list->qlen, list->qlen - 1); next = skb->next; prev = skb->prev; skb->next = skb->prev = NULL; WRITE_ONCE(next->prev, prev); WRITE_ONCE(prev->next, next); } /** * __skb_dequeue - remove from the head of the queue * @list: list to dequeue from * * Remove the head of the list. This function does not take any locks * so must be used with appropriate locks held only. The head item is * returned or %NULL if the list is empty. */ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek(list); if (skb) __skb_unlink(skb, list); return skb; } struct sk_buff *skb_dequeue(struct sk_buff_head *list); /** * __skb_dequeue_tail - remove from the tail of the queue * @list: list to dequeue from * * Remove the tail of the list. This function does not take any locks * so must be used with appropriate locks held only. The tail item is * returned or %NULL if the list is empty. */ static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek_tail(list); if (skb) __skb_unlink(skb, list); return skb; } struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline bool skb_is_nonlinear(const struct sk_buff *skb) { return skb->data_len; } static inline unsigned int skb_headlen(const struct sk_buff *skb) { return skb->len - skb->data_len; } static inline unsigned int __skb_pagelen(const struct sk_buff *skb) { unsigned int i, len = 0; for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--) len += skb_frag_size(&skb_shinfo(skb)->frags[i]); return len; } static inline unsigned int skb_pagelen(const struct sk_buff *skb) { return skb_headlen(skb) + __skb_pagelen(skb); } static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag, netmem_ref netmem, int off, int size) { frag->netmem = netmem; frag->offset = off; skb_frag_size_set(frag, size); } static inline void skb_frag_fill_page_desc(skb_frag_t *frag, struct page *page, int off, int size) { skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size); } static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo, int i, netmem_ref netmem, int off, int size) { skb_frag_t *frag = &shinfo->frags[i]; skb_frag_fill_netmem_desc(frag, netmem, off, size); } static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, int i, struct page *page, int off, int size) { __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off, size); } /** * skb_len_add - adds a number to len fields of skb * @skb: buffer to add len to * @delta: number of bytes to add */ static inline void skb_len_add(struct sk_buff *skb, int delta) { skb->len += delta; skb->data_len += delta; skb->truesize += delta; } /** * __skb_fill_netmem_desc - initialise a fragment in an skb * @skb: buffer containing fragment to be initialised * @i: fragment index to initialise * @netmem: the netmem to use for this fragment * @off: the offset to the data with @page * @size: the length of the data * * Initialises the @i'th fragment of @skb to point to &size bytes at * offset @off within @page. * * Does not take any additional reference on the fragment. */ static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, netmem_ref netmem, int off, int size) { struct page *page; __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size); if (netmem_is_net_iov(netmem)) { skb->unreadable = true; return; } page = netmem_to_page(netmem); /* Propagate page pfmemalloc to the skb if we can. The problem is * that not all callers have unique ownership of the page but rely * on page_is_pfmemalloc doing the right thing(tm). */ page = compound_head(page); if (page_is_pfmemalloc(page)) skb->pfmemalloc = true; } static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) { __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i, netmem_ref netmem, int off, int size) { __skb_fill_netmem_desc(skb, i, netmem, off, size); skb_shinfo(skb)->nr_frags = i + 1; } /** * skb_fill_page_desc - initialise a paged fragment in an skb * @skb: buffer containing fragment to be initialised * @i: paged fragment index to initialise * @page: the page to use for this fragment * @off: the offset to the data with @page * @size: the length of the data * * As per __skb_fill_page_desc() -- initialises the @i'th fragment of * @skb to point to @size bytes at offset @off within @page. In * addition updates @skb such that @i is the last fragment. * * Does not take any additional reference on the fragment. */ static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) { skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } /** * skb_fill_page_desc_noacc - initialise a paged fragment in an skb * @skb: buffer containing fragment to be initialised * @i: paged fragment index to initialise * @page: the page to use for this fragment * @off: the offset to the data with @page * @size: the length of the data * * Variant of skb_fill_page_desc() which does not deal with * pfmemalloc, if page is not owned by us. */ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, struct page *page, int off, int size) { struct skb_shared_info *shinfo = skb_shinfo(skb); __skb_fill_page_desc_noacc(shinfo, i, page, off, size); shinfo->nr_frags = i + 1; } void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, int off, int size, unsigned int truesize); static inline void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size, unsigned int truesize) { skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size, truesize); } void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize); #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) { return skb->head + skb->tail; } static inline void skb_reset_tail_pointer(struct sk_buff *skb) { skb->tail = skb->data - skb->head; } static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) { skb_reset_tail_pointer(skb); skb->tail += offset; } #else /* NET_SKBUFF_DATA_USES_OFFSET */ static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) { return skb->tail; } static inline void skb_reset_tail_pointer(struct sk_buff *skb) { skb->tail = skb->data; } static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) { skb->tail = skb->data + offset; } #endif /* NET_SKBUFF_DATA_USES_OFFSET */ static inline void skb_assert_len(struct sk_buff *skb) { #ifdef CONFIG_DEBUG_NET if (WARN_ONCE(!skb->len, "%s\n", __func__)) DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); #endif /* CONFIG_DEBUG_NET */ } #if defined(CONFIG_FAIL_SKB_REALLOC) void skb_might_realloc(struct sk_buff *skb); #else static inline void skb_might_realloc(struct sk_buff *skb) {} #endif /* * Add data to an sk_buff */ void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len); void *skb_put(struct sk_buff *skb, unsigned int len); static inline void *__skb_put(struct sk_buff *skb, unsigned int len) { void *tmp = skb_tail_pointer(skb); SKB_LINEAR_ASSERT(skb); skb->tail += len; skb->len += len; return tmp; } static inline void *__skb_put_zero(struct sk_buff *skb, unsigned int len) { void *tmp = __skb_put(skb, len); memset(tmp, 0, len); return tmp; } static inline void *__skb_put_data(struct sk_buff *skb, const void *data, unsigned int len) { void *tmp = __skb_put(skb, len); memcpy(tmp, data, len); return tmp; } static inline void __skb_put_u8(struct sk_buff *skb, u8 val) { *(u8 *)__skb_put(skb, 1) = val; } static inline void *skb_put_zero(struct sk_buff *skb, unsigned int len) { void *tmp = skb_put(skb, len); memset(tmp, 0, len); return tmp; } static inline void *skb_put_data(struct sk_buff *skb, const void *data, unsigned int len) { void *tmp = skb_put(skb, len); memcpy(tmp, data, len); return tmp; } static inline void skb_put_u8(struct sk_buff *skb, u8 val) { *(u8 *)skb_put(skb, 1) = val; } void *skb_push(struct sk_buff *skb, unsigned int len); static inline void *__skb_push(struct sk_buff *skb, unsigned int len) { DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); skb->data -= len; skb->len += len; return skb->data; } void *skb_pull(struct sk_buff *skb, unsigned int len); static inline void *__skb_pull(struct sk_buff *skb, unsigned int len) { DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); skb->len -= len; if (unlikely(skb->len < skb->data_len)) { #if defined(CONFIG_DEBUG_NET) skb->len += len; pr_err("__skb_pull(len=%u)\n", len); skb_dump(KERN_ERR, skb, false); #endif BUG(); } return skb->data += len; } static inline void *skb_pull_inline(struct sk_buff *skb, unsigned int len) { return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); } void *skb_pull_data(struct sk_buff *skb, size_t len); void *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline enum skb_drop_reason pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) { DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); skb_might_realloc(skb); if (likely(len <= skb_headlen(skb))) return SKB_NOT_DROPPED_YET; if (unlikely(len > skb->len)) return SKB_DROP_REASON_PKT_TOO_SMALL; if (unlikely(!__pskb_pull_tail(skb, len - skb_headlen(skb)))) return SKB_DROP_REASON_NOMEM; return SKB_NOT_DROPPED_YET; } static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) { return pskb_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET; } static inline void *pskb_pull(struct sk_buff *skb, unsigned int len) { if (!pskb_may_pull(skb, len)) return NULL; skb->len -= len; return skb->data += len; } void skb_condense(struct sk_buff *skb); /** * skb_headroom - bytes at buffer head * @skb: buffer to check * * Return the number of bytes of free space at the head of an &sk_buff. */ static inline unsigned int skb_headroom(const struct sk_buff *skb) { return skb->data - skb->head; } /** * skb_tailroom - bytes at buffer end * @skb: buffer to check * * Return the number of bytes of free space at the tail of an sk_buff */ static inline int skb_tailroom(const struct sk_buff *skb) { return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail; } /** * skb_availroom - bytes at buffer end * @skb: buffer to check * * Return the number of bytes of free space at the tail of an sk_buff * allocated by sk_stream_alloc() */ static inline int skb_availroom(const struct sk_buff *skb) { if (skb_is_nonlinear(skb)) return 0; return skb->end - skb->tail - skb->reserved_tailroom; } /** * skb_reserve - adjust headroom * @skb: buffer to alter * @len: bytes to move * * Increase the headroom of an empty &sk_buff by reducing the tail * room. This is only allowed for an empty buffer. */ static inline void skb_reserve(struct sk_buff *skb, int len) { skb->data += len; skb->tail += len; } /** * skb_tailroom_reserve - adjust reserved_tailroom * @skb: buffer to alter * @mtu: maximum amount of headlen permitted * @needed_tailroom: minimum amount of reserved_tailroom * * Set reserved_tailroom so that headlen can be as large as possible but * not larger than mtu and tailroom cannot be smaller than * needed_tailroom. * The required headroom should already have been reserved before using * this function. */ static inline void skb_tailroom_reserve(struct sk_buff *skb, unsigned int mtu, unsigned int needed_tailroom) { SKB_LINEAR_ASSERT(skb); if (mtu < skb_tailroom(skb) - needed_tailroom) /* use at most mtu */ skb->reserved_tailroom = skb_tailroom(skb) - mtu; else /* use up to all available space */ skb->reserved_tailroom = needed_tailroom; } #define ENCAP_TYPE_ETHER 0 #define ENCAP_TYPE_IPPROTO 1 static inline void skb_set_inner_protocol(struct sk_buff *skb, __be16 protocol) { skb->inner_protocol = protocol; skb->inner_protocol_type = ENCAP_TYPE_ETHER; } static inline void skb_set_inner_ipproto(struct sk_buff *skb, __u8 ipproto) { skb->inner_ipproto = ipproto; skb->inner_protocol_type = ENCAP_TYPE_IPPROTO; } static inline void skb_reset_inner_headers(struct sk_buff *skb) { skb->inner_mac_header = skb->mac_header; skb->inner_network_header = skb->network_header; skb->inner_transport_header = skb->transport_header; } static inline int skb_mac_header_was_set(const struct sk_buff *skb) { return skb->mac_header != (typeof(skb->mac_header))~0U; } static inline void skb_reset_mac_len(struct sk_buff *skb) { if (!skb_mac_header_was_set(skb)) { DEBUG_NET_WARN_ON_ONCE(1); skb->mac_len = 0; } else { skb->mac_len = skb->network_header - skb->mac_header; } } static inline unsigned char *skb_inner_transport_header(const struct sk_buff *skb) { return skb->head + skb->inner_transport_header; } static inline int skb_inner_transport_offset(const struct sk_buff *skb) { return skb_inner_transport_header(skb) - skb->data; } static inline void skb_reset_inner_transport_header(struct sk_buff *skb) { long offset = skb->data - skb->head; DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_transport_header))offset); skb->inner_transport_header = offset; } static inline void skb_set_inner_transport_header(struct sk_buff *skb, const int offset) { skb_reset_inner_transport_header(skb); skb->inner_transport_header += offset; } static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb) { return skb->head + skb->inner_network_header; } static inline void skb_reset_inner_network_header(struct sk_buff *skb) { long offset = skb->data - skb->head; DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_network_header))offset); skb->inner_network_header = offset; } static inline void skb_set_inner_network_header(struct sk_buff *skb, const int offset) { skb_reset_inner_network_header(skb); skb->inner_network_header += offset; } static inline bool skb_inner_network_header_was_set(const struct sk_buff *skb) { return skb->inner_network_header > 0; } static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) { return skb->head + skb->inner_mac_header; } static inline void skb_reset_inner_mac_header(struct sk_buff *skb) { long offset = skb->data - skb->head; DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_mac_header))offset); skb->inner_mac_header = offset; } static inline void skb_set_inner_mac_header(struct sk_buff *skb, const int offset) { skb_reset_inner_mac_header(skb); skb->inner_mac_header += offset; } static inline bool skb_transport_header_was_set(const struct sk_buff *skb) { return skb->transport_header != (typeof(skb->transport_header))~0U; } static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)); return skb->head + skb->transport_header; } static inline void skb_reset_transport_header(struct sk_buff *skb) { long offset = skb->data - skb->head; DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->transport_header))offset); skb->transport_header = offset; } /** * skb_reset_transport_header_careful - conditionally reset transport header * @skb: buffer to alter * * Hardened version of skb_reset_transport_header(). * * Returns: true if the operation was a success. */ static inline bool __must_check skb_reset_transport_header_careful(struct sk_buff *skb) { long offset = skb->data - skb->head; if (unlikely(offset != (typeof(skb->transport_header))offset)) return false; if (unlikely(offset == (typeof(skb->transport_header))~0U)) return false; skb->transport_header = offset; return true; } static inline void skb_set_transport_header(struct sk_buff *skb, const int offset) { skb_reset_transport_header(skb); skb->transport_header += offset; } static inline unsigned char *skb_network_header(const struct sk_buff *skb) { return skb->head + skb->network_header; } static inline void skb_reset_network_header(struct sk_buff *skb) { long offset = skb->data - skb->head; DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->network_header))offset); skb->network_header = offset; } static inline void skb_set_network_header(struct sk_buff *skb, const int offset) { skb_reset_network_header(skb); skb->network_header += offset; } static inline unsigned char *skb_mac_header(const struct sk_buff *skb) { DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb)); return skb->head + skb->mac_header; } static inline int skb_mac_offset(const struct sk_buff *skb) { return skb_mac_header(skb) - skb->data; } static inline u32 skb_mac_header_len(const struct sk_buff *skb) { DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb)); return skb->network_header - skb->mac_header; } static inline void skb_unset_mac_header(struct sk_buff *skb) { skb->mac_header = (typeof(skb->mac_header))~0U; } static inline void skb_reset_mac_header(struct sk_buff *skb) { long offset = skb->data - skb->head; DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->mac_header))offset); skb->mac_header = offset; } static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) { skb_reset_mac_header(skb); skb->mac_header += offset; } static inline void skb_pop_mac_header(struct sk_buff *skb) { skb->mac_header = skb->network_header; } static inline void skb_probe_transport_header(struct sk_buff *skb) { struct flow_keys_basic keys; if (skb_transport_header_was_set(skb)) return; if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, NULL, 0, 0, 0, 0)) skb_set_transport_header(skb, keys.control.thoff); } static inline void skb_mac_header_rebuild(struct sk_buff *skb) { if (skb_mac_header_was_set(skb)) { const unsigned char *old_mac = skb_mac_header(skb); skb_set_mac_header(skb, -skb->mac_len); memmove(skb_mac_header(skb), old_mac, skb->mac_len); } } /* Move the full mac header up to current network_header. * Leaves skb->data pointing at offset skb->mac_len into the mac_header. * Must be provided the complete mac header length. */ static inline void skb_mac_header_rebuild_full(struct sk_buff *skb, u32 full_mac_len) { if (skb_mac_header_was_set(skb)) { const unsigned char *old_mac = skb_mac_header(skb); skb_set_mac_header(skb, -full_mac_len); memmove(skb_mac_header(skb), old_mac, full_mac_len); __skb_push(skb, full_mac_len - skb->mac_len); } } static inline int skb_checksum_start_offset(const struct sk_buff *skb) { return skb->csum_start - skb_headroom(skb); } static inline unsigned char *skb_checksum_start(const struct sk_buff *skb) { return skb->head + skb->csum_start; } static inline int skb_transport_offset(const struct sk_buff *skb) { return skb_transport_header(skb) - skb->data; } static inline u32 skb_network_header_len(const struct sk_buff *skb) { DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)); return skb->transport_header - skb->network_header; } static inline u32 skb_inner_network_header_len(const struct sk_buff *skb) { return skb->inner_transport_header - skb->inner_network_header; } static inline int skb_network_offset(const struct sk_buff *skb) { return skb_network_header(skb) - skb->data; } static inline int skb_inner_network_offset(const struct sk_buff *skb) { return skb_inner_network_header(skb) - skb->data; } static inline enum skb_drop_reason pskb_network_may_pull_reason(struct sk_buff *skb, unsigned int len) { return pskb_may_pull_reason(skb, skb_network_offset(skb) + len); } static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) { return pskb_network_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET; } /* * CPUs often take a performance hit when accessing unaligned memory * locations. The actual performance hit varies, it can be small if the * hardware handles it or large if we have to take an exception and fix it * in software. * * Since an ethernet header is 14 bytes network drivers often end up with * the IP header at an unaligned offset. The IP header can be aligned by * shifting the start of the packet by 2 bytes. Drivers should do this * with: * * skb_reserve(skb, NET_IP_ALIGN); * * The downside to this alignment of the IP header is that the DMA is now * unaligned. On some architectures the cost of an unaligned DMA is high * and this cost outweighs the gains made by aligning the IP header. * * Since this trade off varies between architectures, we allow NET_IP_ALIGN * to be overridden. */ #ifndef NET_IP_ALIGN #define NET_IP_ALIGN 2 #endif /* * The networking layer reserves some headroom in skb data (via * dev_alloc_skb). This is used to avoid having to reallocate skb data when * the header has to grow. In the default case, if the header has to grow * 32 bytes or less we avoid the reallocation. * * Unfortunately this headroom changes the DMA alignment of the resulting * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive * on some architectures. An architecture can override this value, * perhaps setting it to a cacheline in size (since that will maintain * cacheline alignment of the DMA). It must be a power of 2. * * Various parts of the networking layer expect at least 32 bytes of * headroom, you should not reduce this. * * Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS) * to reduce average number of cache lines per packet. * get_rps_cpu() for example only access one 64 bytes aligned block : * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8) */ #ifndef NET_SKB_PAD #define NET_SKB_PAD max(32, L1_CACHE_BYTES) #endif int ___pskb_trim(struct sk_buff *skb, unsigned int len); static inline void __skb_set_length(struct sk_buff *skb, unsigned int len) { if (WARN_ON(skb_is_nonlinear(skb))) return; skb->len = len; skb_set_tail_pointer(skb, len); } static inline void __skb_trim(struct sk_buff *skb, unsigned int len) { __skb_set_length(skb, len); } void skb_trim(struct sk_buff *skb, unsigned int len); static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) { if (skb->data_len) return ___pskb_trim(skb, len); __skb_trim(skb, len); return 0; } static inline int pskb_trim(struct sk_buff *skb, unsigned int len) { skb_might_realloc(skb); return (len < skb->len) ? __pskb_trim(skb, len) : 0; } /** * pskb_trim_unique - remove end from a paged unique (not cloned) buffer * @skb: buffer to alter * @len: new length * * This is identical to pskb_trim except that the caller knows that * the skb is not cloned so we should never get an error due to out- * of-memory. */ static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len) { int err = pskb_trim(skb, len); BUG_ON(err); } static inline int __skb_grow(struct sk_buff *skb, unsigned int len) { unsigned int diff = len - skb->len; if (skb_tailroom(skb) < diff) { int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb), GFP_ATOMIC); if (ret) return ret; } __skb_set_length(skb, len); return 0; } /** * skb_orphan - orphan a buffer * @skb: buffer to orphan * * If a buffer currently has an owner then we call the owner's * destructor function and make the @skb unowned. The buffer continues * to exist but is no longer charged to its former owner. */ static inline void skb_orphan(struct sk_buff *skb) { if (skb->destructor) { skb->destructor(skb); skb->destructor = NULL; skb->sk = NULL; } else { BUG_ON(skb->sk); } } /** * skb_orphan_frags - orphan the frags contained in a buffer * @skb: buffer to orphan frags from * @gfp_mask: allocation mask for replacement pages * * For each frag in the SKB which needs a destructor (i.e. has an * owner) create a copy of that frag and release the original * page by calling the destructor. */ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) { if (likely(!skb_zcopy(skb))) return 0; if (skb_shinfo(skb)->flags & SKBFL_DONT_ORPHAN) return 0; return skb_copy_ubufs(skb, gfp_mask); } /* Frags must be orphaned, even if refcounted, if skb might loop to rx path */ static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask) { if (likely(!skb_zcopy(skb))) return 0; return skb_copy_ubufs(skb, gfp_mask); } /** * __skb_queue_purge_reason - empty a list * @list: list to empty * @reason: drop reason * * Delete all buffers on an &sk_buff list. Each buffer is removed from * the list and one reference dropped. This function does not take the * list lock and the caller must hold the relevant locks to use it. */ static inline void __skb_queue_purge_reason(struct sk_buff_head *list, enum skb_drop_reason reason) { struct sk_buff *skb; while ((skb = __skb_dequeue(list)) != NULL) kfree_skb_reason(skb, reason); } static inline void __skb_queue_purge(struct sk_buff_head *list) { __skb_queue_purge_reason(list, SKB_DROP_REASON_QUEUE_PURGE); } void skb_queue_purge_reason(struct sk_buff_head *list, enum skb_drop_reason reason); static inline void skb_queue_purge(struct sk_buff_head *list) { skb_queue_purge_reason(list, SKB_DROP_REASON_QUEUE_PURGE); } unsigned int skb_rbtree_purge(struct rb_root *root); void skb_errqueue_purge(struct sk_buff_head *list); void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); /** * netdev_alloc_frag - allocate a page fragment * @fragsz: fragment size * * Allocates a frag from a page for receive buffer. * Uses GFP_ATOMIC allocations. */ static inline void *netdev_alloc_frag(unsigned int fragsz) { return __netdev_alloc_frag_align(fragsz, ~0u); } static inline void *netdev_alloc_frag_align(unsigned int fragsz, unsigned int align) { WARN_ON_ONCE(!is_power_of_2(align)); return __netdev_alloc_frag_align(fragsz, -align); } struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, gfp_t gfp_mask); /** * netdev_alloc_skb - allocate an skbuff for rx on a specific device * @dev: network device to receive on * @length: length to allocate * * Allocate a new &sk_buff and assign it a usage count of one. The * buffer has unspecified headroom built in. Users should allocate * the headroom they think they need without accounting for the * built in space. The built in space is used for optimisations. * * %NULL is returned if there is no free memory. Although this function * allocates memory it can be called from an interrupt. */ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, unsigned int length) { return __netdev_alloc_skb(dev, length, GFP_ATOMIC); } /* legacy helper around __netdev_alloc_skb() */ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, gfp_t gfp_mask) { return __netdev_alloc_skb(NULL, length, gfp_mask); } /* legacy helper around netdev_alloc_skb() */ static inline struct sk_buff *dev_alloc_skb(unsigned int length) { return netdev_alloc_skb(NULL, length); } static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, unsigned int length, gfp_t gfp) { struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp); if (NET_IP_ALIGN && skb) skb_reserve(skb, NET_IP_ALIGN); return skb; } static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, unsigned int length) { return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); } static inline void skb_free_frag(void *addr) { page_frag_free(addr); } void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); static inline void *napi_alloc_frag(unsigned int fragsz) { return __napi_alloc_frag_align(fragsz, ~0u); } static inline void *napi_alloc_frag_align(unsigned int fragsz, unsigned int align) { WARN_ON_ONCE(!is_power_of_2(align)); return __napi_alloc_frag_align(fragsz, -align); } struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int length); void napi_consume_skb(struct sk_buff *skb, int budget); void napi_skb_free_stolen_head(struct sk_buff *skb); void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason); /** * __dev_alloc_pages - allocate page for network Rx * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx * @order: size of the allocation * * Allocate a new page. * * %NULL is returned if there is no free memory. */ static inline struct page *__dev_alloc_pages_noprof(gfp_t gfp_mask, unsigned int order) { /* This piece of code contains several assumptions. * 1. This is for device Rx, therefore a cold page is preferred. * 2. The expectation is the user wants a compound page. * 3. If requesting a order 0 page it will not be compound * due to the check to see if order has a value in prep_new_page * 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to * code in gfp_to_alloc_flags that should be enforcing this. */ gfp_mask |= __GFP_COMP | __GFP_MEMALLOC; return alloc_pages_node_noprof(NUMA_NO_NODE, gfp_mask, order); } #define __dev_alloc_pages(...) alloc_hooks(__dev_alloc_pages_noprof(__VA_ARGS__)) /* * This specialized allocator has to be a macro for its allocations to be * accounted separately (to have a separate alloc_tag). */ #define dev_alloc_pages(_order) __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, _order) /** * __dev_alloc_page - allocate a page for network Rx * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx * * Allocate a new page. * * %NULL is returned if there is no free memory. */ static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask) { return __dev_alloc_pages_noprof(gfp_mask, 0); } #define __dev_alloc_page(...) alloc_hooks(__dev_alloc_page_noprof(__VA_ARGS__)) /* * This specialized allocator has to be a macro for its allocations to be * accounted separately (to have a separate alloc_tag). */ #define dev_alloc_page() dev_alloc_pages(0) /** * dev_page_is_reusable - check whether a page can be reused for network Rx * @page: the page to test * * A page shouldn't be considered for reusing/recycling if it was allocated * under memory pressure or at a distant memory node. * * Returns: false if this page should be returned to page allocator, true * otherwise. */ static inline bool dev_page_is_reusable(const struct page *page) { return likely(page_to_nid(page) == numa_mem_id() && !page_is_pfmemalloc(page)); } /** * skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page * @page: The page that was allocated from skb_alloc_page * @skb: The skb that may need pfmemalloc set */ static inline void skb_propagate_pfmemalloc(const struct page *page, struct sk_buff *skb) { if (page_is_pfmemalloc(page)) skb->pfmemalloc = true; } /** * skb_frag_off() - Returns the offset of a skb fragment * @frag: the paged fragment */ static inline unsigned int skb_frag_off(const skb_frag_t *frag) { return frag->offset; } /** * skb_frag_off_add() - Increments the offset of a skb fragment by @delta * @frag: skb fragment * @delta: value to add */ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) { frag->offset += delta; } /** * skb_frag_off_set() - Sets the offset of a skb fragment * @frag: skb fragment * @offset: offset of fragment */ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) { frag->offset = offset; } /** * skb_frag_off_copy() - Sets the offset of a skb fragment from another fragment * @fragto: skb fragment where offset is set * @fragfrom: skb fragment offset is copied from */ static inline void skb_frag_off_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { fragto->offset = fragfrom->offset; } /* Return: true if the skb_frag contains a net_iov. */ static inline bool skb_frag_is_net_iov(const skb_frag_t *frag) { return netmem_is_net_iov(frag->netmem); } /** * skb_frag_net_iov - retrieve the net_iov referred to by fragment * @frag: the fragment * * Return: the &struct net_iov associated with @frag. Returns NULL if this * frag has no associated net_iov. */ static inline struct net_iov *skb_frag_net_iov(const skb_frag_t *frag) { if (!skb_frag_is_net_iov(frag)) return NULL; return netmem_to_net_iov(frag->netmem); } /** * skb_frag_page - retrieve the page referred to by a paged fragment * @frag: the paged fragment * * Return: the &struct page associated with @frag. Returns NULL if this frag * has no associated page. */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { if (skb_frag_is_net_iov(frag)) return NULL; return netmem_to_page(frag->netmem); } /** * skb_frag_netmem - retrieve the netmem referred to by a fragment * @frag: the fragment * * Return: the &netmem_ref associated with @frag. */ static inline netmem_ref skb_frag_netmem(const skb_frag_t *frag) { return frag->netmem; } int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, const struct bpf_prog *prog); /** * skb_frag_address - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer * * Returns: the address of the data within @frag. The page must already * be mapped. */ static inline void *skb_frag_address(const skb_frag_t *frag) { if (!skb_frag_page(frag)) return NULL; return page_address(skb_frag_page(frag)) + skb_frag_off(frag); } /** * skb_frag_address_safe - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer * * Returns: the address of the data within @frag. Checks that the page * is mapped and returns %NULL otherwise. */ static inline void *skb_frag_address_safe(const skb_frag_t *frag) { struct page *page = skb_frag_page(frag); void *ptr; if (!page) return NULL; ptr = page_address(page); if (unlikely(!ptr)) return NULL; return ptr + skb_frag_off(frag); } /** * skb_frag_page_copy() - sets the page in a fragment from another fragment * @fragto: skb fragment where page is set * @fragfrom: skb fragment page is copied from */ static inline void skb_frag_page_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { fragto->netmem = fragfrom->netmem; } bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); /** * __skb_frag_dma_map - maps a paged fragment via the DMA API * @dev: the device to map the fragment to * @frag: the paged fragment to map * @offset: the offset within the fragment (starting at the * fragment's own offset) * @size: the number of bytes to map * @dir: the direction of the mapping (``PCI_DMA_*``) * * Maps the page associated with @frag to @device. */ static inline dma_addr_t __skb_frag_dma_map(struct device *dev, const skb_frag_t *frag, size_t offset, size_t size, enum dma_data_direction dir) { if (skb_frag_is_net_iov(frag)) { return netmem_to_net_iov(frag->netmem)->dma_addr + offset + frag->offset; } return dma_map_page(dev, skb_frag_page(frag), skb_frag_off(frag) + offset, size, dir); } #define skb_frag_dma_map(dev, frag, ...) \ CONCATENATE(_skb_frag_dma_map, \ COUNT_ARGS(__VA_ARGS__))(dev, frag, ##__VA_ARGS__) #define __skb_frag_dma_map1(dev, frag, offset, uf, uo) ({ \ const skb_frag_t *uf = (frag); \ size_t uo = (offset); \ \ __skb_frag_dma_map(dev, uf, uo, skb_frag_size(uf) - uo, \ DMA_TO_DEVICE); \ }) #define _skb_frag_dma_map1(dev, frag, offset) \ __skb_frag_dma_map1(dev, frag, offset, __UNIQUE_ID(frag_), \ __UNIQUE_ID(offset_)) #define _skb_frag_dma_map0(dev, frag) \ _skb_frag_dma_map1(dev, frag, 0) #define _skb_frag_dma_map2(dev, frag, offset, size) \ __skb_frag_dma_map(dev, frag, offset, size, DMA_TO_DEVICE) #define _skb_frag_dma_map3(dev, frag, offset, size, dir) \ __skb_frag_dma_map(dev, frag, offset, size, dir) static inline struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) { return __pskb_copy(skb, skb_headroom(skb), gfp_mask); } static inline struct sk_buff *pskb_copy_for_clone(struct sk_buff *skb, gfp_t gfp_mask) { return __pskb_copy_fclone(skb, skb_headroom(skb), gfp_mask, true); } /** * skb_clone_writable - is the header of a clone writable * @skb: buffer to check * @len: length up to which to write * * Returns true if modifying the header part of the cloned buffer * does not requires the data to be copied. */ static inline int skb_clone_writable(const struct sk_buff *skb, unsigned int len) { return !skb_header_cloned(skb) && skb_headroom(skb) + len <= skb->hdr_len; } static inline int skb_try_make_writable(struct sk_buff *skb, unsigned int write_len) { return skb_cloned(skb) && !skb_clone_writable(skb, write_len) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom, int cloned) { int delta = 0; if (headroom > skb_headroom(skb)) delta = headroom - skb_headroom(skb); if (delta || cloned) return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0, GFP_ATOMIC); return 0; } /** * skb_cow - copy header of skb when it is required * @skb: buffer to cow * @headroom: needed headroom * * If the skb passed lacks sufficient headroom or its data part * is shared, data is reallocated. If reallocation fails, an error * is returned and original skb is not changed. * * The result is skb with writable area skb->head...skb->tail * and at least @headroom of space at head. */ static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) { return __skb_cow(skb, headroom, skb_cloned(skb)); } /** * skb_cow_head - skb_cow but only making the head writable * @skb: buffer to cow * @headroom: needed headroom * * This function is identical to skb_cow except that we replace the * skb_cloned check by skb_header_cloned. It should be used when * you only need to push on some header and do not need to modify * the data. */ static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom) { return __skb_cow(skb, headroom, skb_header_cloned(skb)); } /** * skb_padto - pad an skbuff up to a minimal size * @skb: buffer to pad * @len: minimal length * * Pads up a buffer to ensure the trailing bytes exist and are * blanked. If the buffer already contains sufficient data it * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ static inline int skb_padto(struct sk_buff *skb, unsigned int len) { unsigned int size = skb->len; if (likely(size >= len)) return 0; return skb_pad(skb, len - size); } /** * __skb_put_padto - increase size and pad an skbuff up to a minimal size * @skb: buffer to pad * @len: minimal length * @free_on_error: free buffer on error * * Pads up a buffer to ensure the trailing bytes exist and are * blanked. If the buffer already contains sufficient data it * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error if @free_on_error is true. */ static inline int __must_check __skb_put_padto(struct sk_buff *skb, unsigned int len, bool free_on_error) { unsigned int size = skb->len; if (unlikely(size < len)) { len -= size; if (__skb_pad(skb, len, free_on_error)) return -ENOMEM; __skb_put(skb, len); } return 0; } /** * skb_put_padto - increase size and pad an skbuff up to a minimal size * @skb: buffer to pad * @len: minimal length * * Pads up a buffer to ensure the trailing bytes exist and are * blanked. If the buffer already contains sufficient data it * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int len) { return __skb_put_padto(skb, len, true); } bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) __must_check; static inline bool skb_can_coalesce_netmem(struct sk_buff *skb, int i, netmem_ref netmem, int off) { if (skb_zcopy(skb)) return false; if (i) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; return netmem == skb_frag_netmem(frag) && off == skb_frag_off(frag) + skb_frag_size(frag); } return false; } static inline bool skb_can_coalesce(struct sk_buff *skb, int i, const struct page *page, int off) { return skb_can_coalesce_netmem(skb, i, page_to_netmem(page), off); } static inline int __skb_linearize(struct sk_buff *skb) { return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM; } /** * skb_linearize - convert paged skb to linear one * @skb: buffer to linarize * * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ static inline int skb_linearize(struct sk_buff *skb) { return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; } /** * skb_has_shared_frag - can any frag be overwritten * @skb: buffer to test * * Return: true if the skb has at least one frag that might be modified * by an external entity (as in vmsplice()/sendfile()) */ static inline bool skb_has_shared_frag(const struct sk_buff *skb) { return skb_is_nonlinear(skb) && skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG; } /** * skb_linearize_cow - make sure skb is linear and writable * @skb: buffer to process * * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ static inline int skb_linearize_cow(struct sk_buff *skb) { return skb_is_nonlinear(skb) || skb_cloned(skb) ? __skb_linearize(skb) : 0; } static __always_inline void __skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len, unsigned int off) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_block_sub(skb->csum, csum_partial(start, len, 0), off); else if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_start_offset(skb) < 0) skb->ip_summed = CHECKSUM_NONE; } /** * skb_postpull_rcsum - update checksum for received skb after pull * @skb: buffer to update * @start: start of data before pull * @len: length of data pulled * * After doing a pull on a received packet, you need to call this to * update the CHECKSUM_COMPLETE checksum, or set ip_summed to * CHECKSUM_NONE so that it can be recomputed from scratch. */ static inline void skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = wsum_negate(csum_partial(start, len, wsum_negate(skb->csum))); else if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_start_offset(skb) < 0) skb->ip_summed = CHECKSUM_NONE; } static __always_inline void __skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len, unsigned int off) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_block_add(skb->csum, csum_partial(start, len, 0), off); } /** * skb_postpush_rcsum - update checksum for received skb after push * @skb: buffer to update * @start: start of data after push * @len: length of data pushed * * After doing a push on a received packet, you need to call this to * update the CHECKSUM_COMPLETE checksum. */ static inline void skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { __skb_postpush_rcsum(skb, start, len, 0); } void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); /** * skb_push_rcsum - push skb and update receive checksum * @skb: buffer to update * @len: length of data pulled * * This function performs an skb_push on the packet and updates * the CHECKSUM_COMPLETE checksum. It should be used on * receive path processing instead of skb_push unless you know * that the checksum difference is zero (e.g., a valid IP header) * or you are setting ip_summed to CHECKSUM_NONE. */ static inline void *skb_push_rcsum(struct sk_buff *skb, unsigned int len) { skb_push(skb, len); skb_postpush_rcsum(skb, skb->data, len); return skb->data; } int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); /** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim * @len: new length * * This is exactly the same as pskb_trim except that it ensures the * checksum of received packets are still valid after the operation. * It can change skb pointers. */ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { skb_might_realloc(skb); if (likely(len >= skb->len)) return 0; return pskb_trim_rcsum_slow(skb, len); } static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; __skb_trim(skb, len); return 0; } static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; return __skb_grow(skb, len); } #define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) #define skb_rb_first(root) rb_to_skb(rb_first(root)) #define skb_rb_last(root) rb_to_skb(rb_last(root)) #define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode)) #define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode)) #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ skb = skb->next) #define skb_queue_walk_safe(queue, skb, tmp) \ for (skb = (queue)->next, tmp = skb->next; \ skb != (struct sk_buff *)(queue); \ skb = tmp, tmp = skb->next) #define skb_queue_walk_from(queue, skb) \ for (; skb != (struct sk_buff *)(queue); \ skb = skb->next) #define skb_rbtree_walk(skb, root) \ for (skb = skb_rb_first(root); skb != NULL; \ skb = skb_rb_next(skb)) #define skb_rbtree_walk_from(skb) \ for (; skb != NULL; \ skb = skb_rb_next(skb)) #define skb_rbtree_walk_from_safe(skb, tmp) \ for (; tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL); \ skb = tmp) #define skb_queue_walk_from_safe(queue, skb, tmp) \ for (tmp = skb->next; \ skb != (struct sk_buff *)(queue); \ skb = tmp, tmp = skb->next) #define skb_queue_reverse_walk(queue, skb) \ for (skb = (queue)->prev; \ skb != (struct sk_buff *)(queue); \ skb = skb->prev) #define skb_queue_reverse_walk_safe(queue, skb, tmp) \ for (skb = (queue)->prev, tmp = skb->prev; \ skb != (struct sk_buff *)(queue); \ skb = tmp, tmp = skb->prev) #define skb_queue_reverse_walk_from_safe(queue, skb, tmp) \ for (tmp = skb->prev; \ skb != (struct sk_buff *)(queue); \ skb = tmp, tmp = skb->prev) static inline bool skb_has_frag_list(const struct sk_buff *skb) { return skb_shinfo(skb)->frag_list != NULL; } static inline void skb_frag_list_init(struct sk_buff *skb) { skb_shinfo(skb)->frag_list = NULL; } #define skb_walk_frags(skb, iter) \ for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, int *err, long *timeo_p, const struct sk_buff *skb); struct sk_buff *__skb_try_recv_from_queue(struct sk_buff_head *queue, unsigned int flags, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, struct sk_buff_head *queue, unsigned int flags, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, unsigned int flags, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err); __poll_t datagram_poll_queue(struct file *file, struct socket *sock, struct poll_table_struct *wait, struct sk_buff_head *rcv_queue); __poll_t datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, struct iov_iter *to, int size); static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, struct msghdr *msg, int size) { return skb_copy_datagram_iter(from, offset, &msg->msg_iter, size); } int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, struct msghdr *msg); int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, u32 *crcp); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int skb_copy_datagram_from_iter_full(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len); int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, unsigned int flags); int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, int len); int skb_send_sock_locked_with_flags(struct sock *sk, struct sk_buff *skb, int offset, int len, int flags); int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen); void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features, unsigned int offset); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len); int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); int skb_eth_pop(struct sk_buff *skb); int skb_eth_push(struct sk_buff *skb, const unsigned char *dst, const unsigned char *src); int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, int mac_len, bool ethernet); int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, bool ethernet); int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse); int skb_mpls_dec_ttl(struct sk_buff *skb); struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, gfp_t gfp); static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) { return copy_from_iter_full(data, len, &msg->msg_iter) ? 0 : -EFAULT; } static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) { return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT; } __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); u32 skb_crc32c(const struct sk_buff *skb, int offset, int len, u32 crc); static inline void * __must_check __skb_header_pointer(const struct sk_buff *skb, int offset, int len, const void *data, int hlen, void *buffer) { if (likely(hlen - offset >= len)) return (void *)data + offset; if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0)) return NULL; return buffer; } static inline void * __must_check skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) { return __skb_header_pointer(skb, offset, len, skb->data, skb_headlen(skb), buffer); } static inline void * __must_check skb_pointer_if_linear(const struct sk_buff *skb, int offset, int len) { if (likely(skb_headlen(skb) - offset >= len)) return skb->data + offset; return NULL; } /** * skb_needs_linearize - check if we need to linearize a given skb * depending on the given device features. * @skb: socket buffer to check * @features: net device features * * Returns true if either: * 1. skb has frag_list and the device doesn't support FRAGLIST, or * 2. skb is fragmented and the device does not support SG. */ static inline bool skb_needs_linearize(struct sk_buff *skb, netdev_features_t features) { return skb_is_nonlinear(skb) && ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) || (skb_shinfo(skb)->nr_frags && !(features & NETIF_F_SG))); } static inline void skb_copy_from_linear_data(const struct sk_buff *skb, void *to, const unsigned int len) { memcpy(to, skb->data, len); } static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb, const int offset, void *to, const unsigned int len) { memcpy(to, skb->data + offset, len); } static inline void skb_copy_to_linear_data(struct sk_buff *skb, const void *from, const unsigned int len) { memcpy(skb->data, from, len); } static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb, const int offset, const void *from, const unsigned int len) { memcpy(skb->data + offset, from, len); } void skb_init(void); static inline ktime_t skb_get_ktime(const struct sk_buff *skb) { return skb->tstamp; } /** * skb_get_timestamp - get timestamp from a skb * @skb: skb to get stamp from * @stamp: pointer to struct __kernel_old_timeval to store stamp in * * Timestamps are stored in the skb as offsets to a base timestamp. * This function converts the offset back to a struct timeval and stores * it in stamp. */ static inline void skb_get_timestamp(const struct sk_buff *skb, struct __kernel_old_timeval *stamp) { *stamp = ns_to_kernel_old_timeval(skb->tstamp); } static inline void skb_get_new_timestamp(const struct sk_buff *skb, struct __kernel_sock_timeval *stamp) { struct timespec64 ts = ktime_to_timespec64(skb->tstamp); stamp->tv_sec = ts.tv_sec; stamp->tv_usec = ts.tv_nsec / 1000; } static inline void skb_get_timestampns(const struct sk_buff *skb, struct __kernel_old_timespec *stamp) { struct timespec64 ts = ktime_to_timespec64(skb->tstamp); stamp->tv_sec = ts.tv_sec; stamp->tv_nsec = ts.tv_nsec; } static inline void skb_get_new_timestampns(const struct sk_buff *skb, struct __kernel_timespec *stamp) { struct timespec64 ts = ktime_to_timespec64(skb->tstamp); stamp->tv_sec = ts.tv_sec; stamp->tv_nsec = ts.tv_nsec; } static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); skb->tstamp_type = SKB_CLOCK_REALTIME; } static inline ktime_t net_timedelta(ktime_t t) { return ktime_sub(ktime_get_real(), t); } static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, u8 tstamp_type) { skb->tstamp = kt; if (kt) skb->tstamp_type = tstamp_type; else skb->tstamp_type = SKB_CLOCK_REALTIME; } static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, ktime_t kt, clockid_t clockid) { u8 tstamp_type = SKB_CLOCK_REALTIME; switch (clockid) { case CLOCK_REALTIME: break; case CLOCK_MONOTONIC: tstamp_type = SKB_CLOCK_MONOTONIC; break; case CLOCK_TAI: tstamp_type = SKB_CLOCK_TAI; break; default: WARN_ON_ONCE(1); kt = 0; } skb_set_delivery_time(skb, kt, tstamp_type); } DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); /* It is used in the ingress path to clear the delivery_time. * If needed, set the skb->tstamp to the (rcv) timestamp. */ static inline void skb_clear_delivery_time(struct sk_buff *skb) { if (skb->tstamp_type) { skb->tstamp_type = SKB_CLOCK_REALTIME; if (static_branch_unlikely(&netstamp_needed_key)) skb->tstamp = ktime_get_real(); else skb->tstamp = 0; } } static inline void skb_clear_tstamp(struct sk_buff *skb) { if (skb->tstamp_type) return; skb->tstamp = 0; } static inline ktime_t skb_tstamp(const struct sk_buff *skb) { if (skb->tstamp_type) return 0; return skb->tstamp; } static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) { if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp) return skb->tstamp; if (static_branch_unlikely(&netstamp_needed_key) || cond) return ktime_get_real(); return 0; } static inline u8 skb_metadata_len(const struct sk_buff *skb) { return skb_shinfo(skb)->meta_len; } static inline void *skb_metadata_end(const struct sk_buff *skb) { return skb_mac_header(skb); } static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, const struct sk_buff *skb_b, u8 meta_len) { const void *a = skb_metadata_end(skb_a); const void *b = skb_metadata_end(skb_b); u64 diffs = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || BITS_PER_LONG != 64) goto slow; /* Using more efficient variant than plain call to memcmp(). */ switch (meta_len) { #define __it(x, op) (x -= sizeof(u##op)) #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) case 32: diffs |= __it_diff(a, b, 64); fallthrough; case 24: diffs |= __it_diff(a, b, 64); fallthrough; case 16: diffs |= __it_diff(a, b, 64); fallthrough; case 8: diffs |= __it_diff(a, b, 64); break; case 28: diffs |= __it_diff(a, b, 64); fallthrough; case 20: diffs |= __it_diff(a, b, 64); fallthrough; case 12: diffs |= __it_diff(a, b, 64); fallthrough; case 4: diffs |= __it_diff(a, b, 32); break; default: slow: return memcmp(a - meta_len, b - meta_len, meta_len); } return diffs; } static inline bool skb_metadata_differs(const struct sk_buff *skb_a, const struct sk_buff *skb_b) { u8 len_a = skb_metadata_len(skb_a); u8 len_b = skb_metadata_len(skb_b); if (!(len_a | len_b)) return false; return len_a != len_b ? true : __skb_metadata_differs(skb_a, skb_b, len_a); } static inline void skb_metadata_set(struct sk_buff *skb, u8 meta_len) { skb_shinfo(skb)->meta_len = meta_len; } static inline void skb_metadata_clear(struct sk_buff *skb) { skb_metadata_set(skb, 0); } struct sk_buff *skb_clone_sk(struct sk_buff *skb); #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING void skb_clone_tx_timestamp(struct sk_buff *skb); bool skb_defer_rx_timestamp(struct sk_buff *skb); #else /* CONFIG_NETWORK_PHY_TIMESTAMPING */ static inline void skb_clone_tx_timestamp(struct sk_buff *skb) { } static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) { return false; } #endif /* !CONFIG_NETWORK_PHY_TIMESTAMPING */ /** * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps * * PHY drivers may accept clones of transmitted packets for * timestamping via their phy_driver.txtstamp method. These drivers * must call this function to return the skb back to the stack with a * timestamp. * * @skb: clone of the original outgoing packet * @hwtstamps: hardware time stamps * */ void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps); void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb, struct skb_shared_hwtstamps *hwtstamps, struct sock *sk, int tstype); /** * skb_tstamp_tx - queue clone of skb with send time stamps * @orig_skb: the original outgoing packet * @hwtstamps: hardware time stamps, may be NULL if not available * * If the skb has a socket associated, then this function clones the * skb (thus sharing the actual data and optional structures), stores * the optional hardware time stamping information (if non NULL) or * generates a software time stamp (otherwise), then queues the clone * to the error queue of the socket. Errors are silently ignored. */ void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps); /** * skb_tx_timestamp() - Driver hook for transmit timestamping * * Ethernet MAC Drivers should call this function in their hard_xmit() * function immediately before giving the sk_buff to the MAC hardware. * * Specifically, one should make absolutely sure that this function is * called before TX completion of this packet can trigger. Otherwise * the packet could potentially already be freed. * * @skb: A socket buffer. */ static inline void skb_tx_timestamp(struct sk_buff *skb) { skb_clone_tx_timestamp(skb); if (skb_shinfo(skb)->tx_flags & (SKBTX_SW_TSTAMP | SKBTX_BPF)) skb_tstamp_tx(skb, NULL); } /** * skb_complete_wifi_ack - deliver skb with wifi status * * @skb: the original outgoing packet * @acked: ack status * */ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked); __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); __sum16 __skb_checksum_complete(struct sk_buff *skb); static inline int skb_csum_unnecessary(const struct sk_buff *skb) { return ((skb->ip_summed == CHECKSUM_UNNECESSARY) || skb->csum_valid || (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_start_offset(skb) >= 0)); } /** * skb_checksum_complete - Calculate checksum of an entire packet * @skb: packet to process * * This function calculates the checksum over the entire packet plus * the value of skb->csum. The latter can be used to supply the * checksum of a pseudo header as used by TCP/UDP. It returns the * checksum. * * For protocols that contain complete checksums such as ICMP/TCP/UDP, * this function can be used to verify that checksum on received * packets. In that case the function should return zero if the * checksum is correct. In particular, this function will return zero * if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the * hardware has already verified the correctness of the checksum. */ static inline __sum16 skb_checksum_complete(struct sk_buff *skb) { return skb_csum_unnecessary(skb) ? 0 : __skb_checksum_complete(skb); } static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_UNNECESSARY) { if (skb->csum_level == 0) skb->ip_summed = CHECKSUM_NONE; else skb->csum_level--; } } static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_UNNECESSARY) { if (skb->csum_level < SKB_MAX_CSUM_LEVEL) skb->csum_level++; } else if (skb->ip_summed == CHECKSUM_NONE) { skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = 0; } } static inline void __skb_reset_checksum_unnecessary(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_UNNECESSARY) { skb->ip_summed = CHECKSUM_NONE; skb->csum_level = 0; } } /* Check if we need to perform checksum complete validation. * * Returns: true if checksum complete is needed, false otherwise * (either checksum is unnecessary or zero checksum is allowed). */ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, bool zero_okay, __sum16 check) { if (skb_csum_unnecessary(skb) || (zero_okay && !check)) { skb->csum_valid = 1; __skb_decr_checksum_unnecessary(skb); return false; } return true; } /* For small packets <= CHECKSUM_BREAK perform checksum complete directly * in checksum_init. */ #define CHECKSUM_BREAK 76 /* Unset checksum-complete * * Unset checksum complete can be done when packet is being modified * (uncompressed for instance) and checksum-complete value is * invalidated. */ static inline void skb_checksum_complete_unset(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; } /* Validate (init) checksum based on checksum complete. * * Return values: * 0: checksum is validated or try to in skb_checksum_complete. In the latter * case the ip_summed will not be CHECKSUM_UNNECESSARY and the pseudo * checksum is stored in skb->csum for use in __skb_checksum_complete * non-zero: value of invalid checksum * */ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, bool complete, __wsum psum) { if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!csum_fold(csum_add(psum, skb->csum))) { skb->csum_valid = 1; return 0; } } skb->csum = psum; if (complete || skb->len <= CHECKSUM_BREAK) { __sum16 csum; csum = __skb_checksum_complete(skb); skb->csum_valid = !csum; return csum; } return 0; } static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto) { return 0; } /* Perform checksum validate (init). Note that this is a macro since we only * want to calculate the pseudo header which is an input function if necessary. * First we try to validate without any computation (checksum unnecessary) and * then calculate based on checksum complete calling the function to compute * pseudo header. * * Return values: * 0: checksum is validated or try to in skb_checksum_complete * non-zero: value of invalid checksum */ #define __skb_checksum_validate(skb, proto, complete, \ zero_okay, check, compute_pseudo) \ ({ \ __sum16 __ret = 0; \ skb->csum_valid = 0; \ if (__skb_checksum_validate_needed(skb, zero_okay, check)) \ __ret = __skb_checksum_validate_complete(skb, \ complete, compute_pseudo(skb, proto)); \ __ret; \ }) #define skb_checksum_init(skb, proto, compute_pseudo) \ __skb_checksum_validate(skb, proto, false, false, 0, compute_pseudo) #define skb_checksum_init_zero_check(skb, proto, check, compute_pseudo) \ __skb_checksum_validate(skb, proto, false, true, check, compute_pseudo) #define skb_checksum_validate(skb, proto, compute_pseudo) \ __skb_checksum_validate(skb, proto, true, false, 0, compute_pseudo) #define skb_checksum_validate_zero_check(skb, proto, check, \ compute_pseudo) \ __skb_checksum_validate(skb, proto, true, true, check, compute_pseudo) #define skb_checksum_simple_validate(skb) \ __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo) static inline bool __skb_checksum_convert_check(struct sk_buff *skb) { return (skb->ip_summed == CHECKSUM_NONE && skb->csum_valid); } static inline void __skb_checksum_convert(struct sk_buff *skb, __wsum pseudo) { skb->csum = ~pseudo; skb->ip_summed = CHECKSUM_COMPLETE; } #define skb_checksum_try_convert(skb, proto, compute_pseudo) \ do { \ if (__skb_checksum_convert_check(skb)) \ __skb_checksum_convert(skb, compute_pseudo(skb, proto)); \ } while (0) static inline void skb_remcsum_adjust_partial(struct sk_buff *skb, void *ptr, u16 start, u16 offset) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = ((unsigned char *)ptr + start) - skb->head; skb->csum_offset = offset - start; } /* Update skbuf and packet to reflect the remote checksum offload operation. * When called, ptr indicates the starting point for skb->csum when * ip_summed is CHECKSUM_COMPLETE. If we need create checksum complete * here, skb_postpull_rcsum is done so skb->csum start is ptr. */ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, int start, int offset, bool nopartial) { __wsum delta; if (!nopartial) { skb_remcsum_adjust_partial(skb, ptr, start, offset); return; } if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) { __skb_checksum_complete(skb); skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data); } delta = remcsum_adjust(ptr, skb->csum, start, offset); /* Adjust skb->csum since we changed the packet */ skb->csum = csum_add(skb->csum, delta); } static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) return (void *)(skb->_nfct & NFCT_PTRMASK); #else return NULL; #endif } static inline unsigned long skb_get_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) return skb->_nfct; #else return 0UL; #endif } static inline void skb_set_nfct(struct sk_buff *skb, unsigned long nfct) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) skb->slow_gro |= !!nfct; skb->_nfct = nfct; #endif } #ifdef CONFIG_SKB_EXTENSIONS enum skb_ext_id { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) SKB_EXT_BRIDGE_NF, #endif #ifdef CONFIG_XFRM SKB_EXT_SEC_PATH, #endif #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) TC_SKB_EXT, #endif #if IS_ENABLED(CONFIG_MPTCP) SKB_EXT_MPTCP, #endif #if IS_ENABLED(CONFIG_MCTP_FLOWS) SKB_EXT_MCTP, #endif #if IS_ENABLED(CONFIG_INET_PSP) SKB_EXT_PSP, #endif SKB_EXT_NUM, /* must be last */ }; /** * struct skb_ext - sk_buff extensions * @refcnt: 1 on allocation, deallocated on 0 * @offset: offset to add to @data to obtain extension address * @chunks: size currently allocated, stored in SKB_EXT_ALIGN_SHIFT units * @data: start of extension data, variable sized * * Note: offsets/lengths are stored in chunks of 8 bytes, this allows * to use 'u8' types while allowing up to 2kb worth of extension data. */ struct skb_ext { refcount_t refcnt; u8 offset[SKB_EXT_NUM]; /* in chunks of 8 bytes */ u8 chunks; /* same */ char data[] __aligned(8); }; struct skb_ext *__skb_ext_alloc(gfp_t flags); void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, struct skb_ext *ext); void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id); void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id); void __skb_ext_put(struct skb_ext *ext); static inline void skb_ext_put(struct sk_buff *skb) { if (skb->active_extensions) __skb_ext_put(skb->extensions); } static inline void __skb_ext_copy(struct sk_buff *dst, const struct sk_buff *src) { dst->active_extensions = src->active_extensions; if (src->active_extensions) { struct skb_ext *ext = src->extensions; refcount_inc(&ext->refcnt); dst->extensions = ext; } } static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *src) { skb_ext_put(dst); __skb_ext_copy(dst, src); } static inline bool __skb_ext_exist(const struct skb_ext *ext, enum skb_ext_id i) { return !!ext->offset[i]; } static inline bool skb_ext_exist(const struct sk_buff *skb, enum skb_ext_id id) { return skb->active_extensions & (1 << id); } static inline void skb_ext_del(struct sk_buff *skb, enum skb_ext_id id) { if (skb_ext_exist(skb, id)) __skb_ext_del(skb, id); } static inline void *skb_ext_find(const struct sk_buff *skb, enum skb_ext_id id) { if (skb_ext_exist(skb, id)) { struct skb_ext *ext = skb->extensions; return (void *)ext + (ext->offset[id] << 3); } return NULL; } static inline void skb_ext_reset(struct sk_buff *skb) { if (unlikely(skb->active_extensions)) { __skb_ext_put(skb->extensions); skb->active_extensions = 0; } } static inline bool skb_has_extensions(struct sk_buff *skb) { return unlikely(skb->active_extensions); } #else static inline void skb_ext_put(struct sk_buff *skb) {} static inline void skb_ext_reset(struct sk_buff *skb) {} static inline void skb_ext_del(struct sk_buff *skb, int unused) {} static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {} static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {} static inline bool skb_has_extensions(struct sk_buff *skb) { return false; } #endif /* CONFIG_SKB_EXTENSIONS */ static inline void nf_reset_ct(struct sk_buff *skb) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb_nfct(skb)); skb->_nfct = 0; #endif } static inline void nf_reset_trace(struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES) skb->nf_trace = 0; #endif } static inline void ipvs_reset(struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IP_VS) skb->ipvs_property = 0; #endif } /* Note: This doesn't put any conntrack info in dst. */ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, bool copy) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) dst->_nfct = src->_nfct; nf_conntrack_get(skb_nfct(src)); #endif #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES) if (copy) dst->nf_trace = src->nf_trace; #endif } static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb_nfct(dst)); #endif dst->slow_gro = src->slow_gro; __nf_copy(dst, src, true); } #ifdef CONFIG_NETWORK_SECMARK static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) { to->secmark = from->secmark; } static inline void skb_init_secmark(struct sk_buff *skb) { skb->secmark = 0; } #else static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) { } static inline void skb_init_secmark(struct sk_buff *skb) { } #endif static inline int secpath_exists(const struct sk_buff *skb) { #ifdef CONFIG_XFRM return skb_ext_exist(skb, SKB_EXT_SEC_PATH); #else return 0; #endif } static inline bool skb_irq_freeable(const struct sk_buff *skb) { return !skb->destructor && !secpath_exists(skb) && !skb_nfct(skb) && !skb->_skb_refdst && !skb_has_frag_list(skb); } static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping) { skb->queue_mapping = queue_mapping; } static inline u16 skb_get_queue_mapping(const struct sk_buff *skb) { return skb->queue_mapping; } static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from) { to->queue_mapping = from->queue_mapping; } static inline void skb_record_rx_queue(struct sk_buff *skb, u16 rx_queue) { skb->queue_mapping = rx_queue + 1; } static inline u16 skb_get_rx_queue(const struct sk_buff *skb) { return skb->queue_mapping - 1; } static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) { return skb->queue_mapping != 0; } static inline void skb_set_dst_pending_confirm(struct sk_buff *skb, u32 val) { skb->dst_pending_confirm = val; } static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb) { return skb->dst_pending_confirm != 0; } static inline struct sec_path *skb_sec_path(const struct sk_buff *skb) { #ifdef CONFIG_XFRM return skb_ext_find(skb, SKB_EXT_SEC_PATH); #else return NULL; #endif } static inline bool skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; } /* Note: Should be called only if skb_is_gso(skb) is true */ static inline bool skb_is_gso_v6(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; } /* Note: Should be called only if skb_is_gso(skb) is true */ static inline bool skb_is_gso_sctp(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP; } /* Note: Should be called only if skb_is_gso(skb) is true */ static inline bool skb_is_gso_tcp(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6); } static inline void skb_gso_reset(struct sk_buff *skb) { skb_shinfo(skb)->gso_size = 0; skb_shinfo(skb)->gso_segs = 0; skb_shinfo(skb)->gso_type = 0; } static inline void skb_increase_gso_size(struct skb_shared_info *shinfo, u16 increment) { if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS)) return; shinfo->gso_size += increment; } static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo, u16 decrement) { if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS)) return; shinfo->gso_size -= decrement; } void __skb_warn_lro_forwarding(const struct sk_buff *skb); static inline bool skb_warn_if_lro(const struct sk_buff *skb) { /* LRO sets gso_size but not gso_type, whereas if GSO is really * wanted then gso_type will be set. */ const struct skb_shared_info *shinfo = skb_shinfo(skb); if (skb_is_nonlinear(skb) && shinfo->gso_size != 0 && unlikely(shinfo->gso_type == 0)) { __skb_warn_lro_forwarding(skb); return true; } return false; } static inline void skb_forward_csum(struct sk_buff *skb) { /* Unfortunately we don't support this one. Any brave souls? */ if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; } /** * skb_checksum_none_assert - make sure skb ip_summed is CHECKSUM_NONE * @skb: skb to check * * fresh skbs have their ip_summed set to CHECKSUM_NONE. * Instead of forcing ip_summed to CHECKSUM_NONE, we can * use this helper, to document places where we make this assertion. */ static inline void skb_checksum_none_assert(const struct sk_buff *skb) { DEBUG_NET_WARN_ON_ONCE(skb->ip_summed != CHECKSUM_NONE); } bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); int skb_checksum_setup(struct sk_buff *skb, bool recalculate); struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, unsigned int transport_len, __sum16(*skb_chkf)(struct sk_buff *skb)); /** * skb_head_is_locked - Determine if the skb->head is locked down * @skb: skb to check * * The head on skbs build around a head frag can be removed if they are * not cloned. This function returns true if the skb head is locked down * due to either being allocated via kmalloc, or by being a clone with * multiple references to the head. */ static inline bool skb_head_is_locked(const struct sk_buff *skb) { return !skb->head_frag || skb_cloned(skb); } /* Local Checksum Offload. * Compute outer checksum based on the assumption that the * inner checksum will be offloaded later. * See Documentation/networking/checksum-offloads.rst for * explanation of how this works. * Fill in outer checksum adjustment (e.g. with sum of outer * pseudo-header) before calling. * Also ensure that inner checksum is in linear data area. */ static inline __wsum lco_csum(struct sk_buff *skb) { unsigned char *csum_start = skb_checksum_start(skb); unsigned char *l4_hdr = skb_transport_header(skb); __wsum partial; /* Start with complement of inner checksum adjustment */ partial = ~csum_unfold(*(__force __sum16 *)(csum_start + skb->csum_offset)); /* Add in checksum of our headers (incl. outer checksum * adjustment filled in by caller) and return result. */ return csum_partial(l4_hdr, csum_start - l4_hdr, partial); } static inline bool skb_is_redirected(const struct sk_buff *skb) { return skb->redirected; } static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) { skb->redirected = 1; #ifdef CONFIG_NET_REDIRECT skb->from_ingress = from_ingress; if (skb->from_ingress) skb_clear_tstamp(skb); #endif } static inline void skb_reset_redirect(struct sk_buff *skb) { skb->redirected = 0; } static inline void skb_set_redirected_noclear(struct sk_buff *skb, bool from_ingress) { skb->redirected = 1; #ifdef CONFIG_NET_REDIRECT skb->from_ingress = from_ingress; #endif } static inline bool skb_csum_is_sctp(struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IP_SCTP) return skb->csum_not_inet; #else return 0; #endif } static inline void skb_reset_csum_not_inet(struct sk_buff *skb) { skb->ip_summed = CHECKSUM_NONE; #if IS_ENABLED(CONFIG_IP_SCTP) skb->csum_not_inet = 0; #endif } static inline void skb_set_kcov_handle(struct sk_buff *skb, const u64 kcov_handle) { #ifdef CONFIG_KCOV skb->kcov_handle = kcov_handle; #endif } static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { #ifdef CONFIG_KCOV return skb->kcov_handle; #else return 0; #endif } static inline void skb_mark_for_recycle(struct sk_buff *skb) { #ifdef CONFIG_PAGE_POOL skb->pp_recycle = 1; #endif } ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter, ssize_t maxsize); #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */
7 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2001 Silicon Graphics, Inc. All Rights Reserved. */ #ifndef __XFS_ITABLE_H__ #define __XFS_ITABLE_H__ /* In-memory representation of a userspace request for batch inode data. */ struct xfs_ibulk { struct xfs_mount *mp; struct mnt_idmap *idmap; void __user *ubuffer; /* user output buffer */ xfs_ino_t startino; /* start with this inode */ unsigned int icount; /* number of elements in ubuffer */ unsigned int ocount; /* number of records returned */ unsigned int flags; /* XFS_IBULK_FLAG_* */ unsigned int iwalk_flags; /* XFS_IWALK_FLAG_* */ }; /* Fill out the bs_extents64 field if set. */ #define XFS_IBULK_NREXT64 (1U << 0) /* Signal that we can return metadata directories. */ #define XFS_IBULK_METADIR (1U << 1) /* * Advance the user buffer pointer by one record of the given size. If the * buffer is now full, return the appropriate error code. */ static inline int xfs_ibulk_advance( struct xfs_ibulk *breq, size_t bytes) { char __user *b = breq->ubuffer; breq->ubuffer = b + bytes; breq->ocount++; return breq->ocount == breq->icount ? -ECANCELED : 0; } /* * Return stat information in bulk (by-inode) for the filesystem. */ /* * Return codes for the formatter function are 0 to continue iterating, and * non-zero to stop iterating. Any non-zero value will be passed up to the * bulkstat/inumbers caller. The special value -ECANCELED can be used to stop * iteration, as neither bulkstat nor inumbers will ever generate that error * code on their own. */ typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq, const struct xfs_bulkstat *bstat); int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); void xfs_bulkstat_to_bstat(struct xfs_mount *mp, struct xfs_bstat *bs1, const struct xfs_bulkstat *bstat); typedef int (*inumbers_fmt_pf)(struct xfs_ibulk *breq, const struct xfs_inumbers *igrp); int xfs_inumbers(struct xfs_ibulk *breq, inumbers_fmt_pf formatter); void xfs_inumbers_to_inogrp(struct xfs_inogrp *ig1, const struct xfs_inumbers *ig); #endif /* __XFS_ITABLE_H__ */
33 34 164 34 33 34 34 34 34 34 165 34 34 34 33 2 34 34 34 34 34 34 34 34 34 34 34 34 33 34 34 34 34 34 1 33 2 34 33 34 33 34 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_shared.h" #include "xfs_mount.h" #include "xfs_ag.h" #include "xfs_defer.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_extfree_item.h" #include "xfs_log.h" #include "xfs_btree.h" #include "xfs_rmap.h" #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_trace.h" #include "xfs_error.h" #include "xfs_log_priv.h" #include "xfs_log_recover.h" #include "xfs_rtalloc.h" #include "xfs_inode.h" #include "xfs_rtbitmap.h" #include "xfs_rtgroup.h" #include "xfs_zone_alloc.h" struct kmem_cache *xfs_efi_cache; struct kmem_cache *xfs_efd_cache; static const struct xfs_item_ops xfs_efi_item_ops; static inline struct xfs_efi_log_item *EFI_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_efi_log_item, efi_item); } STATIC void xfs_efi_item_free( struct xfs_efi_log_item *efip) { kvfree(efip->efi_item.li_lv_shadow); if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS) kfree(efip); else kmem_cache_free(xfs_efi_cache, efip); } /* * Freeing the efi requires that we remove it from the AIL if it has already * been placed there. However, the EFI may not yet have been placed in the AIL * when called by xfs_efi_release() from EFD processing due to the ordering of * committed vs unpin operations in bulk insert operations. Hence the reference * count to ensure only the last caller frees the EFI. */ STATIC void xfs_efi_release( struct xfs_efi_log_item *efip) { ASSERT(atomic_read(&efip->efi_refcount) > 0); if (!atomic_dec_and_test(&efip->efi_refcount)) return; xfs_trans_ail_delete(&efip->efi_item, 0); xfs_efi_item_free(efip); } STATIC void xfs_efi_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { struct xfs_efi_log_item *efip = EFI_ITEM(lip); *nvecs += 1; *nbytes += xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents); } unsigned int xfs_efi_log_space(unsigned int nr) { return xlog_item_space(1, xfs_efi_log_format_sizeof(nr)); } /* * This is called to fill in the vector of log iovecs for the * given efi log item. We use only 1 iovec, and we point that * at the efi_log_format structure embedded in the efi item. * It is at this point that we assert that all of the extent * slots in the efi item have been filled. */ STATIC void xfs_efi_item_format( struct xfs_log_item *lip, struct xfs_log_vec *lv) { struct xfs_efi_log_item *efip = EFI_ITEM(lip); struct xfs_log_iovec *vecp = NULL; ASSERT(atomic_read(&efip->efi_next_extent) == efip->efi_format.efi_nextents); ASSERT(lip->li_type == XFS_LI_EFI || lip->li_type == XFS_LI_EFI_RT); efip->efi_format.efi_type = lip->li_type; efip->efi_format.efi_size = 1; xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format, xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents)); } /* * The unpin operation is the last place an EFI is manipulated in the log. It is * either inserted in the AIL or aborted in the event of a log I/O error. In * either case, the EFI transaction has been successfully committed to make it * this far. Therefore, we expect whoever committed the EFI to either construct * and commit the EFD or drop the EFD's reference in the event of error. Simply * drop the log's EFI reference now that the log is done with it. */ STATIC void xfs_efi_item_unpin( struct xfs_log_item *lip, int remove) { struct xfs_efi_log_item *efip = EFI_ITEM(lip); xfs_efi_release(efip); } /* * The EFI has been either committed or aborted if the transaction has been * cancelled. If the transaction was cancelled, an EFD isn't going to be * constructed and thus we free the EFI here directly. */ STATIC void xfs_efi_item_release( struct xfs_log_item *lip) { xfs_efi_release(EFI_ITEM(lip)); } /* * Allocate and initialize an efi item with the given number of extents. */ STATIC struct xfs_efi_log_item * xfs_efi_init( struct xfs_mount *mp, unsigned short item_type, uint nextents) { struct xfs_efi_log_item *efip; ASSERT(item_type == XFS_LI_EFI || item_type == XFS_LI_EFI_RT); ASSERT(nextents > 0); if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { efip = kzalloc(xfs_efi_log_item_sizeof(nextents), GFP_KERNEL | __GFP_NOFAIL); } else { efip = kmem_cache_zalloc(xfs_efi_cache, GFP_KERNEL | __GFP_NOFAIL); } xfs_log_item_init(mp, &efip->efi_item, item_type, &xfs_efi_item_ops); efip->efi_format.efi_nextents = nextents; efip->efi_format.efi_id = (uintptr_t)(void *)efip; atomic_set(&efip->efi_next_extent, 0); atomic_set(&efip->efi_refcount, 2); return efip; } /* * Copy an EFI format buffer from the given buf, and into the destination * EFI format structure. * The given buffer can be in 32 bit or 64 bit form (which has different padding), * one of which will be the native format for this kernel. * It will handle the conversion of formats if necessary. */ STATIC int xfs_efi_copy_format( struct kvec *buf, struct xfs_efi_log_format *dst_efi_fmt) { struct xfs_efi_log_format *src_efi_fmt = buf->iov_base; uint len, len32, len64, i; len = xfs_efi_log_format_sizeof(src_efi_fmt->efi_nextents); len32 = xfs_efi_log_format32_sizeof(src_efi_fmt->efi_nextents); len64 = xfs_efi_log_format64_sizeof(src_efi_fmt->efi_nextents); if (buf->iov_len == len) { memcpy(dst_efi_fmt, src_efi_fmt, offsetof(struct xfs_efi_log_format, efi_extents)); for (i = 0; i < src_efi_fmt->efi_nextents; i++) memcpy(&dst_efi_fmt->efi_extents[i], &src_efi_fmt->efi_extents[i], sizeof(struct xfs_extent)); return 0; } else if (buf->iov_len == len32) { struct xfs_efi_log_format_32 *src_efi_fmt_32 = buf->iov_base; dst_efi_fmt->efi_type = src_efi_fmt_32->efi_type; dst_efi_fmt->efi_size = src_efi_fmt_32->efi_size; dst_efi_fmt->efi_nextents = src_efi_fmt_32->efi_nextents; dst_efi_fmt->efi_id = src_efi_fmt_32->efi_id; for (i = 0; i < dst_efi_fmt->efi_nextents; i++) { dst_efi_fmt->efi_extents[i].ext_start = src_efi_fmt_32->efi_extents[i].ext_start; dst_efi_fmt->efi_extents[i].ext_len = src_efi_fmt_32->efi_extents[i].ext_len; } return 0; } else if (buf->iov_len == len64) { struct xfs_efi_log_format_64 *src_efi_fmt_64 = buf->iov_base; dst_efi_fmt->efi_type = src_efi_fmt_64->efi_type; dst_efi_fmt->efi_size = src_efi_fmt_64->efi_size; dst_efi_fmt->efi_nextents = src_efi_fmt_64->efi_nextents; dst_efi_fmt->efi_id = src_efi_fmt_64->efi_id; for (i = 0; i < dst_efi_fmt->efi_nextents; i++) { dst_efi_fmt->efi_extents[i].ext_start = src_efi_fmt_64->efi_extents[i].ext_start; dst_efi_fmt->efi_extents[i].ext_len = src_efi_fmt_64->efi_extents[i].ext_len; } return 0; } XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, NULL, buf->iov_base, buf->iov_len); return -EFSCORRUPTED; } static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_efd_log_item, efd_item); } STATIC void xfs_efd_item_free(struct xfs_efd_log_item *efdp) { kvfree(efdp->efd_item.li_lv_shadow); if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS) kfree(efdp); else kmem_cache_free(xfs_efd_cache, efdp); } STATIC void xfs_efd_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { struct xfs_efd_log_item *efdp = EFD_ITEM(lip); *nvecs += 1; *nbytes += xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents); } unsigned int xfs_efd_log_space(unsigned int nr) { return xlog_item_space(1, xfs_efd_log_format_sizeof(nr)); } /* * This is called to fill in the vector of log iovecs for the * given efd log item. We use only 1 iovec, and we point that * at the efd_log_format structure embedded in the efd item. * It is at this point that we assert that all of the extent * slots in the efd item have been filled. */ STATIC void xfs_efd_item_format( struct xfs_log_item *lip, struct xfs_log_vec *lv) { struct xfs_efd_log_item *efdp = EFD_ITEM(lip); struct xfs_log_iovec *vecp = NULL; ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents); ASSERT(lip->li_type == XFS_LI_EFD || lip->li_type == XFS_LI_EFD_RT); efdp->efd_format.efd_type = lip->li_type; efdp->efd_format.efd_size = 1; xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format, xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents)); } /* * The EFD is either committed or aborted if the transaction is cancelled. If * the transaction is cancelled, drop our reference to the EFI and free the EFD. */ STATIC void xfs_efd_item_release( struct xfs_log_item *lip) { struct xfs_efd_log_item *efdp = EFD_ITEM(lip); xfs_efi_release(efdp->efd_efip); xfs_efd_item_free(efdp); } static struct xfs_log_item * xfs_efd_item_intent( struct xfs_log_item *lip) { return &EFD_ITEM(lip)->efd_efip->efi_item; } static const struct xfs_item_ops xfs_efd_item_ops = { .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED | XFS_ITEM_INTENT_DONE, .iop_size = xfs_efd_item_size, .iop_format = xfs_efd_item_format, .iop_release = xfs_efd_item_release, .iop_intent = xfs_efd_item_intent, }; static inline struct xfs_extent_free_item *xefi_entry(const struct list_head *e) { return list_entry(e, struct xfs_extent_free_item, xefi_list); } static inline bool xfs_efi_item_isrt(const struct xfs_log_item *lip) { ASSERT(lip->li_type == XFS_LI_EFI || lip->li_type == XFS_LI_EFI_RT); return lip->li_type == XFS_LI_EFI_RT; } /* * Fill the EFD with all extents from the EFI when we need to roll the * transaction and continue with a new EFI. * * This simply copies all the extents in the EFI to the EFD rather than make * assumptions about which extents in the EFI have already been processed. We * currently keep the xefi list in the same order as the EFI extent list, but * that may not always be the case. Copying everything avoids leaving a landmine * were we fail to cancel all the extents in an EFI if the xefi list is * processed in a different order to the extents in the EFI. */ static void xfs_efd_from_efi( struct xfs_efd_log_item *efdp) { struct xfs_efi_log_item *efip = efdp->efd_efip; uint i; ASSERT(efip->efi_format.efi_nextents > 0); ASSERT(efdp->efd_next_extent < efip->efi_format.efi_nextents); for (i = 0; i < efip->efi_format.efi_nextents; i++) { efdp->efd_format.efd_extents[i] = efip->efi_format.efi_extents[i]; } efdp->efd_next_extent = efip->efi_format.efi_nextents; } static void xfs_efd_add_extent( struct xfs_efd_log_item *efdp, struct xfs_extent_free_item *xefi) { struct xfs_extent *extp; ASSERT(efdp->efd_next_extent < efdp->efd_format.efd_nextents); extp = &efdp->efd_format.efd_extents[efdp->efd_next_extent]; extp->ext_start = xefi->xefi_startblock; extp->ext_len = xefi->xefi_blockcount; efdp->efd_next_extent++; } /* Sort bmap items by AG. */ static int xfs_extent_free_diff_items( void *priv, const struct list_head *a, const struct list_head *b) { struct xfs_extent_free_item *ra = xefi_entry(a); struct xfs_extent_free_item *rb = xefi_entry(b); return ra->xefi_group->xg_gno - rb->xefi_group->xg_gno; } /* Log a free extent to the intent item. */ STATIC void xfs_extent_free_log_item( struct xfs_trans *tp, struct xfs_efi_log_item *efip, struct xfs_extent_free_item *xefi) { uint next_extent; struct xfs_extent *extp; /* * atomic_inc_return gives us the value after the increment; * we want to use it as an array index so we need to subtract 1 from * it. */ next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; ASSERT(next_extent < efip->efi_format.efi_nextents); extp = &efip->efi_format.efi_extents[next_extent]; extp->ext_start = xefi->xefi_startblock; extp->ext_len = xefi->xefi_blockcount; } static struct xfs_log_item * __xfs_extent_free_create_intent( struct xfs_trans *tp, struct list_head *items, unsigned int count, bool sort, unsigned short item_type) { struct xfs_mount *mp = tp->t_mountp; struct xfs_efi_log_item *efip; struct xfs_extent_free_item *xefi; ASSERT(count > 0); efip = xfs_efi_init(mp, item_type, count); if (sort) list_sort(mp, items, xfs_extent_free_diff_items); list_for_each_entry(xefi, items, xefi_list) xfs_extent_free_log_item(tp, efip, xefi); return &efip->efi_item; } static struct xfs_log_item * xfs_extent_free_create_intent( struct xfs_trans *tp, struct list_head *items, unsigned int count, bool sort) { return __xfs_extent_free_create_intent(tp, items, count, sort, XFS_LI_EFI); } static inline unsigned short xfs_efd_type_from_efi(const struct xfs_efi_log_item *efip) { return xfs_efi_item_isrt(&efip->efi_item) ? XFS_LI_EFD_RT : XFS_LI_EFD; } /* Get an EFD so we can process all the free extents. */ static struct xfs_log_item * xfs_extent_free_create_done( struct xfs_trans *tp, struct xfs_log_item *intent, unsigned int count) { struct xfs_efi_log_item *efip = EFI_ITEM(intent); struct xfs_efd_log_item *efdp; ASSERT(count > 0); if (count > XFS_EFD_MAX_FAST_EXTENTS) { efdp = kzalloc(xfs_efd_log_item_sizeof(count), GFP_KERNEL | __GFP_NOFAIL); } else { efdp = kmem_cache_zalloc(xfs_efd_cache, GFP_KERNEL | __GFP_NOFAIL); } xfs_log_item_init(tp->t_mountp, &efdp->efd_item, xfs_efd_type_from_efi(efip), &xfs_efd_item_ops); efdp->efd_efip = efip; efdp->efd_format.efd_nextents = count; efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; return &efdp->efd_item; } static inline const struct xfs_defer_op_type * xefi_ops( struct xfs_extent_free_item *xefi) { if (xfs_efi_is_realtime(xefi)) return &xfs_rtextent_free_defer_type; if (xefi->xefi_agresv == XFS_AG_RESV_AGFL) return &xfs_agfl_free_defer_type; return &xfs_extent_free_defer_type; } /* Add this deferred EFI to the transaction. */ void xfs_extent_free_defer_add( struct xfs_trans *tp, struct xfs_extent_free_item *xefi, struct xfs_defer_pending **dfpp) { struct xfs_mount *mp = tp->t_mountp; xefi->xefi_group = xfs_group_intent_get(mp, xefi->xefi_startblock, xfs_efi_is_realtime(xefi) ? XG_TYPE_RTG : XG_TYPE_AG); trace_xfs_extent_free_defer(mp, xefi); *dfpp = xfs_defer_add(tp, &xefi->xefi_list, xefi_ops(xefi)); } /* Cancel a free extent. */ STATIC void xfs_extent_free_cancel_item( struct list_head *item) { struct xfs_extent_free_item *xefi = xefi_entry(item); xfs_group_intent_put(xefi->xefi_group); kmem_cache_free(xfs_extfree_item_cache, xefi); } /* Process a free extent. */ STATIC int xfs_extent_free_finish_item( struct xfs_trans *tp, struct xfs_log_item *done, struct list_head *item, struct xfs_btree_cur **state) { struct xfs_owner_info oinfo = { }; struct xfs_extent_free_item *xefi = xefi_entry(item); struct xfs_efd_log_item *efdp = EFD_ITEM(done); struct xfs_mount *mp = tp->t_mountp; xfs_agblock_t agbno; int error = 0; agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock); oinfo.oi_owner = xefi->xefi_owner; if (xefi->xefi_flags & XFS_EFI_ATTR_FORK) oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; if (xefi->xefi_flags & XFS_EFI_BMBT_BLOCK) oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; trace_xfs_extent_free_deferred(mp, xefi); /* * If we need a new transaction to make progress, the caller will log a * new EFI with the current contents. It will also log an EFD to cancel * the existing EFI, and so we need to copy all the unprocessed extents * in this EFI to the EFD so this works correctly. */ if (!(xefi->xefi_flags & XFS_EFI_CANCELLED)) error = __xfs_free_extent(tp, to_perag(xefi->xefi_group), agbno, xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv, xefi->xefi_flags & XFS_EFI_SKIP_DISCARD); if (error == -EAGAIN) { xfs_efd_from_efi(efdp); return error; } xfs_efd_add_extent(efdp, xefi); xfs_extent_free_cancel_item(item); return error; } /* Abort all pending EFIs. */ STATIC void xfs_extent_free_abort_intent( struct xfs_log_item *intent) { xfs_efi_release(EFI_ITEM(intent)); } /* * AGFL blocks are accounted differently in the reserve pools and are not * inserted into the busy extent list. */ STATIC int xfs_agfl_free_finish_item( struct xfs_trans *tp, struct xfs_log_item *done, struct list_head *item, struct xfs_btree_cur **state) { struct xfs_owner_info oinfo = { }; struct xfs_mount *mp = tp->t_mountp; struct xfs_efd_log_item *efdp = EFD_ITEM(done); struct xfs_extent_free_item *xefi = xefi_entry(item); struct xfs_buf *agbp; int error; xfs_agblock_t agbno; ASSERT(xefi->xefi_blockcount == 1); agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock); oinfo.oi_owner = xefi->xefi_owner; trace_xfs_agfl_free_deferred(mp, xefi); error = xfs_alloc_read_agf(to_perag(xefi->xefi_group), tp, 0, &agbp); if (!error) error = xfs_free_ag_extent(tp, agbp, agbno, 1, &oinfo, XFS_AG_RESV_AGFL); xfs_efd_add_extent(efdp, xefi); xfs_extent_free_cancel_item(&xefi->xefi_list); return error; } /* Is this recovered EFI ok? */ static inline bool xfs_efi_validate_ext( struct xfs_mount *mp, bool isrt, struct xfs_extent *extp) { if (isrt) return xfs_verify_rtbext(mp, extp->ext_start, extp->ext_len); return xfs_verify_fsbext(mp, extp->ext_start, extp->ext_len); } static inline void xfs_efi_recover_work( struct xfs_mount *mp, struct xfs_defer_pending *dfp, bool isrt, struct xfs_extent *extp) { struct xfs_extent_free_item *xefi; xefi = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); xefi->xefi_startblock = extp->ext_start; xefi->xefi_blockcount = extp->ext_len; xefi->xefi_agresv = XFS_AG_RESV_NONE; xefi->xefi_owner = XFS_RMAP_OWN_UNKNOWN; xefi->xefi_group = xfs_group_intent_get(mp, extp->ext_start, isrt ? XG_TYPE_RTG : XG_TYPE_AG); if (isrt) xefi->xefi_flags |= XFS_EFI_REALTIME; xfs_defer_add_item(dfp, &xefi->xefi_list); } /* * Process an extent free intent item that was recovered from * the log. We need to free the extents that it describes. */ STATIC int xfs_extent_free_recover_work( struct xfs_defer_pending *dfp, struct list_head *capture_list) { struct xfs_trans_res resv; struct xfs_log_item *lip = dfp->dfp_intent; struct xfs_efi_log_item *efip = EFI_ITEM(lip); struct xfs_mount *mp = lip->li_log->l_mp; struct xfs_trans *tp; int i; int error = 0; bool isrt = xfs_efi_item_isrt(lip); /* * First check the validity of the extents described by the EFI. If * any are bad, then assume that all are bad and just toss the EFI. * Mixing RT and non-RT extents in the same EFI item is not allowed. */ for (i = 0; i < efip->efi_format.efi_nextents; i++) { if (!xfs_efi_validate_ext(mp, isrt, &efip->efi_format.efi_extents[i])) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, &efip->efi_format, sizeof(efip->efi_format)); return -EFSCORRUPTED; } xfs_efi_recover_work(mp, dfp, isrt, &efip->efi_format.efi_extents[i]); } resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); error = xfs_trans_alloc(mp, &resv, 0, 0, 0, &tp); if (error) return error; error = xlog_recover_finish_intent(tp, dfp); if (error == -EFSCORRUPTED) XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, &efip->efi_format, sizeof(efip->efi_format)); if (error) goto abort_error; return xfs_defer_ops_capture_and_commit(tp, capture_list); abort_error: xfs_trans_cancel(tp); return error; } /* Relog an intent item to push the log tail forward. */ static struct xfs_log_item * xfs_extent_free_relog_intent( struct xfs_trans *tp, struct xfs_log_item *intent, struct xfs_log_item *done_item) { struct xfs_efd_log_item *efdp = EFD_ITEM(done_item); struct xfs_efi_log_item *efip; struct xfs_extent *extp; unsigned int count; count = EFI_ITEM(intent)->efi_format.efi_nextents; extp = EFI_ITEM(intent)->efi_format.efi_extents; ASSERT(intent->li_type == XFS_LI_EFI || intent->li_type == XFS_LI_EFI_RT); efdp->efd_next_extent = count; memcpy(efdp->efd_format.efd_extents, extp, count * sizeof(*extp)); efip = xfs_efi_init(tp->t_mountp, intent->li_type, count); memcpy(efip->efi_format.efi_extents, extp, count * sizeof(*extp)); atomic_set(&efip->efi_next_extent, count); return &efip->efi_item; } const struct xfs_defer_op_type xfs_extent_free_defer_type = { .name = "extent_free", .max_items = XFS_EFI_MAX_FAST_EXTENTS, .create_intent = xfs_extent_free_create_intent, .abort_intent = xfs_extent_free_abort_intent, .create_done = xfs_extent_free_create_done, .finish_item = xfs_extent_free_finish_item, .cancel_item = xfs_extent_free_cancel_item, .recover_work = xfs_extent_free_recover_work, .relog_intent = xfs_extent_free_relog_intent, }; /* sub-type with special handling for AGFL deferred frees */ const struct xfs_defer_op_type xfs_agfl_free_defer_type = { .name = "agfl_free", .max_items = XFS_EFI_MAX_FAST_EXTENTS, .create_intent = xfs_extent_free_create_intent, .abort_intent = xfs_extent_free_abort_intent, .create_done = xfs_extent_free_create_done, .finish_item = xfs_agfl_free_finish_item, .cancel_item = xfs_extent_free_cancel_item, .recover_work = xfs_extent_free_recover_work, .relog_intent = xfs_extent_free_relog_intent, }; #ifdef CONFIG_XFS_RT /* Create a realtime extent freeing */ static struct xfs_log_item * xfs_rtextent_free_create_intent( struct xfs_trans *tp, struct list_head *items, unsigned int count, bool sort) { return __xfs_extent_free_create_intent(tp, items, count, sort, XFS_LI_EFI_RT); } /* Process a free realtime extent. */ STATIC int xfs_rtextent_free_finish_item( struct xfs_trans *tp, struct xfs_log_item *done, struct list_head *item, struct xfs_btree_cur **state) { struct xfs_mount *mp = tp->t_mountp; struct xfs_extent_free_item *xefi = xefi_entry(item); struct xfs_efd_log_item *efdp = EFD_ITEM(done); struct xfs_rtgroup **rtgp = (struct xfs_rtgroup **)state; int error = 0; trace_xfs_extent_free_deferred(mp, xefi); if (xefi->xefi_flags & XFS_EFI_CANCELLED) goto done; if (*rtgp != to_rtg(xefi->xefi_group)) { unsigned int lock_flags; if (xfs_has_zoned(mp)) lock_flags = XFS_RTGLOCK_RMAP; else lock_flags = XFS_RTGLOCK_BITMAP; *rtgp = to_rtg(xefi->xefi_group); xfs_rtgroup_lock(*rtgp, lock_flags); xfs_rtgroup_trans_join(tp, *rtgp, lock_flags); } if (xfs_has_zoned(mp)) { error = xfs_zone_free_blocks(tp, *rtgp, xefi->xefi_startblock, xefi->xefi_blockcount); } else { error = xfs_rtfree_blocks(tp, *rtgp, xefi->xefi_startblock, xefi->xefi_blockcount); } if (error == -EAGAIN) { xfs_efd_from_efi(efdp); return error; } done: xfs_efd_add_extent(efdp, xefi); xfs_extent_free_cancel_item(item); return error; } const struct xfs_defer_op_type xfs_rtextent_free_defer_type = { .name = "rtextent_free", .max_items = XFS_EFI_MAX_FAST_EXTENTS, .create_intent = xfs_rtextent_free_create_intent, .abort_intent = xfs_extent_free_abort_intent, .create_done = xfs_extent_free_create_done, .finish_item = xfs_rtextent_free_finish_item, .cancel_item = xfs_extent_free_cancel_item, .recover_work = xfs_extent_free_recover_work, .relog_intent = xfs_extent_free_relog_intent, }; #else const struct xfs_defer_op_type xfs_rtextent_free_defer_type = { .name = "rtextent_free", }; #endif /* CONFIG_XFS_RT */ STATIC bool xfs_efi_item_match( struct xfs_log_item *lip, uint64_t intent_id) { return EFI_ITEM(lip)->efi_format.efi_id == intent_id; } static const struct xfs_item_ops xfs_efi_item_ops = { .flags = XFS_ITEM_INTENT, .iop_size = xfs_efi_item_size, .iop_format = xfs_efi_item_format, .iop_unpin = xfs_efi_item_unpin, .iop_release = xfs_efi_item_release, .iop_match = xfs_efi_item_match, }; /* * This routine is called to create an in-core extent free intent * item from the efi format structure which was logged on disk. * It allocates an in-core efi, copies the extents from the format * structure into it, and adds the efi to the AIL with the given * LSN. */ STATIC int xlog_recover_efi_commit_pass2( struct xlog *log, struct list_head *buffer_list, struct xlog_recover_item *item, xfs_lsn_t lsn) { struct xfs_mount *mp = log->l_mp; struct xfs_efi_log_item *efip; struct xfs_efi_log_format *efi_formatp; int error; efi_formatp = item->ri_buf[0].iov_base; if (item->ri_buf[0].iov_len < xfs_efi_log_format_sizeof(0)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } efip = xfs_efi_init(mp, ITEM_TYPE(item), efi_formatp->efi_nextents); error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format); if (error) { xfs_efi_item_free(efip); return error; } atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); xlog_recover_intent_item(log, &efip->efi_item, lsn, &xfs_extent_free_defer_type); return 0; } const struct xlog_recover_item_ops xlog_efi_item_ops = { .item_type = XFS_LI_EFI, .commit_pass2 = xlog_recover_efi_commit_pass2, }; #ifdef CONFIG_XFS_RT STATIC int xlog_recover_rtefi_commit_pass2( struct xlog *log, struct list_head *buffer_list, struct xlog_recover_item *item, xfs_lsn_t lsn) { struct xfs_mount *mp = log->l_mp; struct xfs_efi_log_item *efip; struct xfs_efi_log_format *efi_formatp; int error; efi_formatp = item->ri_buf[0].iov_base; if (item->ri_buf[0].iov_len < xfs_efi_log_format_sizeof(0)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } efip = xfs_efi_init(mp, ITEM_TYPE(item), efi_formatp->efi_nextents); error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format); if (error) { xfs_efi_item_free(efip); return error; } atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); xlog_recover_intent_item(log, &efip->efi_item, lsn, &xfs_rtextent_free_defer_type); return 0; } #else STATIC int xlog_recover_rtefi_commit_pass2( struct xlog *log, struct list_head *buffer_list, struct xlog_recover_item *item, xfs_lsn_t lsn) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } #endif const struct xlog_recover_item_ops xlog_rtefi_item_ops = { .item_type = XFS_LI_EFI_RT, .commit_pass2 = xlog_recover_rtefi_commit_pass2, }; /* * This routine is called when an EFD format structure is found in a committed * transaction in the log. Its purpose is to cancel the corresponding EFI if it * was still in the log. To do this it searches the AIL for the EFI with an id * equal to that in the EFD format structure. If we find it we drop the EFD * reference, which removes the EFI from the AIL and frees it. */ STATIC int xlog_recover_efd_commit_pass2( struct xlog *log, struct list_head *buffer_list, struct xlog_recover_item *item, xfs_lsn_t lsn) { struct xfs_efd_log_format *efd_formatp; int buflen = item->ri_buf[0].iov_len; efd_formatp = item->ri_buf[0].iov_base; if (buflen < sizeof(struct xfs_efd_log_format)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, efd_formatp, buflen); return -EFSCORRUPTED; } if (item->ri_buf[0].iov_len != xfs_efd_log_format32_sizeof( efd_formatp->efd_nextents) && item->ri_buf[0].iov_len != xfs_efd_log_format64_sizeof( efd_formatp->efd_nextents)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, efd_formatp, buflen); return -EFSCORRUPTED; } xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id); return 0; } const struct xlog_recover_item_ops xlog_efd_item_ops = { .item_type = XFS_LI_EFD, .commit_pass2 = xlog_recover_efd_commit_pass2, }; #ifdef CONFIG_XFS_RT STATIC int xlog_recover_rtefd_commit_pass2( struct xlog *log, struct list_head *buffer_list, struct xlog_recover_item *item, xfs_lsn_t lsn) { struct xfs_efd_log_format *efd_formatp; int buflen = item->ri_buf[0].iov_len; efd_formatp = item->ri_buf[0].iov_base; if (buflen < sizeof(struct xfs_efd_log_format)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, efd_formatp, buflen); return -EFSCORRUPTED; } if (item->ri_buf[0].iov_len != xfs_efd_log_format32_sizeof( efd_formatp->efd_nextents) && item->ri_buf[0].iov_len != xfs_efd_log_format64_sizeof( efd_formatp->efd_nextents)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, efd_formatp, buflen); return -EFSCORRUPTED; } xlog_recover_release_intent(log, XFS_LI_EFI_RT, efd_formatp->efd_efi_id); return 0; } #else # define xlog_recover_rtefd_commit_pass2 xlog_recover_rtefi_commit_pass2 #endif const struct xlog_recover_item_ops xlog_rtefd_item_ops = { .item_type = XFS_LI_EFD_RT, .commit_pass2 = xlog_recover_rtefd_commit_pass2, };
2 5 2 2 2 5 5 1 1 1 1 16 14 14 10 4 4 12 16 1 4 3 4 3 2 1 1 8 8 6 1 19 15 1 2 3 2 7 19 10 6 9 6 2 2 4 4 3 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 18 18 18 18 18 17 2 1 16 1 15 15 14 15 11 10 10 11 11 9 9 3 11 3 2 1 2 2 8 8 8 8 8 8 8 8 8 8 7 8 8 15 8 8 8 8 18 21 23 23 4 23 19 23 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/hpfs/super.c * * Mikulas Patocka (mikulas@artax.karlin.mff.cuni.cz), 1998-1999 * * mounting, unmounting, error handling */ #include "hpfs_fn.h" #include <linux/module.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/init.h> #include <linux/statfs.h> #include <linux/magic.h> #include <linux/sched.h> #include <linux/bitmap.h> #include <linux/slab.h> #include <linux/seq_file.h> /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ static void mark_dirty(struct super_block *s, int remount) { if (hpfs_sb(s)->sb_chkdsk && (remount || !sb_rdonly(s))) { struct buffer_head *bh; struct hpfs_spare_block *sb; if ((sb = hpfs_map_sector(s, 17, &bh, 0))) { sb->dirty = 1; sb->old_wrote = 0; mark_buffer_dirty(bh); sync_dirty_buffer(bh); brelse(bh); } } } /* Mark the filesystem clean (mark it dirty for chkdsk if chkdsk==2 or if there were errors) */ static void unmark_dirty(struct super_block *s) { struct buffer_head *bh; struct hpfs_spare_block *sb; if (sb_rdonly(s)) return; sync_blockdev(s->s_bdev); if ((sb = hpfs_map_sector(s, 17, &bh, 0))) { sb->dirty = hpfs_sb(s)->sb_chkdsk > 1 - hpfs_sb(s)->sb_was_error; sb->old_wrote = hpfs_sb(s)->sb_chkdsk >= 2 && !hpfs_sb(s)->sb_was_error; mark_buffer_dirty(bh); sync_dirty_buffer(bh); brelse(bh); } } /* Filesystem error... */ void hpfs_error(struct super_block *s, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; pr_err("filesystem error: %pV", &vaf); va_end(args); if (!hpfs_sb(s)->sb_was_error) { if (hpfs_sb(s)->sb_err == 2) { pr_cont("; crashing the system because you wanted it\n"); mark_dirty(s, 0); panic("HPFS panic"); } else if (hpfs_sb(s)->sb_err == 1) { if (sb_rdonly(s)) pr_cont("; already mounted read-only\n"); else { pr_cont("; remounting read-only\n"); mark_dirty(s, 0); s->s_flags |= SB_RDONLY; } } else if (sb_rdonly(s)) pr_cont("; going on - but anything won't be destroyed because it's read-only\n"); else pr_cont("; corrupted filesystem mounted read/write - your computer will explode within 20 seconds ... but you wanted it so!\n"); } else pr_cont("\n"); hpfs_sb(s)->sb_was_error = 1; } /* * A little trick to detect cycles in many hpfs structures and don't let the * kernel crash on corrupted filesystem. When first called, set c2 to 0. * * BTW. chkdsk doesn't detect cycles correctly. When I had 2 lost directories * nested each in other, chkdsk locked up happilly. */ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2, char *msg) { if (*c2 && *c1 == key) { hpfs_error(s, "cycle detected on key %08x in %s", key, msg); return 1; } (*c2)++; if (!((*c2 - 1) & *c2)) *c1 = key; return 0; } static void free_sbi(struct hpfs_sb_info *sbi) { kfree(sbi->sb_cp_table); kfree(sbi->sb_bmp_dir); kfree(sbi); } static void lazy_free_sbi(struct rcu_head *rcu) { free_sbi(container_of(rcu, struct hpfs_sb_info, rcu)); } static void hpfs_put_super(struct super_block *s) { hpfs_lock(s); unmark_dirty(s); hpfs_unlock(s); call_rcu(&hpfs_sb(s)->rcu, lazy_free_sbi); } static unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) { struct quad_buffer_head qbh; unsigned long *bits; unsigned count; bits = hpfs_map_4sectors(s, secno, &qbh, 0); if (!bits) return (unsigned)-1; count = bitmap_weight(bits, 2048 * BITS_PER_BYTE); hpfs_brelse4(&qbh); return count; } static unsigned count_bitmaps(struct super_block *s) { unsigned n, count, n_bands; n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; count = 0; for (n = 0; n < COUNT_RD_AHEAD; n++) { hpfs_prefetch_bitmap(s, n); } for (n = 0; n < n_bands; n++) { unsigned c; hpfs_prefetch_bitmap(s, n + COUNT_RD_AHEAD); c = hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n])); if (c != (unsigned)-1) count += c; } return count; } unsigned hpfs_get_free_dnodes(struct super_block *s) { struct hpfs_sb_info *sbi = hpfs_sb(s); if (sbi->sb_n_free_dnodes == (unsigned)-1) { unsigned c = hpfs_count_one_bitmap(s, sbi->sb_dmap); if (c == (unsigned)-1) return 0; sbi->sb_n_free_dnodes = c; } return sbi->sb_n_free_dnodes; } static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *s = dentry->d_sb; struct hpfs_sb_info *sbi = hpfs_sb(s); u64 id = huge_encode_dev(s->s_bdev->bd_dev); hpfs_lock(s); if (sbi->sb_n_free == (unsigned)-1) sbi->sb_n_free = count_bitmaps(s); buf->f_type = s->s_magic; buf->f_bsize = 512; buf->f_blocks = sbi->sb_fs_size; buf->f_bfree = sbi->sb_n_free; buf->f_bavail = sbi->sb_n_free; buf->f_files = sbi->sb_dirband_size / 4; buf->f_ffree = hpfs_get_free_dnodes(s); buf->f_fsid = u64_to_fsid(id); buf->f_namelen = 254; hpfs_unlock(s); return 0; } long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg) { switch (cmd) { case FITRIM: { struct fstrim_range range; secno n_trimmed; int r; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; r = hpfs_trim_fs(file_inode(file)->i_sb, range.start >> 9, (range.start + range.len) >> 9, (range.minlen + 511) >> 9, &n_trimmed); if (r) return r; range.len = (u64)n_trimmed << 9; if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) return -EFAULT; return 0; } default: { return -ENOIOCTLCMD; } } } static struct kmem_cache * hpfs_inode_cachep; static struct inode *hpfs_alloc_inode(struct super_block *sb) { struct hpfs_inode_info *ei; ei = alloc_inode_sb(sb, hpfs_inode_cachep, GFP_NOFS); if (!ei) return NULL; return &ei->vfs_inode; } static void hpfs_free_inode(struct inode *inode) { kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); } static void init_once(void *foo) { struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) { hpfs_inode_cachep = kmem_cache_create("hpfs_inode_cache", sizeof(struct hpfs_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_ACCOUNT), init_once); if (hpfs_inode_cachep == NULL) return -ENOMEM; return 0; } static void destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(hpfs_inode_cachep); } enum { Opt_help, Opt_uid, Opt_gid, Opt_umask, Opt_case, Opt_check, Opt_err, Opt_eas, Opt_chkdsk, Opt_timeshift, }; static const struct constant_table hpfs_param_case[] = { {"asis", 0}, {"lower", 1}, {} }; static const struct constant_table hpfs_param_check[] = { {"none", 0}, {"normal", 1}, {"strict", 2}, {} }; static const struct constant_table hpfs_param_err[] = { {"continue", 0}, {"remount-ro", 1}, {"panic", 2}, {} }; static const struct constant_table hpfs_param_eas[] = { {"no", 0}, {"ro", 1}, {"rw", 2}, {} }; static const struct constant_table hpfs_param_chkdsk[] = { {"no", 0}, {"errors", 1}, {"always", 2}, {} }; static const struct fs_parameter_spec hpfs_param_spec[] = { fsparam_flag ("help", Opt_help), fsparam_uid ("uid", Opt_uid), fsparam_gid ("gid", Opt_gid), fsparam_u32oct ("umask", Opt_umask), fsparam_enum ("case", Opt_case, hpfs_param_case), fsparam_enum ("check", Opt_check, hpfs_param_check), fsparam_enum ("errors", Opt_err, hpfs_param_err), fsparam_enum ("eas", Opt_eas, hpfs_param_eas), fsparam_enum ("chkdsk", Opt_chkdsk, hpfs_param_chkdsk), fsparam_s32 ("timeshift", Opt_timeshift), {} }; struct hpfs_fc_context { kuid_t uid; kgid_t gid; umode_t umask; int lowercase; int eas; int chk; int errs; int chkdsk; int timeshift; }; static inline void hpfs_help(void) { pr_info("\n\ HPFS filesystem options:\n\ help do not mount and display this text\n\ uid=xxx set uid of files that don't have uid specified in eas\n\ gid=xxx set gid of files that don't have gid specified in eas\n\ umask=xxx set mode of files that don't have mode specified in eas\n\ case=lower lowercase all files\n\ case=asis do not lowercase files (default)\n\ check=none no fs checks - kernel may crash on corrupted filesystem\n\ check=normal do some checks - it should not crash (default)\n\ check=strict do extra time-consuming checks, used for debugging\n\ errors=continue continue on errors\n\ errors=remount-ro remount read-only if errors found (default)\n\ errors=panic panic on errors\n\ chkdsk=no do not mark fs for chkdsking even if there were errors\n\ chkdsk=errors mark fs dirty if errors found (default)\n\ chkdsk=always always mark fs dirty - used for debugging\n\ eas=no ignore extended attributes\n\ eas=ro read but do not write extended attributes\n\ eas=rw r/w eas => enables chmod, chown, mknod, ln -s (default)\n\ timeshift=nnn add nnn seconds to file times\n\ \n"); } static int hpfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct hpfs_fc_context *ctx = fc->fs_private; struct fs_parse_result result; int opt; opt = fs_parse(fc, hpfs_param_spec, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_help: hpfs_help(); return -EINVAL; case Opt_uid: ctx->uid = result.uid; break; case Opt_gid: ctx->gid = result.gid; break; case Opt_umask: ctx->umask = result.uint_32; break; case Opt_case: ctx->lowercase = result.uint_32; break; case Opt_check: ctx->chk = result.uint_32; break; case Opt_err: ctx->errs = result.uint_32; break; case Opt_eas: ctx->eas = result.uint_32; break; case Opt_chkdsk: ctx->chkdsk = result.uint_32; break; case Opt_timeshift: { char *rhs = param->string; int timeshift; if (kstrtoint(rhs, 0, &timeshift)) return -EINVAL; ctx->timeshift = timeshift; break; } default: return -EINVAL; } return 0; } static int hpfs_reconfigure(struct fs_context *fc) { struct hpfs_fc_context *ctx = fc->fs_private; struct super_block *s = fc->root->d_sb; struct hpfs_sb_info *sbi = hpfs_sb(s); sync_filesystem(s); fc->sb_flags |= SB_NOATIME; hpfs_lock(s); if (ctx->timeshift != sbi->sb_timeshift) { pr_err("timeshift can't be changed using remount.\n"); goto out_err; } unmark_dirty(s); sbi->sb_uid = ctx->uid; sbi->sb_gid = ctx->gid; sbi->sb_mode = 0777 & ~ctx->umask; sbi->sb_lowercase = ctx->lowercase; sbi->sb_eas = ctx->eas; sbi->sb_chk = ctx->chk; sbi->sb_chkdsk = ctx->chkdsk; sbi->sb_err = ctx->errs; sbi->sb_timeshift = ctx->timeshift; if (!(fc->sb_flags & SB_RDONLY)) mark_dirty(s, 1); hpfs_unlock(s); return 0; out_err: hpfs_unlock(s); return -EINVAL; } static int hpfs_show_options(struct seq_file *seq, struct dentry *root) { struct hpfs_sb_info *sbi = hpfs_sb(root->d_sb); seq_printf(seq, ",uid=%u", from_kuid_munged(&init_user_ns, sbi->sb_uid)); seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, sbi->sb_gid)); seq_printf(seq, ",umask=%03o", (~sbi->sb_mode & 0777)); if (sbi->sb_lowercase) seq_printf(seq, ",case=lower"); if (!sbi->sb_chk) seq_printf(seq, ",check=none"); if (sbi->sb_chk == 2) seq_printf(seq, ",check=strict"); if (!sbi->sb_err) seq_printf(seq, ",errors=continue"); if (sbi->sb_err == 2) seq_printf(seq, ",errors=panic"); if (!sbi->sb_chkdsk) seq_printf(seq, ",chkdsk=no"); if (sbi->sb_chkdsk == 2) seq_printf(seq, ",chkdsk=always"); if (!sbi->sb_eas) seq_printf(seq, ",eas=no"); if (sbi->sb_eas == 1) seq_printf(seq, ",eas=ro"); if (sbi->sb_timeshift) seq_printf(seq, ",timeshift=%d", sbi->sb_timeshift); return 0; } /* Super operations */ static const struct super_operations hpfs_sops = { .alloc_inode = hpfs_alloc_inode, .free_inode = hpfs_free_inode, .evict_inode = hpfs_evict_inode, .put_super = hpfs_put_super, .statfs = hpfs_statfs, .show_options = hpfs_show_options, }; static int hpfs_fill_super(struct super_block *s, struct fs_context *fc) { struct hpfs_fc_context *ctx = fc->fs_private; struct buffer_head *bh0, *bh1, *bh2; struct hpfs_boot_block *bootblock; struct hpfs_super_block *superblock; struct hpfs_spare_block *spareblock; struct hpfs_sb_info *sbi; struct inode *root; int silent = fc->sb_flags & SB_SILENT; dnode_secno root_dno; struct hpfs_dirent *de = NULL; struct quad_buffer_head qbh; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) { return -ENOMEM; } s->s_fs_info = sbi; mutex_init(&sbi->hpfs_mutex); hpfs_lock(s); /*sbi->sb_mounting = 1;*/ sb_set_blocksize(s, 512); sbi->sb_fs_size = -1; if (!(bootblock = hpfs_map_sector(s, 0, &bh0, 0))) goto bail1; if (!(superblock = hpfs_map_sector(s, 16, &bh1, 1))) goto bail2; if (!(spareblock = hpfs_map_sector(s, 17, &bh2, 0))) goto bail3; /* Check magics */ if (/*le16_to_cpu(bootblock->magic) != BB_MAGIC ||*/ le32_to_cpu(superblock->magic) != SB_MAGIC || le32_to_cpu(spareblock->magic) != SP_MAGIC) { if (!silent) pr_err("Bad magic ... probably not HPFS\n"); goto bail4; } /* Check version */ if (!sb_rdonly(s) && superblock->funcversion != 2 && superblock->funcversion != 3) { pr_err("Bad version %d,%d. Mount readonly to go around\n", (int)superblock->version, (int)superblock->funcversion); pr_err("please try recent version of HPFS driver at http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi and if it still can't understand this format, contact author - mikulas@artax.karlin.mff.cuni.cz\n"); goto bail4; } s->s_flags |= SB_NOATIME; /* Fill superblock stuff */ s->s_magic = HPFS_SUPER_MAGIC; s->s_op = &hpfs_sops; set_default_d_op(s, &hpfs_dentry_operations); s->s_time_min = local_to_gmt(s, 0); s->s_time_max = local_to_gmt(s, U32_MAX); sbi->sb_root = le32_to_cpu(superblock->root); sbi->sb_fs_size = le32_to_cpu(superblock->n_sectors); sbi->sb_bitmaps = le32_to_cpu(superblock->bitmaps); sbi->sb_dirband_start = le32_to_cpu(superblock->dir_band_start); sbi->sb_dirband_size = le32_to_cpu(superblock->n_dir_band); sbi->sb_dmap = le32_to_cpu(superblock->dir_band_bitmap); sbi->sb_uid = ctx->uid; sbi->sb_gid = ctx->gid; sbi->sb_mode = 0777 & ~ctx->umask; sbi->sb_n_free = -1; sbi->sb_n_free_dnodes = -1; sbi->sb_lowercase = ctx->lowercase; sbi->sb_eas = ctx->eas; sbi->sb_chk = ctx->chk; sbi->sb_chkdsk = ctx->chkdsk; sbi->sb_err = ctx->errs; sbi->sb_timeshift = ctx->timeshift; sbi->sb_was_error = 0; sbi->sb_cp_table = NULL; sbi->sb_c_bitmap = -1; sbi->sb_max_fwd_alloc = 0xffffff; if (sbi->sb_fs_size >= 0x80000000) { hpfs_error(s, "invalid size in superblock: %08x", (unsigned)sbi->sb_fs_size); goto bail4; } if (spareblock->n_spares_used) hpfs_load_hotfix_map(s, spareblock); /* Load bitmap directory */ if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps)))) goto bail4; /* Check for general fs errors*/ if (spareblock->dirty && !spareblock->old_wrote) { if (sbi->sb_err == 2) { pr_err("Improperly stopped, not mounted\n"); goto bail4; } hpfs_error(s, "improperly stopped"); } if (!sb_rdonly(s)) { spareblock->dirty = 1; spareblock->old_wrote = 0; mark_buffer_dirty(bh2); } if (le32_to_cpu(spareblock->n_dnode_spares) != le32_to_cpu(spareblock->n_dnode_spares_free)) { if (sbi->sb_err >= 2) { pr_err("Spare dnodes used, try chkdsk\n"); mark_dirty(s, 0); goto bail4; } hpfs_error(s, "warning: spare dnodes used, try chkdsk"); if (sbi->sb_err == 0) pr_err("Proceeding, but your filesystem could be corrupted if you delete files or directories\n"); } if (sbi->sb_chk) { unsigned a; if (le32_to_cpu(superblock->dir_band_end) - le32_to_cpu(superblock->dir_band_start) + 1 != le32_to_cpu(superblock->n_dir_band) || le32_to_cpu(superblock->dir_band_end) < le32_to_cpu(superblock->dir_band_start) || le32_to_cpu(superblock->n_dir_band) > 0x4000) { hpfs_error(s, "dir band size mismatch: dir_band_start==%08x, dir_band_end==%08x, n_dir_band==%08x", le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->dir_band_end), le32_to_cpu(superblock->n_dir_band)); goto bail4; } a = sbi->sb_dirband_size; sbi->sb_dirband_size = 0; if (hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->n_dir_band), "dir_band") || hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_bitmap), 4, "dir_band_bitmap") || hpfs_chk_sectors(s, le32_to_cpu(superblock->bitmaps), 4, "bitmaps")) { mark_dirty(s, 0); goto bail4; } sbi->sb_dirband_size = a; } else pr_err("You really don't want any checks? You are crazy...\n"); /* Load code page table */ if (le32_to_cpu(spareblock->n_code_pages)) if (!(sbi->sb_cp_table = hpfs_load_code_page(s, le32_to_cpu(spareblock->code_page_dir)))) pr_err("code page support is disabled\n"); brelse(bh2); brelse(bh1); brelse(bh0); root = iget_locked(s, sbi->sb_root); if (!root) goto bail0; hpfs_init_inode(root); hpfs_read_inode(root); unlock_new_inode(root); s->s_root = d_make_root(root); if (!s->s_root) goto bail0; /* * find the root directory's . pointer & finish filling in the inode */ root_dno = hpfs_fnode_dno(s, sbi->sb_root); if (root_dno) de = map_dirent(root, root_dno, "\001\001", 2, NULL, &qbh); if (!de) hpfs_error(s, "unable to find root dir"); else { inode_set_atime(root, local_to_gmt(s, le32_to_cpu(de->read_date)), 0); inode_set_mtime(root, local_to_gmt(s, le32_to_cpu(de->write_date)), 0); inode_set_ctime(root, local_to_gmt(s, le32_to_cpu(de->creation_date)), 0); hpfs_i(root)->i_ea_size = le32_to_cpu(de->ea_size); hpfs_i(root)->i_parent_dir = root->i_ino; if (root->i_size == -1) root->i_size = 2048; if (root->i_blocks == -1) root->i_blocks = 5; hpfs_brelse4(&qbh); } hpfs_unlock(s); return 0; bail4: brelse(bh2); bail3: brelse(bh1); bail2: brelse(bh0); bail1: bail0: hpfs_unlock(s); free_sbi(sbi); return -EINVAL; } static int hpfs_get_tree(struct fs_context *fc) { return get_tree_bdev(fc, hpfs_fill_super); } static void hpfs_free_fc(struct fs_context *fc) { kfree(fc->fs_private); } static const struct fs_context_operations hpfs_fc_context_ops = { .parse_param = hpfs_parse_param, .get_tree = hpfs_get_tree, .reconfigure = hpfs_reconfigure, .free = hpfs_free_fc, }; static int hpfs_init_fs_context(struct fs_context *fc) { struct hpfs_fc_context *ctx; ctx = kzalloc(sizeof(struct hpfs_fc_context), GFP_KERNEL); if (!ctx) return -ENOMEM; if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { struct super_block *sb = fc->root->d_sb; struct hpfs_sb_info *sbi = hpfs_sb(sb); ctx->uid = sbi->sb_uid; ctx->gid = sbi->sb_gid; ctx->umask = 0777 & ~sbi->sb_mode; ctx->lowercase = sbi->sb_lowercase; ctx->eas = sbi->sb_eas; ctx->chk = sbi->sb_chk; ctx->chkdsk = sbi->sb_chkdsk; ctx->errs = sbi->sb_err; ctx->timeshift = sbi->sb_timeshift; } else { ctx->uid = current_uid(); ctx->gid = current_gid(); ctx->umask = current_umask(); ctx->lowercase = 0; ctx->eas = 2; ctx->chk = 1; ctx->errs = 1; ctx->chkdsk = 1; ctx->timeshift = 0; } fc->fs_private = ctx; fc->ops = &hpfs_fc_context_ops; return 0; }; static struct file_system_type hpfs_fs_type = { .owner = THIS_MODULE, .name = "hpfs", .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, .init_fs_context = hpfs_init_fs_context, .parameters = hpfs_param_spec, }; MODULE_ALIAS_FS("hpfs"); static int __init init_hpfs_fs(void) { int err = init_inodecache(); if (err) goto out1; err = register_filesystem(&hpfs_fs_type); if (err) goto out; return 0; out: destroy_inodecache(); out1: return err; } static void __exit exit_hpfs_fs(void) { unregister_filesystem(&hpfs_fs_type); destroy_inodecache(); } module_init(init_hpfs_fs) module_exit(exit_hpfs_fs) MODULE_DESCRIPTION("OS/2 HPFS file system support"); MODULE_LICENSE("GPL");
18 18 18 18 18 18 18 41 20 20 20 20 11 10 12 9 9 9 12 12 9 44 48 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include "peer.h" #include "device.h" #include "queueing.h" #include "timers.h" #include "peerlookup.h" #include "noise.h" #include <linux/kref.h> #include <linux/lockdep.h> #include <linux/rcupdate.h> #include <linux/list.h> static struct kmem_cache *peer_cache; static atomic64_t peer_counter = ATOMIC64_INIT(0); struct wg_peer *wg_peer_create(struct wg_device *wg, const u8 public_key[NOISE_PUBLIC_KEY_LEN], const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]) { struct wg_peer *peer; int ret = -ENOMEM; lockdep_assert_held(&wg->device_update_lock); if (wg->num_peers >= MAX_PEERS_PER_DEVICE) return ERR_PTR(ret); peer = kmem_cache_zalloc(peer_cache, GFP_KERNEL); if (unlikely(!peer)) return ERR_PTR(ret); if (unlikely(dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))) goto err; peer->device = wg; wg_noise_handshake_init(&peer->handshake, &wg->static_identity, public_key, preshared_key, peer); peer->internal_id = atomic64_inc_return(&peer_counter); peer->serial_work_cpu = nr_cpumask_bits; wg_cookie_init(&peer->latest_cookie); wg_timers_init(peer); wg_cookie_checker_precompute_peer_keys(peer); spin_lock_init(&peer->keypairs.keypair_update_lock); INIT_WORK(&peer->transmit_handshake_work, wg_packet_handshake_send_worker); INIT_WORK(&peer->transmit_packet_work, wg_packet_tx_worker); wg_prev_queue_init(&peer->tx_queue); wg_prev_queue_init(&peer->rx_queue); rwlock_init(&peer->endpoint_lock); kref_init(&peer->refcount); skb_queue_head_init(&peer->staged_packet_queue); wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state); netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll); napi_enable(&peer->napi); list_add_tail(&peer->peer_list, &wg->peer_list); INIT_LIST_HEAD(&peer->allowedips_list); wg_pubkey_hashtable_add(wg->peer_hashtable, peer); ++wg->num_peers; pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id); return peer; err: kmem_cache_free(peer_cache, peer); return ERR_PTR(ret); } struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer) { RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), "Taking peer reference without holding the RCU read lock"); if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount))) return NULL; return peer; } static void peer_make_dead(struct wg_peer *peer) { /* Remove from configuration-time lookup structures. */ list_del_init(&peer->peer_list); wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer, &peer->device->device_update_lock); wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer); /* Mark as dead, so that we don't allow jumping contexts after. */ WRITE_ONCE(peer->is_dead, true); /* The caller must now synchronize_net() for this to take effect. */ } static void peer_remove_after_dead(struct wg_peer *peer) { WARN_ON(!peer->is_dead); /* No more keypairs can be created for this peer, since is_dead protects * add_new_keypair, so we can now destroy existing ones. */ wg_noise_keypairs_clear(&peer->keypairs); /* Destroy all ongoing timers that were in-flight at the beginning of * this function. */ wg_timers_stop(peer); /* The transition between packet encryption/decryption queues isn't * guarded by is_dead, but each reference's life is strictly bounded by * two generations: once for parallel crypto and once for serial * ingestion, so we can simply flush twice, and be sure that we no * longer have references inside these queues. */ /* a) For encrypt/decrypt. */ flush_workqueue(peer->device->packet_crypt_wq); /* b.1) For send (but not receive, since that's napi). */ flush_workqueue(peer->device->packet_crypt_wq); /* b.2.1) For receive (but not send, since that's wq). */ napi_disable(&peer->napi); /* b.2.1) It's now safe to remove the napi struct, which must be done * here from process context. */ netif_napi_del(&peer->napi); /* Ensure any workstructs we own (like transmit_handshake_work or * clear_peer_work) no longer are in use. */ flush_workqueue(peer->device->handshake_send_wq); /* After the above flushes, a peer might still be active in a few * different contexts: 1) from xmit(), before hitting is_dead and * returning, 2) from wg_packet_consume_data(), before hitting is_dead * and returning, 3) from wg_receive_handshake_packet() after a point * where it has processed an incoming handshake packet, but where * all calls to pass it off to timers fails because of is_dead. We won't * have new references in (1) eventually, because we're removed from * allowedips; we won't have new references in (2) eventually, because * wg_index_hashtable_lookup will always return NULL, since we removed * all existing keypairs and no more can be created; we won't have new * references in (3) eventually, because we're removed from the pubkey * hash table, which allows for a maximum of one handshake response, * via the still-uncleared index hashtable entry, but not more than one, * and in wg_cookie_message_consume, the lookup eventually gets a peer * with a refcount of zero, so no new reference is taken. */ --peer->device->num_peers; wg_peer_put(peer); } /* We have a separate "remove" function make sure that all active places where * a peer is currently operating will eventually come to an end and not pass * their reference onto another context. */ void wg_peer_remove(struct wg_peer *peer) { if (unlikely(!peer)) return; lockdep_assert_held(&peer->device->device_update_lock); peer_make_dead(peer); synchronize_net(); peer_remove_after_dead(peer); } void wg_peer_remove_all(struct wg_device *wg) { struct wg_peer *peer, *temp; LIST_HEAD(dead_peers); lockdep_assert_held(&wg->device_update_lock); /* Avoid having to traverse individually for each one. */ wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock); list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { peer_make_dead(peer); list_add_tail(&peer->peer_list, &dead_peers); } synchronize_net(); list_for_each_entry_safe(peer, temp, &dead_peers, peer_list) peer_remove_after_dead(peer); } static void rcu_release(struct rcu_head *rcu) { struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu); dst_cache_destroy(&peer->endpoint_cache); WARN_ON(wg_prev_queue_peek(&peer->tx_queue) || wg_prev_queue_peek(&peer->rx_queue)); /* The final zeroing takes care of clearing any remaining handshake key * material and other potentially sensitive information. */ memzero_explicit(peer, sizeof(*peer)); kmem_cache_free(peer_cache, peer); } static void kref_release(struct kref *refcount) { struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount); pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr); /* Remove ourself from dynamic runtime lookup structures, now that the * last reference is gone. */ wg_index_hashtable_remove(peer->device->index_hashtable, &peer->handshake.entry); /* Remove any lingering packets that didn't have a chance to be * transmitted. */ wg_packet_purge_staged_packets(peer); /* Free the memory used. */ call_rcu(&peer->rcu, rcu_release); } void wg_peer_put(struct wg_peer *peer) { if (unlikely(!peer)) return; kref_put(&peer->refcount, kref_release); } int __init wg_peer_init(void) { peer_cache = KMEM_CACHE(wg_peer, 0); return peer_cache ? 0 : -ENOMEM; } void wg_peer_uninit(void) { kmem_cache_destroy(peer_cache); }
11 11 10 11 6 4 3 3 3 2 3 3 3 3 3 3 3 3 2 3 7 4 4 4 4 3 1 2 2 1 2 29 30 1 29 13 13 3 26 17 12 29 5 4 10 7 2 2 2 4 1 3 1 2 1 4 2 1 2 4 2 4 2 2 1 73 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C)2003,2004 USAGI/WIDE Project * * Author: * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> */ #include <linux/types.h> #include <linux/timer.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/in6.h> #include <linux/icmpv6.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <net/ip6_checksum.h> #include <linux/seq_file.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_log.h> #include "nf_internals.h" static const unsigned int nf_ct_icmpv6_timeout = 30*HZ; bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct net *net, struct nf_conntrack_tuple *tuple) { const struct icmp6hdr *hp; struct icmp6hdr _hdr; hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hp == NULL) return false; tuple->dst.u.icmp.type = hp->icmp6_type; tuple->src.u.icmp.id = hp->icmp6_identifier; tuple->dst.u.icmp.code = hp->icmp6_code; return true; } /* Add 1; spaces filled with 0. */ static const u_int8_t invmap[] = { [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1, [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY + 1 }; static const u_int8_t noct_valid_new[] = { [ICMPV6_MGM_QUERY - 130] = 1, [ICMPV6_MGM_REPORT - 130] = 1, [ICMPV6_MGM_REDUCTION - 130] = 1, [NDISC_ROUTER_SOLICITATION - 130] = 1, [NDISC_ROUTER_ADVERTISEMENT - 130] = 1, [NDISC_NEIGHBOUR_SOLICITATION - 130] = 1, [NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1, [ICMPV6_MLD2_REPORT - 130] = 1, [ICMPV6_MRDISC_ADV - 130] = 1, [ICMPV6_MRDISC_SOL - 130] = 1 }; bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig) { int type = orig->dst.u.icmp.type - 128; if (type < 0 || type >= sizeof(invmap) || !invmap[type]) return false; tuple->src.u.icmp.id = orig->src.u.icmp.id; tuple->dst.u.icmp.type = invmap[type] - 1; tuple->dst.u.icmp.code = orig->dst.u.icmp.code; return true; } static unsigned int *icmpv6_get_timeouts(struct net *net) { return &nf_icmpv6_pernet(net)->timeout; } /* Returns verdict for packet, or -1 for invalid. */ int nf_conntrack_icmpv6_packet(struct nf_conn *ct, struct sk_buff *skb, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { unsigned int *timeout = nf_ct_timeout_lookup(ct); static const u8 valid_new[] = { [ICMPV6_ECHO_REQUEST - 128] = 1, [ICMPV6_NI_QUERY - 128] = 1 }; if (state->pf != NFPROTO_IPV6) return -NF_ACCEPT; if (!nf_ct_is_confirmed(ct)) { int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128; if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { /* Can't create a new ICMPv6 `conn' with this. */ pr_debug("icmpv6: can't create new conn with type %u\n", type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); return -NF_ACCEPT; } } if (!timeout) timeout = icmpv6_get_timeouts(nf_ct_net(ct)); /* Do not immediately delete the connection after the first successful reply to avoid excessive conntrackd traffic and also to handle correctly ICMP echo reply duplicates. */ nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; } static void icmpv6_error_log(const struct sk_buff *skb, const struct nf_hook_state *state, const char *msg) { nf_l4proto_log_invalid(skb, state, IPPROTO_ICMPV6, "%s", msg); } static noinline_for_stack int nf_conntrack_icmpv6_redirect(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { u8 hl = ipv6_hdr(skb)->hop_limit; union nf_inet_addr outer_daddr; union { struct nd_opt_hdr nd_opt; struct rd_msg rd_msg; } tmp; const struct nd_opt_hdr *nd_opt; const struct rd_msg *rd_msg; rd_msg = skb_header_pointer(skb, dataoff, sizeof(*rd_msg), &tmp.rd_msg); if (!rd_msg) { icmpv6_error_log(skb, state, "short redirect"); return -NF_ACCEPT; } if (rd_msg->icmph.icmp6_code != 0) return NF_ACCEPT; if (hl != 255 || !(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { icmpv6_error_log(skb, state, "invalid saddr or hoplimit for redirect"); return -NF_ACCEPT; } dataoff += sizeof(*rd_msg); /* warning: rd_msg no longer usable after this call */ nd_opt = skb_header_pointer(skb, dataoff, sizeof(*nd_opt), &tmp.nd_opt); if (!nd_opt || nd_opt->nd_opt_len == 0) { icmpv6_error_log(skb, state, "redirect without options"); return -NF_ACCEPT; } /* We could call ndisc_parse_options(), but it would need * skb_linearize() and a bit more work. */ if (nd_opt->nd_opt_type != ND_OPT_REDIRECT_HDR) return NF_ACCEPT; memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr, sizeof(outer_daddr.ip6)); dataoff += 8; return nf_conntrack_inet_error(tmpl, skb, dataoff, state, IPPROTO_ICMPV6, &outer_daddr); } int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { union nf_inet_addr outer_daddr; const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; int type; icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { icmpv6_error_log(skb, state, "short packet"); return -NF_ACCEPT; } if (state->hook == NF_INET_PRE_ROUTING && state->net->ct.sysctl_checksum && nf_ip6_checksum(skb, state->hook, dataoff, IPPROTO_ICMPV6)) { icmpv6_error_log(skb, state, "ICMPv6 checksum failed"); return -NF_ACCEPT; } type = icmp6h->icmp6_type - 130; if (type >= 0 && type < sizeof(noct_valid_new) && noct_valid_new[type]) { nf_ct_set(skb, NULL, IP_CT_UNTRACKED); return NF_ACCEPT; } if (icmp6h->icmp6_type == NDISC_REDIRECT) return nf_conntrack_icmpv6_redirect(tmpl, skb, dataoff, state); /* is not error message ? */ if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr, sizeof(outer_daddr.ip6)); dataoff += sizeof(*icmp6h); return nf_conntrack_inet_error(tmpl, skb, dataoff, state, IPPROTO_ICMPV6, &outer_daddr); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> static int icmpv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *t) { if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) || nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) || nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code)) goto nla_put_failure; return 0; nla_put_failure: return -1; } static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = { [CTA_PROTO_ICMPV6_TYPE] = { .type = NLA_U8 }, [CTA_PROTO_ICMPV6_CODE] = { .type = NLA_U8 }, [CTA_PROTO_ICMPV6_ID] = { .type = NLA_U16 }, }; static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *tuple, u_int32_t flags) { if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_TYPE)) { if (!tb[CTA_PROTO_ICMPV6_TYPE]) return -EINVAL; tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]); if (tuple->dst.u.icmp.type < 128 || tuple->dst.u.icmp.type - 128 >= sizeof(invmap) || !invmap[tuple->dst.u.icmp.type - 128]) return -EINVAL; } if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_CODE)) { if (!tb[CTA_PROTO_ICMPV6_CODE]) return -EINVAL; tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]); } if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_ID)) { if (!tb[CTA_PROTO_ICMPV6_ID]) return -EINVAL; tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]); } return 0; } static unsigned int icmpv6_nlattr_tuple_size(void) { static unsigned int size __read_mostly; if (!size) size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); return size; } #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeout = data; struct nf_icmp_net *in = nf_icmpv6_pernet(net); if (!timeout) timeout = icmpv6_get_timeouts(net); if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) { *timeout = ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ; } else { /* Set default ICMPv6 timeout. */ *timeout = in->timeout; } return 0; } static int icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeout = data; if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ))) goto nla_put_failure; return 0; nla_put_failure: return -ENOSPC; } static const struct nla_policy icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NLA_U32 }, }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ void nf_conntrack_icmpv6_init_net(struct net *net) { struct nf_icmp_net *in = nf_icmpv6_pernet(net); in->timeout = nf_ct_icmpv6_timeout; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = { .l4proto = IPPROTO_ICMPV6, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmpv6_tuple_to_nlattr, .nlattr_tuple_size = icmpv6_nlattr_tuple_size, .nlattr_to_tuple = icmpv6_nlattr_to_tuple, .nla_policy = icmpv6_nla_policy, #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = icmpv6_timeout_nlattr_to_obj, .obj_to_nlattr = icmpv6_timeout_obj_to_nlattr, .nlattr_max = CTA_TIMEOUT_ICMP_MAX, .obj_size = sizeof(unsigned int), .nla_policy = icmpv6_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ };
515 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_CPU_ENTRY_AREA_H #define _ASM_X86_CPU_ENTRY_AREA_H #include <linux/percpu-defs.h> #include <asm/processor.h> #include <asm/intel_ds.h> #include <asm/pgtable_areas.h> #ifdef CONFIG_X86_64 #ifdef CONFIG_AMD_MEM_ENCRYPT #define VC_EXCEPTION_STKSZ EXCEPTION_STKSZ #else #define VC_EXCEPTION_STKSZ 0 #endif /* Macro to enforce the same ordering and stack sizes */ #define ESTACKS_MEMBERS(guardsize, optional_stack_size) \ char DF_stack_guard[guardsize]; \ char DF_stack[EXCEPTION_STKSZ]; \ char NMI_stack_guard[guardsize]; \ char NMI_stack[EXCEPTION_STKSZ]; \ char DB_stack_guard[guardsize]; \ char DB_stack[EXCEPTION_STKSZ]; \ char MCE_stack_guard[guardsize]; \ char MCE_stack[EXCEPTION_STKSZ]; \ char VC_stack_guard[guardsize]; \ char VC_stack[optional_stack_size]; \ char VC2_stack_guard[guardsize]; \ char VC2_stack[optional_stack_size]; \ char IST_top_guard[guardsize]; \ /* The exception stacks' physical storage. No guard pages required */ struct exception_stacks { ESTACKS_MEMBERS(0, VC_EXCEPTION_STKSZ) }; /* The effective cpu entry area mapping with guard pages. */ struct cea_exception_stacks { ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ) }; /* * The exception stack ordering in [cea_]exception_stacks */ enum exception_stack_ordering { ESTACK_DF, ESTACK_NMI, ESTACK_DB, ESTACK_MCE, ESTACK_VC, ESTACK_VC2, N_EXCEPTION_STACKS }; #define CEA_ESTACK_SIZE(st) \ sizeof(((struct cea_exception_stacks *)0)->st## _stack) #define CEA_ESTACK_BOT(ceastp, st) \ ((unsigned long)&(ceastp)->st## _stack) #define CEA_ESTACK_TOP(ceastp, st) \ (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st)) #define CEA_ESTACK_OFFS(st) \ offsetof(struct cea_exception_stacks, st## _stack) #define CEA_ESTACK_PAGES \ (sizeof(struct cea_exception_stacks) / PAGE_SIZE) #endif #ifdef CONFIG_X86_32 struct doublefault_stack { unsigned long stack[(PAGE_SIZE - sizeof(struct x86_hw_tss)) / sizeof(unsigned long)]; struct x86_hw_tss tss; } __aligned(PAGE_SIZE); #endif /* * cpu_entry_area is a percpu region that contains things needed by the CPU * and early entry/exit code. Real types aren't used for all fields here * to avoid circular header dependencies. * * Every field is a virtual alias of some other allocated backing store. * There is no direct allocation of a struct cpu_entry_area. */ struct cpu_entry_area { char gdt[PAGE_SIZE]; /* * The GDT is just below entry_stack and thus serves (on x86_64) as * a read-only guard page. On 32-bit the GDT must be writeable, so * it needs an extra guard page. */ #ifdef CONFIG_X86_32 char guard_entry_stack[PAGE_SIZE]; #endif struct entry_stack_page entry_stack_page; #ifdef CONFIG_X86_32 char guard_doublefault_stack[PAGE_SIZE]; struct doublefault_stack doublefault_stack; #endif /* * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because * we need task switches to work, and task switches write to the TSS. */ struct tss_struct tss; #ifdef CONFIG_X86_64 /* * Exception stacks used for IST entries with guard pages. */ struct cea_exception_stacks estacks; #endif /* * Per CPU debug store for Intel performance monitoring. Wastes a * full page at the moment. */ struct debug_store cpu_debug_store; /* * The actual PEBS/BTS buffers must be mapped to user space * Reserve enough fixmap PTEs. */ struct debug_store_buffers cpu_debug_buffers; }; #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); extern void setup_cpu_entry_areas(void); extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); extern struct cpu_entry_area *get_cpu_entry_area(int cpu); static __always_inline struct entry_stack *cpu_entry_stack(int cpu) { return &get_cpu_entry_area(cpu)->entry_stack_page.stack; } #define __this_cpu_ist_top_va(name) \ CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name) #define __this_cpu_ist_bottom_va(name) \ CEA_ESTACK_BOT(__this_cpu_read(cea_exception_stacks), name) #endif
2 2 2 2 2 2 2 1 1 1 1 4 4 4 4 4 4 4 14 3 14 19 20 20 14 2 2 4 4 1 4 1 1 1 16 14 15 8 5 8 7 8 8 7 8 22 23 23 23 22 21 21 20 20 20 20 16 16 13 14 8 7 7 1 7 12 15 22 18 18 25 8 7 6 1 5 27 18 25 26 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 // SPDX-License-Identifier: GPL-2.0 #include <linux/cred.h> #include <linux/device.h> #include <linux/dma-buf.h> #include <linux/dma-resv.h> #include <linux/highmem.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/memfd.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/shmem_fs.h> #include <linux/hugetlb.h> #include <linux/slab.h> #include <linux/udmabuf.h> #include <linux/vmalloc.h> #include <linux/iosys-map.h> static int list_limit = 1024; module_param(list_limit, int, 0644); MODULE_PARM_DESC(list_limit, "udmabuf_create_list->count limit. Default is 1024."); static int size_limit_mb = 64; module_param(size_limit_mb, int, 0644); MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is 64."); struct udmabuf { pgoff_t pagecount; struct folio **folios; /** * Unlike folios, pinned_folios is only used for unpin. * So, nr_pinned is not the same to pagecount, the pinned_folios * only set each folio which already pinned when udmabuf_create. * Note that, since a folio may be pinned multiple times, each folio * can be added to pinned_folios multiple times, depending on how many * times the folio has been pinned when create. */ pgoff_t nr_pinned; struct folio **pinned_folios; struct sg_table *sg; struct miscdevice *device; pgoff_t *offsets; }; static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct udmabuf *ubuf = vma->vm_private_data; pgoff_t pgoff = vmf->pgoff; unsigned long addr, pfn; vm_fault_t ret; if (pgoff >= ubuf->pagecount) return VM_FAULT_SIGBUS; pfn = folio_pfn(ubuf->folios[pgoff]); pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT; ret = vmf_insert_pfn(vma, vmf->address, pfn); if (ret & VM_FAULT_ERROR) return ret; /* pre fault */ pgoff = vma->vm_pgoff; addr = vma->vm_start; for (; addr < vma->vm_end; pgoff++, addr += PAGE_SIZE) { if (addr == vmf->address) continue; if (WARN_ON(pgoff >= ubuf->pagecount)) break; pfn = folio_pfn(ubuf->folios[pgoff]); pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT; /** * If the below vmf_insert_pfn() fails, we do not return an * error here during this pre-fault step. However, an error * will be returned if the failure occurs when the addr is * truly accessed. */ if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR) break; } return ret; } static const struct vm_operations_struct udmabuf_vm_ops = { .fault = udmabuf_vm_fault, }; static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma) { struct udmabuf *ubuf = buf->priv; if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0) return -EINVAL; vma->vm_ops = &udmabuf_vm_ops; vma->vm_private_data = ubuf; vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); return 0; } static int vmap_udmabuf(struct dma_buf *buf, struct iosys_map *map) { struct udmabuf *ubuf = buf->priv; struct page **pages; void *vaddr; pgoff_t pg; dma_resv_assert_held(buf->resv); pages = kvmalloc_array(ubuf->pagecount, sizeof(*pages), GFP_KERNEL); if (!pages) return -ENOMEM; for (pg = 0; pg < ubuf->pagecount; pg++) pages[pg] = folio_page(ubuf->folios[pg], ubuf->offsets[pg] >> PAGE_SHIFT); vaddr = vm_map_ram(pages, ubuf->pagecount, -1); kvfree(pages); if (!vaddr) return -EINVAL; iosys_map_set_vaddr(map, vaddr); return 0; } static void vunmap_udmabuf(struct dma_buf *buf, struct iosys_map *map) { struct udmabuf *ubuf = buf->priv; dma_resv_assert_held(buf->resv); vm_unmap_ram(map->vaddr, ubuf->pagecount); } static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf, enum dma_data_direction direction) { struct udmabuf *ubuf = buf->priv; struct sg_table *sg; struct scatterlist *sgl; unsigned int i = 0; int ret; sg = kzalloc(sizeof(*sg), GFP_KERNEL); if (!sg) return ERR_PTR(-ENOMEM); ret = sg_alloc_table(sg, ubuf->pagecount, GFP_KERNEL); if (ret < 0) goto err_alloc; for_each_sg(sg->sgl, sgl, ubuf->pagecount, i) sg_set_folio(sgl, ubuf->folios[i], PAGE_SIZE, ubuf->offsets[i]); ret = dma_map_sgtable(dev, sg, direction, 0); if (ret < 0) goto err_map; return sg; err_map: sg_free_table(sg); err_alloc: kfree(sg); return ERR_PTR(ret); } static void put_sg_table(struct device *dev, struct sg_table *sg, enum dma_data_direction direction) { dma_unmap_sgtable(dev, sg, direction, 0); sg_free_table(sg); kfree(sg); } static struct sg_table *map_udmabuf(struct dma_buf_attachment *at, enum dma_data_direction direction) { return get_sg_table(at->dev, at->dmabuf, direction); } static void unmap_udmabuf(struct dma_buf_attachment *at, struct sg_table *sg, enum dma_data_direction direction) { return put_sg_table(at->dev, sg, direction); } static void unpin_all_folios(struct udmabuf *ubuf) { pgoff_t i; for (i = 0; i < ubuf->nr_pinned; ++i) unpin_folio(ubuf->pinned_folios[i]); kvfree(ubuf->pinned_folios); } static __always_inline int init_udmabuf(struct udmabuf *ubuf, pgoff_t pgcnt) { ubuf->folios = kvmalloc_array(pgcnt, sizeof(*ubuf->folios), GFP_KERNEL); if (!ubuf->folios) return -ENOMEM; ubuf->offsets = kvcalloc(pgcnt, sizeof(*ubuf->offsets), GFP_KERNEL); if (!ubuf->offsets) return -ENOMEM; ubuf->pinned_folios = kvmalloc_array(pgcnt, sizeof(*ubuf->pinned_folios), GFP_KERNEL); if (!ubuf->pinned_folios) return -ENOMEM; return 0; } static __always_inline void deinit_udmabuf(struct udmabuf *ubuf) { unpin_all_folios(ubuf); kvfree(ubuf->offsets); kvfree(ubuf->folios); } static void release_udmabuf(struct dma_buf *buf) { struct udmabuf *ubuf = buf->priv; struct device *dev = ubuf->device->this_device; if (ubuf->sg) put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL); deinit_udmabuf(ubuf); kfree(ubuf); } static int begin_cpu_udmabuf(struct dma_buf *buf, enum dma_data_direction direction) { struct udmabuf *ubuf = buf->priv; struct device *dev = ubuf->device->this_device; int ret = 0; if (!ubuf->sg) { ubuf->sg = get_sg_table(dev, buf, direction); if (IS_ERR(ubuf->sg)) { ret = PTR_ERR(ubuf->sg); ubuf->sg = NULL; } } else { dma_sync_sgtable_for_cpu(dev, ubuf->sg, direction); } return ret; } static int end_cpu_udmabuf(struct dma_buf *buf, enum dma_data_direction direction) { struct udmabuf *ubuf = buf->priv; struct device *dev = ubuf->device->this_device; if (!ubuf->sg) return -EINVAL; dma_sync_sgtable_for_device(dev, ubuf->sg, direction); return 0; } static const struct dma_buf_ops udmabuf_ops = { .map_dma_buf = map_udmabuf, .unmap_dma_buf = unmap_udmabuf, .release = release_udmabuf, .mmap = mmap_udmabuf, .vmap = vmap_udmabuf, .vunmap = vunmap_udmabuf, .begin_cpu_access = begin_cpu_udmabuf, .end_cpu_access = end_cpu_udmabuf, }; #define SEALS_WANTED (F_SEAL_SHRINK) #define SEALS_DENIED (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE) static int check_memfd_seals(struct file *memfd) { int seals; if (!shmem_file(memfd) && !is_file_hugepages(memfd)) return -EBADFD; seals = memfd_fcntl(memfd, F_GET_SEALS, 0); if (seals == -EINVAL) return -EBADFD; if ((seals & SEALS_WANTED) != SEALS_WANTED || (seals & SEALS_DENIED) != 0) return -EINVAL; return 0; } static struct dma_buf *export_udmabuf(struct udmabuf *ubuf, struct miscdevice *device) { DEFINE_DMA_BUF_EXPORT_INFO(exp_info); ubuf->device = device; exp_info.ops = &udmabuf_ops; exp_info.size = ubuf->pagecount << PAGE_SHIFT; exp_info.priv = ubuf; exp_info.flags = O_RDWR; return dma_buf_export(&exp_info); } static long udmabuf_pin_folios(struct udmabuf *ubuf, struct file *memfd, loff_t start, loff_t size, struct folio **folios) { pgoff_t nr_pinned = ubuf->nr_pinned; pgoff_t upgcnt = ubuf->pagecount; u32 cur_folio, cur_pgcnt; pgoff_t pgoff, pgcnt; long nr_folios; loff_t end; pgcnt = size >> PAGE_SHIFT; end = start + (pgcnt << PAGE_SHIFT) - 1; nr_folios = memfd_pin_folios(memfd, start, end, folios, pgcnt, &pgoff); if (nr_folios <= 0) return nr_folios ? nr_folios : -EINVAL; cur_pgcnt = 0; for (cur_folio = 0; cur_folio < nr_folios; ++cur_folio) { pgoff_t subpgoff = pgoff; size_t fsize = folio_size(folios[cur_folio]); ubuf->pinned_folios[nr_pinned++] = folios[cur_folio]; for (; subpgoff < fsize; subpgoff += PAGE_SIZE) { ubuf->folios[upgcnt] = folios[cur_folio]; ubuf->offsets[upgcnt] = subpgoff; ++upgcnt; if (++cur_pgcnt >= pgcnt) goto end; } /** * In a given range, only the first subpage of the first folio * has an offset, that is returned by memfd_pin_folios(). * The first subpages of other folios (in the range) have an * offset of 0. */ pgoff = 0; } end: ubuf->pagecount = upgcnt; ubuf->nr_pinned = nr_pinned; return 0; } static long udmabuf_create(struct miscdevice *device, struct udmabuf_create_list *head, struct udmabuf_create_item *list) { unsigned long max_nr_folios = 0; struct folio **folios = NULL; pgoff_t pgcnt = 0, pglimit; struct udmabuf *ubuf; struct dma_buf *dmabuf; long ret = -EINVAL; u32 i, flags; ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL); if (!ubuf) return -ENOMEM; pglimit = ((u64)size_limit_mb * 1024 * 1024) >> PAGE_SHIFT; for (i = 0; i < head->count; i++) { pgoff_t subpgcnt; if (!PAGE_ALIGNED(list[i].offset)) goto err_noinit; if (!PAGE_ALIGNED(list[i].size)) goto err_noinit; subpgcnt = list[i].size >> PAGE_SHIFT; pgcnt += subpgcnt; if (pgcnt > pglimit) goto err_noinit; max_nr_folios = max_t(unsigned long, subpgcnt, max_nr_folios); } if (!pgcnt) goto err_noinit; ret = init_udmabuf(ubuf, pgcnt); if (ret) goto err; folios = kvmalloc_array(max_nr_folios, sizeof(*folios), GFP_KERNEL); if (!folios) { ret = -ENOMEM; goto err; } for (i = 0; i < head->count; i++) { struct file *memfd = fget(list[i].memfd); if (!memfd) { ret = -EBADFD; goto err; } /* * Take the inode lock to protect against concurrent * memfd_add_seals(), which takes this lock in write mode. */ inode_lock_shared(file_inode(memfd)); ret = check_memfd_seals(memfd); if (ret) goto out_unlock; ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset, list[i].size, folios); out_unlock: inode_unlock_shared(file_inode(memfd)); fput(memfd); if (ret) goto err; } flags = head->flags & UDMABUF_FLAGS_CLOEXEC ? O_CLOEXEC : 0; dmabuf = export_udmabuf(ubuf, device); if (IS_ERR(dmabuf)) { ret = PTR_ERR(dmabuf); goto err; } /* * Ownership of ubuf is held by the dmabuf from here. * If the following dma_buf_fd() fails, dma_buf_put() cleans up both the * dmabuf and the ubuf (through udmabuf_ops.release). */ ret = dma_buf_fd(dmabuf, flags); if (ret < 0) dma_buf_put(dmabuf); kvfree(folios); return ret; err: deinit_udmabuf(ubuf); err_noinit: kfree(ubuf); kvfree(folios); return ret; } static long udmabuf_ioctl_create(struct file *filp, unsigned long arg) { struct udmabuf_create create; struct udmabuf_create_list head; struct udmabuf_create_item list; if (copy_from_user(&create, (void __user *)arg, sizeof(create))) return -EFAULT; head.flags = create.flags; head.count = 1; list.memfd = create.memfd; list.offset = create.offset; list.size = create.size; return udmabuf_create(filp->private_data, &head, &list); } static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg) { struct udmabuf_create_list head; struct udmabuf_create_item *list; int ret = -EINVAL; u32 lsize; if (copy_from_user(&head, (void __user *)arg, sizeof(head))) return -EFAULT; if (head.count > list_limit) return -EINVAL; lsize = sizeof(struct udmabuf_create_item) * head.count; list = memdup_user((void __user *)(arg + sizeof(head)), lsize); if (IS_ERR(list)) return PTR_ERR(list); ret = udmabuf_create(filp->private_data, &head, list); kfree(list); return ret; } static long udmabuf_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { long ret; switch (ioctl) { case UDMABUF_CREATE: ret = udmabuf_ioctl_create(filp, arg); break; case UDMABUF_CREATE_LIST: ret = udmabuf_ioctl_create_list(filp, arg); break; default: ret = -ENOTTY; break; } return ret; } static const struct file_operations udmabuf_fops = { .owner = THIS_MODULE, .unlocked_ioctl = udmabuf_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = udmabuf_ioctl, #endif }; static struct miscdevice udmabuf_misc = { .minor = MISC_DYNAMIC_MINOR, .name = "udmabuf", .fops = &udmabuf_fops, }; static int __init udmabuf_dev_init(void) { int ret; ret = misc_register(&udmabuf_misc); if (ret < 0) { pr_err("Could not initialize udmabuf device\n"); return ret; } ret = dma_coerce_mask_and_coherent(udmabuf_misc.this_device, DMA_BIT_MASK(64)); if (ret < 0) { pr_err("Could not setup DMA mask for udmabuf device\n"); misc_deregister(&udmabuf_misc); return ret; } return 0; } static void __exit udmabuf_dev_exit(void) { misc_deregister(&udmabuf_misc); } module_init(udmabuf_dev_init) module_exit(udmabuf_dev_exit) MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");
9 9 342 345 271 2 8245 2120 5792 214 1852 1 1199 7 59 360 10 528 6 7 24 937 1 723 20 723 1313 33 21 279 936 3 1 946 942 5 946 517 10 713 39 24 14 53 20 9 28 413 582 315 310 312 49 326 205 1232 4 9639 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_SIGNAL_H #define _LINUX_SCHED_SIGNAL_H #include <linux/rculist.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/sched/jobctl.h> #include <linux/sched/task.h> #include <linux/cred.h> #include <linux/refcount.h> #include <linux/pid.h> #include <linux/posix-timers.h> #include <linux/mm_types.h> #include <asm/ptrace.h> /* * Types defining task->signal and task->sighand and APIs using them: */ struct sighand_struct { spinlock_t siglock; refcount_t count; wait_queue_head_t signalfd_wqh; struct k_sigaction action[_NSIG]; }; /* * Per-process accounting stats: */ struct pacct_struct { int ac_flag; long ac_exitcode; unsigned long ac_mem; u64 ac_utime, ac_stime; unsigned long ac_minflt, ac_majflt; }; struct cpu_itimer { u64 expires; u64 incr; }; /* * This is the atomic variant of task_cputime, which can be used for * storing and updating task_cputime statistics without locking. */ struct task_cputime_atomic { atomic64_t utime; atomic64_t stime; atomic64_t sum_exec_runtime; }; #define INIT_CPUTIME_ATOMIC \ (struct task_cputime_atomic) { \ .utime = ATOMIC64_INIT(0), \ .stime = ATOMIC64_INIT(0), \ .sum_exec_runtime = ATOMIC64_INIT(0), \ } /** * struct thread_group_cputimer - thread group interval timer counts * @cputime_atomic: atomic thread group interval timers. * * This structure contains the version of task_cputime, above, that is * used for thread group CPU timer calculations. */ struct thread_group_cputimer { struct task_cputime_atomic cputime_atomic; }; struct multiprocess_signals { sigset_t signal; struct hlist_node node; }; struct core_thread { struct task_struct *task; struct core_thread *next; }; struct core_state { atomic_t nr_threads; struct core_thread dumper; struct completion startup; }; /* * NOTE! "signal_struct" does not have its own * locking, because a shared signal_struct always * implies a shared sighand_struct, so locking * sighand_struct is always a proper superset of * the locking of signal_struct. */ struct signal_struct { refcount_t sigcnt; atomic_t live; int nr_threads; int quick_threads; struct list_head thread_head; wait_queue_head_t wait_chldexit; /* for wait4() */ /* current thread group signal load-balancing target: */ struct task_struct *curr_target; /* shared signal handling: */ struct sigpending shared_pending; /* For collecting multiprocess signals during fork */ struct hlist_head multiprocess; /* thread group exit support */ int group_exit_code; /* notify group_exec_task when notify_count is less or equal to 0 */ int notify_count; struct task_struct *group_exec_task; /* thread group stop support, overloads group_exit_code too */ int group_stop_count; unsigned int flags; /* see SIGNAL_* flags below */ struct core_state *core_state; /* coredumping support */ /* * PR_SET_CHILD_SUBREAPER marks a process, like a service * manager, to re-parent orphan (double-forking) child processes * to this process instead of 'init'. The service manager is * able to receive SIGCHLD signals and is able to investigate * the process until it calls wait(). All children of this * process will inherit a flag if they should look for a * child_subreaper process at exit. */ unsigned int is_child_subreaper:1; unsigned int has_child_subreaper:1; #ifdef CONFIG_POSIX_TIMERS /* POSIX.1b Interval Timers */ unsigned int timer_create_restore_ids:1; atomic_t next_posix_timer_id; struct hlist_head posix_timers; struct hlist_head ignored_posix_timers; /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; ktime_t it_real_incr; /* * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these * values are defined to 0 and 1 respectively */ struct cpu_itimer it[2]; /* * Thread group totals for process CPU timers. * See thread_group_cputimer(), et al, for details. */ struct thread_group_cputimer cputimer; #endif /* Empty if CONFIG_POSIX_TIMERS=n */ struct posix_cputimers posix_cputimers; /* PID/PID hash table linkage. */ struct pid *pids[PIDTYPE_MAX]; #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; #endif struct pid *tty_old_pgrp; /* boolean value for session group leader */ int leader; struct tty_struct *tty; /* NULL if no tty */ #ifdef CONFIG_SCHED_AUTOGROUP struct autogroup *autogroup; #endif /* * Cumulative resource counters for dead threads in the group, * and for reaped dead child processes forked by this group. * Live threads maintain their own counters and add to these * in __exit_signal, except for the group leader. */ seqlock_t stats_lock; u64 utime, stime, cutime, cstime; u64 gtime; u64 cgtime; struct prev_cputime prev_cputime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; unsigned long maxrss, cmaxrss; struct task_io_accounting ioac; /* * Cumulative ns of schedule CPU time fo dead threads in the * group, not including a zombie group leader, (This only differs * from jiffies_to_ns(utime + stime) if sched_clock uses something * other than jiffies.) */ unsigned long long sum_sched_runtime; /* * We don't bother to synchronize most readers of this at all, * because there is no reader checking a limit that actually needs * to get both rlim_cur and rlim_max atomically, and either one * alone is a single word that can safely be read normally. * getrlimit/setrlimit use task_lock(current->group_leader) to * protect this instead of the siglock, because they really * have no need to disable irqs. */ struct rlimit rlim[RLIM_NLIMITS]; #ifdef CONFIG_BSD_PROCESS_ACCT struct pacct_struct pacct; /* per-process accounting information */ #endif #ifdef CONFIG_TASKSTATS struct taskstats *stats; #endif #ifdef CONFIG_AUDIT unsigned audit_tty; struct tty_audit_buf *tty_audit_buf; #endif #ifdef CONFIG_CGROUPS struct rw_semaphore cgroup_threadgroup_rwsem; #endif /* * Thread is the potential origin of an oom condition; kill first on * oom */ bool oom_flag_origin; short oom_score_adj; /* OOM kill score adjustment */ short oom_score_adj_min; /* OOM kill score adjustment min value. * Only settable by CAP_SYS_RESOURCE. */ struct mm_struct *oom_mm; /* recorded mm when the thread group got * killed by the oom killer */ struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations * (notably. ptrace) * Deprecated do not use in new code. * Use exec_update_lock instead. */ struct rw_semaphore exec_update_lock; /* Held while task_struct is * being updated during exec, * and may have inconsistent * permissions. */ } __randomize_layout; /* * Bits in flags field of signal_struct. */ #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ #define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ /* * Pending notifications to parent. */ #define SIGNAL_CLD_STOPPED 0x00000010 #define SIGNAL_CLD_CONTINUED 0x00000020 #define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) #define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ #define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ SIGNAL_STOP_CONTINUED) static inline void signal_set_stop_flags(struct signal_struct *sig, unsigned int flags) { WARN_ON(sig->flags & SIGNAL_GROUP_EXIT); sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; } extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type); static inline int kernel_dequeue_signal(void) { struct task_struct *task = current; kernel_siginfo_t __info; enum pid_type __type; int ret; spin_lock_irq(&task->sighand->siglock); ret = dequeue_signal(&task->blocked, &__info, &__type); spin_unlock_irq(&task->sighand->siglock); return ret; } static inline void kernel_signal_stop(void) { spin_lock_irq(&current->sighand->siglock); if (current->jobctl & JOBCTL_STOP_DEQUEUED) { current->jobctl |= JOBCTL_STOPPED; set_special_state(TASK_STOPPED); } spin_unlock_irq(&current->sighand->siglock); schedule(); } int force_sig_fault_to_task(int sig, int code, void __user *addr, struct task_struct *t); int force_sig_fault(int sig, int code, void __user *addr); int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t); int force_sig_mceerr(int code, void __user *, short); int send_sig_mceerr(int code, void __user *, short, struct task_struct *); int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper); int force_sig_pkuerr(void __user *addr, u32 pkey); int send_sig_perf(void __user *addr, u32 type, u64 sig_data); int force_sig_ptrace_errno_trap(int errno, void __user *addr); int force_sig_fault_trapno(int sig, int code, void __user *addr, int trapno); int send_sig_fault_trapno(int sig, int code, void __user *addr, int trapno, struct task_struct *t); int force_sig_seccomp(int syscall, int reason, bool force_coredump); extern int send_sig_info(int, struct kernel_siginfo *, struct task_struct *); extern void force_sigsegv(int sig); extern int force_sig_info(struct kernel_siginfo *); extern int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp); extern int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid); extern int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr, struct pid *, const struct cred *); extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern __must_check bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int); extern void force_fatal_sig(int); extern void force_exit_sig(int); extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); static inline void clear_notify_signal(void) { clear_thread_flag(TIF_NOTIFY_SIGNAL); smp_mb__after_atomic(); } /* * Returns 'true' if kick_process() is needed to force a transition from * user -> kernel to guarantee expedient run of TWA_SIGNAL based task_work. */ static inline bool __set_notify_signal(struct task_struct *task) { return !test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && !wake_up_state(task, TASK_INTERRUPTIBLE); } /* * Called to break out of interruptible wait loops, and enter the * exit_to_user_mode_loop(). */ static inline void set_notify_signal(struct task_struct *task) { if (__set_notify_signal(task)) kick_process(task); } static inline int restart_syscall(void) { set_tsk_thread_flag(current, TIF_SIGPENDING); return -ERESTARTNOINTR; } static inline int task_sigpending(struct task_struct *p) { return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } static inline int signal_pending(struct task_struct *p) { /* * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same * behavior in terms of ensuring that we break out of wait loops * so that notify signal callbacks can be processed. */ if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL))) return 1; return task_sigpending(p); } static inline int __fatal_signal_pending(struct task_struct *p) { return unlikely(sigismember(&p->pending.signal, SIGKILL)); } static inline int fatal_signal_pending(struct task_struct *p) { return task_sigpending(p) && __fatal_signal_pending(p); } static inline int signal_pending_state(unsigned int state, struct task_struct *p) { if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) return 0; if (!signal_pending(p)) return 0; return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } /* * This should only be used in fault handlers to decide whether we * should stop the current fault routine to handle the signals * instead, especially with the case where we've got interrupted with * a VM_FAULT_RETRY. */ static inline bool fault_signal_pending(vm_fault_t fault_flags, struct pt_regs *regs) { return unlikely((fault_flags & VM_FAULT_RETRY) && (fatal_signal_pending(current) || (user_mode(regs) && signal_pending(current)))); } /* * Reevaluate whether the task has signals pending delivery. * Wake the task if so. * This is required every time the blocked sigset_t changes. * callers must hold sighand->siglock. */ extern void recalc_sigpending(void); extern void calculate_sigpending(void); extern void signal_wake_up_state(struct task_struct *t, unsigned int state); static inline void signal_wake_up(struct task_struct *t, bool fatal) { unsigned int state = 0; if (fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN)) { t->jobctl &= ~(JOBCTL_STOPPED | JOBCTL_TRACED); state = TASK_WAKEKILL | __TASK_TRACED; } signal_wake_up_state(t, state); } static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) { unsigned int state = 0; if (resume) { t->jobctl &= ~JOBCTL_TRACED; state = __TASK_TRACED; } signal_wake_up_state(t, state); } void task_join_group_stop(struct task_struct *task); #ifdef TIF_RESTORE_SIGMASK /* * Legacy restore_sigmask accessors. These are inefficient on * SMP architectures because they require atomic operations. */ /** * set_restore_sigmask() - make sure saved_sigmask processing gets done * * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code * will run before returning to user mode, to process the flag. For * all callers, TIF_SIGPENDING is already set or it's no harm to set * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the * arch code will notice on return to user mode, in case those bits * are scarce. We set TIF_SIGPENDING here to ensure that the arch * signal code always gets run when TIF_RESTORE_SIGMASK is set. */ static inline void set_restore_sigmask(void) { set_thread_flag(TIF_RESTORE_SIGMASK); } static inline void clear_tsk_restore_sigmask(struct task_struct *task) { clear_tsk_thread_flag(task, TIF_RESTORE_SIGMASK); } static inline void clear_restore_sigmask(void) { clear_thread_flag(TIF_RESTORE_SIGMASK); } static inline bool test_tsk_restore_sigmask(struct task_struct *task) { return test_tsk_thread_flag(task, TIF_RESTORE_SIGMASK); } static inline bool test_restore_sigmask(void) { return test_thread_flag(TIF_RESTORE_SIGMASK); } static inline bool test_and_clear_restore_sigmask(void) { return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); } #else /* TIF_RESTORE_SIGMASK */ /* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */ static inline void set_restore_sigmask(void) { current->restore_sigmask = true; } static inline void clear_tsk_restore_sigmask(struct task_struct *task) { task->restore_sigmask = false; } static inline void clear_restore_sigmask(void) { current->restore_sigmask = false; } static inline bool test_restore_sigmask(void) { return current->restore_sigmask; } static inline bool test_tsk_restore_sigmask(struct task_struct *task) { return task->restore_sigmask; } static inline bool test_and_clear_restore_sigmask(void) { if (!current->restore_sigmask) return false; current->restore_sigmask = false; return true; } #endif static inline void restore_saved_sigmask(void) { if (test_and_clear_restore_sigmask()) __set_current_blocked(&current->saved_sigmask); } extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize); static inline void restore_saved_sigmask_unless(bool interrupted) { if (interrupted) WARN_ON(!signal_pending(current)); else restore_saved_sigmask(); } static inline sigset_t *sigmask_to_save(void) { sigset_t *res = &current->blocked; if (unlikely(test_restore_sigmask())) res = &current->saved_sigmask; return res; } static inline int kill_cad_pid(int sig, int priv) { return kill_pid(cad_pid, sig, priv); } /* These can be the second arg to send_sig_info/send_group_sig_info. */ #define SEND_SIG_NOINFO ((struct kernel_siginfo *) 0) #define SEND_SIG_PRIV ((struct kernel_siginfo *) 1) static inline int __on_sig_stack(unsigned long sp) { #ifdef CONFIG_STACK_GROWSUP return sp >= current->sas_ss_sp && sp - current->sas_ss_sp < current->sas_ss_size; #else return sp > current->sas_ss_sp && sp - current->sas_ss_sp <= current->sas_ss_size; #endif } /* * True if we are on the alternate signal stack. */ static inline int on_sig_stack(unsigned long sp) { /* * If the signal stack is SS_AUTODISARM then, by construction, we * can't be on the signal stack unless user code deliberately set * SS_AUTODISARM when we were already on it. * * This improves reliability: if user state gets corrupted such that * the stack pointer points very close to the end of the signal stack, * then this check will enable the signal to be handled anyway. */ if (current->sas_ss_flags & SS_AUTODISARM) return 0; return __on_sig_stack(sp); } static inline int sas_ss_flags(unsigned long sp) { if (!current->sas_ss_size) return SS_DISABLE; return on_sig_stack(sp) ? SS_ONSTACK : 0; } static inline void sas_ss_reset(struct task_struct *p) { p->sas_ss_sp = 0; p->sas_ss_size = 0; p->sas_ss_flags = SS_DISABLE; } static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) { if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) #ifdef CONFIG_STACK_GROWSUP return current->sas_ss_sp; #else return current->sas_ss_sp + current->sas_ss_size; #endif return sp; } extern void __cleanup_sighand(struct sighand_struct *); extern void flush_itimer_signals(void); #define tasklist_empty() \ list_empty(&init_task.tasks) #define next_task(p) \ list_entry_rcu((p)->tasks.next, struct task_struct, tasks) #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) extern bool current_is_single_threaded(void); /* * Without tasklist/siglock it is only rcu-safe if g can't exit/exec, * otherwise next_thread(t) will never reach g after list_del_rcu(g). */ #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) #define for_other_threads(p, t) \ for (t = p; (t = next_thread(t)) != p; ) #define __for_each_thread(signal, t) \ list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node, \ lockdep_is_held(&tasklist_lock)) #define for_each_thread(p, t) \ __for_each_thread((p)->signal, t) /* Careful: this is a double loop, 'break' won't work as expected. */ #define for_each_process_thread(p, t) \ for_each_process(p) for_each_thread(p, t) typedef int (*proc_visitor)(struct task_struct *p, void *data); void walk_process_tree(struct task_struct *top, proc_visitor, void *); static inline struct pid *task_pid_type(struct task_struct *task, enum pid_type type) { struct pid *pid; if (type == PIDTYPE_PID) pid = task_pid(task); else pid = task->signal->pids[type]; return pid; } static inline struct pid *task_tgid(struct task_struct *task) { return task->signal->pids[PIDTYPE_TGID]; } /* * Without tasklist or RCU lock it is not safe to dereference * the result of task_pgrp/task_session even if task == current, * we can race with another thread doing sys_setsid/sys_setpgid. */ static inline struct pid *task_pgrp(struct task_struct *task) { return task->signal->pids[PIDTYPE_PGID]; } static inline struct pid *task_session(struct task_struct *task) { return task->signal->pids[PIDTYPE_SID]; } static inline int get_nr_threads(struct task_struct *task) { return task->signal->nr_threads; } static inline bool thread_group_leader(struct task_struct *p) { return p->exit_signal >= 0; } static inline bool same_thread_group(struct task_struct *p1, struct task_struct *p2) { return p1->signal == p2->signal; } /* * returns NULL if p is the last thread in the thread group */ static inline struct task_struct *__next_thread(struct task_struct *p) { return list_next_or_null_rcu(&p->signal->thread_head, &p->thread_node, struct task_struct, thread_node); } static inline struct task_struct *next_thread(struct task_struct *p) { return __next_thread(p) ?: p->group_leader; } static inline int thread_group_empty(struct task_struct *p) { return thread_group_leader(p) && list_is_last(&p->thread_node, &p->signal->thread_head); } #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) extern struct sighand_struct *__lock_task_sighand(struct task_struct *task, unsigned long *flags); static inline struct sighand_struct *lock_task_sighand(struct task_struct *task, unsigned long *flags) { struct sighand_struct *ret; ret = __lock_task_sighand(task, flags); (void)__cond_lock(&task->sighand->siglock, ret); return ret; } static inline void unlock_task_sighand(struct task_struct *task, unsigned long *flags) { spin_unlock_irqrestore(&task->sighand->siglock, *flags); } #ifdef CONFIG_LOCKDEP extern void lockdep_assert_task_sighand_held(struct task_struct *task); #else static inline void lockdep_assert_task_sighand_held(struct task_struct *task) { } #endif static inline unsigned long task_rlimit(const struct task_struct *task, unsigned int limit) { return READ_ONCE(task->signal->rlim[limit].rlim_cur); } static inline unsigned long task_rlimit_max(const struct task_struct *task, unsigned int limit) { return READ_ONCE(task->signal->rlim[limit].rlim_max); } static inline unsigned long rlimit(unsigned int limit) { return task_rlimit(current, limit); } static inline unsigned long rlimit_max(unsigned int limit) { return task_rlimit_max(current, limit); } #endif /* _LINUX_SCHED_SIGNAL_H */
1 185 1 102 1 129 129 2 3 3 3 3 3 302 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BTRFS_MISC_H #define BTRFS_MISC_H #include <linux/types.h> #include <linux/bitmap.h> #include <linux/sched.h> #include <linux/wait.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/math64.h> #include <linux/rbtree.h> #include <linux/bio.h> /* * Enumerate bits using enum autoincrement. Define the @name as the n-th bit. */ #define ENUM_BIT(name) \ __ ## name ## _BIT, \ name = (1U << __ ## name ## _BIT), \ __ ## name ## _SEQ = __ ## name ## _BIT static inline phys_addr_t bio_iter_phys(struct bio *bio, struct bvec_iter *iter) { struct bio_vec bv = bio_iter_iovec(bio, *iter); return bvec_phys(&bv); } /* * Iterate bio using btrfs block size. * * This will handle large folio and highmem. * * @paddr: Physical memory address of each iteration * @bio: The bio to iterate * @iter: The bvec_iter (pointer) to use. * @blocksize: The blocksize to iterate. * * This requires all folios in the bio to cover at least one block. */ #define btrfs_bio_for_each_block(paddr, bio, iter, blocksize) \ for (; (iter)->bi_size && \ (paddr = bio_iter_phys((bio), (iter)), 1); \ bio_advance_iter_single((bio), (iter), (blocksize))) /* Initialize a bvec_iter to the size of the specified bio. */ static inline struct bvec_iter init_bvec_iter_for_bio(struct bio *bio) { struct bio_vec *bvec; u32 bio_size = 0; int i; bio_for_each_bvec_all(bvec, bio, i) bio_size += bvec->bv_len; return (struct bvec_iter) { .bi_sector = 0, .bi_size = bio_size, .bi_idx = 0, .bi_bvec_done = 0, }; } #define btrfs_bio_for_each_block_all(paddr, bio, blocksize) \ for (struct bvec_iter iter = init_bvec_iter_for_bio(bio); \ (iter).bi_size && \ (paddr = bio_iter_phys((bio), &(iter)), 1); \ bio_advance_iter_single((bio), &(iter), (blocksize))) static inline void cond_wake_up(struct wait_queue_head *wq) { /* * This implies a full smp_mb barrier, see comments for * waitqueue_active why. */ if (wq_has_sleeper(wq)) wake_up(wq); } static inline void cond_wake_up_nomb(struct wait_queue_head *wq) { /* * Special case for conditional wakeup where the barrier required for * waitqueue_active is implied by some of the preceding code. Eg. one * of such atomic operations (atomic_dec_and_return, ...), or a * unlock/lock sequence, etc. */ if (waitqueue_active(wq)) wake_up(wq); } static inline u64 mult_perc(u64 num, u32 percent) { return div_u64(num * percent, 100); } /* Copy of is_power_of_two that is 64bit safe */ static inline bool is_power_of_two_u64(u64 n) { return n != 0 && (n & (n - 1)) == 0; } static inline bool has_single_bit_set(u64 n) { return is_power_of_two_u64(n); } /* * Simple bytenr based rb_tree relate structures * * Any structure wants to use bytenr as single search index should have their * structure start with these members. */ struct rb_simple_node { struct rb_node rb_node; u64 bytenr; }; static inline struct rb_node *rb_simple_search(const struct rb_root *root, u64 bytenr) { struct rb_node *node = root->rb_node; struct rb_simple_node *entry; while (node) { entry = rb_entry(node, struct rb_simple_node, rb_node); if (bytenr < entry->bytenr) node = node->rb_left; else if (bytenr > entry->bytenr) node = node->rb_right; else return node; } return NULL; } /* * Search @root from an entry that starts or comes after @bytenr. * * @root: the root to search. * @bytenr: bytenr to search from. * * Return the rb_node that start at or after @bytenr. If there is no entry at * or after @bytner return NULL. */ static inline struct rb_node *rb_simple_search_first(const struct rb_root *root, u64 bytenr) { struct rb_node *node = root->rb_node, *ret = NULL; struct rb_simple_node *entry, *ret_entry = NULL; while (node) { entry = rb_entry(node, struct rb_simple_node, rb_node); if (bytenr < entry->bytenr) { if (!ret || entry->bytenr < ret_entry->bytenr) { ret = node; ret_entry = entry; } node = node->rb_left; } else if (bytenr > entry->bytenr) { node = node->rb_right; } else { return node; } } return ret; } static int rb_simple_node_bytenr_cmp(struct rb_node *new, const struct rb_node *existing) { struct rb_simple_node *new_entry = rb_entry(new, struct rb_simple_node, rb_node); struct rb_simple_node *existing_entry = rb_entry(existing, struct rb_simple_node, rb_node); if (new_entry->bytenr < existing_entry->bytenr) return -1; else if (new_entry->bytenr > existing_entry->bytenr) return 1; return 0; } static inline struct rb_node *rb_simple_insert(struct rb_root *root, struct rb_simple_node *simple_node) { return rb_find_add(&simple_node->rb_node, root, rb_simple_node_bytenr_cmp); } static inline bool bitmap_test_range_all_set(const unsigned long *addr, unsigned long start, unsigned long nbits) { unsigned long found_zero; found_zero = find_next_zero_bit(addr, start + nbits, start); return (found_zero == start + nbits); } static inline bool bitmap_test_range_all_zero(const unsigned long *addr, unsigned long start, unsigned long nbits) { unsigned long found_set; found_set = find_next_bit(addr, start + nbits, start); return (found_set == start + nbits); } static inline u64 folio_end(struct folio *folio) { return folio_pos(folio) + folio_size(folio); } #endif
240 240 240 241 241 240 24 24 45 24 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2012 Fusion-io All rights reserved. * Copyright (C) 2012 Intel Corp. All rights reserved. */ #include <linux/sched.h> #include <linux/bio.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/raid/pq.h> #include <linux/hash.h> #include <linux/list_sort.h> #include <linux/raid/xor.h> #include <linux/mm.h> #include "messages.h" #include "ctree.h" #include "disk-io.h" #include "volumes.h" #include "raid56.h" #include "async-thread.h" #include "file-item.h" #include "btrfs_inode.h" /* set when additional merges to this rbio are not allowed */ #define RBIO_RMW_LOCKED_BIT 1 /* * set when this rbio is sitting in the hash, but it is just a cache * of past RMW */ #define RBIO_CACHE_BIT 2 /* * set when it is safe to trust the stripe_pages for caching */ #define RBIO_CACHE_READY_BIT 3 #define RBIO_CACHE_SIZE 1024 #define BTRFS_STRIPE_HASH_TABLE_BITS 11 static void dump_bioc(const struct btrfs_fs_info *fs_info, const struct btrfs_io_context *bioc) { if (unlikely(!bioc)) { btrfs_crit(fs_info, "bioc=NULL"); return; } btrfs_crit(fs_info, "bioc logical=%llu full_stripe=%llu size=%llu map_type=0x%llx mirror=%u replace_nr_stripes=%u replace_stripe_src=%d num_stripes=%u", bioc->logical, bioc->full_stripe_logical, bioc->size, bioc->map_type, bioc->mirror_num, bioc->replace_nr_stripes, bioc->replace_stripe_src, bioc->num_stripes); for (int i = 0; i < bioc->num_stripes; i++) { btrfs_crit(fs_info, " nr=%d devid=%llu physical=%llu", i, bioc->stripes[i].dev->devid, bioc->stripes[i].physical); } } static void btrfs_dump_rbio(const struct btrfs_fs_info *fs_info, const struct btrfs_raid_bio *rbio) { if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) return; dump_bioc(fs_info, rbio->bioc); btrfs_crit(fs_info, "rbio flags=0x%lx nr_sectors=%u nr_data=%u real_stripes=%u stripe_nsectors=%u scrubp=%u dbitmap=0x%lx", rbio->flags, rbio->nr_sectors, rbio->nr_data, rbio->real_stripes, rbio->stripe_nsectors, rbio->scrubp, rbio->dbitmap); } #define ASSERT_RBIO(expr, rbio) \ ({ \ if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \ const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \ (rbio)->bioc->fs_info : NULL; \ \ btrfs_dump_rbio(__fs_info, (rbio)); \ } \ ASSERT((expr)); \ }) #define ASSERT_RBIO_STRIPE(expr, rbio, stripe_nr) \ ({ \ if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \ const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \ (rbio)->bioc->fs_info : NULL; \ \ btrfs_dump_rbio(__fs_info, (rbio)); \ btrfs_crit(__fs_info, "stripe_nr=%d", (stripe_nr)); \ } \ ASSERT((expr)); \ }) #define ASSERT_RBIO_SECTOR(expr, rbio, sector_nr) \ ({ \ if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \ const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \ (rbio)->bioc->fs_info : NULL; \ \ btrfs_dump_rbio(__fs_info, (rbio)); \ btrfs_crit(__fs_info, "sector_nr=%d", (sector_nr)); \ } \ ASSERT((expr)); \ }) #define ASSERT_RBIO_LOGICAL(expr, rbio, logical) \ ({ \ if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \ const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \ (rbio)->bioc->fs_info : NULL; \ \ btrfs_dump_rbio(__fs_info, (rbio)); \ btrfs_crit(__fs_info, "logical=%llu", (logical)); \ } \ ASSERT((expr)); \ }) /* Used by the raid56 code to lock stripes for read/modify/write */ struct btrfs_stripe_hash { struct list_head hash_list; spinlock_t lock; }; /* Used by the raid56 code to lock stripes for read/modify/write */ struct btrfs_stripe_hash_table { struct list_head stripe_cache; spinlock_t cache_lock; int cache_size; struct btrfs_stripe_hash table[]; }; /* * A structure to present a sector inside a page, the length is fixed to * sectorsize; */ struct sector_ptr { /* * Blocks from the bio list can still be highmem. * So here we use physical address to present a page and the offset inside it. */ phys_addr_t paddr; bool has_paddr; bool uptodate; }; static void rmw_rbio_work(struct work_struct *work); static void rmw_rbio_work_locked(struct work_struct *work); static void index_rbio_pages(struct btrfs_raid_bio *rbio); static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); static int finish_parity_scrub(struct btrfs_raid_bio *rbio); static void scrub_rbio_work_locked(struct work_struct *work); static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio) { bitmap_free(rbio->error_bitmap); kfree(rbio->stripe_pages); kfree(rbio->bio_sectors); kfree(rbio->stripe_sectors); kfree(rbio->finish_pointers); } static void free_raid_bio(struct btrfs_raid_bio *rbio) { int i; if (!refcount_dec_and_test(&rbio->refs)) return; WARN_ON(!list_empty(&rbio->stripe_cache)); WARN_ON(!list_empty(&rbio->hash_list)); WARN_ON(!bio_list_empty(&rbio->bio_list)); for (i = 0; i < rbio->nr_pages; i++) { if (rbio->stripe_pages[i]) { __free_page(rbio->stripe_pages[i]); rbio->stripe_pages[i] = NULL; } } btrfs_put_bioc(rbio->bioc); free_raid_bio_pointers(rbio); kfree(rbio); } static void start_async_work(struct btrfs_raid_bio *rbio, work_func_t work_func) { INIT_WORK(&rbio->work, work_func); queue_work(rbio->bioc->fs_info->rmw_workers, &rbio->work); } /* * the stripe hash table is used for locking, and to collect * bios in hopes of making a full stripe */ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) { struct btrfs_stripe_hash_table *table; struct btrfs_stripe_hash_table *x; struct btrfs_stripe_hash *cur; struct btrfs_stripe_hash *h; unsigned int num_entries = 1U << BTRFS_STRIPE_HASH_TABLE_BITS; if (info->stripe_hash_table) return 0; /* * The table is large, starting with order 4 and can go as high as * order 7 in case lock debugging is turned on. * * Try harder to allocate and fallback to vmalloc to lower the chance * of a failing mount. */ table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL); if (!table) return -ENOMEM; spin_lock_init(&table->cache_lock); INIT_LIST_HEAD(&table->stripe_cache); h = table->table; for (unsigned int i = 0; i < num_entries; i++) { cur = h + i; INIT_LIST_HEAD(&cur->hash_list); spin_lock_init(&cur->lock); } x = cmpxchg(&info->stripe_hash_table, NULL, table); kvfree(x); return 0; } static void memcpy_sectors(const struct sector_ptr *dst, const struct sector_ptr *src, u32 blocksize) { memcpy_page(phys_to_page(dst->paddr), offset_in_page(dst->paddr), phys_to_page(src->paddr), offset_in_page(src->paddr), blocksize); } /* * caching an rbio means to copy anything from the * bio_sectors array into the stripe_pages array. We * use the page uptodate bit in the stripe cache array * to indicate if it has valid data * * once the caching is done, we set the cache ready * bit. */ static void cache_rbio_pages(struct btrfs_raid_bio *rbio) { int i; int ret; ret = alloc_rbio_pages(rbio); if (ret) return; for (i = 0; i < rbio->nr_sectors; i++) { /* Some range not covered by bio (partial write), skip it */ if (!rbio->bio_sectors[i].has_paddr) { /* * Even if the sector is not covered by bio, if it is * a data sector it should still be uptodate as it is * read from disk. */ if (i < rbio->nr_data * rbio->stripe_nsectors) ASSERT(rbio->stripe_sectors[i].uptodate); continue; } memcpy_sectors(&rbio->stripe_sectors[i], &rbio->bio_sectors[i], rbio->bioc->fs_info->sectorsize); rbio->stripe_sectors[i].uptodate = 1; } set_bit(RBIO_CACHE_READY_BIT, &rbio->flags); } /* * we hash on the first logical address of the stripe */ static int rbio_bucket(struct btrfs_raid_bio *rbio) { u64 num = rbio->bioc->full_stripe_logical; /* * we shift down quite a bit. We're using byte * addressing, and most of the lower bits are zeros. * This tends to upset hash_64, and it consistently * returns just one or two different values. * * shifting off the lower bits fixes things. */ return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS); } static bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio, unsigned int page_nr) { const u32 sectorsize = rbio->bioc->fs_info->sectorsize; const u32 sectors_per_page = PAGE_SIZE / sectorsize; int i; ASSERT(page_nr < rbio->nr_pages); for (i = sectors_per_page * page_nr; i < sectors_per_page * page_nr + sectors_per_page; i++) { if (!rbio->stripe_sectors[i].uptodate) return false; } return true; } /* * Update the stripe_sectors[] array to use correct page and pgoff * * Should be called every time any page pointer in stripes_pages[] got modified. */ static void index_stripe_sectors(struct btrfs_raid_bio *rbio) { const u32 sectorsize = rbio->bioc->fs_info->sectorsize; u32 offset; int i; for (i = 0, offset = 0; i < rbio->nr_sectors; i++, offset += sectorsize) { int page_index = offset >> PAGE_SHIFT; ASSERT(page_index < rbio->nr_pages); if (!rbio->stripe_pages[page_index]) continue; rbio->stripe_sectors[i].has_paddr = true; rbio->stripe_sectors[i].paddr = page_to_phys(rbio->stripe_pages[page_index]) + offset_in_page(offset); } } static void steal_rbio_page(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest, int page_nr) { const u32 sectorsize = src->bioc->fs_info->sectorsize; const u32 sectors_per_page = PAGE_SIZE / sectorsize; int i; if (dest->stripe_pages[page_nr]) __free_page(dest->stripe_pages[page_nr]); dest->stripe_pages[page_nr] = src->stripe_pages[page_nr]; src->stripe_pages[page_nr] = NULL; /* Also update the sector->uptodate bits. */ for (i = sectors_per_page * page_nr; i < sectors_per_page * page_nr + sectors_per_page; i++) dest->stripe_sectors[i].uptodate = true; } static bool is_data_stripe_page(struct btrfs_raid_bio *rbio, int page_nr) { const int sector_nr = (page_nr << PAGE_SHIFT) >> rbio->bioc->fs_info->sectorsize_bits; /* * We have ensured PAGE_SIZE is aligned with sectorsize, thus * we won't have a page which is half data half parity. * * Thus if the first sector of the page belongs to data stripes, then * the full page belongs to data stripes. */ return (sector_nr < rbio->nr_data * rbio->stripe_nsectors); } /* * Stealing an rbio means taking all the uptodate pages from the stripe array * in the source rbio and putting them into the destination rbio. * * This will also update the involved stripe_sectors[] which are referring to * the old pages. */ static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest) { int i; if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags)) return; for (i = 0; i < dest->nr_pages; i++) { struct page *p = src->stripe_pages[i]; /* * We don't need to steal P/Q pages as they will always be * regenerated for RMW or full write anyway. */ if (!is_data_stripe_page(src, i)) continue; /* * If @src already has RBIO_CACHE_READY_BIT, it should have * all data stripe pages present and uptodate. */ ASSERT(p); ASSERT(full_page_sectors_uptodate(src, i)); steal_rbio_page(src, dest, i); } index_stripe_sectors(dest); index_stripe_sectors(src); } /* * merging means we take the bio_list from the victim and * splice it into the destination. The victim should * be discarded afterwards. * * must be called with dest->rbio_list_lock held */ static void merge_rbio(struct btrfs_raid_bio *dest, struct btrfs_raid_bio *victim) { bio_list_merge_init(&dest->bio_list, &victim->bio_list); dest->bio_list_bytes += victim->bio_list_bytes; /* Also inherit the bitmaps from @victim. */ bitmap_or(&dest->dbitmap, &victim->dbitmap, &dest->dbitmap, dest->stripe_nsectors); } /* * used to prune items that are in the cache. The caller * must hold the hash table lock. */ static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio) { int bucket = rbio_bucket(rbio); struct btrfs_stripe_hash_table *table; struct btrfs_stripe_hash *h; int freeit = 0; /* * check the bit again under the hash table lock. */ if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) return; table = rbio->bioc->fs_info->stripe_hash_table; h = table->table + bucket; /* hold the lock for the bucket because we may be * removing it from the hash table */ spin_lock(&h->lock); /* * hold the lock for the bio list because we need * to make sure the bio list is empty */ spin_lock(&rbio->bio_list_lock); if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) { list_del_init(&rbio->stripe_cache); table->cache_size -= 1; freeit = 1; /* if the bio list isn't empty, this rbio is * still involved in an IO. We take it out * of the cache list, and drop the ref that * was held for the list. * * If the bio_list was empty, we also remove * the rbio from the hash_table, and drop * the corresponding ref */ if (bio_list_empty(&rbio->bio_list)) { if (!list_empty(&rbio->hash_list)) { list_del_init(&rbio->hash_list); refcount_dec(&rbio->refs); BUG_ON(!list_empty(&rbio->plug_list)); } } } spin_unlock(&rbio->bio_list_lock); spin_unlock(&h->lock); if (freeit) free_raid_bio(rbio); } /* * prune a given rbio from the cache */ static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio) { struct btrfs_stripe_hash_table *table; if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) return; table = rbio->bioc->fs_info->stripe_hash_table; spin_lock(&table->cache_lock); __remove_rbio_from_cache(rbio); spin_unlock(&table->cache_lock); } /* * remove everything in the cache */ static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info) { struct btrfs_stripe_hash_table *table; struct btrfs_raid_bio *rbio; table = info->stripe_hash_table; spin_lock(&table->cache_lock); while (!list_empty(&table->stripe_cache)) { rbio = list_first_entry(&table->stripe_cache, struct btrfs_raid_bio, stripe_cache); __remove_rbio_from_cache(rbio); } spin_unlock(&table->cache_lock); } /* * remove all cached entries and free the hash table * used by unmount */ void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info) { if (!info->stripe_hash_table) return; btrfs_clear_rbio_cache(info); kvfree(info->stripe_hash_table); info->stripe_hash_table = NULL; } /* * insert an rbio into the stripe cache. It * must have already been prepared by calling * cache_rbio_pages * * If this rbio was already cached, it gets * moved to the front of the lru. * * If the size of the rbio cache is too big, we * prune an item. */ static void cache_rbio(struct btrfs_raid_bio *rbio) { struct btrfs_stripe_hash_table *table; if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags)) return; table = rbio->bioc->fs_info->stripe_hash_table; spin_lock(&table->cache_lock); spin_lock(&rbio->bio_list_lock); /* bump our ref if we were not in the list before */ if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags)) refcount_inc(&rbio->refs); if (!list_empty(&rbio->stripe_cache)){ list_move(&rbio->stripe_cache, &table->stripe_cache); } else { list_add(&rbio->stripe_cache, &table->stripe_cache); table->cache_size += 1; } spin_unlock(&rbio->bio_list_lock); if (table->cache_size > RBIO_CACHE_SIZE) { struct btrfs_raid_bio *found; found = list_last_entry(&table->stripe_cache, struct btrfs_raid_bio, stripe_cache); if (found != rbio) __remove_rbio_from_cache(found); } spin_unlock(&table->cache_lock); } /* * helper function to run the xor_blocks api. It is only * able to do MAX_XOR_BLOCKS at a time, so we need to * loop through. */ static void run_xor(void **pages, int src_cnt, ssize_t len) { int src_off = 0; int xor_src_cnt = 0; void *dest = pages[src_cnt]; while(src_cnt > 0) { xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); xor_blocks(xor_src_cnt, len, dest, pages + src_off); src_cnt -= xor_src_cnt; src_off += xor_src_cnt; } } /* * Returns true if the bio list inside this rbio covers an entire stripe (no * rmw required). */ static int rbio_is_full(struct btrfs_raid_bio *rbio) { unsigned long size = rbio->bio_list_bytes; int ret = 1; spin_lock(&rbio->bio_list_lock); if (size != rbio->nr_data * BTRFS_STRIPE_LEN) ret = 0; BUG_ON(size > rbio->nr_data * BTRFS_STRIPE_LEN); spin_unlock(&rbio->bio_list_lock); return ret; } /* * returns 1 if it is safe to merge two rbios together. * The merging is safe if the two rbios correspond to * the same stripe and if they are both going in the same * direction (read vs write), and if neither one is * locked for final IO * * The caller is responsible for locking such that * rmw_locked is safe to test */ static int rbio_can_merge(struct btrfs_raid_bio *last, struct btrfs_raid_bio *cur) { if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) || test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) return 0; /* * we can't merge with cached rbios, since the * idea is that when we merge the destination * rbio is going to run our IO for us. We can * steal from cached rbios though, other functions * handle that. */ if (test_bit(RBIO_CACHE_BIT, &last->flags) || test_bit(RBIO_CACHE_BIT, &cur->flags)) return 0; if (last->bioc->full_stripe_logical != cur->bioc->full_stripe_logical) return 0; /* we can't merge with different operations */ if (last->operation != cur->operation) return 0; /* * We've need read the full stripe from the drive. * check and repair the parity and write the new results. * * We're not allowed to add any new bios to the * bio list here, anyone else that wants to * change this stripe needs to do their own rmw. */ if (last->operation == BTRFS_RBIO_PARITY_SCRUB) return 0; if (last->operation == BTRFS_RBIO_READ_REBUILD) return 0; return 1; } static unsigned int rbio_stripe_sector_index(const struct btrfs_raid_bio *rbio, unsigned int stripe_nr, unsigned int sector_nr) { ASSERT_RBIO_STRIPE(stripe_nr < rbio->real_stripes, rbio, stripe_nr); ASSERT_RBIO_SECTOR(sector_nr < rbio->stripe_nsectors, rbio, sector_nr); return stripe_nr * rbio->stripe_nsectors + sector_nr; } /* Return a sector from rbio->stripe_sectors, not from the bio list */ static struct sector_ptr *rbio_stripe_sector(const struct btrfs_raid_bio *rbio, unsigned int stripe_nr, unsigned int sector_nr) { return &rbio->stripe_sectors[rbio_stripe_sector_index(rbio, stripe_nr, sector_nr)]; } /* Grab a sector inside P stripe */ static struct sector_ptr *rbio_pstripe_sector(const struct btrfs_raid_bio *rbio, unsigned int sector_nr) { return rbio_stripe_sector(rbio, rbio->nr_data, sector_nr); } /* Grab a sector inside Q stripe, return NULL if not RAID6 */ static struct sector_ptr *rbio_qstripe_sector(const struct btrfs_raid_bio *rbio, unsigned int sector_nr) { if (rbio->nr_data + 1 == rbio->real_stripes) return NULL; return rbio_stripe_sector(rbio, rbio->nr_data + 1, sector_nr); } /* * The first stripe in the table for a logical address * has the lock. rbios are added in one of three ways: * * 1) Nobody has the stripe locked yet. The rbio is given * the lock and 0 is returned. The caller must start the IO * themselves. * * 2) Someone has the stripe locked, but we're able to merge * with the lock owner. The rbio is freed and the IO will * start automatically along with the existing rbio. 1 is returned. * * 3) Someone has the stripe locked, but we're not able to merge. * The rbio is added to the lock owner's plug list, or merged into * an rbio already on the plug list. When the lock owner unlocks, * the next rbio on the list is run and the IO is started automatically. * 1 is returned * * If we return 0, the caller still owns the rbio and must continue with * IO submission. If we return 1, the caller must assume the rbio has * already been freed. */ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) { struct btrfs_stripe_hash *h; struct btrfs_raid_bio *cur; struct btrfs_raid_bio *pending; struct btrfs_raid_bio *freeit = NULL; struct btrfs_raid_bio *cache_drop = NULL; int ret = 0; h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio); spin_lock(&h->lock); list_for_each_entry(cur, &h->hash_list, hash_list) { if (cur->bioc->full_stripe_logical != rbio->bioc->full_stripe_logical) continue; spin_lock(&cur->bio_list_lock); /* Can we steal this cached rbio's pages? */ if (bio_list_empty(&cur->bio_list) && list_empty(&cur->plug_list) && test_bit(RBIO_CACHE_BIT, &cur->flags) && !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) { list_del_init(&cur->hash_list); refcount_dec(&cur->refs); steal_rbio(cur, rbio); cache_drop = cur; spin_unlock(&cur->bio_list_lock); goto lockit; } /* Can we merge into the lock owner? */ if (rbio_can_merge(cur, rbio)) { merge_rbio(cur, rbio); spin_unlock(&cur->bio_list_lock); freeit = rbio; ret = 1; goto out; } /* * We couldn't merge with the running rbio, see if we can merge * with the pending ones. We don't have to check for rmw_locked * because there is no way they are inside finish_rmw right now */ list_for_each_entry(pending, &cur->plug_list, plug_list) { if (rbio_can_merge(pending, rbio)) { merge_rbio(pending, rbio); spin_unlock(&cur->bio_list_lock); freeit = rbio; ret = 1; goto out; } } /* * No merging, put us on the tail of the plug list, our rbio * will be started with the currently running rbio unlocks */ list_add_tail(&rbio->plug_list, &cur->plug_list); spin_unlock(&cur->bio_list_lock); ret = 1; goto out; } lockit: refcount_inc(&rbio->refs); list_add(&rbio->hash_list, &h->hash_list); out: spin_unlock(&h->lock); if (cache_drop) remove_rbio_from_cache(cache_drop); if (freeit) free_raid_bio(freeit); return ret; } static void recover_rbio_work_locked(struct work_struct *work); /* * called as rmw or parity rebuild is completed. If the plug list has more * rbios waiting for this stripe, the next one on the list will be started */ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) { int bucket; struct btrfs_stripe_hash *h; int keep_cache = 0; bucket = rbio_bucket(rbio); h = rbio->bioc->fs_info->stripe_hash_table->table + bucket; if (list_empty(&rbio->plug_list)) cache_rbio(rbio); spin_lock(&h->lock); spin_lock(&rbio->bio_list_lock); if (!list_empty(&rbio->hash_list)) { /* * if we're still cached and there is no other IO * to perform, just leave this rbio here for others * to steal from later */ if (list_empty(&rbio->plug_list) && test_bit(RBIO_CACHE_BIT, &rbio->flags)) { keep_cache = 1; clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); BUG_ON(!bio_list_empty(&rbio->bio_list)); goto done; } list_del_init(&rbio->hash_list); refcount_dec(&rbio->refs); /* * we use the plug list to hold all the rbios * waiting for the chance to lock this stripe. * hand the lock over to one of them. */ if (!list_empty(&rbio->plug_list)) { struct btrfs_raid_bio *next; struct list_head *head = rbio->plug_list.next; next = list_entry(head, struct btrfs_raid_bio, plug_list); list_del_init(&rbio->plug_list); list_add(&next->hash_list, &h->hash_list); refcount_inc(&next->refs); spin_unlock(&rbio->bio_list_lock); spin_unlock(&h->lock); if (next->operation == BTRFS_RBIO_READ_REBUILD) { start_async_work(next, recover_rbio_work_locked); } else if (next->operation == BTRFS_RBIO_WRITE) { steal_rbio(rbio, next); start_async_work(next, rmw_rbio_work_locked); } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) { steal_rbio(rbio, next); start_async_work(next, scrub_rbio_work_locked); } goto done_nolock; } } done: spin_unlock(&rbio->bio_list_lock); spin_unlock(&h->lock); done_nolock: if (!keep_cache) remove_rbio_from_cache(rbio); } static void rbio_endio_bio_list(struct bio *cur, blk_status_t status) { struct bio *next; while (cur) { next = cur->bi_next; cur->bi_next = NULL; cur->bi_status = status; bio_endio(cur); cur = next; } } /* * this frees the rbio and runs through all the bios in the * bio_list and calls end_io on them */ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t status) { struct bio *cur = bio_list_get(&rbio->bio_list); struct bio *extra; kfree(rbio->csum_buf); bitmap_free(rbio->csum_bitmap); rbio->csum_buf = NULL; rbio->csum_bitmap = NULL; /* * Clear the data bitmap, as the rbio may be cached for later usage. * do this before before unlock_stripe() so there will be no new bio * for this bio. */ bitmap_clear(&rbio->dbitmap, 0, rbio->stripe_nsectors); /* * At this moment, rbio->bio_list is empty, however since rbio does not * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the * hash list, rbio may be merged with others so that rbio->bio_list * becomes non-empty. * Once unlock_stripe() is done, rbio->bio_list will not be updated any * more and we can call bio_endio() on all queued bios. */ unlock_stripe(rbio); extra = bio_list_get(&rbio->bio_list); free_raid_bio(rbio); rbio_endio_bio_list(cur, status); if (extra) rbio_endio_bio_list(extra, status); } /* * Get a sector pointer specified by its @stripe_nr and @sector_nr. * * @rbio: The raid bio * @stripe_nr: Stripe number, valid range [0, real_stripe) * @sector_nr: Sector number inside the stripe, * valid range [0, stripe_nsectors) * @bio_list_only: Whether to use sectors inside the bio list only. * * The read/modify/write code wants to reuse the original bio page as much * as possible, and only use stripe_sectors as fallback. */ static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio, int stripe_nr, int sector_nr, bool bio_list_only) { struct sector_ptr *sector; int index; ASSERT_RBIO_STRIPE(stripe_nr >= 0 && stripe_nr < rbio->real_stripes, rbio, stripe_nr); ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors, rbio, sector_nr); index = stripe_nr * rbio->stripe_nsectors + sector_nr; ASSERT(index >= 0 && index < rbio->nr_sectors); spin_lock(&rbio->bio_list_lock); sector = &rbio->bio_sectors[index]; if (sector->has_paddr || bio_list_only) { /* Don't return sector without a valid page pointer */ if (!sector->has_paddr) sector = NULL; spin_unlock(&rbio->bio_list_lock); return sector; } spin_unlock(&rbio->bio_list_lock); return &rbio->stripe_sectors[index]; } /* * allocation and initial setup for the btrfs_raid_bio. Not * this does not allocate any pages for rbio->pages. */ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, struct btrfs_io_context *bioc) { const unsigned int real_stripes = bioc->num_stripes - bioc->replace_nr_stripes; const unsigned int stripe_npages = BTRFS_STRIPE_LEN >> PAGE_SHIFT; const unsigned int num_pages = stripe_npages * real_stripes; const unsigned int stripe_nsectors = BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits; const unsigned int num_sectors = stripe_nsectors * real_stripes; struct btrfs_raid_bio *rbio; /* PAGE_SIZE must also be aligned to sectorsize for subpage support */ ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize)); /* * Our current stripe len should be fixed to 64k thus stripe_nsectors * (at most 16) should be no larger than BITS_PER_LONG. */ ASSERT(stripe_nsectors <= BITS_PER_LONG); /* * Real stripes must be between 2 (2 disks RAID5, aka RAID1) and 256 * (limited by u8). */ ASSERT(real_stripes >= 2); ASSERT(real_stripes <= U8_MAX); rbio = kzalloc(sizeof(*rbio), GFP_NOFS); if (!rbio) return ERR_PTR(-ENOMEM); rbio->stripe_pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); rbio->bio_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr), GFP_NOFS); rbio->stripe_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr), GFP_NOFS); rbio->finish_pointers = kcalloc(real_stripes, sizeof(void *), GFP_NOFS); rbio->error_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS); if (!rbio->stripe_pages || !rbio->bio_sectors || !rbio->stripe_sectors || !rbio->finish_pointers || !rbio->error_bitmap) { free_raid_bio_pointers(rbio); kfree(rbio); return ERR_PTR(-ENOMEM); } bio_list_init(&rbio->bio_list); init_waitqueue_head(&rbio->io_wait); INIT_LIST_HEAD(&rbio->plug_list); spin_lock_init(&rbio->bio_list_lock); INIT_LIST_HEAD(&rbio->stripe_cache); INIT_LIST_HEAD(&rbio->hash_list); btrfs_get_bioc(bioc); rbio->bioc = bioc; rbio->nr_pages = num_pages; rbio->nr_sectors = num_sectors; rbio->real_stripes = real_stripes; rbio->stripe_npages = stripe_npages; rbio->stripe_nsectors = stripe_nsectors; refcount_set(&rbio->refs, 1); atomic_set(&rbio->stripes_pending, 0); ASSERT(btrfs_nr_parity_stripes(bioc->map_type)); rbio->nr_data = real_stripes - btrfs_nr_parity_stripes(bioc->map_type); ASSERT(rbio->nr_data > 0); return rbio; } /* allocate pages for all the stripes in the bio, including parity */ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio) { int ret; ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages, false); if (ret < 0) return ret; /* Mapping all sectors */ index_stripe_sectors(rbio); return 0; } /* only allocate pages for p/q stripes */ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio) { const int data_pages = rbio->nr_data * rbio->stripe_npages; int ret; ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages, rbio->stripe_pages + data_pages, false); if (ret < 0) return ret; index_stripe_sectors(rbio); return 0; } /* * Return the total number of errors found in the vertical stripe of @sector_nr. * * @faila and @failb will also be updated to the first and second stripe * number of the errors. */ static int get_rbio_veritical_errors(struct btrfs_raid_bio *rbio, int sector_nr, int *faila, int *failb) { int stripe_nr; int found_errors = 0; if (faila || failb) { /* * Both @faila and @failb should be valid pointers if any of * them is specified. */ ASSERT(faila && failb); *faila = -1; *failb = -1; } for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) { int total_sector_nr = stripe_nr * rbio->stripe_nsectors + sector_nr; if (test_bit(total_sector_nr, rbio->error_bitmap)) { found_errors++; if (faila) { /* Update faila and failb. */ if (*faila < 0) *faila = stripe_nr; else if (*failb < 0) *failb = stripe_nr; } } } return found_errors; } /* * Add a single sector @sector into our list of bios for IO. * * Return 0 if everything went well. * Return <0 for error. */ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio, struct bio_list *bio_list, struct sector_ptr *sector, unsigned int stripe_nr, unsigned int sector_nr, enum req_op op) { const u32 sectorsize = rbio->bioc->fs_info->sectorsize; struct bio *last = bio_list->tail; int ret; struct bio *bio; struct btrfs_io_stripe *stripe; u64 disk_start; /* * Note: here stripe_nr has taken device replace into consideration, * thus it can be larger than rbio->real_stripe. * So here we check against bioc->num_stripes, not rbio->real_stripes. */ ASSERT_RBIO_STRIPE(stripe_nr >= 0 && stripe_nr < rbio->bioc->num_stripes, rbio, stripe_nr); ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors, rbio, sector_nr); ASSERT(sector->has_paddr); stripe = &rbio->bioc->stripes[stripe_nr]; disk_start = stripe->physical + sector_nr * sectorsize; /* if the device is missing, just fail this stripe */ if (!stripe->dev->bdev) { int found_errors; set_bit(stripe_nr * rbio->stripe_nsectors + sector_nr, rbio->error_bitmap); /* Check if we have reached tolerance early. */ found_errors = get_rbio_veritical_errors(rbio, sector_nr, NULL, NULL); if (unlikely(found_errors > rbio->bioc->max_errors)) return -EIO; return 0; } /* see if we can add this page onto our existing bio */ if (last) { u64 last_end = last->bi_iter.bi_sector << SECTOR_SHIFT; last_end += last->bi_iter.bi_size; /* * we can't merge these if they are from different * devices or if they are not contiguous */ if (last_end == disk_start && !last->bi_status && last->bi_bdev == stripe->dev->bdev) { ret = bio_add_page(last, phys_to_page(sector->paddr), sectorsize, offset_in_page(sector->paddr)); if (ret == sectorsize) return 0; } } /* put a new bio on the list */ bio = bio_alloc(stripe->dev->bdev, max(BTRFS_STRIPE_LEN >> PAGE_SHIFT, 1), op, GFP_NOFS); bio->bi_iter.bi_sector = disk_start >> SECTOR_SHIFT; bio->bi_private = rbio; __bio_add_page(bio, phys_to_page(sector->paddr), sectorsize, offset_in_page(sector->paddr)); bio_list_add(bio_list, bio); return 0; } static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio) { const u32 sectorsize = rbio->bioc->fs_info->sectorsize; const u32 sectorsize_bits = rbio->bioc->fs_info->sectorsize_bits; struct bvec_iter iter = bio->bi_iter; phys_addr_t paddr; u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - rbio->bioc->full_stripe_logical; btrfs_bio_for_each_block(paddr, bio, &iter, sectorsize) { unsigned int index = (offset >> sectorsize_bits); struct sector_ptr *sector = &rbio->bio_sectors[index]; sector->has_paddr = true; sector->paddr = paddr; offset += sectorsize; } } /* * helper function to walk our bio list and populate the bio_pages array with * the result. This seems expensive, but it is faster than constantly * searching through the bio list as we setup the IO in finish_rmw or stripe * reconstruction. * * This must be called before you trust the answers from page_in_rbio */ static void index_rbio_pages(struct btrfs_raid_bio *rbio) { struct bio *bio; spin_lock(&rbio->bio_list_lock); bio_list_for_each(bio, &rbio->bio_list) index_one_bio(rbio, bio); spin_unlock(&rbio->bio_list_lock); } static void bio_get_trace_info(struct btrfs_raid_bio *rbio, struct bio *bio, struct raid56_bio_trace_info *trace_info) { const struct btrfs_io_context *bioc = rbio->bioc; int i; ASSERT(bioc); /* We rely on bio->bi_bdev to find the stripe number. */ if (!bio->bi_bdev) goto not_found; for (i = 0; i < bioc->num_stripes; i++) { if (bio->bi_bdev != bioc->stripes[i].dev->bdev) continue; trace_info->stripe_nr = i; trace_info->devid = bioc->stripes[i].dev->devid; trace_info->offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - bioc->stripes[i].physical; return; } not_found: trace_info->devid = -1; trace_info->offset = -1; trace_info->stripe_nr = -1; } static inline void bio_list_put(struct bio_list *bio_list) { struct bio *bio; while ((bio = bio_list_pop(bio_list))) bio_put(bio); } static void assert_rbio(struct btrfs_raid_bio *rbio) { if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) return; /* * At least two stripes (2 disks RAID5), and since real_stripes is U8, * we won't go beyond 256 disks anyway. */ ASSERT_RBIO(rbio->real_stripes >= 2, rbio); ASSERT_RBIO(rbio->nr_data > 0, rbio); /* * This is another check to make sure nr data stripes is smaller * than total stripes. */ ASSERT_RBIO(rbio->nr_data < rbio->real_stripes, rbio); } static inline void *kmap_local_sector(const struct sector_ptr *sector) { /* The sector pointer must have a page mapped to it. */ ASSERT(sector->has_paddr); return kmap_local_page(phys_to_page(sector->paddr)) + offset_in_page(sector->paddr); } /* Generate PQ for one vertical stripe. */ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr) { void **pointers = rbio->finish_pointers; const u32 sectorsize = rbio->bioc->fs_info->sectorsize; struct sector_ptr *sector; int stripe; const bool has_qstripe = rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6; /* First collect one sector from each data stripe */ for (stripe = 0; stripe < rbio->nr_data; stripe++) { sector = sector_in_rbio(rbio, stripe, sectornr, 0); pointers[stripe] = kmap_local_sector(sector); } /* Then add the parity stripe */ sector = rbio_pstripe_sector(rbio, sectornr); sector->uptodate = 1; pointers[stripe++] = kmap_local_sector(sector); if (has_qstripe) { /* * RAID6, add the qstripe and call the library function * to fill in our p/q */ sector = rbio_qstripe_sector(rbio, sectornr); sector->uptodate = 1; pointers[stripe++] = kmap_local_sector(sector); assert_rbio(rbio); raid6_call.gen_syndrome(rbio->real_stripes, sectorsize, pointers); } else { /* raid5 */ memcpy(pointers[rbio->nr_data], pointers[0], sectorsize); run_xor(pointers + 1, rbio->nr_data - 1, sectorsize); } for (stripe = stripe - 1; stripe >= 0; stripe--) kunmap_local(pointers[stripe]); } static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio, struct bio_list *bio_list) { /* The total sector number inside the full stripe. */ int total_sector_nr; int sectornr; int stripe; int ret; ASSERT(bio_list_size(bio_list) == 0); /* We should have at least one data sector. */ ASSERT(bitmap_weight(&rbio->dbitmap, rbio->stripe_nsectors)); /* * Reset errors, as we may have errors inherited from from degraded * write. */ bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); /* * Start assembly. Make bios for everything from the higher layers (the * bio_list in our rbio) and our P/Q. Ignore everything else. */ for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { struct sector_ptr *sector; stripe = total_sector_nr / rbio->stripe_nsectors; sectornr = total_sector_nr % rbio->stripe_nsectors; /* This vertical stripe has no data, skip it. */ if (!test_bit(sectornr, &rbio->dbitmap)) continue; if (stripe < rbio->nr_data) { sector = sector_in_rbio(rbio, stripe, sectornr, 1); if (!sector) continue; } else { sector = rbio_stripe_sector(rbio, stripe, sectornr); } ret = rbio_add_io_sector(rbio, bio_list, sector, stripe, sectornr, REQ_OP_WRITE); if (ret) goto error; } if (likely(!rbio->bioc->replace_nr_stripes)) return 0; /* * Make a copy for the replace target device. * * Thus the source stripe number (in replace_stripe_src) should be valid. */ ASSERT(rbio->bioc->replace_stripe_src >= 0); for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { struct sector_ptr *sector; stripe = total_sector_nr / rbio->stripe_nsectors; sectornr = total_sector_nr % rbio->stripe_nsectors; /* * For RAID56, there is only one device that can be replaced, * and replace_stripe_src[0] indicates the stripe number we * need to copy from. */ if (stripe != rbio->bioc->replace_stripe_src) { /* * We can skip the whole stripe completely, note * total_sector_nr will be increased by one anyway. */ ASSERT(sectornr == 0); total_sector_nr += rbio->stripe_nsectors - 1; continue; } /* This vertical stripe has no data, skip it. */ if (!test_bit(sectornr, &rbio->dbitmap)) continue; if (stripe < rbio->nr_data) { sector = sector_in_rbio(rbio, stripe, sectornr, 1); if (!sector) continue; } else { sector = rbio_stripe_sector(rbio, stripe, sectornr); } ret = rbio_add_io_sector(rbio, bio_list, sector, rbio->real_stripes, sectornr, REQ_OP_WRITE); if (ret) goto error; } return 0; error: bio_list_put(bio_list); return -EIO; } static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - rbio->bioc->full_stripe_logical; int total_nr_sector = offset >> fs_info->sectorsize_bits; ASSERT(total_nr_sector < rbio->nr_data * rbio->stripe_nsectors); bitmap_set(rbio->error_bitmap, total_nr_sector, bio->bi_iter.bi_size >> fs_info->sectorsize_bits); /* * Special handling for raid56_alloc_missing_rbio() used by * scrub/replace. Unlike call path in raid56_parity_recover(), they * pass an empty bio here. Thus we have to find out the missing device * and mark the stripe error instead. */ if (bio->bi_iter.bi_size == 0) { bool found_missing = false; int stripe_nr; for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) { if (!rbio->bioc->stripes[stripe_nr].dev->bdev) { found_missing = true; bitmap_set(rbio->error_bitmap, stripe_nr * rbio->stripe_nsectors, rbio->stripe_nsectors); } } ASSERT(found_missing); } } /* * For subpage case, we can no longer set page Up-to-date directly for * stripe_pages[], thus we need to locate the sector. */ static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio, phys_addr_t paddr) { int i; for (i = 0; i < rbio->nr_sectors; i++) { struct sector_ptr *sector = &rbio->stripe_sectors[i]; if (sector->has_paddr && sector->paddr == paddr) return sector; } return NULL; } /* * this sets each page in the bio uptodate. It should only be used on private * rbio pages, nothing that comes in from the higher layers */ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio) { const u32 blocksize = rbio->bioc->fs_info->sectorsize; phys_addr_t paddr; ASSERT(!bio_flagged(bio, BIO_CLONED)); btrfs_bio_for_each_block_all(paddr, bio, blocksize) { struct sector_ptr *sector = find_stripe_sector(rbio, paddr); ASSERT(sector); if (sector) sector->uptodate = 1; } } static int get_bio_sector_nr(struct btrfs_raid_bio *rbio, struct bio *bio) { phys_addr_t bvec_paddr = bvec_phys(bio_first_bvec_all(bio)); int i; for (i = 0; i < rbio->nr_sectors; i++) { if (rbio->stripe_sectors[i].paddr == bvec_paddr) break; if (rbio->bio_sectors[i].has_paddr && rbio->bio_sectors[i].paddr == bvec_paddr) break; } ASSERT(i < rbio->nr_sectors); return i; } static void rbio_update_error_bitmap(struct btrfs_raid_bio *rbio, struct bio *bio) { int total_sector_nr = get_bio_sector_nr(rbio, bio); u32 bio_size = 0; struct bio_vec *bvec; int i; bio_for_each_bvec_all(bvec, bio, i) bio_size += bvec->bv_len; /* * Since we can have multiple bios touching the error_bitmap, we cannot * call bitmap_set() without protection. * * Instead use set_bit() for each bit, as set_bit() itself is atomic. */ for (i = total_sector_nr; i < total_sector_nr + (bio_size >> rbio->bioc->fs_info->sectorsize_bits); i++) set_bit(i, rbio->error_bitmap); } /* Verify the data sectors at read time. */ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio, struct bio *bio) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; int total_sector_nr = get_bio_sector_nr(rbio, bio); phys_addr_t paddr; /* No data csum for the whole stripe, no need to verify. */ if (!rbio->csum_bitmap || !rbio->csum_buf) return; /* P/Q stripes, they have no data csum to verify against. */ if (total_sector_nr >= rbio->nr_data * rbio->stripe_nsectors) return; btrfs_bio_for_each_block_all(paddr, bio, fs_info->sectorsize) { u8 csum_buf[BTRFS_CSUM_SIZE]; u8 *expected_csum = rbio->csum_buf + total_sector_nr * fs_info->csum_size; int ret; /* No csum for this sector, skip to the next sector. */ if (!test_bit(total_sector_nr, rbio->csum_bitmap)) continue; ret = btrfs_check_block_csum(fs_info, paddr, csum_buf, expected_csum); if (ret < 0) set_bit(total_sector_nr, rbio->error_bitmap); total_sector_nr++; } } static void raid_wait_read_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; if (bio->bi_status) { rbio_update_error_bitmap(rbio, bio); } else { set_bio_pages_uptodate(rbio, bio); verify_bio_data_sectors(rbio, bio); } bio_put(bio); if (atomic_dec_and_test(&rbio->stripes_pending)) wake_up(&rbio->io_wait); } static void submit_read_wait_bio_list(struct btrfs_raid_bio *rbio, struct bio_list *bio_list) { struct bio *bio; atomic_set(&rbio->stripes_pending, bio_list_size(bio_list)); while ((bio = bio_list_pop(bio_list))) { bio->bi_end_io = raid_wait_read_end_io; if (trace_raid56_read_enabled()) { struct raid56_bio_trace_info trace_info = { 0 }; bio_get_trace_info(rbio, bio, &trace_info); trace_raid56_read(rbio, bio, &trace_info); } submit_bio(bio); } wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); } static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio) { const int data_pages = rbio->nr_data * rbio->stripe_npages; int ret; ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages, false); if (ret < 0) return ret; index_stripe_sectors(rbio); return 0; } /* * We use plugging call backs to collect full stripes. * Any time we get a partial stripe write while plugged * we collect it into a list. When the unplug comes down, * we sort the list by logical block number and merge * everything we can into the same rbios */ struct btrfs_plug_cb { struct blk_plug_cb cb; struct btrfs_fs_info *info; struct list_head rbio_list; }; /* * rbios on the plug list are sorted for easier merging. */ static int plug_cmp(void *priv, const struct list_head *a, const struct list_head *b) { const struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio, plug_list); const struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio, plug_list); u64 a_sector = ra->bio_list.head->bi_iter.bi_sector; u64 b_sector = rb->bio_list.head->bi_iter.bi_sector; if (a_sector < b_sector) return -1; if (a_sector > b_sector) return 1; return 0; } static void raid_unplug(struct blk_plug_cb *cb, bool from_schedule) { struct btrfs_plug_cb *plug = container_of(cb, struct btrfs_plug_cb, cb); struct btrfs_raid_bio *cur; struct btrfs_raid_bio *last = NULL; list_sort(NULL, &plug->rbio_list, plug_cmp); while (!list_empty(&plug->rbio_list)) { cur = list_first_entry(&plug->rbio_list, struct btrfs_raid_bio, plug_list); list_del_init(&cur->plug_list); if (rbio_is_full(cur)) { /* We have a full stripe, queue it down. */ start_async_work(cur, rmw_rbio_work); continue; } if (last) { if (rbio_can_merge(last, cur)) { merge_rbio(last, cur); free_raid_bio(cur); continue; } start_async_work(last, rmw_rbio_work); } last = cur; } if (last) start_async_work(last, rmw_rbio_work); kfree(plug); } /* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio) { const struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT; const u64 full_stripe_start = rbio->bioc->full_stripe_logical; const u32 orig_len = orig_bio->bi_iter.bi_size; const u32 sectorsize = fs_info->sectorsize; u64 cur_logical; ASSERT_RBIO_LOGICAL(orig_logical >= full_stripe_start && orig_logical + orig_len <= full_stripe_start + rbio->nr_data * BTRFS_STRIPE_LEN, rbio, orig_logical); bio_list_add(&rbio->bio_list, orig_bio); rbio->bio_list_bytes += orig_bio->bi_iter.bi_size; /* Update the dbitmap. */ for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len; cur_logical += sectorsize) { int bit = ((u32)(cur_logical - full_stripe_start) >> fs_info->sectorsize_bits) % rbio->stripe_nsectors; set_bit(bit, &rbio->dbitmap); } } /* * our main entry point for writes from the rest of the FS. */ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc) { struct btrfs_fs_info *fs_info = bioc->fs_info; struct btrfs_raid_bio *rbio; struct btrfs_plug_cb *plug = NULL; struct blk_plug_cb *cb; rbio = alloc_rbio(fs_info, bioc); if (IS_ERR(rbio)) { bio->bi_status = errno_to_blk_status(PTR_ERR(rbio)); bio_endio(bio); return; } rbio->operation = BTRFS_RBIO_WRITE; rbio_add_bio(rbio, bio); /* * Don't plug on full rbios, just get them out the door * as quickly as we can */ if (!rbio_is_full(rbio)) { cb = blk_check_plugged(raid_unplug, fs_info, sizeof(*plug)); if (cb) { plug = container_of(cb, struct btrfs_plug_cb, cb); if (!plug->info) { plug->info = fs_info; INIT_LIST_HEAD(&plug->rbio_list); } list_add_tail(&rbio->plug_list, &plug->rbio_list); return; } } /* * Either we don't have any existing plug, or we're doing a full stripe, * queue the rmw work now. */ start_async_work(rbio, rmw_rbio_work); } static int verify_one_sector(struct btrfs_raid_bio *rbio, int stripe_nr, int sector_nr) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; struct sector_ptr *sector; u8 csum_buf[BTRFS_CSUM_SIZE]; u8 *csum_expected; int ret; if (!rbio->csum_bitmap || !rbio->csum_buf) return 0; /* No way to verify P/Q as they are not covered by data csum. */ if (stripe_nr >= rbio->nr_data) return 0; /* * If we're rebuilding a read, we have to use pages from the * bio list if possible. */ if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0); } else { sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr); } csum_expected = rbio->csum_buf + (stripe_nr * rbio->stripe_nsectors + sector_nr) * fs_info->csum_size; ret = btrfs_check_block_csum(fs_info, sector->paddr, csum_buf, csum_expected); return ret; } /* * Recover a vertical stripe specified by @sector_nr. * @*pointers are the pre-allocated pointers by the caller, so we don't * need to allocate/free the pointers again and again. */ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr, void **pointers, void **unmap_array) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; struct sector_ptr *sector; const u32 sectorsize = fs_info->sectorsize; int found_errors; int faila; int failb; int stripe_nr; int ret = 0; /* * Now we just use bitmap to mark the horizontal stripes in * which we have data when doing parity scrub. */ if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB && !test_bit(sector_nr, &rbio->dbitmap)) return 0; found_errors = get_rbio_veritical_errors(rbio, sector_nr, &faila, &failb); /* * No errors in the vertical stripe, skip it. Can happen for recovery * which only part of a stripe failed csum check. */ if (!found_errors) return 0; if (unlikely(found_errors > rbio->bioc->max_errors)) return -EIO; /* * Setup our array of pointers with sectors from each stripe * * NOTE: store a duplicate array of pointers to preserve the * pointer order. */ for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) { /* * If we're rebuilding a read, we have to use pages from the * bio list if possible. */ if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0); } else { sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr); } pointers[stripe_nr] = kmap_local_sector(sector); unmap_array[stripe_nr] = pointers[stripe_nr]; } /* All raid6 handling here */ if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) { /* Single failure, rebuild from parity raid5 style */ if (failb < 0) { if (faila == rbio->nr_data) /* * Just the P stripe has failed, without * a bad data or Q stripe. * We have nothing to do, just skip the * recovery for this stripe. */ goto cleanup; /* * a single failure in raid6 is rebuilt * in the pstripe code below */ goto pstripe; } /* * If the q stripe is failed, do a pstripe reconstruction from * the xors. * If both the q stripe and the P stripe are failed, we're * here due to a crc mismatch and we can't give them the * data they want. */ if (failb == rbio->real_stripes - 1) { if (faila == rbio->real_stripes - 2) /* * Only P and Q are corrupted. * We only care about data stripes recovery, * can skip this vertical stripe. */ goto cleanup; /* * Otherwise we have one bad data stripe and * a good P stripe. raid5! */ goto pstripe; } if (failb == rbio->real_stripes - 2) { raid6_datap_recov(rbio->real_stripes, sectorsize, faila, pointers); } else { raid6_2data_recov(rbio->real_stripes, sectorsize, faila, failb, pointers); } } else { void *p; /* Rebuild from P stripe here (raid5 or raid6). */ ASSERT(failb == -1); pstripe: /* Copy parity block into failed block to start with */ memcpy(pointers[faila], pointers[rbio->nr_data], sectorsize); /* Rearrange the pointer array */ p = pointers[faila]; for (stripe_nr = faila; stripe_nr < rbio->nr_data - 1; stripe_nr++) pointers[stripe_nr] = pointers[stripe_nr + 1]; pointers[rbio->nr_data - 1] = p; /* Xor in the rest */ run_xor(pointers, rbio->nr_data - 1, sectorsize); } /* * No matter if this is a RMW or recovery, we should have all * failed sectors repaired in the vertical stripe, thus they are now * uptodate. * Especially if we determine to cache the rbio, we need to * have at least all data sectors uptodate. * * If possible, also check if the repaired sector matches its data * checksum. */ if (faila >= 0) { ret = verify_one_sector(rbio, faila, sector_nr); if (ret < 0) goto cleanup; sector = rbio_stripe_sector(rbio, faila, sector_nr); sector->uptodate = 1; } if (failb >= 0) { ret = verify_one_sector(rbio, failb, sector_nr); if (ret < 0) goto cleanup; sector = rbio_stripe_sector(rbio, failb, sector_nr); sector->uptodate = 1; } cleanup: for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--) kunmap_local(unmap_array[stripe_nr]); return ret; } static int recover_sectors(struct btrfs_raid_bio *rbio) { void **pointers = NULL; void **unmap_array = NULL; int sectornr; int ret = 0; /* * @pointers array stores the pointer for each sector. * * @unmap_array stores copy of pointers that does not get reordered * during reconstruction so that kunmap_local works. */ pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); if (!pointers || !unmap_array) { ret = -ENOMEM; goto out; } if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { spin_lock(&rbio->bio_list_lock); set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); spin_unlock(&rbio->bio_list_lock); } index_rbio_pages(rbio); for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) { ret = recover_vertical(rbio, sectornr, pointers, unmap_array); if (ret < 0) break; } out: kfree(pointers); kfree(unmap_array); return ret; } static void recover_rbio(struct btrfs_raid_bio *rbio) { struct bio_list bio_list = BIO_EMPTY_LIST; int total_sector_nr; int ret = 0; /* * Either we're doing recover for a read failure or degraded write, * caller should have set error bitmap correctly. */ ASSERT(bitmap_weight(rbio->error_bitmap, rbio->nr_sectors)); /* For recovery, we need to read all sectors including P/Q. */ ret = alloc_rbio_pages(rbio); if (ret < 0) goto out; index_rbio_pages(rbio); /* * Read everything that hasn't failed. However this time we will * not trust any cached sector. * As we may read out some stale data but higher layer is not reading * that stale part. * * So here we always re-read everything in recovery path. */ for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { int stripe = total_sector_nr / rbio->stripe_nsectors; int sectornr = total_sector_nr % rbio->stripe_nsectors; struct sector_ptr *sector; /* * Skip the range which has error. It can be a range which is * marked error (for csum mismatch), or it can be a missing * device. */ if (!rbio->bioc->stripes[stripe].dev->bdev || test_bit(total_sector_nr, rbio->error_bitmap)) { /* * Also set the error bit for missing device, which * may not yet have its error bit set. */ set_bit(total_sector_nr, rbio->error_bitmap); continue; } sector = rbio_stripe_sector(rbio, stripe, sectornr); ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe, sectornr, REQ_OP_READ); if (ret < 0) { bio_list_put(&bio_list); goto out; } } submit_read_wait_bio_list(rbio, &bio_list); ret = recover_sectors(rbio); out: rbio_orig_end_io(rbio, errno_to_blk_status(ret)); } static void recover_rbio_work(struct work_struct *work) { struct btrfs_raid_bio *rbio; rbio = container_of(work, struct btrfs_raid_bio, work); if (!lock_stripe_add(rbio)) recover_rbio(rbio); } static void recover_rbio_work_locked(struct work_struct *work) { recover_rbio(container_of(work, struct btrfs_raid_bio, work)); } static void set_rbio_raid6_extra_error(struct btrfs_raid_bio *rbio, int mirror_num) { bool found = false; int sector_nr; /* * This is for RAID6 extra recovery tries, thus mirror number should * be large than 2. * Mirror 1 means read from data stripes. Mirror 2 means rebuild using * RAID5 methods. */ ASSERT(mirror_num > 2); for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { int found_errors; int faila; int failb; found_errors = get_rbio_veritical_errors(rbio, sector_nr, &faila, &failb); /* This vertical stripe doesn't have errors. */ if (!found_errors) continue; /* * If we found errors, there should be only one error marked * by previous set_rbio_range_error(). */ ASSERT(found_errors == 1); found = true; /* Now select another stripe to mark as error. */ failb = rbio->real_stripes - (mirror_num - 1); if (failb <= faila) failb--; /* Set the extra bit in error bitmap. */ if (failb >= 0) set_bit(failb * rbio->stripe_nsectors + sector_nr, rbio->error_bitmap); } /* We should found at least one vertical stripe with error.*/ ASSERT(found); } /* * the main entry point for reads from the higher layers. This * is really only called when the normal read path had a failure, * so we assume the bio they send down corresponds to a failed part * of the drive. */ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, int mirror_num) { struct btrfs_fs_info *fs_info = bioc->fs_info; struct btrfs_raid_bio *rbio; rbio = alloc_rbio(fs_info, bioc); if (IS_ERR(rbio)) { bio->bi_status = errno_to_blk_status(PTR_ERR(rbio)); bio_endio(bio); return; } rbio->operation = BTRFS_RBIO_READ_REBUILD; rbio_add_bio(rbio, bio); set_rbio_range_error(rbio, bio); /* * Loop retry: * for 'mirror == 2', reconstruct from all other stripes. * for 'mirror_num > 2', select a stripe to fail on every retry. */ if (mirror_num > 2) set_rbio_raid6_extra_error(rbio, mirror_num); start_async_work(rbio, recover_rbio_work); } static void fill_data_csums(struct btrfs_raid_bio *rbio) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; struct btrfs_root *csum_root = btrfs_csum_root(fs_info, rbio->bioc->full_stripe_logical); const u64 start = rbio->bioc->full_stripe_logical; const u32 len = (rbio->nr_data * rbio->stripe_nsectors) << fs_info->sectorsize_bits; int ret; /* The rbio should not have its csum buffer initialized. */ ASSERT(!rbio->csum_buf && !rbio->csum_bitmap); /* * Skip the csum search if: * * - The rbio doesn't belong to data block groups * Then we are doing IO for tree blocks, no need to search csums. * * - The rbio belongs to mixed block groups * This is to avoid deadlock, as we're already holding the full * stripe lock, if we trigger a metadata read, and it needs to do * raid56 recovery, we will deadlock. */ if (!(rbio->bioc->map_type & BTRFS_BLOCK_GROUP_DATA) || rbio->bioc->map_type & BTRFS_BLOCK_GROUP_METADATA) return; rbio->csum_buf = kzalloc(rbio->nr_data * rbio->stripe_nsectors * fs_info->csum_size, GFP_NOFS); rbio->csum_bitmap = bitmap_zalloc(rbio->nr_data * rbio->stripe_nsectors, GFP_NOFS); if (!rbio->csum_buf || !rbio->csum_bitmap) { ret = -ENOMEM; goto error; } ret = btrfs_lookup_csums_bitmap(csum_root, NULL, start, start + len - 1, rbio->csum_buf, rbio->csum_bitmap); if (ret < 0) goto error; if (bitmap_empty(rbio->csum_bitmap, len >> fs_info->sectorsize_bits)) goto no_csum; return; error: /* * We failed to allocate memory or grab the csum, but it's not fatal, * we can still continue. But better to warn users that RMW is no * longer safe for this particular sub-stripe write. */ btrfs_warn_rl(fs_info, "sub-stripe write for full stripe %llu is not safe, failed to get csum: %d", rbio->bioc->full_stripe_logical, ret); no_csum: kfree(rbio->csum_buf); bitmap_free(rbio->csum_bitmap); rbio->csum_buf = NULL; rbio->csum_bitmap = NULL; } static int rmw_read_wait_recover(struct btrfs_raid_bio *rbio) { struct bio_list bio_list = BIO_EMPTY_LIST; int total_sector_nr; int ret = 0; /* * Fill the data csums we need for data verification. We need to fill * the csum_bitmap/csum_buf first, as our endio function will try to * verify the data sectors. */ fill_data_csums(rbio); /* * Build a list of bios to read all sectors (including data and P/Q). * * This behavior is to compensate the later csum verification and recovery. */ for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { struct sector_ptr *sector; int stripe = total_sector_nr / rbio->stripe_nsectors; int sectornr = total_sector_nr % rbio->stripe_nsectors; sector = rbio_stripe_sector(rbio, stripe, sectornr); ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe, sectornr, REQ_OP_READ); if (ret) { bio_list_put(&bio_list); return ret; } } /* * We may or may not have any corrupted sectors (including missing dev * and csum mismatch), just let recover_sectors() to handle them all. */ submit_read_wait_bio_list(rbio, &bio_list); return recover_sectors(rbio); } static void raid_wait_write_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; if (bio->bi_status) rbio_update_error_bitmap(rbio, bio); bio_put(bio); if (atomic_dec_and_test(&rbio->stripes_pending)) wake_up(&rbio->io_wait); } static void submit_write_bios(struct btrfs_raid_bio *rbio, struct bio_list *bio_list) { struct bio *bio; atomic_set(&rbio->stripes_pending, bio_list_size(bio_list)); while ((bio = bio_list_pop(bio_list))) { bio->bi_end_io = raid_wait_write_end_io; if (trace_raid56_write_enabled()) { struct raid56_bio_trace_info trace_info = { 0 }; bio_get_trace_info(rbio, bio, &trace_info); trace_raid56_write(rbio, bio, &trace_info); } submit_bio(bio); } } /* * To determine if we need to read any sector from the disk. * Should only be utilized in RMW path, to skip cached rbio. */ static bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio) { int i; for (i = 0; i < rbio->nr_data * rbio->stripe_nsectors; i++) { struct sector_ptr *sector = &rbio->stripe_sectors[i]; /* * We have a sector which doesn't have page nor uptodate, * thus this rbio can not be cached one, as cached one must * have all its data sectors present and uptodate. */ if (!sector->has_paddr || !sector->uptodate) return true; } return false; } static void rmw_rbio(struct btrfs_raid_bio *rbio) { struct bio_list bio_list; int sectornr; int ret = 0; /* * Allocate the pages for parity first, as P/Q pages will always be * needed for both full-stripe and sub-stripe writes. */ ret = alloc_rbio_parity_pages(rbio); if (ret < 0) goto out; /* * Either full stripe write, or we have every data sector already * cached, can go to write path immediately. */ if (!rbio_is_full(rbio) && need_read_stripe_sectors(rbio)) { /* * Now we're doing sub-stripe write, also need all data stripes * to do the full RMW. */ ret = alloc_rbio_data_pages(rbio); if (ret < 0) goto out; index_rbio_pages(rbio); ret = rmw_read_wait_recover(rbio); if (ret < 0) goto out; } /* * At this stage we're not allowed to add any new bios to the * bio list any more, anyone else that wants to change this stripe * needs to do their own rmw. */ spin_lock(&rbio->bio_list_lock); set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); spin_unlock(&rbio->bio_list_lock); bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); index_rbio_pages(rbio); /* * We don't cache full rbios because we're assuming * the higher layers are unlikely to use this area of * the disk again soon. If they do use it again, * hopefully they will send another full bio. */ if (!rbio_is_full(rbio)) cache_rbio_pages(rbio); else clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) generate_pq_vertical(rbio, sectornr); bio_list_init(&bio_list); ret = rmw_assemble_write_bios(rbio, &bio_list); if (ret < 0) goto out; /* We should have at least one bio assembled. */ ASSERT(bio_list_size(&bio_list)); submit_write_bios(rbio, &bio_list); wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); /* We may have more errors than our tolerance during the read. */ for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) { int found_errors; found_errors = get_rbio_veritical_errors(rbio, sectornr, NULL, NULL); if (unlikely(found_errors > rbio->bioc->max_errors)) { ret = -EIO; break; } } out: rbio_orig_end_io(rbio, errno_to_blk_status(ret)); } static void rmw_rbio_work(struct work_struct *work) { struct btrfs_raid_bio *rbio; rbio = container_of(work, struct btrfs_raid_bio, work); if (lock_stripe_add(rbio) == 0) rmw_rbio(rbio); } static void rmw_rbio_work_locked(struct work_struct *work) { rmw_rbio(container_of(work, struct btrfs_raid_bio, work)); } /* * The following code is used to scrub/replace the parity stripe * * Caller must have already increased bio_counter for getting @bioc. * * Note: We need make sure all the pages that add into the scrub/replace * raid bio are correct and not be changed during the scrub/replace. That * is those pages just hold metadata or file data with checksum. */ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio, struct btrfs_io_context *bioc, struct btrfs_device *scrub_dev, unsigned long *dbitmap, int stripe_nsectors) { struct btrfs_fs_info *fs_info = bioc->fs_info; struct btrfs_raid_bio *rbio; int i; rbio = alloc_rbio(fs_info, bioc); if (IS_ERR(rbio)) return NULL; bio_list_add(&rbio->bio_list, bio); /* * This is a special bio which is used to hold the completion handler * and make the scrub rbio is similar to the other types */ ASSERT(!bio->bi_iter.bi_size); rbio->operation = BTRFS_RBIO_PARITY_SCRUB; /* * After mapping bioc with BTRFS_MAP_WRITE, parities have been sorted * to the end position, so this search can start from the first parity * stripe. */ for (i = rbio->nr_data; i < rbio->real_stripes; i++) { if (bioc->stripes[i].dev == scrub_dev) { rbio->scrubp = i; break; } } ASSERT_RBIO_STRIPE(i < rbio->real_stripes, rbio, i); bitmap_copy(&rbio->dbitmap, dbitmap, stripe_nsectors); return rbio; } /* * We just scrub the parity that we have correct data on the same horizontal, * so we needn't allocate all pages for all the stripes. */ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) { const u32 sectorsize = rbio->bioc->fs_info->sectorsize; int total_sector_nr; for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { struct page *page; int sectornr = total_sector_nr % rbio->stripe_nsectors; int index = (total_sector_nr * sectorsize) >> PAGE_SHIFT; if (!test_bit(sectornr, &rbio->dbitmap)) continue; if (rbio->stripe_pages[index]) continue; page = alloc_page(GFP_NOFS); if (!page) return -ENOMEM; rbio->stripe_pages[index] = page; } index_stripe_sectors(rbio); return 0; } static int finish_parity_scrub(struct btrfs_raid_bio *rbio) { struct btrfs_io_context *bioc = rbio->bioc; const u32 sectorsize = bioc->fs_info->sectorsize; void **pointers = rbio->finish_pointers; unsigned long *pbitmap = &rbio->finish_pbitmap; int nr_data = rbio->nr_data; int stripe; int sectornr; bool has_qstripe; struct page *page; struct sector_ptr p_sector = { 0 }; struct sector_ptr q_sector = { 0 }; struct bio_list bio_list; int is_replace = 0; int ret; bio_list_init(&bio_list); if (rbio->real_stripes - rbio->nr_data == 1) has_qstripe = false; else if (rbio->real_stripes - rbio->nr_data == 2) has_qstripe = true; else BUG(); /* * Replace is running and our P/Q stripe is being replaced, then we * need to duplicate the final write to replace target. */ if (bioc->replace_nr_stripes && bioc->replace_stripe_src == rbio->scrubp) { is_replace = 1; bitmap_copy(pbitmap, &rbio->dbitmap, rbio->stripe_nsectors); } /* * Because the higher layers(scrubber) are unlikely to * use this area of the disk again soon, so don't cache * it. */ clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); page = alloc_page(GFP_NOFS); if (!page) return -ENOMEM; p_sector.has_paddr = true; p_sector.paddr = page_to_phys(page); p_sector.uptodate = 1; page = NULL; if (has_qstripe) { /* RAID6, allocate and map temp space for the Q stripe */ page = alloc_page(GFP_NOFS); if (!page) { __free_page(phys_to_page(p_sector.paddr)); p_sector.has_paddr = false; return -ENOMEM; } q_sector.has_paddr = true; q_sector.paddr = page_to_phys(page); q_sector.uptodate = 1; page = NULL; pointers[rbio->real_stripes - 1] = kmap_local_sector(&q_sector); } bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); /* Map the parity stripe just once */ pointers[nr_data] = kmap_local_sector(&p_sector); for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) { struct sector_ptr *sector; void *parity; /* first collect one page from each data stripe */ for (stripe = 0; stripe < nr_data; stripe++) { sector = sector_in_rbio(rbio, stripe, sectornr, 0); pointers[stripe] = kmap_local_sector(sector); } if (has_qstripe) { assert_rbio(rbio); /* RAID6, call the library function to fill in our P/Q */ raid6_call.gen_syndrome(rbio->real_stripes, sectorsize, pointers); } else { /* raid5 */ memcpy(pointers[nr_data], pointers[0], sectorsize); run_xor(pointers + 1, nr_data - 1, sectorsize); } /* Check scrubbing parity and repair it */ sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr); parity = kmap_local_sector(sector); if (memcmp(parity, pointers[rbio->scrubp], sectorsize) != 0) memcpy(parity, pointers[rbio->scrubp], sectorsize); else /* Parity is right, needn't writeback */ bitmap_clear(&rbio->dbitmap, sectornr, 1); kunmap_local(parity); for (stripe = nr_data - 1; stripe >= 0; stripe--) kunmap_local(pointers[stripe]); } kunmap_local(pointers[nr_data]); __free_page(phys_to_page(p_sector.paddr)); p_sector.has_paddr = false; if (q_sector.has_paddr) { __free_page(phys_to_page(q_sector.paddr)); q_sector.has_paddr = false; } /* * time to start writing. Make bios for everything from the * higher layers (the bio_list in our rbio) and our p/q. Ignore * everything else. */ for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) { struct sector_ptr *sector; sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr); ret = rbio_add_io_sector(rbio, &bio_list, sector, rbio->scrubp, sectornr, REQ_OP_WRITE); if (ret) goto cleanup; } if (!is_replace) goto submit_write; /* * Replace is running and our parity stripe needs to be duplicated to * the target device. Check we have a valid source stripe number. */ ASSERT_RBIO(rbio->bioc->replace_stripe_src >= 0, rbio); for_each_set_bit(sectornr, pbitmap, rbio->stripe_nsectors) { struct sector_ptr *sector; sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr); ret = rbio_add_io_sector(rbio, &bio_list, sector, rbio->real_stripes, sectornr, REQ_OP_WRITE); if (ret) goto cleanup; } submit_write: submit_write_bios(rbio, &bio_list); return 0; cleanup: bio_list_put(&bio_list); return ret; } static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) { if (stripe >= 0 && stripe < rbio->nr_data) return 1; return 0; } static int recover_scrub_rbio(struct btrfs_raid_bio *rbio) { void **pointers = NULL; void **unmap_array = NULL; int sector_nr; int ret = 0; /* * @pointers array stores the pointer for each sector. * * @unmap_array stores copy of pointers that does not get reordered * during reconstruction so that kunmap_local works. */ pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); if (!pointers || !unmap_array) { ret = -ENOMEM; goto out; } for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { int dfail = 0, failp = -1; int faila; int failb; int found_errors; found_errors = get_rbio_veritical_errors(rbio, sector_nr, &faila, &failb); if (unlikely(found_errors > rbio->bioc->max_errors)) { ret = -EIO; goto out; } if (found_errors == 0) continue; /* We should have at least one error here. */ ASSERT(faila >= 0 || failb >= 0); if (is_data_stripe(rbio, faila)) dfail++; else if (is_parity_stripe(faila)) failp = faila; if (is_data_stripe(rbio, failb)) dfail++; else if (is_parity_stripe(failb)) failp = failb; /* * Because we can not use a scrubbing parity to repair the * data, so the capability of the repair is declined. (In the * case of RAID5, we can not repair anything.) */ if (unlikely(dfail > rbio->bioc->max_errors - 1)) { ret = -EIO; goto out; } /* * If all data is good, only parity is correctly, just repair * the parity, no need to recover data stripes. */ if (dfail == 0) continue; /* * Here means we got one corrupted data stripe and one * corrupted parity on RAID6, if the corrupted parity is * scrubbing parity, luckily, use the other one to repair the * data, or we can not repair the data stripe. */ if (unlikely(failp != rbio->scrubp)) { ret = -EIO; goto out; } ret = recover_vertical(rbio, sector_nr, pointers, unmap_array); if (ret < 0) goto out; } out: kfree(pointers); kfree(unmap_array); return ret; } static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio) { struct bio_list bio_list = BIO_EMPTY_LIST; int total_sector_nr; int ret = 0; /* Build a list of bios to read all the missing parts. */ for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { int sectornr = total_sector_nr % rbio->stripe_nsectors; int stripe = total_sector_nr / rbio->stripe_nsectors; struct sector_ptr *sector; /* No data in the vertical stripe, no need to read. */ if (!test_bit(sectornr, &rbio->dbitmap)) continue; /* * We want to find all the sectors missing from the rbio and * read them from the disk. If sector_in_rbio() finds a sector * in the bio list we don't need to read it off the stripe. */ sector = sector_in_rbio(rbio, stripe, sectornr, 1); if (sector) continue; sector = rbio_stripe_sector(rbio, stripe, sectornr); /* * The bio cache may have handed us an uptodate sector. If so, * use it. */ if (sector->uptodate) continue; ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe, sectornr, REQ_OP_READ); if (ret) { bio_list_put(&bio_list); return ret; } } submit_read_wait_bio_list(rbio, &bio_list); return 0; } static void scrub_rbio(struct btrfs_raid_bio *rbio) { int sector_nr; int ret; ret = alloc_rbio_essential_pages(rbio); if (ret) goto out; bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); ret = scrub_assemble_read_bios(rbio); if (ret < 0) goto out; /* We may have some failures, recover the failed sectors first. */ ret = recover_scrub_rbio(rbio); if (ret < 0) goto out; /* * We have every sector properly prepared. Can finish the scrub * and writeback the good content. */ ret = finish_parity_scrub(rbio); wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { int found_errors; found_errors = get_rbio_veritical_errors(rbio, sector_nr, NULL, NULL); if (unlikely(found_errors > rbio->bioc->max_errors)) { ret = -EIO; break; } } out: rbio_orig_end_io(rbio, errno_to_blk_status(ret)); } static void scrub_rbio_work_locked(struct work_struct *work) { scrub_rbio(container_of(work, struct btrfs_raid_bio, work)); } void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) { if (!lock_stripe_add(rbio)) start_async_work(rbio, scrub_rbio_work_locked); } /* * This is for scrub call sites where we already have correct data contents. * This allows us to avoid reading data stripes again. * * Unfortunately here we have to do folio copy, other than reusing the pages. * This is due to the fact rbio has its own page management for its cache. */ void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio, struct folio **data_folios, u64 data_logical) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; const u64 offset_in_full_stripe = data_logical - rbio->bioc->full_stripe_logical; unsigned int findex = 0; unsigned int foffset = 0; int ret; /* We shouldn't hit RAID56 for bs > ps cases for now. */ ASSERT(fs_info->sectorsize <= PAGE_SIZE); /* * If we hit ENOMEM temporarily, but later at * raid56_parity_submit_scrub_rbio() time it succeeded, we just do * the extra read, not a big deal. * * If we hit ENOMEM later at raid56_parity_submit_scrub_rbio() time, * the bio would got proper error number set. */ ret = alloc_rbio_data_pages(rbio); if (ret < 0) return; /* data_logical must be at stripe boundary and inside the full stripe. */ ASSERT(IS_ALIGNED(offset_in_full_stripe, BTRFS_STRIPE_LEN)); ASSERT(offset_in_full_stripe < (rbio->nr_data << BTRFS_STRIPE_LEN_SHIFT)); for (unsigned int cur_off = offset_in_full_stripe; cur_off < offset_in_full_stripe + BTRFS_STRIPE_LEN; cur_off += PAGE_SIZE) { const unsigned int pindex = cur_off >> PAGE_SHIFT; void *kaddr; kaddr = kmap_local_page(rbio->stripe_pages[pindex]); memcpy_from_folio(kaddr, data_folios[findex], foffset, PAGE_SIZE); kunmap_local(kaddr); foffset += PAGE_SIZE; ASSERT(foffset <= folio_size(data_folios[findex])); if (foffset == folio_size(data_folios[findex])) { findex++; foffset = 0; } } for (unsigned int sector_nr = offset_in_full_stripe >> fs_info->sectorsize_bits; sector_nr < (offset_in_full_stripe + BTRFS_STRIPE_LEN) >> fs_info->sectorsize_bits; sector_nr++) rbio->stripe_sectors[sector_nr].uptodate = true; }
13 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 // SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ /* * This file implements UBIFS shrinker which evicts clean znodes from the TNC * tree when Linux VM needs more RAM. * * We do not implement any LRU lists to find oldest znodes to free because it * would add additional overhead to the file system fast paths. So the shrinker * just walks the TNC tree when searching for znodes to free. * * If the root of a TNC sub-tree is clean and old enough, then the children are * also clean and old enough. So the shrinker walks the TNC in level order and * dumps entire sub-trees. * * The age of znodes is just the time-stamp when they were last looked at. * The current shrinker first tries to evict old znodes, then young ones. * * Since the shrinker is global, it has to protect against races with FS * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. */ #include "ubifs.h" /* List of all UBIFS file-system instances */ LIST_HEAD(ubifs_infos); /* * We number each shrinker run and record the number on the ubifs_info structure * so that we can easily work out which ubifs_info structures have already been * done by the current run. */ static unsigned int shrinker_run_no; /* Protects 'ubifs_infos' list */ DEFINE_SPINLOCK(ubifs_infos_lock); /* Global clean znode counter (for all mounted UBIFS instances) */ atomic_long_t ubifs_clean_zn_cnt; /** * shrink_tnc - shrink TNC tree. * @c: UBIFS file-system description object * @nr: number of znodes to free * @age: the age of znodes to free * @contention: if any contention, this is set to %1 * * This function traverses TNC tree and frees clean znodes. It does not free * clean znodes which younger then @age. Returns number of freed znodes. */ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) { int total_freed = 0; struct ubifs_znode *znode, *zprev; time64_t time = ktime_get_seconds(); ubifs_assert(c, mutex_is_locked(&c->umount_mutex)); ubifs_assert(c, mutex_is_locked(&c->tnc_mutex)); if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) return 0; /* * Traverse the TNC tree in levelorder manner, so that it is possible * to destroy large sub-trees. Indeed, if a znode is old, then all its * children are older or of the same age. * * Note, we are holding 'c->tnc_mutex', so we do not have to lock the * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is * changed only when the 'c->tnc_mutex' is held. */ zprev = NULL; znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); while (znode && total_freed < nr && atomic_long_read(&c->clean_zn_cnt) > 0) { int freed; /* * If the znode is clean, but it is in the 'c->cnext' list, this * means that this znode has just been written to flash as a * part of commit and was marked clean. They will be removed * from the list at end commit. We cannot change the list, * because it is not protected by any mutex (design decision to * make commit really independent and parallel to main I/O). So * we just skip these znodes. * * Note, the 'clean_zn_cnt' counters are not updated until * after the commit, so the UBIFS shrinker does not report * the znodes which are in the 'c->cnext' list as freeable. * * Also note, if the root of a sub-tree is not in 'c->cnext', * then the whole sub-tree is not in 'c->cnext' as well, so it * is safe to dump whole sub-tree. */ if (znode->cnext) { /* * Very soon these znodes will be removed from the list * and become freeable. */ *contention = 1; } else if (!ubifs_zn_dirty(znode) && abs(time - znode->time) >= age) { if (znode->parent) znode->parent->zbranch[znode->iip].znode = NULL; else c->zroot.znode = NULL; freed = ubifs_destroy_tnc_subtree(c, znode); atomic_long_sub(freed, &ubifs_clean_zn_cnt); atomic_long_sub(freed, &c->clean_zn_cnt); total_freed += freed; znode = zprev; } if (unlikely(!c->zroot.znode)) break; zprev = znode; znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); cond_resched(); } return total_freed; } /** * shrink_tnc_trees - shrink UBIFS TNC trees. * @nr: number of znodes to free * @age: the age of znodes to free * @contention: if any contention, this is set to %1 * * This function walks the list of mounted UBIFS file-systems and frees clean * znodes which are older than @age, until at least @nr znodes are freed. * Returns the number of freed znodes. */ static int shrink_tnc_trees(int nr, int age, int *contention) { struct ubifs_info *c; struct list_head *p; unsigned int run_no; int freed = 0; spin_lock(&ubifs_infos_lock); do { run_no = ++shrinker_run_no; } while (run_no == 0); /* Iterate over all mounted UBIFS file-systems and try to shrink them */ p = ubifs_infos.next; while (p != &ubifs_infos) { c = list_entry(p, struct ubifs_info, infos_list); /* * We move the ones we do to the end of the list, so we stop * when we see one we have already done. */ if (c->shrinker_run_no == run_no) break; if (!mutex_trylock(&c->umount_mutex)) { /* Some un-mount is in progress, try next FS */ *contention = 1; p = p->next; continue; } /* * We're holding 'c->umount_mutex', so the file-system won't go * away. */ if (!mutex_trylock(&c->tnc_mutex)) { mutex_unlock(&c->umount_mutex); *contention = 1; p = p->next; continue; } spin_unlock(&ubifs_infos_lock); /* * OK, now we have TNC locked, the file-system cannot go away - * it is safe to reap the cache. */ c->shrinker_run_no = run_no; freed += shrink_tnc(c, nr, age, contention); mutex_unlock(&c->tnc_mutex); spin_lock(&ubifs_infos_lock); /* Get the next list element before we move this one */ p = p->next; /* * Move this one to the end of the list to provide some * fairness. */ list_move_tail(&c->infos_list, &ubifs_infos); mutex_unlock(&c->umount_mutex); if (freed >= nr) break; } spin_unlock(&ubifs_infos_lock); return freed; } /** * kick_a_thread - kick a background thread to start commit. * * This function kicks a background thread to start background commit. Returns * %-1 if a thread was kicked or there is another reason to assume the memory * will soon be freed or become freeable. If there are no dirty znodes, returns * %0. */ static int kick_a_thread(void) { int i; struct ubifs_info *c; /* * Iterate over all mounted UBIFS file-systems and find out if there is * already an ongoing commit operation there. If no, then iterate for * the second time and initiate background commit. */ spin_lock(&ubifs_infos_lock); for (i = 0; i < 2; i++) { list_for_each_entry(c, &ubifs_infos, infos_list) { long dirty_zn_cnt; if (!mutex_trylock(&c->umount_mutex)) { /* * Some un-mount is in progress, it will * certainly free memory, so just return. */ spin_unlock(&ubifs_infos_lock); return -1; } dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || c->ro_mount || c->ro_error) { mutex_unlock(&c->umount_mutex); continue; } if (c->cmt_state != COMMIT_RESTING) { spin_unlock(&ubifs_infos_lock); mutex_unlock(&c->umount_mutex); return -1; } if (i == 1) { list_move_tail(&c->infos_list, &ubifs_infos); spin_unlock(&ubifs_infos_lock); ubifs_request_bg_commit(c); mutex_unlock(&c->umount_mutex); return -1; } mutex_unlock(&c->umount_mutex); } } spin_unlock(&ubifs_infos_lock); return 0; } unsigned long ubifs_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); /* * Due to the way UBIFS updates the clean znode counter it may * temporarily be negative. */ return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; } unsigned long ubifs_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { unsigned long nr = sc->nr_to_scan; int contention = 0; unsigned long freed; long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); if (!clean_zn_cnt) { /* * No clean znodes, nothing to reap. All we can do in this case * is to kick background threads to start commit, which will * probably make clean znodes which, in turn, will be freeable. * And we return -1 which means will make VM call us again * later. */ dbg_tnc("no clean znodes, kick a thread"); return kick_a_thread(); } freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); if (freed >= nr) goto out; dbg_tnc("not enough old znodes, try to free young ones"); freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); if (freed >= nr) goto out; dbg_tnc("not enough young znodes, free all"); freed += shrink_tnc_trees(nr - freed, 0, &contention); if (!freed && contention) { dbg_tnc("freed nothing, but contention"); return SHRINK_STOP; } out: dbg_tnc("%lu znodes were freed, requested %lu", freed, nr); return freed; }
2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 1 2 2 2 2 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for the Prodikeys PC-MIDI Keyboard * providing midi & extra multimedia keys functionality * * Copyright (c) 2009 Don Prince <dhprince.devel@yahoo.co.uk> * * Controls for Octave Shift Up/Down, Channel, and * Sustain Duration available via sysfs. */ /* */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/device.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/mutex.h> #include <linux/hid.h> #include <sound/core.h> #include <sound/initval.h> #include <sound/rawmidi.h> #include "hid-ids.h" #define pk_debug(format, arg...) \ pr_debug("hid-prodikeys: " format "\n" , ## arg) #define pk_error(format, arg...) \ pr_err("hid-prodikeys: " format "\n" , ## arg) struct pcmidi_snd; struct pcmidi_sustain { unsigned long in_use; struct pcmidi_snd *pm; struct timer_list timer; unsigned char status; unsigned char note; unsigned char velocity; }; #define PCMIDI_SUSTAINED_MAX 32 struct pcmidi_snd { struct hid_device *hdev; unsigned short ifnum; struct hid_report *pcmidi_report6; struct input_dev *input_ep82; unsigned short midi_mode; unsigned short midi_sustain_mode; unsigned short midi_sustain; unsigned short midi_channel; short midi_octave; struct pcmidi_sustain sustained_notes[PCMIDI_SUSTAINED_MAX]; unsigned short fn_state; unsigned short last_key[24]; spinlock_t rawmidi_in_lock; struct snd_card *card; struct snd_rawmidi *rwmidi; struct snd_rawmidi_substream *in_substream; unsigned long in_triggered; }; #define PK_QUIRK_NOGET 0x00010000 #define PCMIDI_MIDDLE_C 60 #define PCMIDI_CHANNEL_MIN 0 #define PCMIDI_CHANNEL_MAX 15 #define PCMIDI_OCTAVE_MIN (-2) #define PCMIDI_OCTAVE_MAX 2 #define PCMIDI_SUSTAIN_MIN 0 #define PCMIDI_SUSTAIN_MAX 5000 static const char shortname[] = "PC-MIDI"; static const char longname[] = "Prodikeys PC-MIDI Keyboard"; static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; module_param_array(index, int, NULL, 0444); module_param_array(id, charp, NULL, 0444); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(index, "Index value for the PC-MIDI virtual audio driver"); MODULE_PARM_DESC(id, "ID string for the PC-MIDI virtual audio driver"); MODULE_PARM_DESC(enable, "Enable for the PC-MIDI virtual audio driver"); /* Output routine for the sysfs channel file */ static ssize_t show_channel(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct pcmidi_snd *pm = hid_get_drvdata(hdev); dbg_hid("pcmidi sysfs read channel=%u\n", pm->midi_channel); return sprintf(buf, "%u (min:%u, max:%u)\n", pm->midi_channel, PCMIDI_CHANNEL_MIN, PCMIDI_CHANNEL_MAX); } /* Input routine for the sysfs channel file */ static ssize_t store_channel(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hid_device *hdev = to_hid_device(dev); struct pcmidi_snd *pm = hid_get_drvdata(hdev); unsigned channel = 0; if (sscanf(buf, "%u", &channel) > 0 && channel <= PCMIDI_CHANNEL_MAX) { dbg_hid("pcmidi sysfs write channel=%u\n", channel); pm->midi_channel = channel; return strlen(buf); } return -EINVAL; } static DEVICE_ATTR(channel, S_IRUGO | S_IWUSR | S_IWGRP , show_channel, store_channel); static struct device_attribute *sysfs_device_attr_channel = { &dev_attr_channel, }; /* Output routine for the sysfs sustain file */ static ssize_t show_sustain(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct pcmidi_snd *pm = hid_get_drvdata(hdev); dbg_hid("pcmidi sysfs read sustain=%u\n", pm->midi_sustain); return sprintf(buf, "%u (off:%u, max:%u (ms))\n", pm->midi_sustain, PCMIDI_SUSTAIN_MIN, PCMIDI_SUSTAIN_MAX); } /* Input routine for the sysfs sustain file */ static ssize_t store_sustain(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hid_device *hdev = to_hid_device(dev); struct pcmidi_snd *pm = hid_get_drvdata(hdev); unsigned sustain = 0; if (sscanf(buf, "%u", &sustain) > 0 && sustain <= PCMIDI_SUSTAIN_MAX) { dbg_hid("pcmidi sysfs write sustain=%u\n", sustain); pm->midi_sustain = sustain; pm->midi_sustain_mode = (0 == sustain || !pm->midi_mode) ? 0 : 1; return strlen(buf); } return -EINVAL; } static DEVICE_ATTR(sustain, S_IRUGO | S_IWUSR | S_IWGRP, show_sustain, store_sustain); static struct device_attribute *sysfs_device_attr_sustain = { &dev_attr_sustain, }; /* Output routine for the sysfs octave file */ static ssize_t show_octave(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct pcmidi_snd *pm = hid_get_drvdata(hdev); dbg_hid("pcmidi sysfs read octave=%d\n", pm->midi_octave); return sprintf(buf, "%d (min:%d, max:%d)\n", pm->midi_octave, PCMIDI_OCTAVE_MIN, PCMIDI_OCTAVE_MAX); } /* Input routine for the sysfs octave file */ static ssize_t store_octave(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hid_device *hdev = to_hid_device(dev); struct pcmidi_snd *pm = hid_get_drvdata(hdev); int octave = 0; if (sscanf(buf, "%d", &octave) > 0 && octave >= PCMIDI_OCTAVE_MIN && octave <= PCMIDI_OCTAVE_MAX) { dbg_hid("pcmidi sysfs write octave=%d\n", octave); pm->midi_octave = octave; return strlen(buf); } return -EINVAL; } static DEVICE_ATTR(octave, S_IRUGO | S_IWUSR | S_IWGRP, show_octave, store_octave); static struct device_attribute *sysfs_device_attr_octave = { &dev_attr_octave, }; static void pcmidi_send_note(struct pcmidi_snd *pm, unsigned char status, unsigned char note, unsigned char velocity) { unsigned long flags; unsigned char buffer[3]; buffer[0] = status; buffer[1] = note; buffer[2] = velocity; spin_lock_irqsave(&pm->rawmidi_in_lock, flags); if (!pm->in_substream) goto drop_note; if (!test_bit(pm->in_substream->number, &pm->in_triggered)) goto drop_note; snd_rawmidi_receive(pm->in_substream, buffer, 3); drop_note: spin_unlock_irqrestore(&pm->rawmidi_in_lock, flags); return; } static void pcmidi_sustained_note_release(struct timer_list *t) { struct pcmidi_sustain *pms = timer_container_of(pms, t, timer); pcmidi_send_note(pms->pm, pms->status, pms->note, pms->velocity); pms->in_use = 0; } static void init_sustain_timers(struct pcmidi_snd *pm) { struct pcmidi_sustain *pms; unsigned i; for (i = 0; i < PCMIDI_SUSTAINED_MAX; i++) { pms = &pm->sustained_notes[i]; pms->in_use = 0; pms->pm = pm; timer_setup(&pms->timer, pcmidi_sustained_note_release, 0); } } static void stop_sustain_timers(struct pcmidi_snd *pm) { struct pcmidi_sustain *pms; unsigned i; for (i = 0; i < PCMIDI_SUSTAINED_MAX; i++) { pms = &pm->sustained_notes[i]; pms->in_use = 1; timer_delete_sync(&pms->timer); } } static int pcmidi_get_output_report(struct pcmidi_snd *pm) { struct hid_device *hdev = pm->hdev; struct hid_report *report; list_for_each_entry(report, &hdev->report_enum[HID_OUTPUT_REPORT].report_list, list) { if (!(6 == report->id)) continue; if (report->maxfield < 1) { hid_err(hdev, "output report is empty\n"); break; } if (report->field[0]->report_count != 2) { hid_err(hdev, "field count too low\n"); break; } pm->pcmidi_report6 = report; return 0; } /* should never get here */ return -ENODEV; } static void pcmidi_submit_output_report(struct pcmidi_snd *pm, int state) { struct hid_device *hdev = pm->hdev; struct hid_report *report = pm->pcmidi_report6; report->field[0]->value[0] = 0x01; report->field[0]->value[1] = state; hid_hw_request(hdev, report, HID_REQ_SET_REPORT); } static int pcmidi_handle_report1(struct pcmidi_snd *pm, u8 *data) { u32 bit_mask; bit_mask = data[1]; bit_mask = (bit_mask << 8) | data[2]; bit_mask = (bit_mask << 8) | data[3]; dbg_hid("pcmidi mode: %d\n", pm->midi_mode); /*KEY_MAIL or octave down*/ if (pm->midi_mode && bit_mask == 0x004000) { /* octave down */ pm->midi_octave--; if (pm->midi_octave < -2) pm->midi_octave = -2; dbg_hid("pcmidi mode: %d octave: %d\n", pm->midi_mode, pm->midi_octave); return 1; } /*KEY_WWW or sustain*/ else if (pm->midi_mode && bit_mask == 0x000004) { /* sustain on/off*/ pm->midi_sustain_mode ^= 0x1; return 1; } return 0; /* continue key processing */ } static int pcmidi_handle_report3(struct pcmidi_snd *pm, u8 *data, int size) { struct pcmidi_sustain *pms; unsigned i, j; unsigned char status, note, velocity; unsigned num_notes = (size-1)/2; for (j = 0; j < num_notes; j++) { note = data[j*2+1]; velocity = data[j*2+2]; if (note < 0x81) { /* note on */ status = 128 + 16 + pm->midi_channel; /* 1001nnnn */ note = note - 0x54 + PCMIDI_MIDDLE_C + (pm->midi_octave * 12); if (0 == velocity) velocity = 1; /* force note on */ } else { /* note off */ status = 128 + pm->midi_channel; /* 1000nnnn */ note = note - 0x94 + PCMIDI_MIDDLE_C + (pm->midi_octave*12); if (pm->midi_sustain_mode) { for (i = 0; i < PCMIDI_SUSTAINED_MAX; i++) { pms = &pm->sustained_notes[i]; if (!pms->in_use) { pms->status = status; pms->note = note; pms->velocity = velocity; pms->in_use = 1; mod_timer(&pms->timer, jiffies + msecs_to_jiffies(pm->midi_sustain)); return 1; } } } } pcmidi_send_note(pm, status, note, velocity); } return 1; } static int pcmidi_handle_report4(struct pcmidi_snd *pm, u8 *data) { unsigned key; u32 bit_mask; u32 bit_index; bit_mask = data[1]; bit_mask = (bit_mask << 8) | data[2]; bit_mask = (bit_mask << 8) | data[3]; /* break keys */ for (bit_index = 0; bit_index < 24; bit_index++) { if (!((0x01 << bit_index) & bit_mask)) { input_event(pm->input_ep82, EV_KEY, pm->last_key[bit_index], 0); pm->last_key[bit_index] = 0; } } /* make keys */ for (bit_index = 0; bit_index < 24; bit_index++) { key = 0; switch ((0x01 << bit_index) & bit_mask) { case 0x000010: /* Fn lock*/ pm->fn_state ^= 0x000010; if (pm->fn_state) pcmidi_submit_output_report(pm, 0xc5); else pcmidi_submit_output_report(pm, 0xc6); continue; case 0x020000: /* midi launcher..send a key (qwerty) or not? */ pcmidi_submit_output_report(pm, 0xc1); pm->midi_mode ^= 0x01; dbg_hid("pcmidi mode: %d\n", pm->midi_mode); continue; case 0x100000: /* KEY_MESSENGER or octave up */ dbg_hid("pcmidi mode: %d\n", pm->midi_mode); if (pm->midi_mode) { pm->midi_octave++; if (pm->midi_octave > 2) pm->midi_octave = 2; dbg_hid("pcmidi mode: %d octave: %d\n", pm->midi_mode, pm->midi_octave); continue; } else key = KEY_MESSENGER; break; case 0x400000: key = KEY_CALENDAR; break; case 0x080000: key = KEY_ADDRESSBOOK; break; case 0x040000: key = KEY_DOCUMENTS; break; case 0x800000: key = KEY_WORDPROCESSOR; break; case 0x200000: key = KEY_SPREADSHEET; break; case 0x010000: key = KEY_COFFEE; break; case 0x000100: key = KEY_HELP; break; case 0x000200: key = KEY_SEND; break; case 0x000400: key = KEY_REPLY; break; case 0x000800: key = KEY_FORWARDMAIL; break; case 0x001000: key = KEY_NEW; break; case 0x002000: key = KEY_OPEN; break; case 0x004000: key = KEY_CLOSE; break; case 0x008000: key = KEY_SAVE; break; case 0x000001: key = KEY_UNDO; break; case 0x000002: key = KEY_REDO; break; case 0x000004: key = KEY_SPELLCHECK; break; case 0x000008: key = KEY_PRINT; break; } if (key) { input_event(pm->input_ep82, EV_KEY, key, 1); pm->last_key[bit_index] = key; } } return 1; } static int pcmidi_handle_report( struct pcmidi_snd *pm, unsigned report_id, u8 *data, int size) { int ret = 0; switch (report_id) { case 0x01: /* midi keys (qwerty)*/ ret = pcmidi_handle_report1(pm, data); break; case 0x03: /* midi keyboard (musical)*/ ret = pcmidi_handle_report3(pm, data, size); break; case 0x04: /* multimedia/midi keys (qwerty)*/ ret = pcmidi_handle_report4(pm, data); break; } return ret; } static void pcmidi_setup_extra_keys( struct pcmidi_snd *pm, struct input_dev *input) { /* reassigned functionality for N/A keys MY PICTURES => KEY_WORDPROCESSOR MY MUSIC=> KEY_SPREADSHEET */ static const unsigned int keys[] = { KEY_FN, KEY_MESSENGER, KEY_CALENDAR, KEY_ADDRESSBOOK, KEY_DOCUMENTS, KEY_WORDPROCESSOR, KEY_SPREADSHEET, KEY_COFFEE, KEY_HELP, KEY_SEND, KEY_REPLY, KEY_FORWARDMAIL, KEY_NEW, KEY_OPEN, KEY_CLOSE, KEY_SAVE, KEY_UNDO, KEY_REDO, KEY_SPELLCHECK, KEY_PRINT, 0 }; const unsigned int *pkeys = &keys[0]; unsigned short i; if (pm->ifnum != 1) /* only set up ONCE for interace 1 */ return; pm->input_ep82 = input; for (i = 0; i < 24; i++) pm->last_key[i] = 0; while (*pkeys != 0) { set_bit(*pkeys, pm->input_ep82->keybit); ++pkeys; } } static int pcmidi_set_operational(struct pcmidi_snd *pm) { int rc; if (pm->ifnum != 1) return 0; /* only set up ONCE for interace 1 */ rc = pcmidi_get_output_report(pm); if (rc < 0) return rc; pcmidi_submit_output_report(pm, 0xc1); return 0; } static int pcmidi_snd_free(struct snd_device *dev) { return 0; } static int pcmidi_in_open(struct snd_rawmidi_substream *substream) { struct pcmidi_snd *pm = substream->rmidi->private_data; dbg_hid("pcmidi in open\n"); pm->in_substream = substream; return 0; } static int pcmidi_in_close(struct snd_rawmidi_substream *substream) { dbg_hid("pcmidi in close\n"); return 0; } static void pcmidi_in_trigger(struct snd_rawmidi_substream *substream, int up) { struct pcmidi_snd *pm = substream->rmidi->private_data; dbg_hid("pcmidi in trigger %d\n", up); pm->in_triggered = up; } static const struct snd_rawmidi_ops pcmidi_in_ops = { .open = pcmidi_in_open, .close = pcmidi_in_close, .trigger = pcmidi_in_trigger }; static int pcmidi_snd_initialise(struct pcmidi_snd *pm) { static int dev; struct snd_card *card; struct snd_rawmidi *rwmidi; int err; static struct snd_device_ops ops = { .dev_free = pcmidi_snd_free, }; if (pm->ifnum != 1) return 0; /* only set up midi device ONCE for interace 1 */ if (dev >= SNDRV_CARDS) return -ENODEV; if (!enable[dev]) { dev++; return -ENOENT; } /* Setup sound card */ err = snd_card_new(&pm->hdev->dev, index[dev], id[dev], THIS_MODULE, 0, &card); if (err < 0) { pk_error("failed to create pc-midi sound card\n"); err = -ENOMEM; goto fail; } pm->card = card; /* Setup sound device */ err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, pm, &ops); if (err < 0) { pk_error("failed to create pc-midi sound device: error %d\n", err); goto fail; } strscpy(card->driver, shortname, sizeof(card->driver)); strscpy(card->shortname, shortname, sizeof(card->shortname)); strscpy(card->longname, longname, sizeof(card->longname)); /* Set up rawmidi */ err = snd_rawmidi_new(card, card->shortname, 0, 0, 1, &rwmidi); if (err < 0) { pk_error("failed to create pc-midi rawmidi device: error %d\n", err); goto fail; } pm->rwmidi = rwmidi; strscpy(rwmidi->name, card->shortname, sizeof(rwmidi->name)); rwmidi->info_flags = SNDRV_RAWMIDI_INFO_INPUT; rwmidi->private_data = pm; snd_rawmidi_set_ops(rwmidi, SNDRV_RAWMIDI_STREAM_INPUT, &pcmidi_in_ops); /* create sysfs variables */ err = device_create_file(&pm->hdev->dev, sysfs_device_attr_channel); if (err < 0) { pk_error("failed to create sysfs attribute channel: error %d\n", err); goto fail; } err = device_create_file(&pm->hdev->dev, sysfs_device_attr_sustain); if (err < 0) { pk_error("failed to create sysfs attribute sustain: error %d\n", err); goto fail_attr_sustain; } err = device_create_file(&pm->hdev->dev, sysfs_device_attr_octave); if (err < 0) { pk_error("failed to create sysfs attribute octave: error %d\n", err); goto fail_attr_octave; } spin_lock_init(&pm->rawmidi_in_lock); init_sustain_timers(pm); err = pcmidi_set_operational(pm); if (err < 0) { pk_error("failed to find output report\n"); goto fail_register; } /* register it */ err = snd_card_register(card); if (err < 0) { pk_error("failed to register pc-midi sound card: error %d\n", err); goto fail_register; } dbg_hid("pcmidi_snd_initialise finished ok\n"); return 0; fail_register: stop_sustain_timers(pm); device_remove_file(&pm->hdev->dev, sysfs_device_attr_octave); fail_attr_octave: device_remove_file(&pm->hdev->dev, sysfs_device_attr_sustain); fail_attr_sustain: device_remove_file(&pm->hdev->dev, sysfs_device_attr_channel); fail: if (pm->card) { snd_card_free(pm->card); pm->card = NULL; } return err; } static int pcmidi_snd_terminate(struct pcmidi_snd *pm) { if (pm->card) { stop_sustain_timers(pm); device_remove_file(&pm->hdev->dev, sysfs_device_attr_channel); device_remove_file(&pm->hdev->dev, sysfs_device_attr_sustain); device_remove_file(&pm->hdev->dev, sysfs_device_attr_octave); snd_card_disconnect(pm->card); snd_card_free_when_closed(pm->card); } return 0; } /* * PC-MIDI report descriptor for report id is wrong. */ static const __u8 *pk_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { if (*rsize == 178 && rdesc[111] == 0x06 && rdesc[112] == 0x00 && rdesc[113] == 0xff) { hid_info(hdev, "fixing up pc-midi keyboard report descriptor\n"); rdesc[144] = 0x18; /* report 4: was 0x10 report count */ } return rdesc; } static int pk_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { struct pcmidi_snd *pm = hid_get_drvdata(hdev); if (HID_UP_MSVENDOR == (usage->hid & HID_USAGE_PAGE) && 1 == pm->ifnum) { pcmidi_setup_extra_keys(pm, hi->input); return 0; } return 0; } static int pk_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct pcmidi_snd *pm = hid_get_drvdata(hdev); int ret = 0; if (1 == pm->ifnum) { if (report->id == data[0]) switch (report->id) { case 0x01: /* midi keys (qwerty)*/ case 0x03: /* midi keyboard (musical)*/ case 0x04: /* extra/midi keys (qwerty)*/ ret = pcmidi_handle_report(pm, report->id, data, size); break; } } return ret; } static int pk_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; struct usb_interface *intf; unsigned short ifnum; unsigned long quirks = id->driver_data; struct pcmidi_snd *pm; if (!hid_is_usb(hdev)) return -EINVAL; intf = to_usb_interface(hdev->dev.parent); ifnum = intf->cur_altsetting->desc.bInterfaceNumber; pm = kzalloc(sizeof(*pm), GFP_KERNEL); if (pm == NULL) { hid_err(hdev, "can't alloc descriptor\n"); return -ENOMEM; } pm->hdev = hdev; pm->ifnum = ifnum; hid_set_drvdata(hdev, pm); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "hid parse failed\n"); goto err_free; } if (quirks & PK_QUIRK_NOGET) { /* hid_parse cleared all the quirks */ hdev->quirks |= HID_QUIRK_NOGET; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "hw start failed\n"); goto err_free; } ret = pcmidi_snd_initialise(pm); if (ret < 0) goto err_stop; return 0; err_stop: hid_hw_stop(hdev); err_free: kfree(pm); return ret; } static void pk_remove(struct hid_device *hdev) { struct pcmidi_snd *pm = hid_get_drvdata(hdev); pcmidi_snd_terminate(pm); hid_hw_stop(hdev); kfree(pm); } static const struct hid_device_id pk_devices[] = { {HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI), .driver_data = PK_QUIRK_NOGET}, { } }; MODULE_DEVICE_TABLE(hid, pk_devices); static struct hid_driver pk_driver = { .name = "prodikeys", .id_table = pk_devices, .report_fixup = pk_report_fixup, .input_mapping = pk_input_mapping, .raw_event = pk_raw_event, .probe = pk_probe, .remove = pk_remove, }; module_hid_driver(pk_driver); MODULE_DESCRIPTION("HID driver for the Prodikeys PC-MIDI Keyboard"); MODULE_LICENSE("GPL");
10 10 10 7 7 10 8 5 3 3 3 8 3 3 3 3 3 29 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 // SPDX-License-Identifier: GPL-2.0-only /* Helper handling for netfilter. */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> * (C) 2006-2012 Patrick McHardy <kaber@trash.net> */ #include <linux/types.h> #include <linux/netfilter.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/stddef.h> #include <linux/random.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/rculist.h> #include <linux/rtnetlink.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_log.h> #include <net/ip.h> static DEFINE_MUTEX(nf_ct_helper_mutex); struct hlist_head *nf_ct_helper_hash __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_helper_hash); unsigned int nf_ct_helper_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_helper_hsize); static unsigned int nf_ct_helper_count __read_mostly; static DEFINE_MUTEX(nf_ct_nat_helpers_mutex); static struct list_head nf_ct_nat_helpers __read_mostly; /* Stupid hash, but collision free for the default registrations of the * helpers currently in the kernel. */ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) { return (((tuple->src.l3num << 8) | tuple->dst.protonum) ^ (__force __u16)tuple->src.u.all) % nf_ct_helper_hsize; } struct nf_conntrack_helper * __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; unsigned int i; for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) { if (strcmp(h->name, name)) continue; if (h->tuple.src.l3num != NFPROTO_UNSPEC && h->tuple.src.l3num != l3num) continue; if (h->tuple.dst.protonum == protonum) return h; } } return NULL; } EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find); struct nf_conntrack_helper * nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); #ifdef CONFIG_MODULES if (h == NULL) { rcu_read_unlock(); if (request_module("nfct-helper-%s", name) == 0) { rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); } else { return h; } } #endif if (h != NULL && !try_module_get(h->me)) h = NULL; if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) { module_put(h->me); h = NULL; } rcu_read_unlock(); return h; } EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) { refcount_dec(&helper->refcnt); module_put(helper->me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); static struct nf_conntrack_nat_helper * nf_conntrack_nat_helper_find(const char *mod_name) { struct nf_conntrack_nat_helper *cur; bool found = false; list_for_each_entry_rcu(cur, &nf_ct_nat_helpers, list) { if (!strcmp(cur->mod_name, mod_name)) { found = true; break; } } return found ? cur : NULL; } int nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; struct nf_conntrack_nat_helper *nat; char mod_name[NF_CT_HELPER_NAME_LEN]; int ret = 0; rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); if (!h) { rcu_read_unlock(); return -ENOENT; } nat = nf_conntrack_nat_helper_find(h->nat_mod_name); if (!nat) { snprintf(mod_name, sizeof(mod_name), "%s", h->nat_mod_name); rcu_read_unlock(); request_module("%s", mod_name); rcu_read_lock(); nat = nf_conntrack_nat_helper_find(mod_name); if (!nat) { rcu_read_unlock(); return -ENOENT; } } if (!try_module_get(nat->module)) ret = -ENOENT; rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(nf_nat_helper_try_module_get); void nf_nat_helper_put(struct nf_conntrack_helper *helper) { struct nf_conntrack_nat_helper *nat; nat = nf_conntrack_nat_helper_find(helper->nat_mod_name); if (WARN_ON_ONCE(!nat)) return; module_put(nat->module); } EXPORT_SYMBOL_GPL(nf_nat_helper_put); struct nf_conn_help * nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) { struct nf_conn_help *help; help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp); if (help) INIT_HLIST_HEAD(&help->expectations); else pr_debug("failed to add helper extension area"); return help; } EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, gfp_t flags) { struct nf_conntrack_helper *helper = NULL; struct nf_conn_help *help; /* We already got a helper explicitly attached (e.g. nft_ct) */ if (test_bit(IPS_HELPER_BIT, &ct->status)) return 0; if (WARN_ON_ONCE(!tmpl)) return 0; help = nfct_help(tmpl); if (help != NULL) { helper = rcu_dereference(help->helper); set_bit(IPS_HELPER_BIT, &ct->status); } help = nfct_help(ct); if (helper == NULL) { if (help) RCU_INIT_POINTER(help->helper, NULL); return 0; } if (help == NULL) { help = nf_ct_helper_ext_add(ct, flags); if (help == NULL) return -ENOMEM; } else { /* We only allow helper re-assignment of the same sort since * we cannot reallocate the helper extension area. */ struct nf_conntrack_helper *tmp = rcu_dereference(help->helper); if (tmp && tmp->help != helper->help) { RCU_INIT_POINTER(help->helper, NULL); return 0; } } rcu_assign_pointer(help->helper, helper); return 0; } EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); /* appropriate ct lock protecting must be taken by caller */ static int unhelp(struct nf_conn *ct, void *me) { struct nf_conn_help *help = nfct_help(ct); if (help && rcu_dereference_raw(help->helper) == me) { nf_conntrack_event(IPCT_HELPER, ct); RCU_INIT_POINTER(help->helper, NULL); } /* We are not intended to delete this conntrack. */ return 0; } void nf_ct_helper_destroy(struct nf_conn *ct) { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_helper *helper; if (help) { rcu_read_lock(); helper = rcu_dereference(help->helper); if (helper && helper->destroy) helper->destroy(ct); rcu_read_unlock(); } } static LIST_HEAD(nf_ct_helper_expectfn_list); void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n) { spin_lock_bh(&nf_conntrack_expect_lock); list_add_rcu(&n->head, &nf_ct_helper_expectfn_list); spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register); void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n) { spin_lock_bh(&nf_conntrack_expect_lock); list_del_rcu(&n->head); spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); /* Caller should hold the rcu lock */ struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_name(const char *name) { struct nf_ct_helper_expectfn *cur; bool found = false; list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { if (!strcmp(cur->name, name)) { found = true; break; } } return found ? cur : NULL; } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_name); /* Caller should hold the rcu lock */ struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_symbol(const void *symbol) { struct nf_ct_helper_expectfn *cur; bool found = false; list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { if (cur->expectfn == symbol) { found = true; break; } } return found ? cur : NULL; } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol); __printf(3, 4) void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, const char *fmt, ...) { const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; /* Called from the helper function, this call never fails */ help = nfct_help(ct); /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf); va_end(args); } EXPORT_SYMBOL_GPL(nf_ct_helper_log); int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; unsigned int h = helper_hash(&me->tuple); struct nf_conntrack_helper *cur; int ret = 0, i; BUG_ON(me->expect_policy == NULL); BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); if (!nf_ct_helper_hash) return -ENOENT; if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) return -EINVAL; mutex_lock(&nf_ct_helper_mutex); for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) { if (!strcmp(cur->name, me->name) && (cur->tuple.src.l3num == NFPROTO_UNSPEC || cur->tuple.src.l3num == me->tuple.src.l3num) && cur->tuple.dst.protonum == me->tuple.dst.protonum) { ret = -EBUSY; goto out; } } } /* avoid unpredictable behaviour for auto_assign_helper */ if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) { hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) { ret = -EBUSY; goto out; } } } refcount_set(&me->refcnt, 1); hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); nf_ct_helper_count++; out: mutex_unlock(&nf_ct_helper_mutex); return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) { struct nf_conn_help *help = nfct_help(exp->master); const struct nf_conntrack_helper *me = data; const struct nf_conntrack_helper *this; if (exp->helper == me) return true; this = rcu_dereference_protected(help->helper, lockdep_is_held(&nf_conntrack_expect_lock)); return this == me; } void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) { mutex_lock(&nf_ct_helper_mutex); hlist_del_rcu(&me->hnode); nf_ct_helper_count--; mutex_unlock(&nf_ct_helper_mutex); /* Make sure every nothing is still using the helper unless its a * connection in the hash. */ synchronize_rcu(); nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); void nf_ct_helper_init(struct nf_conntrack_helper *helper, u16 l3num, u16 protonum, const char *name, u16 default_port, u16 spec_port, u32 id, const struct nf_conntrack_expect_policy *exp_pol, u32 expect_class_max, int (*help)(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo), int (*from_nlattr)(struct nlattr *attr, struct nf_conn *ct), struct module *module) { helper->tuple.src.l3num = l3num; helper->tuple.dst.protonum = protonum; helper->tuple.src.u.all = htons(spec_port); helper->expect_policy = exp_pol; helper->expect_class_max = expect_class_max; helper->help = help; helper->from_nlattr = from_nlattr; helper->me = module; snprintf(helper->nat_mod_name, sizeof(helper->nat_mod_name), NF_NAT_HELPER_PREFIX "%s", name); if (spec_port == default_port) snprintf(helper->name, sizeof(helper->name), "%s", name); else snprintf(helper->name, sizeof(helper->name), "%s-%u", name, id); } EXPORT_SYMBOL_GPL(nf_ct_helper_init); int nf_conntrack_helpers_register(struct nf_conntrack_helper *helper, unsigned int n) { unsigned int i; int err = 0; for (i = 0; i < n; i++) { err = nf_conntrack_helper_register(&helper[i]); if (err < 0) goto err; } return err; err: if (i > 0) nf_conntrack_helpers_unregister(helper, i); return err; } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_register); void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper, unsigned int n) { while (n-- > 0) nf_conntrack_helper_unregister(&helper[n]); } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister); void nf_nat_helper_register(struct nf_conntrack_nat_helper *nat) { mutex_lock(&nf_ct_nat_helpers_mutex); list_add_rcu(&nat->list, &nf_ct_nat_helpers); mutex_unlock(&nf_ct_nat_helpers_mutex); } EXPORT_SYMBOL_GPL(nf_nat_helper_register); void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat) { mutex_lock(&nf_ct_nat_helpers_mutex); list_del_rcu(&nat->list); mutex_unlock(&nf_ct_nat_helpers_mutex); } EXPORT_SYMBOL_GPL(nf_nat_helper_unregister); int nf_conntrack_helper_init(void) { nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); if (!nf_ct_helper_hash) return -ENOMEM; INIT_LIST_HEAD(&nf_ct_nat_helpers); return 0; } void nf_conntrack_helper_fini(void) { kvfree(nf_ct_helper_hash); nf_ct_helper_hash = NULL; }
10 9 9 10 13 1 1 4 4 4 4 3 4 1 4 3 1 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 // SPDX-License-Identifier: GPL-2.0-only /* * Input device TTY line discipline * * Copyright (c) 1999-2002 Vojtech Pavlik * * This is a module that converts a tty line into a much simpler * 'serial io port' abstraction that the input device drivers use. */ #include <linux/uaccess.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/init.h> #include <linux/serio.h> #include <linux/tty.h> #include <linux/compat.h> MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>"); MODULE_DESCRIPTION("Input device TTY line discipline"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_MOUSE); #define SERPORT_BUSY 1 #define SERPORT_ACTIVE 2 #define SERPORT_DEAD 3 struct serport { struct tty_struct *tty; wait_queue_head_t wait; struct serio *serio; struct serio_device_id id; spinlock_t lock; unsigned long flags; }; /* * Callback functions from the serio code. */ static int serport_serio_write(struct serio *serio, unsigned char data) { struct serport *serport = serio->port_data; return -(serport->tty->ops->write(serport->tty, &data, 1) != 1); } static int serport_serio_open(struct serio *serio) { struct serport *serport = serio->port_data; guard(spinlock_irqsave)(&serport->lock); set_bit(SERPORT_ACTIVE, &serport->flags); return 0; } static void serport_serio_close(struct serio *serio) { struct serport *serport = serio->port_data; guard(spinlock_irqsave)(&serport->lock); clear_bit(SERPORT_ACTIVE, &serport->flags); } /* * serport_ldisc_open() is the routine that is called upon setting our line * discipline on a tty. It prepares the serio struct. */ static int serport_ldisc_open(struct tty_struct *tty) { struct serport *serport; if (!capable(CAP_SYS_ADMIN)) return -EPERM; serport = kzalloc(sizeof(*serport), GFP_KERNEL); if (!serport) return -ENOMEM; serport->tty = tty; spin_lock_init(&serport->lock); init_waitqueue_head(&serport->wait); tty->disc_data = serport; tty->receive_room = 256; set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); return 0; } /* * serport_ldisc_close() is the opposite of serport_ldisc_open() */ static void serport_ldisc_close(struct tty_struct *tty) { struct serport *serport = tty->disc_data; kfree(serport); } /* * serport_ldisc_receive() is called by the low level tty driver when characters * are ready for us. We forward the characters and flags, one by one to the * 'interrupt' routine. */ static void serport_ldisc_receive(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct serport *serport = tty->disc_data; unsigned int ch_flags = 0; int i; guard(spinlock_irqsave)(&serport->lock); if (!test_bit(SERPORT_ACTIVE, &serport->flags)) return; for (i = 0; i < count; i++) { if (fp) { switch (fp[i]) { case TTY_FRAME: ch_flags = SERIO_FRAME; break; case TTY_PARITY: ch_flags = SERIO_PARITY; break; default: ch_flags = 0; break; } } serio_interrupt(serport->serio, cp[i], ch_flags); } } /* * serport_ldisc_read() just waits indefinitely if everything goes well. * However, when the serio driver closes the serio port, it finishes, * returning 0 characters. */ static ssize_t serport_ldisc_read(struct tty_struct * tty, struct file * file, u8 *kbuf, size_t nr, void **cookie, unsigned long offset) { struct serport *serport = tty->disc_data; struct serio *serio; if (test_and_set_bit(SERPORT_BUSY, &serport->flags)) return -EBUSY; serport->serio = serio = kzalloc(sizeof(*serio), GFP_KERNEL); if (!serio) return -ENOMEM; strscpy(serio->name, "Serial port", sizeof(serio->name)); snprintf(serio->phys, sizeof(serio->phys), "%s/serio0", tty_name(tty)); serio->id = serport->id; serio->id.type = SERIO_RS232; serio->write = serport_serio_write; serio->open = serport_serio_open; serio->close = serport_serio_close; serio->port_data = serport; serio->dev.parent = tty->dev; serio_register_port(serport->serio); printk(KERN_INFO "serio: Serial port %s\n", tty_name(tty)); wait_event_interruptible(serport->wait, test_bit(SERPORT_DEAD, &serport->flags)); serio_unregister_port(serport->serio); serport->serio = NULL; clear_bit(SERPORT_DEAD, &serport->flags); clear_bit(SERPORT_BUSY, &serport->flags); return 0; } static void serport_set_type(struct tty_struct *tty, unsigned long type) { struct serport *serport = tty->disc_data; serport->id.proto = type & 0x000000ff; serport->id.id = (type & 0x0000ff00) >> 8; serport->id.extra = (type & 0x00ff0000) >> 16; } /* * serport_ldisc_ioctl() allows to set the port protocol, and device ID */ static int serport_ldisc_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { if (cmd == SPIOCSTYPE) { unsigned long type; if (get_user(type, (unsigned long __user *) arg)) return -EFAULT; serport_set_type(tty, type); return 0; } return -EINVAL; } #ifdef CONFIG_COMPAT #define COMPAT_SPIOCSTYPE _IOW('q', 0x01, compat_ulong_t) static int serport_ldisc_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { if (cmd == COMPAT_SPIOCSTYPE) { void __user *uarg = compat_ptr(arg); compat_ulong_t compat_type; if (get_user(compat_type, (compat_ulong_t __user *)uarg)) return -EFAULT; serport_set_type(tty, compat_type); return 0; } return -EINVAL; } #endif static void serport_ldisc_hangup(struct tty_struct *tty) { struct serport *serport = tty->disc_data; scoped_guard(spinlock_irqsave, &serport->lock) set_bit(SERPORT_DEAD, &serport->flags); wake_up_interruptible(&serport->wait); } static void serport_ldisc_write_wakeup(struct tty_struct * tty) { struct serport *serport = tty->disc_data; guard(spinlock_irqsave)(&serport->lock); if (test_bit(SERPORT_ACTIVE, &serport->flags)) serio_drv_write_wakeup(serport->serio); } /* * The line discipline structure. */ static struct tty_ldisc_ops serport_ldisc = { .owner = THIS_MODULE, .num = N_MOUSE, .name = "input", .open = serport_ldisc_open, .close = serport_ldisc_close, .read = serport_ldisc_read, .ioctl = serport_ldisc_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = serport_ldisc_compat_ioctl, #endif .receive_buf = serport_ldisc_receive, .hangup = serport_ldisc_hangup, .write_wakeup = serport_ldisc_write_wakeup }; /* * The functions for insering/removing us as a module. */ static int __init serport_init(void) { int retval; retval = tty_register_ldisc(&serport_ldisc); if (retval) printk(KERN_ERR "serport.c: Error registering line discipline.\n"); return retval; } static void __exit serport_exit(void) { tty_unregister_ldisc(&serport_ldisc); } module_init(serport_init); module_exit(serport_exit);
2339 2196 1678 1678 1631 1626 1623 1624 1663 1658 1661 77 76 1655 1663 1658 1657 1658 1661 1660 1635 1631 1378 1380 1575 1579 1572 1578 2181 2183 2187 20 20 20 20 20 20 20 20 20 20 20 19 20 13 13 944 3141 1867 1871 13 13 13 2228 1966 431 2227 2227 1028 1026 1026 3211 842 4426 4434 2679 4416 4280 3567 4406 1839 3557 3566 4409 1839 1623 1835 4415 2814 2814 12 2815 2817 2828 4425 2953 4414 4413 1836 1650 4422 2308 4419 1698 4414 1771 1725 3153 2107 3152 2062 3148 3148 3157 20 3145 1591 3152 2184 2192 2189 1758 2189 1192 3162 3172 3171 3160 3163 2342 2341 2342 1760 1753 1660 1663 3244 3255 1339 743 3524 802 801 3149 1339 862 2111 2049 659 1877 1417 2050 804 2052 3153 1737 188 164 1594 2196 1196 1197 1196 1139 1139 2196 1139 741 1134 3154 3145 2101 2112 822 2114 2109 1190 1192 1192 1195 771 1186 1194 2814 1741 62 1750 1745 1740 1742 1626 1625 1623 1628 3151 3149 3160 3152 3160 3157 2 3145 3159 1523 3151 3134 3138 1759 3144 3141 3157 3157 3146 3148 3147 1741 1746 1741 3151 20 20 20 3149 20 20 3147 1756 3147 2109 2105 3149 3152 4 4 4 1727 3821 5110 6002 2189 2188 2189 2182 14 14 2182 1662 2184 1623 1630 1625 2132 1188 1 1 1 1191 2192 2181 2191 2190 1658 1800 1805 1 1 155 149 2819 2498 2495 2821 1886 744 1750 148 149 149 149 148 149 149 1 1 1 1 1 1 1 9 9 9 9 9 9 542 879 878 876 878 878 870 22 519 518 29 29 29 29 29 29 29 29 29 29 1858 1756 1860 1869 1691 1692 1767 3 1770 1866 1 1820 1821 1 42 977 1425 1320 1689 6 6 6 6 1582 1570 1568 1569 23 20 22 23 331 331 31 719 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 // SPDX-License-Identifier: GPL-2.0 /* * drivers/base/core.c - core driver model code (device registration, etc) * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs * Copyright (c) 2006 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (c) 2006 Novell, Inc. */ #include <linux/acpi.h> #include <linux/blkdev.h> #include <linux/cleanup.h> #include <linux/cpufreq.h> #include <linux/device.h> #include <linux/dma-map-ops.h> /* for dma_default_coherent */ #include <linux/err.h> #include <linux/fwnode.h> #include <linux/init.h> #include <linux/kdev_t.h> #include <linux/kstrtox.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/of.h> #include <linux/of_device.h> #include <linux/pm_runtime.h> #include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/string_helpers.h> #include <linux/swiotlb.h> #include <linux/sysfs.h> #include "base.h" #include "physical_location.h" #include "power/power.h" /* Device links support. */ static LIST_HEAD(deferred_sync); static unsigned int defer_sync_state_count = 1; static DEFINE_MUTEX(fwnode_link_lock); static bool fw_devlink_is_permissive(void); static void __fw_devlink_link_to_consumers(struct device *dev); static bool fw_devlink_drv_reg_done; static bool fw_devlink_best_effort; static struct workqueue_struct *device_link_wq; /** * __fwnode_link_add - Create a link between two fwnode_handles. * @con: Consumer end of the link. * @sup: Supplier end of the link. * @flags: Link flags. * * Create a fwnode link between fwnode handles @con and @sup. The fwnode link * represents the detail that the firmware lists @sup fwnode as supplying a * resource to @con. * * The driver core will use the fwnode link to create a device link between the * two device objects corresponding to @con and @sup when they are created. The * driver core will automatically delete the fwnode link between @con and @sup * after doing that. * * Attempts to create duplicate links between the same pair of fwnode handles * are ignored and there is no reference counting. */ static int __fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup, u8 flags) { struct fwnode_link *link; list_for_each_entry(link, &sup->consumers, s_hook) if (link->consumer == con) { link->flags |= flags; return 0; } link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) return -ENOMEM; link->supplier = sup; INIT_LIST_HEAD(&link->s_hook); link->consumer = con; INIT_LIST_HEAD(&link->c_hook); link->flags = flags; list_add(&link->s_hook, &sup->consumers); list_add(&link->c_hook, &con->suppliers); pr_debug("%pfwf Linked as a fwnode consumer to %pfwf\n", con, sup); return 0; } int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup, u8 flags) { guard(mutex)(&fwnode_link_lock); return __fwnode_link_add(con, sup, flags); } /** * __fwnode_link_del - Delete a link between two fwnode_handles. * @link: the fwnode_link to be deleted * * The fwnode_link_lock needs to be held when this function is called. */ static void __fwnode_link_del(struct fwnode_link *link) { pr_debug("%pfwf Dropping the fwnode link to %pfwf\n", link->consumer, link->supplier); list_del(&link->s_hook); list_del(&link->c_hook); kfree(link); } /** * __fwnode_link_cycle - Mark a fwnode link as being part of a cycle. * @link: the fwnode_link to be marked * * The fwnode_link_lock needs to be held when this function is called. */ static void __fwnode_link_cycle(struct fwnode_link *link) { pr_debug("%pfwf: cycle: depends on %pfwf\n", link->consumer, link->supplier); link->flags |= FWLINK_FLAG_CYCLE; } /** * fwnode_links_purge_suppliers - Delete all supplier links of fwnode_handle. * @fwnode: fwnode whose supplier links need to be deleted * * Deletes all supplier links connecting directly to @fwnode. */ static void fwnode_links_purge_suppliers(struct fwnode_handle *fwnode) { struct fwnode_link *link, *tmp; guard(mutex)(&fwnode_link_lock); list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook) __fwnode_link_del(link); } /** * fwnode_links_purge_consumers - Delete all consumer links of fwnode_handle. * @fwnode: fwnode whose consumer links need to be deleted * * Deletes all consumer links connecting directly to @fwnode. */ static void fwnode_links_purge_consumers(struct fwnode_handle *fwnode) { struct fwnode_link *link, *tmp; guard(mutex)(&fwnode_link_lock); list_for_each_entry_safe(link, tmp, &fwnode->consumers, s_hook) __fwnode_link_del(link); } /** * fwnode_links_purge - Delete all links connected to a fwnode_handle. * @fwnode: fwnode whose links needs to be deleted * * Deletes all links connecting directly to a fwnode. */ void fwnode_links_purge(struct fwnode_handle *fwnode) { fwnode_links_purge_suppliers(fwnode); fwnode_links_purge_consumers(fwnode); } void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) { struct fwnode_handle *child; /* Don't purge consumer links of an added child */ if (fwnode->dev) return; fwnode->flags |= FWNODE_FLAG_NOT_DEVICE; fwnode_links_purge_consumers(fwnode); fwnode_for_each_available_child_node(fwnode, child) fw_devlink_purge_absent_suppliers(child); } EXPORT_SYMBOL_GPL(fw_devlink_purge_absent_suppliers); /** * __fwnode_links_move_consumers - Move consumer from @from to @to fwnode_handle * @from: move consumers away from this fwnode * @to: move consumers to this fwnode * * Move all consumer links from @from fwnode to @to fwnode. */ static void __fwnode_links_move_consumers(struct fwnode_handle *from, struct fwnode_handle *to) { struct fwnode_link *link, *tmp; list_for_each_entry_safe(link, tmp, &from->consumers, s_hook) { __fwnode_link_add(link->consumer, to, link->flags); __fwnode_link_del(link); } } /** * __fw_devlink_pickup_dangling_consumers - Pick up dangling consumers * @fwnode: fwnode from which to pick up dangling consumers * @new_sup: fwnode of new supplier * * If the @fwnode has a corresponding struct device and the device supports * probing (that is, added to a bus), then we want to let fw_devlink create * MANAGED device links to this device, so leave @fwnode and its descendant's * fwnode links alone. * * Otherwise, move its consumers to the new supplier @new_sup. */ static void __fw_devlink_pickup_dangling_consumers(struct fwnode_handle *fwnode, struct fwnode_handle *new_sup) { struct fwnode_handle *child; if (fwnode->dev && fwnode->dev->bus) return; fwnode->flags |= FWNODE_FLAG_NOT_DEVICE; __fwnode_links_move_consumers(fwnode, new_sup); fwnode_for_each_available_child_node(fwnode, child) __fw_devlink_pickup_dangling_consumers(child, new_sup); } static DEFINE_MUTEX(device_links_lock); DEFINE_STATIC_SRCU(device_links_srcu); static inline void device_links_write_lock(void) { mutex_lock(&device_links_lock); } static inline void device_links_write_unlock(void) { mutex_unlock(&device_links_lock); } int device_links_read_lock(void) __acquires(&device_links_srcu) { return srcu_read_lock(&device_links_srcu); } void device_links_read_unlock(int idx) __releases(&device_links_srcu) { srcu_read_unlock(&device_links_srcu, idx); } int device_links_read_lock_held(void) { return srcu_read_lock_held(&device_links_srcu); } static void device_link_synchronize_removal(void) { synchronize_srcu(&device_links_srcu); } static void device_link_remove_from_lists(struct device_link *link) { list_del_rcu(&link->s_node); list_del_rcu(&link->c_node); } static bool device_is_ancestor(struct device *dev, struct device *target) { while (target->parent) { target = target->parent; if (dev == target) return true; } return false; } #define DL_MARKER_FLAGS (DL_FLAG_INFERRED | \ DL_FLAG_CYCLE | \ DL_FLAG_MANAGED) bool device_link_flag_is_sync_state_only(u32 flags) { return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY; } /** * device_is_dependent - Check if one device depends on another one * @dev: Device to check dependencies for. * @target: Device to check against. * * Check if @target depends on @dev or any device dependent on it (its child or * its consumer etc). Return 1 if that is the case or 0 otherwise. */ static int device_is_dependent(struct device *dev, void *target) { struct device_link *link; int ret; /* * The "ancestors" check is needed to catch the case when the target * device has not been completely initialized yet and it is still * missing from the list of children of its parent device. */ if (dev == target || device_is_ancestor(dev, target)) return 1; ret = device_for_each_child(dev, target, device_is_dependent); if (ret) return ret; list_for_each_entry(link, &dev->links.consumers, s_node) { if (device_link_flag_is_sync_state_only(link->flags)) continue; if (link->consumer == target) return 1; ret = device_is_dependent(link->consumer, target); if (ret) break; } return ret; } static void device_link_init_status(struct device_link *link, struct device *consumer, struct device *supplier) { switch (supplier->links.status) { case DL_DEV_PROBING: switch (consumer->links.status) { case DL_DEV_PROBING: /* * A consumer driver can create a link to a supplier * that has not completed its probing yet as long as it * knows that the supplier is already functional (for * example, it has just acquired some resources from the * supplier). */ link->status = DL_STATE_CONSUMER_PROBE; break; default: link->status = DL_STATE_DORMANT; break; } break; case DL_DEV_DRIVER_BOUND: switch (consumer->links.status) { case DL_DEV_PROBING: link->status = DL_STATE_CONSUMER_PROBE; break; case DL_DEV_DRIVER_BOUND: link->status = DL_STATE_ACTIVE; break; default: link->status = DL_STATE_AVAILABLE; break; } break; case DL_DEV_UNBINDING: link->status = DL_STATE_SUPPLIER_UNBIND; break; default: link->status = DL_STATE_DORMANT; break; } } static int device_reorder_to_tail(struct device *dev, void *not_used) { struct device_link *link; /* * Devices that have not been registered yet will be put to the ends * of the lists during the registration, so skip them here. */ if (device_is_registered(dev)) devices_kset_move_last(dev); if (device_pm_initialized(dev)) device_pm_move_last(dev); device_for_each_child(dev, NULL, device_reorder_to_tail); list_for_each_entry(link, &dev->links.consumers, s_node) { if (device_link_flag_is_sync_state_only(link->flags)) continue; device_reorder_to_tail(link->consumer, NULL); } return 0; } /** * device_pm_move_to_tail - Move set of devices to the end of device lists * @dev: Device to move * * This is a device_reorder_to_tail() wrapper taking the requisite locks. * * It moves the @dev along with all of its children and all of its consumers * to the ends of the device_kset and dpm_list, recursively. */ void device_pm_move_to_tail(struct device *dev) { int idx; idx = device_links_read_lock(); device_pm_lock(); device_reorder_to_tail(dev, NULL); device_pm_unlock(); device_links_read_unlock(idx); } #define to_devlink(dev) container_of((dev), struct device_link, link_dev) static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *output; switch (to_devlink(dev)->status) { case DL_STATE_NONE: output = "not tracked"; break; case DL_STATE_DORMANT: output = "dormant"; break; case DL_STATE_AVAILABLE: output = "available"; break; case DL_STATE_CONSUMER_PROBE: output = "consumer probing"; break; case DL_STATE_ACTIVE: output = "active"; break; case DL_STATE_SUPPLIER_UNBIND: output = "supplier unbinding"; break; default: output = "unknown"; break; } return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(status); static ssize_t auto_remove_on_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_link *link = to_devlink(dev); const char *output; if (device_link_test(link, DL_FLAG_AUTOREMOVE_SUPPLIER)) output = "supplier unbind"; else if (device_link_test(link, DL_FLAG_AUTOREMOVE_CONSUMER)) output = "consumer unbind"; else output = "never"; return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(auto_remove_on); static ssize_t runtime_pm_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_link *link = to_devlink(dev); return sysfs_emit(buf, "%d\n", device_link_test(link, DL_FLAG_PM_RUNTIME)); } static DEVICE_ATTR_RO(runtime_pm); static ssize_t sync_state_only_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_link *link = to_devlink(dev); return sysfs_emit(buf, "%d\n", device_link_test(link, DL_FLAG_SYNC_STATE_ONLY)); } static DEVICE_ATTR_RO(sync_state_only); static struct attribute *devlink_attrs[] = { &dev_attr_status.attr, &dev_attr_auto_remove_on.attr, &dev_attr_runtime_pm.attr, &dev_attr_sync_state_only.attr, NULL, }; ATTRIBUTE_GROUPS(devlink); static void device_link_release_fn(struct work_struct *work) { struct device_link *link = container_of(work, struct device_link, rm_work); /* Ensure that all references to the link object have been dropped. */ device_link_synchronize_removal(); pm_runtime_release_supplier(link); /* * If supplier_preactivated is set, the link has been dropped between * the pm_runtime_get_suppliers() and pm_runtime_put_suppliers() calls * in __driver_probe_device(). In that case, drop the supplier's * PM-runtime usage counter to remove the reference taken by * pm_runtime_get_suppliers(). */ if (link->supplier_preactivated) pm_runtime_put_noidle(link->supplier); pm_request_idle(link->supplier); put_device(link->consumer); put_device(link->supplier); kfree(link); } static void devlink_dev_release(struct device *dev) { struct device_link *link = to_devlink(dev); INIT_WORK(&link->rm_work, device_link_release_fn); /* * It may take a while to complete this work because of the SRCU * synchronization in device_link_release_fn() and if the consumer or * supplier devices get deleted when it runs, so put it into the * dedicated workqueue. */ queue_work(device_link_wq, &link->rm_work); } /** * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate */ void device_link_wait_removal(void) { /* * devlink removal jobs are queued in the dedicated work queue. * To be sure that all removal jobs are terminated, ensure that any * scheduled work has run to completion. */ flush_workqueue(device_link_wq); } EXPORT_SYMBOL_GPL(device_link_wait_removal); static const struct class devlink_class = { .name = "devlink", .dev_groups = devlink_groups, .dev_release = devlink_dev_release, }; static int devlink_add_symlinks(struct device *dev) { char *buf_con __free(kfree) = NULL, *buf_sup __free(kfree) = NULL; int ret; struct device_link *link = to_devlink(dev); struct device *sup = link->supplier; struct device *con = link->consumer; ret = sysfs_create_link(&link->link_dev.kobj, &sup->kobj, "supplier"); if (ret) goto out; ret = sysfs_create_link(&link->link_dev.kobj, &con->kobj, "consumer"); if (ret) goto err_con; buf_con = kasprintf(GFP_KERNEL, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); if (!buf_con) { ret = -ENOMEM; goto err_con_dev; } ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf_con); if (ret) goto err_con_dev; buf_sup = kasprintf(GFP_KERNEL, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); if (!buf_sup) { ret = -ENOMEM; goto err_sup_dev; } ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf_sup); if (ret) goto err_sup_dev; goto out; err_sup_dev: sysfs_remove_link(&sup->kobj, buf_con); err_con_dev: sysfs_remove_link(&link->link_dev.kobj, "consumer"); err_con: sysfs_remove_link(&link->link_dev.kobj, "supplier"); out: return ret; } static void devlink_remove_symlinks(struct device *dev) { char *buf_con __free(kfree) = NULL, *buf_sup __free(kfree) = NULL; struct device_link *link = to_devlink(dev); struct device *sup = link->supplier; struct device *con = link->consumer; sysfs_remove_link(&link->link_dev.kobj, "consumer"); sysfs_remove_link(&link->link_dev.kobj, "supplier"); if (device_is_registered(con)) { buf_sup = kasprintf(GFP_KERNEL, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); if (!buf_sup) goto out; sysfs_remove_link(&con->kobj, buf_sup); } buf_con = kasprintf(GFP_KERNEL, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); if (!buf_con) goto out; sysfs_remove_link(&sup->kobj, buf_con); return; out: WARN(1, "Unable to properly free device link symlinks!\n"); } static struct class_interface devlink_class_intf = { .class = &devlink_class, .add_dev = devlink_add_symlinks, .remove_dev = devlink_remove_symlinks, }; static int __init devlink_class_init(void) { int ret; ret = class_register(&devlink_class); if (ret) return ret; ret = class_interface_register(&devlink_class_intf); if (ret) class_unregister(&devlink_class); return ret; } postcore_initcall(devlink_class_init); #define DL_MANAGED_LINK_FLAGS (DL_FLAG_AUTOREMOVE_CONSUMER | \ DL_FLAG_AUTOREMOVE_SUPPLIER | \ DL_FLAG_AUTOPROBE_CONSUMER | \ DL_FLAG_SYNC_STATE_ONLY | \ DL_FLAG_INFERRED | \ DL_FLAG_CYCLE) #define DL_ADD_VALID_FLAGS (DL_MANAGED_LINK_FLAGS | DL_FLAG_STATELESS | \ DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE) /** * device_link_add - Create a link between two devices. * @consumer: Consumer end of the link. * @supplier: Supplier end of the link. * @flags: Link flags. * * Return: On success, a device_link struct will be returned. * On error or invalid flag settings, NULL will be returned. * * The caller is responsible for the proper synchronization of the link creation * with runtime PM. First, setting the DL_FLAG_PM_RUNTIME flag will cause the * runtime PM framework to take the link into account. Second, if the * DL_FLAG_RPM_ACTIVE flag is set in addition to it, the supplier devices will * be forced into the active meta state and reference-counted upon the creation * of the link. If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be * ignored. * * If DL_FLAG_STATELESS is set in @flags, the caller of this function is * expected to release the link returned by it directly with the help of either * device_link_del() or device_link_remove(). * * If that flag is not set, however, the caller of this function is handing the * management of the link over to the driver core entirely and its return value * can only be used to check whether or not the link is present. In that case, * the DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_AUTOREMOVE_SUPPLIER device link * flags can be used to indicate to the driver core when the link can be safely * deleted. Namely, setting one of them in @flags indicates to the driver core * that the link is not going to be used (by the given caller of this function) * after unbinding the consumer or supplier driver, respectively, from its * device, so the link can be deleted at that point. If none of them is set, * the link will be maintained until one of the devices pointed to by it (either * the consumer or the supplier) is unregistered. * * Also, if DL_FLAG_STATELESS, DL_FLAG_AUTOREMOVE_CONSUMER and * DL_FLAG_AUTOREMOVE_SUPPLIER are not set in @flags (that is, a persistent * managed device link is being added), the DL_FLAG_AUTOPROBE_CONSUMER flag can * be used to request the driver core to automatically probe for a consumer * driver after successfully binding a driver to the supplier device. * * The combination of DL_FLAG_STATELESS and one of DL_FLAG_AUTOREMOVE_CONSUMER, * DL_FLAG_AUTOREMOVE_SUPPLIER, or DL_FLAG_AUTOPROBE_CONSUMER set in @flags at * the same time is invalid and will cause NULL to be returned upfront. * However, if a device link between the given @consumer and @supplier pair * exists already when this function is called for them, the existing link will * be returned regardless of its current type and status (the link's flags may * be modified then). The caller of this function is then expected to treat * the link as though it has just been created, so (in particular) if * DL_FLAG_STATELESS was passed in @flags, the link needs to be released * explicitly when not needed any more (as stated above). * * A side effect of the link creation is re-ordering of dpm_list and the * devices_kset list by moving the consumer device and all devices depending * on it to the ends of these lists (that does not happen to devices that have * not been registered when this function is called). * * The supplier device is required to be registered when this function is called * and NULL will be returned if that is not the case. The consumer device need * not be registered, however. */ struct device_link *device_link_add(struct device *consumer, struct device *supplier, u32 flags) { struct device_link *link; if (!consumer || !supplier || consumer == supplier || flags & ~DL_ADD_VALID_FLAGS || (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) || (flags & DL_FLAG_AUTOPROBE_CONSUMER && flags & (DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER))) return NULL; if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) { if (pm_runtime_get_sync(supplier) < 0) { pm_runtime_put_noidle(supplier); return NULL; } } if (!(flags & DL_FLAG_STATELESS)) flags |= DL_FLAG_MANAGED; if (flags & DL_FLAG_SYNC_STATE_ONLY && !device_link_flag_is_sync_state_only(flags)) return NULL; device_links_write_lock(); device_pm_lock(); /* * If the supplier has not been fully registered yet or there is a * reverse (non-SYNC_STATE_ONLY) dependency between the consumer and * the supplier already in the graph, return NULL. If the link is a * SYNC_STATE_ONLY link, we don't check for reverse dependencies * because it only affects sync_state() callbacks. */ if (!device_pm_initialized(supplier) || (!(flags & DL_FLAG_SYNC_STATE_ONLY) && device_is_dependent(consumer, supplier))) { link = NULL; goto out; } /* * SYNC_STATE_ONLY links are useless once a consumer device has probed. * So, only create it if the consumer hasn't probed yet. */ if (flags & DL_FLAG_SYNC_STATE_ONLY && consumer->links.status != DL_DEV_NO_DRIVER && consumer->links.status != DL_DEV_PROBING) { link = NULL; goto out; } /* * DL_FLAG_AUTOREMOVE_SUPPLIER indicates that the link will be needed * longer than for DL_FLAG_AUTOREMOVE_CONSUMER and setting them both * together doesn't make sense, so prefer DL_FLAG_AUTOREMOVE_SUPPLIER. */ if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER; list_for_each_entry(link, &supplier->links.consumers, s_node) { if (link->consumer != consumer) continue; if (device_link_test(link, DL_FLAG_INFERRED) && !(flags & DL_FLAG_INFERRED)) link->flags &= ~DL_FLAG_INFERRED; if (flags & DL_FLAG_PM_RUNTIME) { if (!device_link_test(link, DL_FLAG_PM_RUNTIME)) { pm_runtime_new_link(consumer); link->flags |= DL_FLAG_PM_RUNTIME; } if (flags & DL_FLAG_RPM_ACTIVE) refcount_inc(&link->rpm_active); } if (flags & DL_FLAG_STATELESS) { kref_get(&link->kref); if (device_link_test(link, DL_FLAG_SYNC_STATE_ONLY) && !device_link_test(link, DL_FLAG_STATELESS)) { link->flags |= DL_FLAG_STATELESS; goto reorder; } else { link->flags |= DL_FLAG_STATELESS; goto out; } } /* * If the life time of the link following from the new flags is * longer than indicated by the flags of the existing link, * update the existing link to stay around longer. */ if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) { if (device_link_test(link, DL_FLAG_AUTOREMOVE_CONSUMER)) { link->flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER; link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER; } } else if (!(flags & DL_FLAG_AUTOREMOVE_CONSUMER)) { link->flags &= ~(DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER); } if (!device_link_test(link, DL_FLAG_MANAGED)) { kref_get(&link->kref); link->flags |= DL_FLAG_MANAGED; device_link_init_status(link, consumer, supplier); } if (device_link_test(link, DL_FLAG_SYNC_STATE_ONLY) && !(flags & DL_FLAG_SYNC_STATE_ONLY)) { link->flags &= ~DL_FLAG_SYNC_STATE_ONLY; goto reorder; } goto out; } link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) goto out; refcount_set(&link->rpm_active, 1); get_device(supplier); link->supplier = supplier; INIT_LIST_HEAD(&link->s_node); get_device(consumer); link->consumer = consumer; INIT_LIST_HEAD(&link->c_node); link->flags = flags; kref_init(&link->kref); link->link_dev.class = &devlink_class; device_set_pm_not_required(&link->link_dev); dev_set_name(&link->link_dev, "%s:%s--%s:%s", dev_bus_name(supplier), dev_name(supplier), dev_bus_name(consumer), dev_name(consumer)); if (device_register(&link->link_dev)) { put_device(&link->link_dev); link = NULL; goto out; } if (flags & DL_FLAG_PM_RUNTIME) { if (flags & DL_FLAG_RPM_ACTIVE) refcount_inc(&link->rpm_active); pm_runtime_new_link(consumer); } /* Determine the initial link state. */ if (flags & DL_FLAG_STATELESS) link->status = DL_STATE_NONE; else device_link_init_status(link, consumer, supplier); /* * Some callers expect the link creation during consumer driver probe to * resume the supplier even without DL_FLAG_RPM_ACTIVE. */ if (link->status == DL_STATE_CONSUMER_PROBE && flags & DL_FLAG_PM_RUNTIME) pm_runtime_resume(supplier); list_add_tail_rcu(&link->s_node, &supplier->links.consumers); list_add_tail_rcu(&link->c_node, &consumer->links.suppliers); if (flags & DL_FLAG_SYNC_STATE_ONLY) { dev_dbg(consumer, "Linked as a sync state only consumer to %s\n", dev_name(supplier)); goto out; } reorder: /* * Move the consumer and all of the devices depending on it to the end * of dpm_list and the devices_kset list. * * It is necessary to hold dpm_list locked throughout all that or else * we may end up suspending with a wrong ordering of it. */ device_reorder_to_tail(consumer, NULL); dev_dbg(consumer, "Linked as a consumer to %s\n", dev_name(supplier)); out: device_pm_unlock(); device_links_write_unlock(); if ((flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) && !link) pm_runtime_put(supplier); return link; } EXPORT_SYMBOL_GPL(device_link_add); static void __device_link_del(struct kref *kref) { struct device_link *link = container_of(kref, struct device_link, kref); dev_dbg(link->consumer, "Dropping the link to %s\n", dev_name(link->supplier)); pm_runtime_drop_link(link); device_link_remove_from_lists(link); device_unregister(&link->link_dev); } static void device_link_put_kref(struct device_link *link) { if (device_link_test(link, DL_FLAG_STATELESS)) kref_put(&link->kref, __device_link_del); else if (!device_is_registered(link->consumer)) __device_link_del(&link->kref); else WARN(1, "Unable to drop a managed device link reference\n"); } /** * device_link_del - Delete a stateless link between two devices. * @link: Device link to delete. * * The caller must ensure proper synchronization of this function with runtime * PM. If the link was added multiple times, it needs to be deleted as often. * Care is required for hotplugged devices: Their links are purged on removal * and calling device_link_del() is then no longer allowed. */ void device_link_del(struct device_link *link) { device_links_write_lock(); device_link_put_kref(link); device_links_write_unlock(); } EXPORT_SYMBOL_GPL(device_link_del); /** * device_link_remove - Delete a stateless link between two devices. * @consumer: Consumer end of the link. * @supplier: Supplier end of the link. * * The caller must ensure proper synchronization of this function with runtime * PM. */ void device_link_remove(void *consumer, struct device *supplier) { struct device_link *link; if (WARN_ON(consumer == supplier)) return; device_links_write_lock(); list_for_each_entry(link, &supplier->links.consumers, s_node) { if (link->consumer == consumer) { device_link_put_kref(link); break; } } device_links_write_unlock(); } EXPORT_SYMBOL_GPL(device_link_remove); static void device_links_missing_supplier(struct device *dev) { struct device_link *link; list_for_each_entry(link, &dev->links.suppliers, c_node) { if (link->status != DL_STATE_CONSUMER_PROBE) continue; if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) { WRITE_ONCE(link->status, DL_STATE_AVAILABLE); } else { WARN_ON(!device_link_test(link, DL_FLAG_SYNC_STATE_ONLY)); WRITE_ONCE(link->status, DL_STATE_DORMANT); } } } static bool dev_is_best_effort(struct device *dev) { return (fw_devlink_best_effort && dev->can_match) || (dev->fwnode && (dev->fwnode->flags & FWNODE_FLAG_BEST_EFFORT)); } static struct fwnode_handle *fwnode_links_check_suppliers( struct fwnode_handle *fwnode) { struct fwnode_link *link; if (!fwnode || fw_devlink_is_permissive()) return NULL; list_for_each_entry(link, &fwnode->suppliers, c_hook) if (!(link->flags & (FWLINK_FLAG_CYCLE | FWLINK_FLAG_IGNORE))) return link->supplier; return NULL; } /** * device_links_check_suppliers - Check presence of supplier drivers. * @dev: Consumer device. * * Check links from this device to any suppliers. Walk the list of the device's * links to suppliers and see if all of them are available. If not, simply * return -EPROBE_DEFER. * * We need to guarantee that the supplier will not go away after the check has * been positive here. It only can go away in __device_release_driver() and * that function checks the device's links to consumers. This means we need to * mark the link as "consumer probe in progress" to make the supplier removal * wait for us to complete (or bad things may happen). * * Links without the DL_FLAG_MANAGED flag set are ignored. */ int device_links_check_suppliers(struct device *dev) { struct device_link *link; int ret = 0, fwnode_ret = 0; struct fwnode_handle *sup_fw; /* * Device waiting for supplier to become available is not allowed to * probe. */ scoped_guard(mutex, &fwnode_link_lock) { sup_fw = fwnode_links_check_suppliers(dev->fwnode); if (sup_fw) { if (dev_is_best_effort(dev)) fwnode_ret = -EAGAIN; else return dev_err_probe(dev, -EPROBE_DEFER, "wait for supplier %pfwf\n", sup_fw); } } device_links_write_lock(); list_for_each_entry(link, &dev->links.suppliers, c_node) { if (!device_link_test(link, DL_FLAG_MANAGED)) continue;