| 2 2 2 2 2 1 1 2 1 1 2 2 2 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 | // SPDX-License-Identifier: GPL-2.0-or-later /* * dlmfs.c * * Code which implements the kernel side of a minimal userspace * interface to our DLM. This file handles the virtual file system * used for communication with userspace. Credit should go to ramfs, * which was a template for the fs side of this module. * * Copyright (C) 2003, 2004 Oracle. All rights reserved. */ /* Simple VFS hooks based on: */ /* * Resizable simple ram filesystem for Linux. * * Copyright (C) 2000 Linus Torvalds. * 2000 Transmeta Corp. */ #include <linux/module.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/pagemap.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> #include <linux/init.h> #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/poll.h> #include <linux/uaccess.h> #include "../stackglue.h" #include "userdlm.h" #define MLOG_MASK_PREFIX ML_DLMFS #include "../cluster/masklog.h" static const struct super_operations dlmfs_ops; static const struct file_operations dlmfs_file_operations; static const struct inode_operations dlmfs_dir_inode_operations; static const struct inode_operations dlmfs_root_inode_operations; static const struct inode_operations dlmfs_file_inode_operations; static struct kmem_cache *dlmfs_inode_cache; struct workqueue_struct *user_dlm_worker; /* * These are the ABI capabilities of dlmfs. * * Over time, dlmfs has added some features that were not part of the * initial ABI. Unfortunately, some of these features are not detectable * via standard usage. For example, Linux's default poll always returns * EPOLLIN, so there is no way for a caller of poll(2) to know when dlmfs * added poll support. Instead, we provide this list of new capabilities. * * Capabilities is a read-only attribute. We do it as a module parameter * so we can discover it whether dlmfs is built in, loaded, or even not * loaded. * * The ABI features are local to this machine's dlmfs mount. This is * distinct from the locking protocol, which is concerned with inter-node * interaction. * * Capabilities: * - bast : EPOLLIN against the file descriptor of a held lock * signifies a bast fired on the lock. */ #define DLMFS_CAPABILITIES "bast stackglue" static int param_set_dlmfs_capabilities(const char *val, const struct kernel_param *kp) { printk(KERN_ERR "%s: readonly parameter\n", kp->name); return -EINVAL; } static int param_get_dlmfs_capabilities(char *buffer, const struct kernel_param *kp) { return sysfs_emit(buffer, DLMFS_CAPABILITIES); } module_param_call(capabilities, param_set_dlmfs_capabilities, param_get_dlmfs_capabilities, NULL, 0444); MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES); /* * decodes a set of open flags into a valid lock level and a set of flags. * returns < 0 if we have invalid flags * flags which mean something to us: * O_RDONLY -> PRMODE level * O_WRONLY -> EXMODE level * * O_NONBLOCK -> NOQUEUE */ static int dlmfs_decode_open_flags(int open_flags, int *level, int *flags) { if (open_flags & (O_WRONLY|O_RDWR)) *level = DLM_LOCK_EX; else *level = DLM_LOCK_PR; *flags = 0; if (open_flags & O_NONBLOCK) *flags |= DLM_LKF_NOQUEUE; return 0; } static int dlmfs_file_open(struct inode *inode, struct file *file) { int status, level, flags; struct dlmfs_filp_private *fp = NULL; struct dlmfs_inode_private *ip; if (S_ISDIR(inode->i_mode)) BUG(); mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino, file->f_flags); status = dlmfs_decode_open_flags(file->f_flags, &level, &flags); if (status < 0) goto bail; /* We don't want to honor O_APPEND at read/write time as it * doesn't make sense for LVB writes. */ file->f_flags &= ~O_APPEND; fp = kmalloc(sizeof(*fp), GFP_NOFS); if (!fp) { status = -ENOMEM; goto bail; } fp->fp_lock_level = level; ip = DLMFS_I(inode); status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags); if (status < 0) { /* this is a strange error to return here but I want * to be able userspace to be able to distinguish a * valid lock request from one that simply couldn't be * granted. */ if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN) status = -ETXTBSY; kfree(fp); goto bail; } file->private_data = fp; bail: return status; } static int dlmfs_file_release(struct inode *inode, struct file *file) { int level; struct dlmfs_inode_private *ip = DLMFS_I(inode); struct dlmfs_filp_private *fp = file->private_data; if (S_ISDIR(inode->i_mode)) BUG(); mlog(0, "close called on inode %lu\n", inode->i_ino); if (fp) { level = fp->fp_lock_level; if (level != DLM_LOCK_IV) user_dlm_cluster_unlock(&ip->ip_lockres, level); kfree(fp); file->private_data = NULL; } return 0; } /* * We do ->setattr() just to override size changes. Our size is the size * of the LVB and nothing else. */ static int dlmfs_file_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { int error; struct inode *inode = d_inode(dentry); attr->ia_valid &= ~ATTR_SIZE; error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } static __poll_t dlmfs_file_poll(struct file *file, poll_table *wait) { __poll_t event = 0; struct inode *inode = file_inode(file); struct dlmfs_inode_private *ip = DLMFS_I(inode); poll_wait(file, &ip->ip_lockres.l_event, wait); spin_lock(&ip->ip_lockres.l_lock); if (ip->ip_lockres.l_flags & USER_LOCK_BLOCKED) event = EPOLLIN | EPOLLRDNORM; spin_unlock(&ip->ip_lockres.l_lock); return event; } static ssize_t dlmfs_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { char lvb[DLM_LVB_LEN]; if (!user_dlm_read_lvb(file_inode(file), lvb)) return 0; return simple_read_from_buffer(buf, count, ppos, lvb, sizeof(lvb)); } static ssize_t dlmfs_file_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { char lvb_buf[DLM_LVB_LEN]; int bytes_left; struct inode *inode = file_inode(filp); mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", inode->i_ino, count, *ppos); if (*ppos >= DLM_LVB_LEN) return -ENOSPC; /* don't write past the lvb */ if (count > DLM_LVB_LEN - *ppos) count = DLM_LVB_LEN - *ppos; if (!count) return 0; bytes_left = copy_from_user(lvb_buf, buf, count); count -= bytes_left; if (count) user_dlm_write_lvb(inode, lvb_buf, count); *ppos = *ppos + count; mlog(0, "wrote %zu bytes\n", count); return count; } static void dlmfs_init_once(void *foo) { struct dlmfs_inode_private *ip = (struct dlmfs_inode_private *) foo; ip->ip_conn = NULL; ip->ip_parent = NULL; inode_init_once(&ip->ip_vfs_inode); } static struct inode *dlmfs_alloc_inode(struct super_block *sb) { struct dlmfs_inode_private *ip; ip = alloc_inode_sb(sb, dlmfs_inode_cache, GFP_NOFS); if (!ip) return NULL; return &ip->ip_vfs_inode; } static void dlmfs_free_inode(struct inode *inode) { kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); } static void dlmfs_evict_inode(struct inode *inode) { int status; struct dlmfs_inode_private *ip; struct user_lock_res *lockres; int teardown; clear_inode(inode); mlog(0, "inode %lu\n", inode->i_ino); ip = DLMFS_I(inode); lockres = &ip->ip_lockres; if (S_ISREG(inode->i_mode)) { spin_lock(&lockres->l_lock); teardown = !!(lockres->l_flags & USER_LOCK_IN_TEARDOWN); spin_unlock(&lockres->l_lock); if (!teardown) { status = user_dlm_destroy_lock(lockres); if (status < 0) mlog_errno(status); } iput(ip->ip_parent); goto clear_fields; } mlog(0, "we're a directory, ip->ip_conn = 0x%p\n", ip->ip_conn); /* we must be a directory. If required, lets unregister the * dlm context now. */ if (ip->ip_conn) user_dlm_unregister(ip->ip_conn); clear_fields: ip->ip_parent = NULL; ip->ip_conn = NULL; } static struct inode *dlmfs_get_root_inode(struct super_block *sb) { struct inode *inode = new_inode(sb); umode_t mode = S_IFDIR | 0755; if (inode) { inode->i_ino = get_next_ino(); inode_init_owner(&nop_mnt_idmap, inode, NULL, mode); simple_inode_init_ts(inode); inc_nlink(inode); inode->i_fop = &simple_dir_operations; inode->i_op = &dlmfs_root_inode_operations; } return inode; } static struct inode *dlmfs_get_inode(struct inode *parent, struct dentry *dentry, umode_t mode) { struct super_block *sb = parent->i_sb; struct inode * inode = new_inode(sb); struct dlmfs_inode_private *ip; if (!inode) return NULL; inode->i_ino = get_next_ino(); inode_init_owner(&nop_mnt_idmap, inode, parent, mode); simple_inode_init_ts(inode); ip = DLMFS_I(inode); ip->ip_conn = DLMFS_I(parent)->ip_conn; switch (mode & S_IFMT) { default: /* for now we don't support anything other than * directories and regular files. */ BUG(); break; case S_IFREG: inode->i_op = &dlmfs_file_inode_operations; inode->i_fop = &dlmfs_file_operations; i_size_write(inode, DLM_LVB_LEN); user_dlm_lock_res_init(&ip->ip_lockres, dentry); /* released at clear_inode time, this insures that we * get to drop the dlm reference on each lock *before* * we call the unregister code for releasing parent * directories. */ ip->ip_parent = igrab(parent); BUG_ON(!ip->ip_parent); break; case S_IFDIR: inode->i_op = &dlmfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == * 2 (for "." entry) */ inc_nlink(inode); break; } return inode; } /* * File creation. Allocate an inode, and we're done.. */ /* SMP-safe */ static struct dentry *dlmfs_mkdir(struct mnt_idmap * idmap, struct inode * dir, struct dentry * dentry, umode_t mode) { int status; struct inode *inode = NULL; const struct qstr *domain = &dentry->d_name; struct dlmfs_inode_private *ip; struct ocfs2_cluster_connection *conn; mlog(0, "mkdir %.*s\n", domain->len, domain->name); /* verify that we have a proper domain */ if (domain->len >= GROUP_NAME_MAX) { status = -EINVAL; mlog(ML_ERROR, "invalid domain name for directory.\n"); goto bail; } inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR); if (!inode) { status = -ENOMEM; mlog_errno(status); goto bail; } ip = DLMFS_I(inode); conn = user_dlm_register(domain); if (IS_ERR(conn)) { status = PTR_ERR(conn); mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", status, domain->len, domain->name); goto bail; } ip->ip_conn = conn; inc_nlink(dir); d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ status = 0; bail: if (status < 0) iput(inode); return ERR_PTR(status); } static int dlmfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { int status = 0; struct inode *inode; const struct qstr *name = &dentry->d_name; mlog(0, "create %.*s\n", name->len, name->name); /* verify name is valid and doesn't contain any dlm reserved * characters */ if (name->len >= USER_DLM_LOCK_ID_MAX_LEN || name->name[0] == '$') { status = -EINVAL; mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len, name->name); goto bail; } inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG); if (!inode) { status = -ENOMEM; mlog_errno(status); goto bail; } d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ bail: return status; } static int dlmfs_unlink(struct inode *dir, struct dentry *dentry) { int status; struct inode *inode = d_inode(dentry); mlog(0, "unlink inode %lu\n", inode->i_ino); /* if there are no current holders, or none that are waiting * to acquire a lock, this basically destroys our lockres. */ status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres); if (status < 0) { mlog(ML_ERROR, "unlink %pd, error %d from destroy\n", dentry, status); goto bail; } status = simple_unlink(dir, dentry); bail: return status; } static int dlmfs_fill_super(struct super_block *sb, struct fs_context *fc) { sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = DLMFS_MAGIC; sb->s_op = &dlmfs_ops; sb->s_root = d_make_root(dlmfs_get_root_inode(sb)); if (!sb->s_root) return -ENOMEM; return 0; } static const struct file_operations dlmfs_file_operations = { .open = dlmfs_file_open, .release = dlmfs_file_release, .poll = dlmfs_file_poll, .read = dlmfs_file_read, .write = dlmfs_file_write, .llseek = default_llseek, }; static const struct inode_operations dlmfs_dir_inode_operations = { .create = dlmfs_create, .lookup = simple_lookup, .unlink = dlmfs_unlink, }; /* this way we can restrict mkdir to only the toplevel of the fs. */ static const struct inode_operations dlmfs_root_inode_operations = { .lookup = simple_lookup, .mkdir = dlmfs_mkdir, .rmdir = simple_rmdir, }; static const struct super_operations dlmfs_ops = { .statfs = simple_statfs, .alloc_inode = dlmfs_alloc_inode, .free_inode = dlmfs_free_inode, .evict_inode = dlmfs_evict_inode, .drop_inode = generic_delete_inode, }; static const struct inode_operations dlmfs_file_inode_operations = { .getattr = simple_getattr, .setattr = dlmfs_file_setattr, }; static int dlmfs_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, dlmfs_fill_super); } static const struct fs_context_operations dlmfs_context_ops = { .get_tree = dlmfs_get_tree, }; static int dlmfs_init_fs_context(struct fs_context *fc) { fc->ops = &dlmfs_context_ops; return 0; } static struct file_system_type dlmfs_fs_type = { .owner = THIS_MODULE, .name = "ocfs2_dlmfs", .kill_sb = kill_litter_super, .init_fs_context = dlmfs_init_fs_context, }; MODULE_ALIAS_FS("ocfs2_dlmfs"); static int __init init_dlmfs_fs(void) { int status; int cleanup_inode = 0, cleanup_worker = 0; dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", sizeof(struct dlmfs_inode_private), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_ACCOUNT), dlmfs_init_once); if (!dlmfs_inode_cache) { status = -ENOMEM; goto bail; } cleanup_inode = 1; user_dlm_worker = alloc_workqueue("user_dlm", WQ_MEM_RECLAIM, 0); if (!user_dlm_worker) { status = -ENOMEM; goto bail; } cleanup_worker = 1; user_dlm_set_locking_protocol(); status = register_filesystem(&dlmfs_fs_type); bail: if (status) { if (cleanup_inode) kmem_cache_destroy(dlmfs_inode_cache); if (cleanup_worker) destroy_workqueue(user_dlm_worker); } else printk("OCFS2 User DLM kernel interface loaded\n"); return status; } static void __exit exit_dlmfs_fs(void) { unregister_filesystem(&dlmfs_fs_type); destroy_workqueue(user_dlm_worker); /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(dlmfs_inode_cache); } MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("OCFS2 DLM-Filesystem"); module_init(init_dlmfs_fs) module_exit(exit_dlmfs_fs) |
| 2 2 2 129 128 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 | /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/parser.h> #include <linux/errno.h> #include <linux/stringhash.h> #include "utf8n.h" int utf8_validate(const struct unicode_map *um, const struct qstr *str) { if (utf8nlen(um, UTF8_NFDI, str->name, str->len) < 0) return -1; return 0; } EXPORT_SYMBOL(utf8_validate); int utf8_strncmp(const struct unicode_map *um, const struct qstr *s1, const struct qstr *s2) { struct utf8cursor cur1, cur2; int c1, c2; if (utf8ncursor(&cur1, um, UTF8_NFDI, s1->name, s1->len) < 0) return -EINVAL; if (utf8ncursor(&cur2, um, UTF8_NFDI, s2->name, s2->len) < 0) return -EINVAL; do { c1 = utf8byte(&cur1); c2 = utf8byte(&cur2); if (c1 < 0 || c2 < 0) return -EINVAL; if (c1 != c2) return 1; } while (c1); return 0; } EXPORT_SYMBOL(utf8_strncmp); int utf8_strncasecmp(const struct unicode_map *um, const struct qstr *s1, const struct qstr *s2) { struct utf8cursor cur1, cur2; int c1, c2; if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0) return -EINVAL; if (utf8ncursor(&cur2, um, UTF8_NFDICF, s2->name, s2->len) < 0) return -EINVAL; do { c1 = utf8byte(&cur1); c2 = utf8byte(&cur2); if (c1 < 0 || c2 < 0) return -EINVAL; if (c1 != c2) return 1; } while (c1); return 0; } EXPORT_SYMBOL(utf8_strncasecmp); /* String cf is expected to be a valid UTF-8 casefolded * string. */ int utf8_strncasecmp_folded(const struct unicode_map *um, const struct qstr *cf, const struct qstr *s1) { struct utf8cursor cur1; int c1, c2; int i = 0; if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0) return -EINVAL; do { c1 = utf8byte(&cur1); c2 = cf->name[i++]; if (c1 < 0) return -EINVAL; if (c1 != c2) return 1; } while (c1); return 0; } EXPORT_SYMBOL(utf8_strncasecmp_folded); int utf8_casefold(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen) { struct utf8cursor cur; size_t nlen = 0; if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0) return -EINVAL; for (nlen = 0; nlen < dlen; nlen++) { int c = utf8byte(&cur); dest[nlen] = c; if (!c) return nlen; if (c == -1) break; } return -EINVAL; } EXPORT_SYMBOL(utf8_casefold); int utf8_casefold_hash(const struct unicode_map *um, const void *salt, struct qstr *str) { struct utf8cursor cur; int c; unsigned long hash = init_name_hash(salt); if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0) return -EINVAL; while ((c = utf8byte(&cur))) { if (c < 0) return -EINVAL; hash = partial_name_hash((unsigned char)c, hash); } str->hash = end_name_hash(hash); return 0; } EXPORT_SYMBOL(utf8_casefold_hash); int utf8_normalize(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen) { struct utf8cursor cur; ssize_t nlen = 0; if (utf8ncursor(&cur, um, UTF8_NFDI, str->name, str->len) < 0) return -EINVAL; for (nlen = 0; nlen < dlen; nlen++) { int c = utf8byte(&cur); dest[nlen] = c; if (!c) return nlen; if (c == -1) break; } return -EINVAL; } EXPORT_SYMBOL(utf8_normalize); static const struct utf8data *find_table_version(const struct utf8data *table, size_t nr_entries, unsigned int version) { size_t i = nr_entries - 1; while (version < table[i].maxage) i--; if (version > table[i].maxage) return NULL; return &table[i]; } struct unicode_map *utf8_load(unsigned int version) { struct unicode_map *um; um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL); if (!um) return ERR_PTR(-ENOMEM); um->version = version; um->tables = symbol_request(utf8_data_table); if (!um->tables) goto out_free_um; if (!utf8version_is_supported(um, version)) goto out_symbol_put; um->ntab[UTF8_NFDI] = find_table_version(um->tables->utf8nfdidata, um->tables->utf8nfdidata_size, um->version); if (!um->ntab[UTF8_NFDI]) goto out_symbol_put; um->ntab[UTF8_NFDICF] = find_table_version(um->tables->utf8nfdicfdata, um->tables->utf8nfdicfdata_size, um->version); if (!um->ntab[UTF8_NFDICF]) goto out_symbol_put; return um; out_symbol_put: symbol_put(utf8_data_table); out_free_um: kfree(um); return ERR_PTR(-EINVAL); } EXPORT_SYMBOL(utf8_load); void utf8_unload(struct unicode_map *um) { if (um) { symbol_put(utf8_data_table); kfree(um); } } EXPORT_SYMBOL(utf8_unload); /** * utf8_parse_version - Parse a UTF-8 version number from a string * * @version: input string * * Returns the parsed version on success, negative code on error */ int utf8_parse_version(char *version) { substring_t args[3]; unsigned int maj, min, rev; static const struct match_token token[] = { {1, "%d.%d.%d"}, {0, NULL} }; if (match_token(version, token, args) != 1) return -EINVAL; if (match_int(&args[0], &maj) || match_int(&args[1], &min) || match_int(&args[2], &rev)) return -EINVAL; return UNICODE_AGE(maj, min, rev); } EXPORT_SYMBOL(utf8_parse_version); |
| 2435 1147 63 3 10 1 9 829 22 103 45 1143 6 61 61 22 24 41 1142 554 739 173 1705 1140 58 45 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | // SPDX-License-Identifier: GPL-2.0-only /* tnum: tracked (or tristate) numbers * * A tnum tracks knowledge about the bits of a value. Each bit can be either * known (0 or 1), or unknown (x). Arithmetic operations on tnums will * propagate the unknown bits such that the tnum result represents all the * possible results for possible values of the operands. */ #include <linux/kernel.h> #include <linux/tnum.h> #define TNUM(_v, _m) (struct tnum){.value = _v, .mask = _m} /* A completely unknown value */ const struct tnum tnum_unknown = { .value = 0, .mask = -1 }; struct tnum tnum_const(u64 value) { return TNUM(value, 0); } struct tnum tnum_range(u64 min, u64 max) { u64 chi = min ^ max, delta; u8 bits = fls64(chi); /* special case, needed because 1ULL << 64 is undefined */ if (bits > 63) return tnum_unknown; /* e.g. if chi = 4, bits = 3, delta = (1<<3) - 1 = 7. * if chi = 0, bits = 0, delta = (1<<0) - 1 = 0, so we return * constant min (since min == max). */ delta = (1ULL << bits) - 1; return TNUM(min & ~delta, delta); } struct tnum tnum_lshift(struct tnum a, u8 shift) { return TNUM(a.value << shift, a.mask << shift); } struct tnum tnum_rshift(struct tnum a, u8 shift) { return TNUM(a.value >> shift, a.mask >> shift); } struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness) { /* if a.value is negative, arithmetic shifting by minimum shift * will have larger negative offset compared to more shifting. * If a.value is nonnegative, arithmetic shifting by minimum shift * will have larger positive offset compare to more shifting. */ if (insn_bitness == 32) return TNUM((u32)(((s32)a.value) >> min_shift), (u32)(((s32)a.mask) >> min_shift)); else return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift); } struct tnum tnum_add(struct tnum a, struct tnum b) { u64 sm, sv, sigma, chi, mu; sm = a.mask + b.mask; sv = a.value + b.value; sigma = sm + sv; chi = sigma ^ sv; mu = chi | a.mask | b.mask; return TNUM(sv & ~mu, mu); } struct tnum tnum_sub(struct tnum a, struct tnum b) { u64 dv, alpha, beta, chi, mu; dv = a.value - b.value; alpha = dv + a.mask; beta = dv - b.mask; chi = alpha ^ beta; mu = chi | a.mask | b.mask; return TNUM(dv & ~mu, mu); } struct tnum tnum_and(struct tnum a, struct tnum b) { u64 alpha, beta, v; alpha = a.value | a.mask; beta = b.value | b.mask; v = a.value & b.value; return TNUM(v, alpha & beta & ~v); } struct tnum tnum_or(struct tnum a, struct tnum b) { u64 v, mu; v = a.value | b.value; mu = a.mask | b.mask; return TNUM(v, mu & ~v); } struct tnum tnum_xor(struct tnum a, struct tnum b) { u64 v, mu; v = a.value ^ b.value; mu = a.mask | b.mask; return TNUM(v & ~mu, mu); } /* Generate partial products by multiplying each bit in the multiplier (tnum a) * with the multiplicand (tnum b), and add the partial products after * appropriately bit-shifting them. Instead of directly performing tnum addition * on the generated partial products, equivalenty, decompose each partial * product into two tnums, consisting of the value-sum (acc_v) and the * mask-sum (acc_m) and then perform tnum addition on them. The following paper * explains the algorithm in more detail: https://arxiv.org/abs/2105.05398. */ struct tnum tnum_mul(struct tnum a, struct tnum b) { u64 acc_v = a.value * b.value; struct tnum acc_m = TNUM(0, 0); while (a.value || a.mask) { /* LSB of tnum a is a certain 1 */ if (a.value & 1) acc_m = tnum_add(acc_m, TNUM(0, b.mask)); /* LSB of tnum a is uncertain */ else if (a.mask & 1) acc_m = tnum_add(acc_m, TNUM(0, b.value | b.mask)); /* Note: no case for LSB is certain 0 */ a = tnum_rshift(a, 1); b = tnum_lshift(b, 1); } return tnum_add(TNUM(acc_v, 0), acc_m); } /* Note that if a and b disagree - i.e. one has a 'known 1' where the other has * a 'known 0' - this will return a 'known 1' for that bit. */ struct tnum tnum_intersect(struct tnum a, struct tnum b) { u64 v, mu; v = a.value | b.value; mu = a.mask & b.mask; return TNUM(v & ~mu, mu); } struct tnum tnum_cast(struct tnum a, u8 size) { a.value &= (1ULL << (size * 8)) - 1; a.mask &= (1ULL << (size * 8)) - 1; return a; } bool tnum_is_aligned(struct tnum a, u64 size) { if (!size) return true; return !((a.value | a.mask) & (size - 1)); } bool tnum_in(struct tnum a, struct tnum b) { if (b.mask & ~a.mask) return false; b.value &= ~a.mask; return a.value == b.value; } int tnum_sbin(char *str, size_t size, struct tnum a) { size_t n; for (n = 64; n; n--) { if (n < size) { if (a.mask & 1) str[n - 1] = 'x'; else if (a.value & 1) str[n - 1] = '1'; else str[n - 1] = '0'; } a.mask >>= 1; a.value >>= 1; } str[min(size - 1, (size_t)64)] = 0; return 64; } struct tnum tnum_subreg(struct tnum a) { return tnum_cast(a, 4); } struct tnum tnum_clear_subreg(struct tnum a) { return tnum_lshift(tnum_rshift(a, 32), 32); } struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg) { return tnum_or(tnum_clear_subreg(reg), tnum_subreg(subreg)); } struct tnum tnum_const_subreg(struct tnum a, u32 value) { return tnum_with_subreg(a, tnum_const(value)); } |
| 2 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Driver for Tascam US-X2Y USB soundcards * * FPGA Loader + ALSA Startup * * Copyright (c) 2003 by Karsten Wiese <annabellesgarden@yahoo.de> */ #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/usb.h> #include <sound/core.h> #include <sound/memalloc.h> #include <sound/pcm.h> #include <sound/hwdep.h> #include "usx2y.h" #include "usbusx2y.h" #include "usX2Yhwdep.h" static vm_fault_t snd_us428ctls_vm_fault(struct vm_fault *vmf) { unsigned long offset; struct page *page; void *vaddr; offset = vmf->pgoff << PAGE_SHIFT; vaddr = (char *)((struct usx2ydev *)vmf->vma->vm_private_data)->us428ctls_sharedmem + offset; page = virt_to_page(vaddr); get_page(page); vmf->page = page; return 0; } static const struct vm_operations_struct us428ctls_vm_ops = { .fault = snd_us428ctls_vm_fault, }; static int snd_us428ctls_mmap(struct snd_hwdep *hw, struct file *filp, struct vm_area_struct *area) { unsigned long size = (unsigned long)(area->vm_end - area->vm_start); struct usx2ydev *us428 = hw->private_data; // FIXME this hwdep interface is used twice: fpga download and mmap for controlling Lights etc. Maybe better using 2 hwdep devs? // so as long as the device isn't fully initialised yet we return -EBUSY here. if (!(us428->chip_status & USX2Y_STAT_CHIP_INIT)) return -EBUSY; /* if userspace tries to mmap beyond end of our buffer, fail */ if (size > US428_SHAREDMEM_PAGES) { dev_dbg(hw->card->dev, "%s: mmap size %lu > %lu\n", __func__, size, (unsigned long)US428_SHAREDMEM_PAGES); return -EINVAL; } area->vm_ops = &us428ctls_vm_ops; vm_flags_set(area, VM_DONTEXPAND | VM_DONTDUMP); area->vm_private_data = hw->private_data; return 0; } static __poll_t snd_us428ctls_poll(struct snd_hwdep *hw, struct file *file, poll_table *wait) { __poll_t mask = 0; struct usx2ydev *us428 = hw->private_data; struct us428ctls_sharedmem *shm = us428->us428ctls_sharedmem; if (us428->chip_status & USX2Y_STAT_CHIP_HUP) return EPOLLHUP; poll_wait(file, &us428->us428ctls_wait_queue_head, wait); if (shm && shm->ctl_snapshot_last != shm->ctl_snapshot_red) mask |= EPOLLIN; return mask; } static int snd_usx2y_hwdep_dsp_status(struct snd_hwdep *hw, struct snd_hwdep_dsp_status *info) { static const char * const type_ids[USX2Y_TYPE_NUMS] = { [USX2Y_TYPE_122] = "us122", [USX2Y_TYPE_224] = "us224", [USX2Y_TYPE_428] = "us428", }; struct usx2ydev *us428 = hw->private_data; int id = -1; switch (le16_to_cpu(us428->dev->descriptor.idProduct)) { case USB_ID_US122: id = USX2Y_TYPE_122; break; case USB_ID_US224: id = USX2Y_TYPE_224; break; case USB_ID_US428: id = USX2Y_TYPE_428; break; } if (id < 0) return -ENODEV; strcpy(info->id, type_ids[id]); info->num_dsps = 2; // 0: Prepad Data, 1: FPGA Code if (us428->chip_status & USX2Y_STAT_CHIP_INIT) info->chip_ready = 1; info->version = USX2Y_DRIVER_VERSION; return 0; } static int usx2y_create_usbmidi(struct snd_card *card) { static const struct snd_usb_midi_endpoint_info quirk_data_1 = { .out_ep = 0x06, .in_ep = 0x06, .out_cables = 0x001, .in_cables = 0x001 }; static const struct snd_usb_audio_quirk quirk_1 = { .vendor_name = "TASCAM", .product_name = NAME_ALLCAPS, .ifnum = 0, .type = QUIRK_MIDI_FIXED_ENDPOINT, .data = &quirk_data_1 }; static const struct snd_usb_midi_endpoint_info quirk_data_2 = { .out_ep = 0x06, .in_ep = 0x06, .out_cables = 0x003, .in_cables = 0x003 }; static const struct snd_usb_audio_quirk quirk_2 = { .vendor_name = "TASCAM", .product_name = "US428", .ifnum = 0, .type = QUIRK_MIDI_FIXED_ENDPOINT, .data = &quirk_data_2 }; struct usb_device *dev = usx2y(card)->dev; struct usb_interface *iface = usb_ifnum_to_if(dev, 0); const struct snd_usb_audio_quirk *quirk = le16_to_cpu(dev->descriptor.idProduct) == USB_ID_US428 ? &quirk_2 : &quirk_1; return snd_usbmidi_create(card, iface, &usx2y(card)->midi_list, quirk); } static int usx2y_create_alsa_devices(struct snd_card *card) { int err; err = usx2y_create_usbmidi(card); if (err < 0) { dev_err(card->dev, "%s: usx2y_create_usbmidi error %i\n", __func__, err); return err; } err = usx2y_audio_create(card); if (err < 0) return err; err = usx2y_hwdep_pcm_new(card); if (err < 0) return err; err = snd_card_register(card); if (err < 0) return err; return 0; } static int snd_usx2y_hwdep_dsp_load(struct snd_hwdep *hw, struct snd_hwdep_dsp_image *dsp) { struct usx2ydev *priv = hw->private_data; struct usb_device *dev = priv->dev; int lret, err; char *buf; buf = memdup_user(dsp->image, dsp->length); if (IS_ERR(buf)) return PTR_ERR(buf); err = usb_set_interface(dev, 0, 1); if (err) dev_err(&dev->dev, "usb_set_interface error\n"); else err = usb_bulk_msg(dev, usb_sndbulkpipe(dev, 2), buf, dsp->length, &lret, 6000); kfree(buf); if (err) return err; if (dsp->index == 1) { msleep(250); // give the device some time err = usx2y_async_seq04_init(priv); if (err) { dev_err(&dev->dev, "usx2y_async_seq04_init error\n"); return err; } err = usx2y_in04_init(priv); if (err) { dev_err(&dev->dev, "usx2y_in04_init error\n"); return err; } err = usx2y_create_alsa_devices(hw->card); if (err) { dev_err(&dev->dev, "usx2y_create_alsa_devices error %i\n", err); return err; } priv->chip_status |= USX2Y_STAT_CHIP_INIT; } return err; } int usx2y_hwdep_new(struct snd_card *card, struct usb_device *device) { int err; struct snd_hwdep *hw; struct usx2ydev *us428 = usx2y(card); err = snd_hwdep_new(card, SND_USX2Y_LOADER_ID, 0, &hw); if (err < 0) return err; hw->iface = SNDRV_HWDEP_IFACE_USX2Y; hw->private_data = us428; hw->ops.dsp_status = snd_usx2y_hwdep_dsp_status; hw->ops.dsp_load = snd_usx2y_hwdep_dsp_load; hw->ops.mmap = snd_us428ctls_mmap; hw->ops.poll = snd_us428ctls_poll; hw->exclusive = 1; sprintf(hw->name, "/dev/bus/usb/%03d/%03d", device->bus->busnum, device->devnum); us428->us428ctls_sharedmem = alloc_pages_exact(US428_SHAREDMEM_PAGES, GFP_KERNEL); if (!us428->us428ctls_sharedmem) return -ENOMEM; memset(us428->us428ctls_sharedmem, -1, US428_SHAREDMEM_PAGES); us428->us428ctls_sharedmem->ctl_snapshot_last = -2; return 0; } |
| 1 277 212 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_UTSNAME_H #define _LINUX_UTSNAME_H #include <linux/sched.h> #include <linux/nsproxy.h> #include <linux/ns_common.h> #include <linux/err.h> #include <uapi/linux/utsname.h> enum uts_proc { UTS_PROC_ARCH, UTS_PROC_OSTYPE, UTS_PROC_OSRELEASE, UTS_PROC_VERSION, UTS_PROC_HOSTNAME, UTS_PROC_DOMAINNAME, }; struct user_namespace; extern struct user_namespace init_user_ns; struct uts_namespace { struct new_utsname name; struct user_namespace *user_ns; struct ucounts *ucounts; struct ns_common ns; } __randomize_layout; extern struct uts_namespace init_uts_ns; #ifdef CONFIG_UTS_NS static inline void get_uts_ns(struct uts_namespace *ns) { refcount_inc(&ns->ns.count); } extern struct uts_namespace *copy_utsname(unsigned long flags, struct user_namespace *user_ns, struct uts_namespace *old_ns); extern void free_uts_ns(struct uts_namespace *ns); static inline void put_uts_ns(struct uts_namespace *ns) { if (refcount_dec_and_test(&ns->ns.count)) free_uts_ns(ns); } void uts_ns_init(void); #else static inline void get_uts_ns(struct uts_namespace *ns) { } static inline void put_uts_ns(struct uts_namespace *ns) { } static inline struct uts_namespace *copy_utsname(unsigned long flags, struct user_namespace *user_ns, struct uts_namespace *old_ns) { if (flags & CLONE_NEWUTS) return ERR_PTR(-EINVAL); return old_ns; } static inline void uts_ns_init(void) { } #endif #ifdef CONFIG_PROC_SYSCTL extern void uts_proc_notify(enum uts_proc proc); #else static inline void uts_proc_notify(enum uts_proc proc) { } #endif static inline struct new_utsname *utsname(void) { return ¤t->nsproxy->uts_ns->name; } static inline struct new_utsname *init_utsname(void) { return &init_uts_ns.name; } extern struct rw_semaphore uts_sem; #endif /* _LINUX_UTSNAME_H */ |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_FPU_XCR_H #define _ASM_X86_FPU_XCR_H #define XCR_XFEATURE_ENABLED_MASK 0x00000000 #define XCR_XFEATURE_IN_USE_MASK 0x00000001 static __always_inline u64 xgetbv(u32 index) { u32 eax, edx; asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index)); return eax + ((u64)edx << 32); } static inline void xsetbv(u32 index, u64 value) { u32 eax = value; u32 edx = value >> 32; asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); } /* * Return a mask of xfeatures which are currently being tracked * by the processor as being in the initial configuration. * * Callers should check X86_FEATURE_XGETBV1. */ static __always_inline u64 xfeatures_in_use(void) { return xgetbv(XCR_XFEATURE_IN_USE_MASK); } #endif /* _ASM_X86_FPU_XCR_H */ |
| 10 1 1 8 1 1 2 1 2 3 3 2 3 2 3 2 5 2 2 2 2 2 5 1 1 2 1 8 6 1 1 2 2 4 2 1 1 1 42 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 | // SPDX-License-Identifier: GPL-2.0-or-later /* * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org> * (C) 2011 Intra2net AG <https://www.intra2net.com> */ #include <linux/init.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/atomic.h> #include <linux/refcount.h> #include <linux/netlink.h> #include <linux/rculist.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/errno.h> #include <net/netlink.h> #include <net/sock.h> #include <net/netns/generic.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_acct.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure"); struct nf_acct { atomic64_t pkts; atomic64_t bytes; unsigned long flags; struct list_head head; refcount_t refcnt; char name[NFACCT_NAME_MAX]; struct rcu_head rcu_head; char data[]; }; struct nfacct_filter { u32 value; u32 mask; }; struct nfnl_acct_net { struct list_head nfnl_acct_list; }; static unsigned int nfnl_acct_net_id __read_mostly; static inline struct nfnl_acct_net *nfnl_acct_pernet(struct net *net) { return net_generic(net, nfnl_acct_net_id); } #define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES) #define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */ static int nfnl_acct_new(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const tb[]) { struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(info->net); struct nf_acct *nfacct, *matching = NULL; unsigned int size = 0; char *acct_name; u32 flags = 0; if (!tb[NFACCT_NAME]) return -EINVAL; acct_name = nla_data(tb[NFACCT_NAME]); if (strlen(acct_name) == 0) return -EINVAL; list_for_each_entry(nfacct, &nfnl_acct_net->nfnl_acct_list, head) { if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) continue; if (info->nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; matching = nfacct; break; } if (matching) { if (info->nlh->nlmsg_flags & NLM_F_REPLACE) { /* reset counters if you request a replacement. */ atomic64_set(&matching->pkts, 0); atomic64_set(&matching->bytes, 0); smp_mb__before_atomic(); /* reset overquota flag if quota is enabled. */ if ((matching->flags & NFACCT_F_QUOTA)) clear_bit(NFACCT_OVERQUOTA_BIT, &matching->flags); return 0; } return -EBUSY; } if (tb[NFACCT_FLAGS]) { flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS])); if (flags & ~NFACCT_F_QUOTA) return -EOPNOTSUPP; if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA) return -EINVAL; if (flags & NFACCT_F_OVERQUOTA) return -EINVAL; if ((flags & NFACCT_F_QUOTA) && !tb[NFACCT_QUOTA]) return -EINVAL; size += sizeof(u64); } nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL); if (nfacct == NULL) return -ENOMEM; if (flags & NFACCT_F_QUOTA) { u64 *quota = (u64 *)nfacct->data; *quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA])); nfacct->flags = flags; } nla_strscpy(nfacct->name, tb[NFACCT_NAME], NFACCT_NAME_MAX); if (tb[NFACCT_BYTES]) { atomic64_set(&nfacct->bytes, be64_to_cpu(nla_get_be64(tb[NFACCT_BYTES]))); } if (tb[NFACCT_PKTS]) { atomic64_set(&nfacct->pkts, be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS]))); } refcount_set(&nfacct->refcnt, 1); list_add_tail_rcu(&nfacct->head, &nfnl_acct_net->nfnl_acct_list); return 0; } static int nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_acct *acct) { struct nlmsghdr *nlh; unsigned int flags = portid ? NLM_F_MULTI : 0; u64 pkts, bytes; u32 old_flags; event = nfnl_msg_type(NFNL_SUBSYS_ACCT, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, NFNETLINK_V0, 0); if (!nlh) goto nlmsg_failure; if (nla_put_string(skb, NFACCT_NAME, acct->name)) goto nla_put_failure; old_flags = acct->flags; if (type == NFNL_MSG_ACCT_GET_CTRZERO) { pkts = atomic64_xchg(&acct->pkts, 0); bytes = atomic64_xchg(&acct->bytes, 0); smp_mb__before_atomic(); if (acct->flags & NFACCT_F_QUOTA) clear_bit(NFACCT_OVERQUOTA_BIT, &acct->flags); } else { pkts = atomic64_read(&acct->pkts); bytes = atomic64_read(&acct->bytes); } if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts), NFACCT_PAD) || nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes), NFACCT_PAD) || nla_put_be32(skb, NFACCT_USE, htonl(refcount_read(&acct->refcnt)))) goto nla_put_failure; if (acct->flags & NFACCT_F_QUOTA) { u64 *quota = (u64 *)acct->data; if (nla_put_be32(skb, NFACCT_FLAGS, htonl(old_flags)) || nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota), NFACCT_PAD)) goto nla_put_failure; } nlmsg_end(skb, nlh); return skb->len; nlmsg_failure: nla_put_failure: nlmsg_cancel(skb, nlh); return -1; } static int nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(net); struct nf_acct *cur, *last; const struct nfacct_filter *filter = cb->data; if (cb->args[2]) return 0; last = (struct nf_acct *)cb->args[1]; if (cb->args[1]) cb->args[1] = 0; rcu_read_lock(); list_for_each_entry_rcu(cur, &nfnl_acct_net->nfnl_acct_list, head) { if (last) { if (cur != last) continue; last = NULL; } if (filter && (cur->flags & filter->mask) != filter->value) continue; if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), NFNL_MSG_ACCT_NEW, cur) < 0) { cb->args[1] = (unsigned long)cur; break; } } if (!cb->args[1]) cb->args[2] = 1; rcu_read_unlock(); return skb->len; } static int nfnl_acct_done(struct netlink_callback *cb) { kfree(cb->data); return 0; } static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = { [NFACCT_FILTER_MASK] = { .type = NLA_U32 }, [NFACCT_FILTER_VALUE] = { .type = NLA_U32 }, }; static int nfnl_acct_start(struct netlink_callback *cb) { const struct nlattr *const attr = cb->data; struct nlattr *tb[NFACCT_FILTER_MAX + 1]; struct nfacct_filter *filter; int err; if (!attr) return 0; err = nla_parse_nested_deprecated(tb, NFACCT_FILTER_MAX, attr, filter_policy, NULL); if (err < 0) return err; if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE]) return -EINVAL; filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL); if (!filter) return -ENOMEM; filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK])); filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE])); cb->data = filter; return 0; } static int nfnl_acct_get(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const tb[]) { struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(info->net); int ret = -ENOENT; struct nf_acct *cur; char *acct_name; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nfnl_acct_dump, .start = nfnl_acct_start, .done = nfnl_acct_done, .data = (void *)tb[NFACCT_FILTER], }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } if (!tb[NFACCT_NAME]) return -EINVAL; acct_name = nla_data(tb[NFACCT_NAME]); list_for_each_entry(cur, &nfnl_acct_net->nfnl_acct_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) continue; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (skb2 == NULL) { ret = -ENOMEM; break; } ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFNL_MSG_TYPE(info->nlh->nlmsg_type), NFNL_MSG_ACCT_NEW, cur); if (ret <= 0) { kfree_skb(skb2); break; } ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); break; } return ret; } /* try to delete object, fail if it is still in use. */ static int nfnl_acct_try_del(struct nf_acct *cur) { int ret = 0; /* We want to avoid races with nfnl_acct_put. So only when the current * refcnt is 1, we decrease it to 0. */ if (refcount_dec_if_one(&cur->refcnt)) { /* We are protected by nfnl mutex. */ list_del_rcu(&cur->head); kfree_rcu(cur, rcu_head); } else { ret = -EBUSY; } return ret; } static int nfnl_acct_del(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const tb[]) { struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(info->net); struct nf_acct *cur, *tmp; int ret = -ENOENT; char *acct_name; if (!tb[NFACCT_NAME]) { list_for_each_entry_safe(cur, tmp, &nfnl_acct_net->nfnl_acct_list, head) nfnl_acct_try_del(cur); return 0; } acct_name = nla_data(tb[NFACCT_NAME]); list_for_each_entry(cur, &nfnl_acct_net->nfnl_acct_list, head) { if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0) continue; ret = nfnl_acct_try_del(cur); if (ret < 0) return ret; break; } return ret; } static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 }, [NFACCT_BYTES] = { .type = NLA_U64 }, [NFACCT_PKTS] = { .type = NLA_U64 }, [NFACCT_FLAGS] = { .type = NLA_U32 }, [NFACCT_QUOTA] = { .type = NLA_U64 }, [NFACCT_FILTER] = {.type = NLA_NESTED }, }; static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { [NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new, .type = NFNL_CB_MUTEX, .attr_count = NFACCT_MAX, .policy = nfnl_acct_policy }, [NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get, .type = NFNL_CB_MUTEX, .attr_count = NFACCT_MAX, .policy = nfnl_acct_policy }, [NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get, .type = NFNL_CB_MUTEX, .attr_count = NFACCT_MAX, .policy = nfnl_acct_policy }, [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del, .type = NFNL_CB_MUTEX, .attr_count = NFACCT_MAX, .policy = nfnl_acct_policy }, }; static const struct nfnetlink_subsystem nfnl_acct_subsys = { .name = "acct", .subsys_id = NFNL_SUBSYS_ACCT, .cb_count = NFNL_MSG_ACCT_MAX, .cb = nfnl_acct_cb, }; MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT); struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name) { struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(net); struct nf_acct *cur, *acct = NULL; rcu_read_lock(); list_for_each_entry_rcu(cur, &nfnl_acct_net->nfnl_acct_list, head) { if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) continue; if (!try_module_get(THIS_MODULE)) goto err; if (!refcount_inc_not_zero(&cur->refcnt)) { module_put(THIS_MODULE); goto err; } acct = cur; break; } err: rcu_read_unlock(); return acct; } EXPORT_SYMBOL_GPL(nfnl_acct_find_get); void nfnl_acct_put(struct nf_acct *acct) { if (refcount_dec_and_test(&acct->refcnt)) kfree_rcu(acct, rcu_head); module_put(THIS_MODULE); } EXPORT_SYMBOL_GPL(nfnl_acct_put); void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct) { atomic64_inc(&nfacct->pkts); atomic64_add(skb->len, &nfacct->bytes); } EXPORT_SYMBOL_GPL(nfnl_acct_update); static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct) { int ret; struct sk_buff *skb; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (skb == NULL) return; ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0, nfacct); if (ret <= 0) { kfree_skb(skb); return; } nfnetlink_broadcast(net, skb, 0, NFNLGRP_ACCT_QUOTA, GFP_ATOMIC); } int nfnl_acct_overquota(struct net *net, struct nf_acct *nfacct) { u64 now; u64 *quota; int ret = NFACCT_UNDERQUOTA; /* no place here if we don't have a quota */ if (!(nfacct->flags & NFACCT_F_QUOTA)) return NFACCT_NO_QUOTA; quota = (u64 *)nfacct->data; now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ? atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes); ret = now > *quota; if (now >= *quota && !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) { nfnl_overquota_report(net, nfacct); } return ret; } EXPORT_SYMBOL_GPL(nfnl_acct_overquota); static int __net_init nfnl_acct_net_init(struct net *net) { INIT_LIST_HEAD(&nfnl_acct_pernet(net)->nfnl_acct_list); return 0; } static void __net_exit nfnl_acct_net_exit(struct net *net) { struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(net); struct nf_acct *cur, *tmp; list_for_each_entry_safe(cur, tmp, &nfnl_acct_net->nfnl_acct_list, head) { list_del_rcu(&cur->head); if (refcount_dec_and_test(&cur->refcnt)) kfree_rcu(cur, rcu_head); } } static struct pernet_operations nfnl_acct_ops = { .init = nfnl_acct_net_init, .exit = nfnl_acct_net_exit, .id = &nfnl_acct_net_id, .size = sizeof(struct nfnl_acct_net), }; static int __init nfnl_acct_init(void) { int ret; ret = register_pernet_subsys(&nfnl_acct_ops); if (ret < 0) { pr_err("nfnl_acct_init: failed to register pernet ops\n"); goto err_out; } ret = nfnetlink_subsys_register(&nfnl_acct_subsys); if (ret < 0) { pr_err("nfnl_acct_init: cannot register with nfnetlink.\n"); goto cleanup_pernet; } return 0; cleanup_pernet: unregister_pernet_subsys(&nfnl_acct_ops); err_out: return ret; } static void __exit nfnl_acct_exit(void) { nfnetlink_subsys_unregister(&nfnl_acct_subsys); unregister_pernet_subsys(&nfnl_acct_ops); } module_init(nfnl_acct_init); module_exit(nfnl_acct_exit); |
| 3 6 1 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ #ifndef RXE_PARAM_H #define RXE_PARAM_H #include <uapi/rdma/rdma_user_rxe.h> #define DEFAULT_MAX_VALUE (1 << 20) static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu) { if (mtu < 256) return 0; else if (mtu < 512) return IB_MTU_256; else if (mtu < 1024) return IB_MTU_512; else if (mtu < 2048) return IB_MTU_1024; else if (mtu < 4096) return IB_MTU_2048; else return IB_MTU_4096; } /* Find the IB mtu for a given network MTU. */ static inline enum ib_mtu eth_mtu_int_to_enum(int mtu) { mtu -= RXE_MAX_HDR_LENGTH; return rxe_mtu_int_to_enum(mtu); } /* default/initial rxe device parameter settings */ enum rxe_device_param { RXE_MAX_MR_SIZE = -1ull, RXE_PAGE_SIZE_CAP = 0xfffff000, RXE_MAX_QP_WR = DEFAULT_MAX_VALUE, RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_AUTO_PATH_MIG | IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_UD_AV_PORT_ENFORCE | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_SRQ_RESIZE | IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_MEM_WINDOW | IB_DEVICE_FLUSH_GLOBAL | IB_DEVICE_FLUSH_PERSISTENT | IB_DEVICE_MEM_WINDOW_TYPE_2B | IB_DEVICE_ATOMIC_WRITE, RXE_MAX_SGE = 32, RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) + sizeof(struct ib_sge) * RXE_MAX_SGE, RXE_MAX_INLINE_DATA = RXE_MAX_WQE_SIZE - sizeof(struct rxe_send_wqe), RXE_MAX_SGE_RD = 32, RXE_MAX_CQ = DEFAULT_MAX_VALUE, RXE_MAX_LOG_CQE = 15, RXE_MAX_PD = DEFAULT_MAX_VALUE, RXE_MAX_QP_RD_ATOM = 128, RXE_MAX_RES_RD_ATOM = 0x3f000, RXE_MAX_QP_INIT_RD_ATOM = 128, RXE_MAX_MCAST_GRP = 8192, RXE_MAX_MCAST_QP_ATTACH = 56, RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000, RXE_MAX_AH = (1<<15) - 1, /* 32Ki - 1 */ RXE_MIN_AH_INDEX = 1, RXE_MAX_AH_INDEX = RXE_MAX_AH, RXE_MAX_SRQ_WR = DEFAULT_MAX_VALUE, RXE_MIN_SRQ_WR = 1, RXE_MAX_SRQ_SGE = 27, RXE_MIN_SRQ_SGE = 1, RXE_MAX_FMR_PAGE_LIST_LEN = 512, RXE_MAX_PKEYS = 64, RXE_LOCAL_CA_ACK_DELAY = 15, RXE_MAX_UCONTEXT = DEFAULT_MAX_VALUE, RXE_NUM_PORT = 1, RXE_MIN_QP_INDEX = 16, RXE_MAX_QP_INDEX = DEFAULT_MAX_VALUE, RXE_MAX_QP = DEFAULT_MAX_VALUE - RXE_MIN_QP_INDEX, RXE_MIN_SRQ_INDEX = 0x00020001, RXE_MAX_SRQ_INDEX = DEFAULT_MAX_VALUE, RXE_MAX_SRQ = DEFAULT_MAX_VALUE - RXE_MIN_SRQ_INDEX, RXE_MIN_MR_INDEX = 0x00000001, RXE_MAX_MR_INDEX = DEFAULT_MAX_VALUE >> 1, RXE_MAX_MR = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX, RXE_MIN_MW_INDEX = RXE_MAX_MR_INDEX + 1, RXE_MAX_MW_INDEX = DEFAULT_MAX_VALUE, RXE_MAX_MW = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX, RXE_MAX_PKT_PER_ACK = 64, RXE_MAX_UNACKED_PSNS = 128, /* Max inflight SKBs per queue pair */ RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64, RXE_INFLIGHT_SKBS_PER_QP_LOW = 16, /* Max number of interations of each work item * before yielding the cpu to let other * work make progress */ RXE_MAX_ITERATIONS = 1024, /* Delay before calling arbiter timer */ RXE_NSEC_ARB_TIMER_DELAY = 200, /* IBTA v1.4 A3.3.1 VENDOR INFORMATION section */ RXE_VENDOR_ID = 0XFFFFFF, }; /* default/initial rxe port parameters */ enum rxe_port_param { RXE_PORT_GID_TBL_LEN = 1024, RXE_PORT_PORT_CAP_FLAGS = IB_PORT_CM_SUP, RXE_PORT_MAX_MSG_SZ = (1UL << 31), RXE_PORT_BAD_PKEY_CNTR = 0, RXE_PORT_QKEY_VIOL_CNTR = 0, RXE_PORT_LID = 0, RXE_PORT_SM_LID = 0, RXE_PORT_SM_SL = 0, RXE_PORT_LMC = 0, RXE_PORT_MAX_VL_NUM = 1, RXE_PORT_SUBNET_TIMEOUT = 0, RXE_PORT_INIT_TYPE_REPLY = 0, RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X, RXE_PORT_ACTIVE_SPEED = 1, RXE_PORT_PKEY_TBL_LEN = 1, RXE_PORT_PHYS_STATE = IB_PORT_PHYS_STATE_POLLING, RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL, }; /* default/initial port info parameters */ enum rxe_port_info_param { RXE_PORT_INFO_VL_CAP = 4, /* 1-8 */ RXE_PORT_INFO_MTU_CAP = 5, /* 4096 */ RXE_PORT_INFO_OPER_VL = 1, /* 1 */ }; #endif /* RXE_PARAM_H */ |
| 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 | // SPDX-License-Identifier: GPL-2.0 #include "blk-rq-qos.h" __read_mostly DEFINE_STATIC_KEY_FALSE(block_rq_qos); /* * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, * false if 'v' + 1 would be bigger than 'below'. */ static bool atomic_inc_below(atomic_t *v, unsigned int below) { unsigned int cur = atomic_read(v); do { if (cur >= below) return false; } while (!atomic_try_cmpxchg(v, &cur, cur + 1)); return true; } bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit) { return atomic_inc_below(&rq_wait->inflight, limit); } void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio) { do { if (rqos->ops->cleanup) rqos->ops->cleanup(rqos, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_done(struct rq_qos *rqos, struct request *rq) { do { if (rqos->ops->done) rqos->ops->done(rqos, rq); rqos = rqos->next; } while (rqos); } void __rq_qos_issue(struct rq_qos *rqos, struct request *rq) { do { if (rqos->ops->issue) rqos->ops->issue(rqos, rq); rqos = rqos->next; } while (rqos); } void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq) { do { if (rqos->ops->requeue) rqos->ops->requeue(rqos, rq); rqos = rqos->next; } while (rqos); } void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio) { do { if (rqos->ops->throttle) rqos->ops->throttle(rqos, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) { do { if (rqos->ops->track) rqos->ops->track(rqos, rq, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) { do { if (rqos->ops->merge) rqos->ops->merge(rqos, rq, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio) { do { if (rqos->ops->done_bio) rqos->ops->done_bio(rqos, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_queue_depth_changed(struct rq_qos *rqos) { do { if (rqos->ops->queue_depth_changed) rqos->ops->queue_depth_changed(rqos); rqos = rqos->next; } while (rqos); } /* * Return true, if we can't increase the depth further by scaling */ bool rq_depth_calc_max_depth(struct rq_depth *rqd) { unsigned int depth; bool ret = false; /* * For QD=1 devices, this is a special case. It's important for those * to have one request ready when one completes, so force a depth of * 2 for those devices. On the backend, it'll be a depth of 1 anyway, * since the device can't have more than that in flight. If we're * scaling down, then keep a setting of 1/1/1. */ if (rqd->queue_depth == 1) { if (rqd->scale_step > 0) rqd->max_depth = 1; else { rqd->max_depth = 2; ret = true; } } else { /* * scale_step == 0 is our default state. If we have suffered * latency spikes, step will be > 0, and we shrink the * allowed write depths. If step is < 0, we're only doing * writes, and we allow a temporarily higher depth to * increase performance. */ depth = min_t(unsigned int, rqd->default_depth, rqd->queue_depth); if (rqd->scale_step > 0) depth = 1 + ((depth - 1) >> min(31, rqd->scale_step)); else if (rqd->scale_step < 0) { unsigned int maxd = 3 * rqd->queue_depth / 4; depth = 1 + ((depth - 1) << -rqd->scale_step); if (depth > maxd) { depth = maxd; ret = true; } } rqd->max_depth = depth; } return ret; } /* Returns true on success and false if scaling up wasn't possible */ bool rq_depth_scale_up(struct rq_depth *rqd) { /* * Hit max in previous round, stop here */ if (rqd->scaled_max) return false; rqd->scale_step--; rqd->scaled_max = rq_depth_calc_max_depth(rqd); return true; } /* * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we * had a latency violation. Returns true on success and returns false if * scaling down wasn't possible. */ bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) { /* * Stop scaling down when we've hit the limit. This also prevents * ->scale_step from going to crazy values, if the device can't * keep up. */ if (rqd->max_depth == 1) return false; if (rqd->scale_step < 0 && hard_throttle) rqd->scale_step = 0; else rqd->scale_step++; rqd->scaled_max = false; rq_depth_calc_max_depth(rqd); return true; } struct rq_qos_wait_data { struct wait_queue_entry wq; struct rq_wait *rqw; acquire_inflight_cb_t *cb; void *private_data; bool got_token; }; static int rq_qos_wake_function(struct wait_queue_entry *curr, unsigned int mode, int wake_flags, void *key) { struct rq_qos_wait_data *data = container_of(curr, struct rq_qos_wait_data, wq); /* * If we fail to get a budget, return -1 to interrupt the wake up loop * in __wake_up_common. */ if (!data->cb(data->rqw, data->private_data)) return -1; data->got_token = true; /* * autoremove_wake_function() removes the wait entry only when it * actually changed the task state. We want the wait always removed. * Remove explicitly and use default_wake_function(). */ default_wake_function(curr, mode, wake_flags, key); /* * Note that the order of operations is important as finish_wait() * tests whether @curr is removed without grabbing the lock. This * should be the last thing to do to make sure we will not have a * UAF access to @data. And the semantics of memory barrier in it * also make sure the waiter will see the latest @data->got_token * once list_empty_careful() in finish_wait() returns true. */ list_del_init_careful(&curr->entry); return 1; } /** * rq_qos_wait - throttle on a rqw if we need to * @rqw: rqw to throttle on * @private_data: caller provided specific data * @acquire_inflight_cb: inc the rqw->inflight counter if we can * @cleanup_cb: the callback to cleanup in case we race with a waker * * This provides a uniform place for the rq_qos users to do their throttling. * Since you can end up with a lot of things sleeping at once, this manages the * waking up based on the resources available. The acquire_inflight_cb should * inc the rqw->inflight if we have the ability to do so, or return false if not * and then we will sleep until the room becomes available. * * cleanup_cb is in case that we race with a waker and need to cleanup the * inflight count accordingly. */ void rq_qos_wait(struct rq_wait *rqw, void *private_data, acquire_inflight_cb_t *acquire_inflight_cb, cleanup_cb_t *cleanup_cb) { struct rq_qos_wait_data data = { .rqw = rqw, .cb = acquire_inflight_cb, .private_data = private_data, .got_token = false, }; bool first_waiter; /* * If there are no waiters in the waiting queue, try to increase the * inflight counter if we can. Otherwise, prepare for adding ourselves * to the waiting queue. */ if (!waitqueue_active(&rqw->wait) && acquire_inflight_cb(rqw, private_data)) return; init_wait_func(&data.wq, rq_qos_wake_function); first_waiter = prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); /* * Make sure there is at least one inflight process; otherwise, waiters * will never be woken up. Since there may be no inflight process before * adding ourselves to the waiting queue above, we need to try to * increase the inflight counter for ourselves. And it is sufficient to * guarantee that at least the first waiter to enter the waiting queue * will re-check the waiting condition before going to sleep, thus * ensuring forward progress. */ if (!data.got_token && first_waiter && acquire_inflight_cb(rqw, private_data)) { finish_wait(&rqw->wait, &data.wq); /* * We raced with rq_qos_wake_function() getting a token, * which means we now have two. Put our local token * and wake anyone else potentially waiting for one. * * Enough memory barrier in list_empty_careful() in * finish_wait() is paired with list_del_init_careful() * in rq_qos_wake_function() to make sure we will see * the latest @data->got_token. */ if (data.got_token) cleanup_cb(rqw, private_data); return; } /* we are now relying on the waker to increase our inflight counter. */ do { if (data.got_token) break; io_schedule(); set_current_state(TASK_UNINTERRUPTIBLE); } while (1); finish_wait(&rqw->wait, &data.wq); } void rq_qos_exit(struct request_queue *q) { mutex_lock(&q->rq_qos_mutex); while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; rqos->ops->exit(rqos); static_branch_dec(&block_rq_qos); } mutex_unlock(&q->rq_qos_mutex); } int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, const struct rq_qos_ops *ops) { struct request_queue *q = disk->queue; unsigned int memflags; lockdep_assert_held(&q->rq_qos_mutex); rqos->disk = disk; rqos->id = id; rqos->ops = ops; /* * No IO can be in-flight when adding rqos, so freeze queue, which * is fine since we only support rq_qos for blk-mq queue. */ memflags = blk_mq_freeze_queue(q); if (rq_qos_id(q, rqos->id)) goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; static_branch_inc(&block_rq_qos); blk_mq_unfreeze_queue(q, memflags); if (rqos->ops->debugfs_attrs) { mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_rqos(rqos); mutex_unlock(&q->debugfs_mutex); } return 0; ebusy: blk_mq_unfreeze_queue(q, memflags); return -EBUSY; } void rq_qos_del(struct rq_qos *rqos) { struct request_queue *q = rqos->disk->queue; struct rq_qos **cur; unsigned int memflags; lockdep_assert_held(&q->rq_qos_mutex); memflags = blk_mq_freeze_queue(q); for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { if (*cur == rqos) { *cur = rqos->next; break; } } blk_mq_unfreeze_queue(q, memflags); mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); mutex_unlock(&q->debugfs_mutex); } |
| 2 1 1 1 1 1 10 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2020 Facebook */ #include <linux/fs.h> #include <linux/anon_inodes.h> #include <linux/filter.h> #include <linux/bpf.h> #include <linux/rcupdate_trace.h> struct bpf_iter_target_info { struct list_head list; const struct bpf_iter_reg *reg_info; u32 btf_id; /* cached value */ }; struct bpf_iter_link { struct bpf_link link; struct bpf_iter_aux_info aux; struct bpf_iter_target_info *tinfo; }; struct bpf_iter_priv_data { struct bpf_iter_target_info *tinfo; const struct bpf_iter_seq_info *seq_info; struct bpf_prog *prog; u64 session_id; u64 seq_num; bool done_stop; u8 target_private[] __aligned(8); }; static struct list_head targets = LIST_HEAD_INIT(targets); static DEFINE_MUTEX(targets_mutex); /* protect bpf_iter_link changes */ static DEFINE_MUTEX(link_mutex); /* incremented on every opened seq_file */ static atomic64_t session_id; static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, const struct bpf_iter_seq_info *seq_info); static void bpf_iter_inc_seq_num(struct seq_file *seq) { struct bpf_iter_priv_data *iter_priv; iter_priv = container_of(seq->private, struct bpf_iter_priv_data, target_private); iter_priv->seq_num++; } static void bpf_iter_dec_seq_num(struct seq_file *seq) { struct bpf_iter_priv_data *iter_priv; iter_priv = container_of(seq->private, struct bpf_iter_priv_data, target_private); iter_priv->seq_num--; } static void bpf_iter_done_stop(struct seq_file *seq) { struct bpf_iter_priv_data *iter_priv; iter_priv = container_of(seq->private, struct bpf_iter_priv_data, target_private); iter_priv->done_stop = true; } static inline bool bpf_iter_target_support_resched(const struct bpf_iter_target_info *tinfo) { return tinfo->reg_info->feature & BPF_ITER_RESCHED; } static bool bpf_iter_support_resched(struct seq_file *seq) { struct bpf_iter_priv_data *iter_priv; iter_priv = container_of(seq->private, struct bpf_iter_priv_data, target_private); return bpf_iter_target_support_resched(iter_priv->tinfo); } /* maximum visited objects before bailing out */ #define MAX_ITER_OBJECTS 1000000 /* bpf_seq_read, a customized and simpler version for bpf iterator. * The following are differences from seq_read(): * . fixed buffer size (PAGE_SIZE) * . assuming NULL ->llseek() * . stop() may call bpf program, handling potential overflow there */ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct seq_file *seq = file->private_data; size_t n, offs, copied = 0; int err = 0, num_objs = 0; bool can_resched; void *p; mutex_lock(&seq->lock); if (!seq->buf) { seq->size = PAGE_SIZE << 3; seq->buf = kvmalloc(seq->size, GFP_KERNEL); if (!seq->buf) { err = -ENOMEM; goto done; } } if (seq->count) { n = min(seq->count, size); err = copy_to_user(buf, seq->buf + seq->from, n); if (err) { err = -EFAULT; goto done; } seq->count -= n; seq->from += n; copied = n; goto done; } seq->from = 0; p = seq->op->start(seq, &seq->index); if (!p) goto stop; if (IS_ERR(p)) { err = PTR_ERR(p); seq->op->stop(seq, p); seq->count = 0; goto done; } err = seq->op->show(seq, p); if (err > 0) { /* object is skipped, decrease seq_num, so next * valid object can reuse the same seq_num. */ bpf_iter_dec_seq_num(seq); seq->count = 0; } else if (err < 0 || seq_has_overflowed(seq)) { if (!err) err = -E2BIG; seq->op->stop(seq, p); seq->count = 0; goto done; } can_resched = bpf_iter_support_resched(seq); while (1) { loff_t pos = seq->index; num_objs++; offs = seq->count; p = seq->op->next(seq, p, &seq->index); if (pos == seq->index) { pr_info_ratelimited("buggy seq_file .next function %ps " "did not updated position index\n", seq->op->next); seq->index++; } if (IS_ERR_OR_NULL(p)) break; /* got a valid next object, increase seq_num */ bpf_iter_inc_seq_num(seq); if (seq->count >= size) break; if (num_objs >= MAX_ITER_OBJECTS) { if (offs == 0) { err = -EAGAIN; seq->op->stop(seq, p); goto done; } break; } err = seq->op->show(seq, p); if (err > 0) { bpf_iter_dec_seq_num(seq); seq->count = offs; } else if (err < 0 || seq_has_overflowed(seq)) { seq->count = offs; if (offs == 0) { if (!err) err = -E2BIG; seq->op->stop(seq, p); goto done; } break; } if (can_resched) cond_resched(); } stop: offs = seq->count; if (IS_ERR(p)) { seq->op->stop(seq, NULL); err = PTR_ERR(p); goto done; } /* bpf program called if !p */ seq->op->stop(seq, p); if (!p) { if (!seq_has_overflowed(seq)) { bpf_iter_done_stop(seq); } else { seq->count = offs; if (offs == 0) { err = -E2BIG; goto done; } } } n = min(seq->count, size); err = copy_to_user(buf, seq->buf, n); if (err) { err = -EFAULT; goto done; } copied = n; seq->count -= n; seq->from = n; done: if (!copied) copied = err; else *ppos += copied; mutex_unlock(&seq->lock); return copied; } static const struct bpf_iter_seq_info * __get_seq_info(struct bpf_iter_link *link) { const struct bpf_iter_seq_info *seq_info; if (link->aux.map) { seq_info = link->aux.map->ops->iter_seq_info; if (seq_info) return seq_info; } return link->tinfo->reg_info->seq_info; } static int iter_open(struct inode *inode, struct file *file) { struct bpf_iter_link *link = inode->i_private; return prepare_seq_file(file, link, __get_seq_info(link)); } static int iter_release(struct inode *inode, struct file *file) { struct bpf_iter_priv_data *iter_priv; struct seq_file *seq; seq = file->private_data; if (!seq) return 0; iter_priv = container_of(seq->private, struct bpf_iter_priv_data, target_private); if (iter_priv->seq_info->fini_seq_private) iter_priv->seq_info->fini_seq_private(seq->private); bpf_prog_put(iter_priv->prog); seq->private = iter_priv; return seq_release_private(inode, file); } const struct file_operations bpf_iter_fops = { .open = iter_open, .read = bpf_seq_read, .release = iter_release, }; /* The argument reg_info will be cached in bpf_iter_target_info. * The common practice is to declare target reg_info as * a const static variable and passed as an argument to * bpf_iter_reg_target(). */ int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) { struct bpf_iter_target_info *tinfo; tinfo = kzalloc(sizeof(*tinfo), GFP_KERNEL); if (!tinfo) return -ENOMEM; tinfo->reg_info = reg_info; INIT_LIST_HEAD(&tinfo->list); mutex_lock(&targets_mutex); list_add(&tinfo->list, &targets); mutex_unlock(&targets_mutex); return 0; } void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info) { struct bpf_iter_target_info *tinfo; bool found = false; mutex_lock(&targets_mutex); list_for_each_entry(tinfo, &targets, list) { if (reg_info == tinfo->reg_info) { list_del(&tinfo->list); kfree(tinfo); found = true; break; } } mutex_unlock(&targets_mutex); WARN_ON(found == false); } static void cache_btf_id(struct bpf_iter_target_info *tinfo, struct bpf_prog *prog) { tinfo->btf_id = prog->aux->attach_btf_id; } int bpf_iter_prog_supported(struct bpf_prog *prog) { const char *attach_fname = prog->aux->attach_func_name; struct bpf_iter_target_info *tinfo = NULL, *iter; u32 prog_btf_id = prog->aux->attach_btf_id; const char *prefix = BPF_ITER_FUNC_PREFIX; int prefix_len = strlen(prefix); if (strncmp(attach_fname, prefix, prefix_len)) return -EINVAL; mutex_lock(&targets_mutex); list_for_each_entry(iter, &targets, list) { if (iter->btf_id && iter->btf_id == prog_btf_id) { tinfo = iter; break; } if (!strcmp(attach_fname + prefix_len, iter->reg_info->target)) { cache_btf_id(iter, prog); tinfo = iter; break; } } mutex_unlock(&targets_mutex); if (!tinfo) return -EINVAL; return bpf_prog_ctx_arg_info_init(prog, tinfo->reg_info->ctx_arg_info, tinfo->reg_info->ctx_arg_info_size); } const struct bpf_func_proto * bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { const struct bpf_iter_target_info *tinfo; const struct bpf_func_proto *fn = NULL; mutex_lock(&targets_mutex); list_for_each_entry(tinfo, &targets, list) { if (tinfo->btf_id == prog->aux->attach_btf_id) { const struct bpf_iter_reg *reg_info; reg_info = tinfo->reg_info; if (reg_info->get_func_proto) fn = reg_info->get_func_proto(func_id, prog); break; } } mutex_unlock(&targets_mutex); return fn; } static void bpf_iter_link_release(struct bpf_link *link) { struct bpf_iter_link *iter_link = container_of(link, struct bpf_iter_link, link); if (iter_link->tinfo->reg_info->detach_target) iter_link->tinfo->reg_info->detach_target(&iter_link->aux); } static void bpf_iter_link_dealloc(struct bpf_link *link) { struct bpf_iter_link *iter_link = container_of(link, struct bpf_iter_link, link); kfree(iter_link); } static int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog) { int ret = 0; mutex_lock(&link_mutex); if (old_prog && link->prog != old_prog) { ret = -EPERM; goto out_unlock; } if (link->prog->type != new_prog->type || link->prog->expected_attach_type != new_prog->expected_attach_type || link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { ret = -EINVAL; goto out_unlock; } old_prog = xchg(&link->prog, new_prog); bpf_prog_put(old_prog); out_unlock: mutex_unlock(&link_mutex); return ret; } static void bpf_iter_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq) { struct bpf_iter_link *iter_link = container_of(link, struct bpf_iter_link, link); bpf_iter_show_fdinfo_t show_fdinfo; seq_printf(seq, "target_name:\t%s\n", iter_link->tinfo->reg_info->target); show_fdinfo = iter_link->tinfo->reg_info->show_fdinfo; if (show_fdinfo) show_fdinfo(&iter_link->aux, seq); } static int bpf_iter_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info) { struct bpf_iter_link *iter_link = container_of(link, struct bpf_iter_link, link); char __user *ubuf = u64_to_user_ptr(info->iter.target_name); bpf_iter_fill_link_info_t fill_link_info; u32 ulen = info->iter.target_name_len; const char *target_name; u32 target_len; if (!ulen ^ !ubuf) return -EINVAL; target_name = iter_link->tinfo->reg_info->target; target_len = strlen(target_name); info->iter.target_name_len = target_len + 1; if (ubuf) { if (ulen >= target_len + 1) { if (copy_to_user(ubuf, target_name, target_len + 1)) return -EFAULT; } else { char zero = '\0'; if (copy_to_user(ubuf, target_name, ulen - 1)) return -EFAULT; if (put_user(zero, ubuf + ulen - 1)) return -EFAULT; return -ENOSPC; } } fill_link_info = iter_link->tinfo->reg_info->fill_link_info; if (fill_link_info) return fill_link_info(&iter_link->aux, info); return 0; } static const struct bpf_link_ops bpf_iter_link_lops = { .release = bpf_iter_link_release, .dealloc = bpf_iter_link_dealloc, .update_prog = bpf_iter_link_replace, .show_fdinfo = bpf_iter_link_show_fdinfo, .fill_link_info = bpf_iter_link_fill_link_info, }; bool bpf_link_is_iter(struct bpf_link *link) { return link->ops == &bpf_iter_link_lops; } int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog) { struct bpf_iter_target_info *tinfo = NULL, *iter; struct bpf_link_primer link_primer; union bpf_iter_link_info linfo; struct bpf_iter_link *link; u32 prog_btf_id, linfo_len; bpfptr_t ulinfo; int err; if (attr->link_create.target_fd || attr->link_create.flags) return -EINVAL; memset(&linfo, 0, sizeof(union bpf_iter_link_info)); ulinfo = make_bpfptr(attr->link_create.iter_info, uattr.is_kernel); linfo_len = attr->link_create.iter_info_len; if (bpfptr_is_null(ulinfo) ^ !linfo_len) return -EINVAL; if (!bpfptr_is_null(ulinfo)) { err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), linfo_len); if (err) return err; linfo_len = min_t(u32, linfo_len, sizeof(linfo)); if (copy_from_bpfptr(&linfo, ulinfo, linfo_len)) return -EFAULT; } prog_btf_id = prog->aux->attach_btf_id; mutex_lock(&targets_mutex); list_for_each_entry(iter, &targets, list) { if (iter->btf_id == prog_btf_id) { tinfo = iter; break; } } mutex_unlock(&targets_mutex); if (!tinfo) return -ENOENT; /* Only allow sleepable program for resched-able iterator */ if (prog->sleepable && !bpf_iter_target_support_resched(tinfo)) return -EINVAL; link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); if (!link) return -ENOMEM; bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); link->tinfo = tinfo; err = bpf_link_prime(&link->link, &link_primer); if (err) { kfree(link); return err; } if (tinfo->reg_info->attach_target) { err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); if (err) { bpf_link_cleanup(&link_primer); return err; } } return bpf_link_settle(&link_primer); } static void init_seq_meta(struct bpf_iter_priv_data *priv_data, struct bpf_iter_target_info *tinfo, const struct bpf_iter_seq_info *seq_info, struct bpf_prog *prog) { priv_data->tinfo = tinfo; priv_data->seq_info = seq_info; priv_data->prog = prog; priv_data->session_id = atomic64_inc_return(&session_id); priv_data->seq_num = 0; priv_data->done_stop = false; } static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, const struct bpf_iter_seq_info *seq_info) { struct bpf_iter_priv_data *priv_data; struct bpf_iter_target_info *tinfo; struct bpf_prog *prog; u32 total_priv_dsize; struct seq_file *seq; int err = 0; mutex_lock(&link_mutex); prog = link->link.prog; bpf_prog_inc(prog); mutex_unlock(&link_mutex); tinfo = link->tinfo; total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + seq_info->seq_priv_size; priv_data = __seq_open_private(file, seq_info->seq_ops, total_priv_dsize); if (!priv_data) { err = -ENOMEM; goto release_prog; } if (seq_info->init_seq_private) { err = seq_info->init_seq_private(priv_data->target_private, &link->aux); if (err) goto release_seq_file; } init_seq_meta(priv_data, tinfo, seq_info, prog); seq = file->private_data; seq->private = priv_data->target_private; return 0; release_seq_file: seq_release_private(file->f_inode, file); file->private_data = NULL; release_prog: bpf_prog_put(prog); return err; } int bpf_iter_new_fd(struct bpf_link *link) { struct bpf_iter_link *iter_link; struct file *file; unsigned int flags; int err, fd; if (link->ops != &bpf_iter_link_lops) return -EINVAL; flags = O_RDONLY | O_CLOEXEC; fd = get_unused_fd_flags(flags); if (fd < 0) return fd; file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); if (IS_ERR(file)) { err = PTR_ERR(file); goto free_fd; } iter_link = container_of(link, struct bpf_iter_link, link); err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link)); if (err) goto free_file; fd_install(fd, file); return fd; free_file: fput(file); free_fd: put_unused_fd(fd); return err; } struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) { struct bpf_iter_priv_data *iter_priv; struct seq_file *seq; void *seq_priv; seq = meta->seq; if (seq->file->f_op != &bpf_iter_fops) return NULL; seq_priv = seq->private; iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, target_private); if (in_stop && iter_priv->done_stop) return NULL; meta->session_id = iter_priv->session_id; meta->seq_num = iter_priv->seq_num; return iter_priv->prog; } int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) { struct bpf_run_ctx run_ctx, *old_run_ctx; int ret; if (prog->sleepable) { rcu_read_lock_trace(); migrate_disable(); might_fault(); old_run_ctx = bpf_set_run_ctx(&run_ctx); ret = bpf_prog_run(prog, ctx); bpf_reset_run_ctx(old_run_ctx); migrate_enable(); rcu_read_unlock_trace(); } else { rcu_read_lock(); migrate_disable(); old_run_ctx = bpf_set_run_ctx(&run_ctx); ret = bpf_prog_run(prog, ctx); bpf_reset_run_ctx(old_run_ctx); migrate_enable(); rcu_read_unlock(); } /* bpf program can only return 0 or 1: * 0 : okay * 1 : retry the same object * The bpf_iter_run_prog() return value * will be seq_ops->show() return value. */ return ret == 0 ? 0 : -EAGAIN; } BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn, void *, callback_ctx, u64, flags) { return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags); } const struct bpf_func_proto bpf_for_each_map_elem_proto = { .func = bpf_for_each_map_elem, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_FUNC, .arg3_type = ARG_PTR_TO_STACK_OR_NULL, .arg4_type = ARG_ANYTHING, }; BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, u64, flags) { bpf_callback_t callback = (bpf_callback_t)callback_fn; u64 ret; u32 i; /* Note: these safety checks are also verified when bpf_loop * is inlined, be careful to modify this code in sync. See * function verifier.c:inline_bpf_loop. */ if (flags) return -EINVAL; if (nr_loops > BPF_MAX_LOOPS) return -E2BIG; for (i = 0; i < nr_loops; i++) { ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0); /* return value: 0 - continue, 1 - stop and return */ if (ret) return i + 1; } return i; } const struct bpf_func_proto bpf_loop_proto = { .func = bpf_loop, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, .arg2_type = ARG_PTR_TO_FUNC, .arg3_type = ARG_PTR_TO_STACK_OR_NULL, .arg4_type = ARG_ANYTHING, }; struct bpf_iter_num_kern { int cur; /* current value, inclusive */ int end; /* final value, exclusive */ } __aligned(8); __bpf_kfunc_start_defs(); __bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) { struct bpf_iter_num_kern *s = (void *)it; BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num)); BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num)); /* start == end is legit, it's an empty range and we'll just get NULL * on first (and any subsequent) bpf_iter_num_next() call */ if (start > end) { s->cur = s->end = 0; return -EINVAL; } /* avoid overflows, e.g., if start == INT_MIN and end == INT_MAX */ if ((s64)end - (s64)start > BPF_MAX_LOOPS) { s->cur = s->end = 0; return -E2BIG; } /* user will call bpf_iter_num_next() first, * which will set s->cur to exactly start value; * underflow shouldn't matter */ s->cur = start - 1; s->end = end; return 0; } __bpf_kfunc int *bpf_iter_num_next(struct bpf_iter_num* it) { struct bpf_iter_num_kern *s = (void *)it; /* check failed initialization or if we are done (same behavior); * need to be careful about overflow, so convert to s64 for checks, * e.g., if s->cur == s->end == INT_MAX, we can't just do * s->cur + 1 >= s->end */ if ((s64)(s->cur + 1) >= s->end) { s->cur = s->end = 0; return NULL; } s->cur++; return &s->cur; } __bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it) { struct bpf_iter_num_kern *s = (void *)it; s->cur = s->end = 0; } __bpf_kfunc_end_defs(); |
| 10 10 10 10 10 10 10 9 8 5 9 8 4 8 1 1 6 2 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 | /* * Cryptographic API. * * Whirlpool hashing Algorithm * * The Whirlpool algorithm was developed by Paulo S. L. M. Barreto and * Vincent Rijmen. It has been selected as one of cryptographic * primitives by the NESSIE project http://www.cryptonessie.org/ * * The original authors have disclaimed all copyright interest in this * code and thus put it in the public domain. The subsequent authors * have put this under the GNU General Public License. * * By Aaron Grothe ajgrothe@yahoo.com, August 23, 2004 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * */ #include <crypto/internal/hash.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> #include <asm/byteorder.h> #include <linux/types.h> #define WP512_DIGEST_SIZE 64 #define WP384_DIGEST_SIZE 48 #define WP256_DIGEST_SIZE 32 #define WP512_BLOCK_SIZE 64 #define WP512_LENGTHBYTES 32 #define WHIRLPOOL_ROUNDS 10 struct wp512_ctx { u8 bitLength[WP512_LENGTHBYTES]; u8 buffer[WP512_BLOCK_SIZE]; int bufferBits; int bufferPos; u64 hash[WP512_DIGEST_SIZE/8]; }; /* * Though Whirlpool is endianness-neutral, the encryption tables are listed * in BIG-ENDIAN format, which is adopted throughout this implementation * (but little-endian notation would be equally suitable if consistently * employed). */ static const u64 C0[256] = { 0x18186018c07830d8ULL, 0x23238c2305af4626ULL, 0xc6c63fc67ef991b8ULL, 0xe8e887e8136fcdfbULL, 0x878726874ca113cbULL, 0xb8b8dab8a9626d11ULL, 0x0101040108050209ULL, 0x4f4f214f426e9e0dULL, 0x3636d836adee6c9bULL, 0xa6a6a2a6590451ffULL, 0xd2d26fd2debdb90cULL, 0xf5f5f3f5fb06f70eULL, 0x7979f979ef80f296ULL, 0x6f6fa16f5fcede30ULL, 0x91917e91fcef3f6dULL, 0x52525552aa07a4f8ULL, 0x60609d6027fdc047ULL, 0xbcbccabc89766535ULL, 0x9b9b569baccd2b37ULL, 0x8e8e028e048c018aULL, 0xa3a3b6a371155bd2ULL, 0x0c0c300c603c186cULL, 0x7b7bf17bff8af684ULL, 0x3535d435b5e16a80ULL, 0x1d1d741de8693af5ULL, 0xe0e0a7e05347ddb3ULL, 0xd7d77bd7f6acb321ULL, 0xc2c22fc25eed999cULL, 0x2e2eb82e6d965c43ULL, 0x4b4b314b627a9629ULL, 0xfefedffea321e15dULL, 0x575741578216aed5ULL, 0x15155415a8412abdULL, 0x7777c1779fb6eee8ULL, 0x3737dc37a5eb6e92ULL, 0xe5e5b3e57b56d79eULL, 0x9f9f469f8cd92313ULL, 0xf0f0e7f0d317fd23ULL, 0x4a4a354a6a7f9420ULL, 0xdada4fda9e95a944ULL, 0x58587d58fa25b0a2ULL, 0xc9c903c906ca8fcfULL, 0x2929a429558d527cULL, 0x0a0a280a5022145aULL, 0xb1b1feb1e14f7f50ULL, 0xa0a0baa0691a5dc9ULL, 0x6b6bb16b7fdad614ULL, 0x85852e855cab17d9ULL, 0xbdbdcebd8173673cULL, 0x5d5d695dd234ba8fULL, 0x1010401080502090ULL, 0xf4f4f7f4f303f507ULL, 0xcbcb0bcb16c08bddULL, 0x3e3ef83eedc67cd3ULL, 0x0505140528110a2dULL, 0x676781671fe6ce78ULL, 0xe4e4b7e47353d597ULL, 0x27279c2725bb4e02ULL, 0x4141194132588273ULL, 0x8b8b168b2c9d0ba7ULL, 0xa7a7a6a7510153f6ULL, 0x7d7de97dcf94fab2ULL, 0x95956e95dcfb3749ULL, 0xd8d847d88e9fad56ULL, 0xfbfbcbfb8b30eb70ULL, 0xeeee9fee2371c1cdULL, 0x7c7ced7cc791f8bbULL, 0x6666856617e3cc71ULL, 0xdddd53dda68ea77bULL, 0x17175c17b84b2eafULL, 0x4747014702468e45ULL, 0x9e9e429e84dc211aULL, 0xcaca0fca1ec589d4ULL, 0x2d2db42d75995a58ULL, 0xbfbfc6bf9179632eULL, 0x07071c07381b0e3fULL, 0xadad8ead012347acULL, 0x5a5a755aea2fb4b0ULL, 0x838336836cb51befULL, 0x3333cc3385ff66b6ULL, 0x636391633ff2c65cULL, 0x02020802100a0412ULL, 0xaaaa92aa39384993ULL, 0x7171d971afa8e2deULL, 0xc8c807c80ecf8dc6ULL, 0x19196419c87d32d1ULL, 0x494939497270923bULL, 0xd9d943d9869aaf5fULL, 0xf2f2eff2c31df931ULL, 0xe3e3abe34b48dba8ULL, 0x5b5b715be22ab6b9ULL, 0x88881a8834920dbcULL, 0x9a9a529aa4c8293eULL, 0x262698262dbe4c0bULL, 0x3232c8328dfa64bfULL, 0xb0b0fab0e94a7d59ULL, 0xe9e983e91b6acff2ULL, 0x0f0f3c0f78331e77ULL, 0xd5d573d5e6a6b733ULL, 0x80803a8074ba1df4ULL, 0xbebec2be997c6127ULL, 0xcdcd13cd26de87ebULL, 0x3434d034bde46889ULL, 0x48483d487a759032ULL, 0xffffdbffab24e354ULL, 0x7a7af57af78ff48dULL, 0x90907a90f4ea3d64ULL, 0x5f5f615fc23ebe9dULL, 0x202080201da0403dULL, 0x6868bd6867d5d00fULL, 0x1a1a681ad07234caULL, 0xaeae82ae192c41b7ULL, 0xb4b4eab4c95e757dULL, 0x54544d549a19a8ceULL, 0x93937693ece53b7fULL, 0x222288220daa442fULL, 0x64648d6407e9c863ULL, 0xf1f1e3f1db12ff2aULL, 0x7373d173bfa2e6ccULL, 0x12124812905a2482ULL, 0x40401d403a5d807aULL, 0x0808200840281048ULL, 0xc3c32bc356e89b95ULL, 0xecec97ec337bc5dfULL, 0xdbdb4bdb9690ab4dULL, 0xa1a1bea1611f5fc0ULL, 0x8d8d0e8d1c830791ULL, 0x3d3df43df5c97ac8ULL, 0x97976697ccf1335bULL, 0x0000000000000000ULL, 0xcfcf1bcf36d483f9ULL, 0x2b2bac2b4587566eULL, 0x7676c57697b3ece1ULL, 0x8282328264b019e6ULL, 0xd6d67fd6fea9b128ULL, 0x1b1b6c1bd87736c3ULL, 0xb5b5eeb5c15b7774ULL, 0xafaf86af112943beULL, 0x6a6ab56a77dfd41dULL, 0x50505d50ba0da0eaULL, 0x45450945124c8a57ULL, 0xf3f3ebf3cb18fb38ULL, 0x3030c0309df060adULL, 0xefef9bef2b74c3c4ULL, 0x3f3ffc3fe5c37edaULL, 0x55554955921caac7ULL, 0xa2a2b2a2791059dbULL, 0xeaea8fea0365c9e9ULL, 0x656589650fecca6aULL, 0xbabad2bab9686903ULL, 0x2f2fbc2f65935e4aULL, 0xc0c027c04ee79d8eULL, 0xdede5fdebe81a160ULL, 0x1c1c701ce06c38fcULL, 0xfdfdd3fdbb2ee746ULL, 0x4d4d294d52649a1fULL, 0x92927292e4e03976ULL, 0x7575c9758fbceafaULL, 0x06061806301e0c36ULL, 0x8a8a128a249809aeULL, 0xb2b2f2b2f940794bULL, 0xe6e6bfe66359d185ULL, 0x0e0e380e70361c7eULL, 0x1f1f7c1ff8633ee7ULL, 0x6262956237f7c455ULL, 0xd4d477d4eea3b53aULL, 0xa8a89aa829324d81ULL, 0x96966296c4f43152ULL, 0xf9f9c3f99b3aef62ULL, 0xc5c533c566f697a3ULL, 0x2525942535b14a10ULL, 0x59597959f220b2abULL, 0x84842a8454ae15d0ULL, 0x7272d572b7a7e4c5ULL, 0x3939e439d5dd72ecULL, 0x4c4c2d4c5a619816ULL, 0x5e5e655eca3bbc94ULL, 0x7878fd78e785f09fULL, 0x3838e038ddd870e5ULL, 0x8c8c0a8c14860598ULL, 0xd1d163d1c6b2bf17ULL, 0xa5a5aea5410b57e4ULL, 0xe2e2afe2434dd9a1ULL, 0x616199612ff8c24eULL, 0xb3b3f6b3f1457b42ULL, 0x2121842115a54234ULL, 0x9c9c4a9c94d62508ULL, 0x1e1e781ef0663ceeULL, 0x4343114322528661ULL, 0xc7c73bc776fc93b1ULL, 0xfcfcd7fcb32be54fULL, 0x0404100420140824ULL, 0x51515951b208a2e3ULL, 0x99995e99bcc72f25ULL, 0x6d6da96d4fc4da22ULL, 0x0d0d340d68391a65ULL, 0xfafacffa8335e979ULL, 0xdfdf5bdfb684a369ULL, 0x7e7ee57ed79bfca9ULL, 0x242490243db44819ULL, 0x3b3bec3bc5d776feULL, 0xabab96ab313d4b9aULL, 0xcece1fce3ed181f0ULL, 0x1111441188552299ULL, 0x8f8f068f0c890383ULL, 0x4e4e254e4a6b9c04ULL, 0xb7b7e6b7d1517366ULL, 0xebeb8beb0b60cbe0ULL, 0x3c3cf03cfdcc78c1ULL, 0x81813e817cbf1ffdULL, 0x94946a94d4fe3540ULL, 0xf7f7fbf7eb0cf31cULL, 0xb9b9deb9a1676f18ULL, 0x13134c13985f268bULL, 0x2c2cb02c7d9c5851ULL, 0xd3d36bd3d6b8bb05ULL, 0xe7e7bbe76b5cd38cULL, 0x6e6ea56e57cbdc39ULL, 0xc4c437c46ef395aaULL, 0x03030c03180f061bULL, 0x565645568a13acdcULL, 0x44440d441a49885eULL, 0x7f7fe17fdf9efea0ULL, 0xa9a99ea921374f88ULL, 0x2a2aa82a4d825467ULL, 0xbbbbd6bbb16d6b0aULL, 0xc1c123c146e29f87ULL, 0x53535153a202a6f1ULL, 0xdcdc57dcae8ba572ULL, 0x0b0b2c0b58271653ULL, 0x9d9d4e9d9cd32701ULL, 0x6c6cad6c47c1d82bULL, 0x3131c43195f562a4ULL, 0x7474cd7487b9e8f3ULL, 0xf6f6fff6e309f115ULL, 0x464605460a438c4cULL, 0xacac8aac092645a5ULL, 0x89891e893c970fb5ULL, 0x14145014a04428b4ULL, 0xe1e1a3e15b42dfbaULL, 0x16165816b04e2ca6ULL, 0x3a3ae83acdd274f7ULL, 0x6969b9696fd0d206ULL, 0x09092409482d1241ULL, 0x7070dd70a7ade0d7ULL, 0xb6b6e2b6d954716fULL, 0xd0d067d0ceb7bd1eULL, 0xeded93ed3b7ec7d6ULL, 0xcccc17cc2edb85e2ULL, 0x424215422a578468ULL, 0x98985a98b4c22d2cULL, 0xa4a4aaa4490e55edULL, 0x2828a0285d885075ULL, 0x5c5c6d5cda31b886ULL, 0xf8f8c7f8933fed6bULL, 0x8686228644a411c2ULL, }; static const u64 C1[256] = { 0xd818186018c07830ULL, 0x2623238c2305af46ULL, 0xb8c6c63fc67ef991ULL, 0xfbe8e887e8136fcdULL, 0xcb878726874ca113ULL, 0x11b8b8dab8a9626dULL, 0x0901010401080502ULL, 0x0d4f4f214f426e9eULL, 0x9b3636d836adee6cULL, 0xffa6a6a2a6590451ULL, 0x0cd2d26fd2debdb9ULL, 0x0ef5f5f3f5fb06f7ULL, 0x967979f979ef80f2ULL, 0x306f6fa16f5fcedeULL, 0x6d91917e91fcef3fULL, 0xf852525552aa07a4ULL, 0x4760609d6027fdc0ULL, 0x35bcbccabc897665ULL, 0x379b9b569baccd2bULL, 0x8a8e8e028e048c01ULL, 0xd2a3a3b6a371155bULL, 0x6c0c0c300c603c18ULL, 0x847b7bf17bff8af6ULL, 0x803535d435b5e16aULL, 0xf51d1d741de8693aULL, 0xb3e0e0a7e05347ddULL, 0x21d7d77bd7f6acb3ULL, 0x9cc2c22fc25eed99ULL, 0x432e2eb82e6d965cULL, 0x294b4b314b627a96ULL, 0x5dfefedffea321e1ULL, 0xd5575741578216aeULL, 0xbd15155415a8412aULL, 0xe87777c1779fb6eeULL, 0x923737dc37a5eb6eULL, 0x9ee5e5b3e57b56d7ULL, 0x139f9f469f8cd923ULL, 0x23f0f0e7f0d317fdULL, 0x204a4a354a6a7f94ULL, 0x44dada4fda9e95a9ULL, 0xa258587d58fa25b0ULL, 0xcfc9c903c906ca8fULL, 0x7c2929a429558d52ULL, 0x5a0a0a280a502214ULL, 0x50b1b1feb1e14f7fULL, 0xc9a0a0baa0691a5dULL, 0x146b6bb16b7fdad6ULL, 0xd985852e855cab17ULL, 0x3cbdbdcebd817367ULL, 0x8f5d5d695dd234baULL, 0x9010104010805020ULL, 0x07f4f4f7f4f303f5ULL, 0xddcbcb0bcb16c08bULL, 0xd33e3ef83eedc67cULL, 0x2d0505140528110aULL, 0x78676781671fe6ceULL, 0x97e4e4b7e47353d5ULL, 0x0227279c2725bb4eULL, 0x7341411941325882ULL, 0xa78b8b168b2c9d0bULL, 0xf6a7a7a6a7510153ULL, 0xb27d7de97dcf94faULL, 0x4995956e95dcfb37ULL, 0x56d8d847d88e9fadULL, 0x70fbfbcbfb8b30ebULL, 0xcdeeee9fee2371c1ULL, 0xbb7c7ced7cc791f8ULL, 0x716666856617e3ccULL, 0x7bdddd53dda68ea7ULL, 0xaf17175c17b84b2eULL, 0x454747014702468eULL, 0x1a9e9e429e84dc21ULL, 0xd4caca0fca1ec589ULL, 0x582d2db42d75995aULL, 0x2ebfbfc6bf917963ULL, 0x3f07071c07381b0eULL, 0xacadad8ead012347ULL, 0xb05a5a755aea2fb4ULL, 0xef838336836cb51bULL, 0xb63333cc3385ff66ULL, 0x5c636391633ff2c6ULL, 0x1202020802100a04ULL, 0x93aaaa92aa393849ULL, 0xde7171d971afa8e2ULL, 0xc6c8c807c80ecf8dULL, 0xd119196419c87d32ULL, 0x3b49493949727092ULL, 0x5fd9d943d9869aafULL, 0x31f2f2eff2c31df9ULL, 0xa8e3e3abe34b48dbULL, 0xb95b5b715be22ab6ULL, 0xbc88881a8834920dULL, 0x3e9a9a529aa4c829ULL, 0x0b262698262dbe4cULL, 0xbf3232c8328dfa64ULL, 0x59b0b0fab0e94a7dULL, 0xf2e9e983e91b6acfULL, 0x770f0f3c0f78331eULL, 0x33d5d573d5e6a6b7ULL, 0xf480803a8074ba1dULL, 0x27bebec2be997c61ULL, 0xebcdcd13cd26de87ULL, 0x893434d034bde468ULL, 0x3248483d487a7590ULL, 0x54ffffdbffab24e3ULL, 0x8d7a7af57af78ff4ULL, 0x6490907a90f4ea3dULL, 0x9d5f5f615fc23ebeULL, 0x3d202080201da040ULL, 0x0f6868bd6867d5d0ULL, 0xca1a1a681ad07234ULL, 0xb7aeae82ae192c41ULL, 0x7db4b4eab4c95e75ULL, 0xce54544d549a19a8ULL, 0x7f93937693ece53bULL, 0x2f222288220daa44ULL, 0x6364648d6407e9c8ULL, 0x2af1f1e3f1db12ffULL, 0xcc7373d173bfa2e6ULL, 0x8212124812905a24ULL, 0x7a40401d403a5d80ULL, 0x4808082008402810ULL, 0x95c3c32bc356e89bULL, 0xdfecec97ec337bc5ULL, 0x4ddbdb4bdb9690abULL, 0xc0a1a1bea1611f5fULL, 0x918d8d0e8d1c8307ULL, 0xc83d3df43df5c97aULL, 0x5b97976697ccf133ULL, 0x0000000000000000ULL, 0xf9cfcf1bcf36d483ULL, 0x6e2b2bac2b458756ULL, 0xe17676c57697b3ecULL, 0xe68282328264b019ULL, 0x28d6d67fd6fea9b1ULL, 0xc31b1b6c1bd87736ULL, 0x74b5b5eeb5c15b77ULL, 0xbeafaf86af112943ULL, 0x1d6a6ab56a77dfd4ULL, 0xea50505d50ba0da0ULL, 0x5745450945124c8aULL, 0x38f3f3ebf3cb18fbULL, 0xad3030c0309df060ULL, 0xc4efef9bef2b74c3ULL, 0xda3f3ffc3fe5c37eULL, 0xc755554955921caaULL, 0xdba2a2b2a2791059ULL, 0xe9eaea8fea0365c9ULL, 0x6a656589650feccaULL, 0x03babad2bab96869ULL, 0x4a2f2fbc2f65935eULL, 0x8ec0c027c04ee79dULL, 0x60dede5fdebe81a1ULL, 0xfc1c1c701ce06c38ULL, 0x46fdfdd3fdbb2ee7ULL, 0x1f4d4d294d52649aULL, 0x7692927292e4e039ULL, 0xfa7575c9758fbceaULL, 0x3606061806301e0cULL, 0xae8a8a128a249809ULL, 0x4bb2b2f2b2f94079ULL, 0x85e6e6bfe66359d1ULL, 0x7e0e0e380e70361cULL, 0xe71f1f7c1ff8633eULL, 0x556262956237f7c4ULL, 0x3ad4d477d4eea3b5ULL, 0x81a8a89aa829324dULL, 0x5296966296c4f431ULL, 0x62f9f9c3f99b3aefULL, 0xa3c5c533c566f697ULL, 0x102525942535b14aULL, 0xab59597959f220b2ULL, 0xd084842a8454ae15ULL, 0xc57272d572b7a7e4ULL, 0xec3939e439d5dd72ULL, 0x164c4c2d4c5a6198ULL, 0x945e5e655eca3bbcULL, 0x9f7878fd78e785f0ULL, 0xe53838e038ddd870ULL, 0x988c8c0a8c148605ULL, 0x17d1d163d1c6b2bfULL, 0xe4a5a5aea5410b57ULL, 0xa1e2e2afe2434dd9ULL, 0x4e616199612ff8c2ULL, 0x42b3b3f6b3f1457bULL, 0x342121842115a542ULL, 0x089c9c4a9c94d625ULL, 0xee1e1e781ef0663cULL, 0x6143431143225286ULL, 0xb1c7c73bc776fc93ULL, 0x4ffcfcd7fcb32be5ULL, 0x2404041004201408ULL, 0xe351515951b208a2ULL, 0x2599995e99bcc72fULL, 0x226d6da96d4fc4daULL, 0x650d0d340d68391aULL, 0x79fafacffa8335e9ULL, 0x69dfdf5bdfb684a3ULL, 0xa97e7ee57ed79bfcULL, 0x19242490243db448ULL, 0xfe3b3bec3bc5d776ULL, 0x9aabab96ab313d4bULL, 0xf0cece1fce3ed181ULL, 0x9911114411885522ULL, 0x838f8f068f0c8903ULL, 0x044e4e254e4a6b9cULL, 0x66b7b7e6b7d15173ULL, 0xe0ebeb8beb0b60cbULL, 0xc13c3cf03cfdcc78ULL, 0xfd81813e817cbf1fULL, 0x4094946a94d4fe35ULL, 0x1cf7f7fbf7eb0cf3ULL, 0x18b9b9deb9a1676fULL, 0x8b13134c13985f26ULL, 0x512c2cb02c7d9c58ULL, 0x05d3d36bd3d6b8bbULL, 0x8ce7e7bbe76b5cd3ULL, 0x396e6ea56e57cbdcULL, 0xaac4c437c46ef395ULL, 0x1b03030c03180f06ULL, 0xdc565645568a13acULL, 0x5e44440d441a4988ULL, 0xa07f7fe17fdf9efeULL, 0x88a9a99ea921374fULL, 0x672a2aa82a4d8254ULL, 0x0abbbbd6bbb16d6bULL, 0x87c1c123c146e29fULL, 0xf153535153a202a6ULL, 0x72dcdc57dcae8ba5ULL, 0x530b0b2c0b582716ULL, 0x019d9d4e9d9cd327ULL, 0x2b6c6cad6c47c1d8ULL, 0xa43131c43195f562ULL, 0xf37474cd7487b9e8ULL, 0x15f6f6fff6e309f1ULL, 0x4c464605460a438cULL, 0xa5acac8aac092645ULL, 0xb589891e893c970fULL, 0xb414145014a04428ULL, 0xbae1e1a3e15b42dfULL, 0xa616165816b04e2cULL, 0xf73a3ae83acdd274ULL, 0x066969b9696fd0d2ULL, 0x4109092409482d12ULL, 0xd77070dd70a7ade0ULL, 0x6fb6b6e2b6d95471ULL, 0x1ed0d067d0ceb7bdULL, 0xd6eded93ed3b7ec7ULL, 0xe2cccc17cc2edb85ULL, 0x68424215422a5784ULL, 0x2c98985a98b4c22dULL, 0xeda4a4aaa4490e55ULL, 0x752828a0285d8850ULL, 0x865c5c6d5cda31b8ULL, 0x6bf8f8c7f8933fedULL, 0xc28686228644a411ULL, }; static const u64 C2[256] = { 0x30d818186018c078ULL, 0x462623238c2305afULL, 0x91b8c6c63fc67ef9ULL, 0xcdfbe8e887e8136fULL, 0x13cb878726874ca1ULL, 0x6d11b8b8dab8a962ULL, 0x0209010104010805ULL, 0x9e0d4f4f214f426eULL, 0x6c9b3636d836adeeULL, 0x51ffa6a6a2a65904ULL, 0xb90cd2d26fd2debdULL, 0xf70ef5f5f3f5fb06ULL, 0xf2967979f979ef80ULL, 0xde306f6fa16f5fceULL, 0x3f6d91917e91fcefULL, 0xa4f852525552aa07ULL, 0xc04760609d6027fdULL, 0x6535bcbccabc8976ULL, 0x2b379b9b569baccdULL, 0x018a8e8e028e048cULL, 0x5bd2a3a3b6a37115ULL, 0x186c0c0c300c603cULL, 0xf6847b7bf17bff8aULL, 0x6a803535d435b5e1ULL, 0x3af51d1d741de869ULL, 0xddb3e0e0a7e05347ULL, 0xb321d7d77bd7f6acULL, 0x999cc2c22fc25eedULL, 0x5c432e2eb82e6d96ULL, 0x96294b4b314b627aULL, 0xe15dfefedffea321ULL, 0xaed5575741578216ULL, 0x2abd15155415a841ULL, 0xeee87777c1779fb6ULL, 0x6e923737dc37a5ebULL, 0xd79ee5e5b3e57b56ULL, 0x23139f9f469f8cd9ULL, 0xfd23f0f0e7f0d317ULL, 0x94204a4a354a6a7fULL, 0xa944dada4fda9e95ULL, 0xb0a258587d58fa25ULL, 0x8fcfc9c903c906caULL, 0x527c2929a429558dULL, 0x145a0a0a280a5022ULL, 0x7f50b1b1feb1e14fULL, 0x5dc9a0a0baa0691aULL, 0xd6146b6bb16b7fdaULL, 0x17d985852e855cabULL, 0x673cbdbdcebd8173ULL, 0xba8f5d5d695dd234ULL, 0x2090101040108050ULL, 0xf507f4f4f7f4f303ULL, 0x8bddcbcb0bcb16c0ULL, 0x7cd33e3ef83eedc6ULL, 0x0a2d050514052811ULL, 0xce78676781671fe6ULL, 0xd597e4e4b7e47353ULL, 0x4e0227279c2725bbULL, 0x8273414119413258ULL, 0x0ba78b8b168b2c9dULL, 0x53f6a7a7a6a75101ULL, 0xfab27d7de97dcf94ULL, 0x374995956e95dcfbULL, 0xad56d8d847d88e9fULL, 0xeb70fbfbcbfb8b30ULL, 0xc1cdeeee9fee2371ULL, 0xf8bb7c7ced7cc791ULL, 0xcc716666856617e3ULL, 0xa77bdddd53dda68eULL, 0x2eaf17175c17b84bULL, 0x8e45474701470246ULL, 0x211a9e9e429e84dcULL, 0x89d4caca0fca1ec5ULL, 0x5a582d2db42d7599ULL, 0x632ebfbfc6bf9179ULL, 0x0e3f07071c07381bULL, 0x47acadad8ead0123ULL, 0xb4b05a5a755aea2fULL, 0x1bef838336836cb5ULL, 0x66b63333cc3385ffULL, 0xc65c636391633ff2ULL, 0x041202020802100aULL, 0x4993aaaa92aa3938ULL, 0xe2de7171d971afa8ULL, 0x8dc6c8c807c80ecfULL, 0x32d119196419c87dULL, 0x923b494939497270ULL, 0xaf5fd9d943d9869aULL, 0xf931f2f2eff2c31dULL, 0xdba8e3e3abe34b48ULL, 0xb6b95b5b715be22aULL, 0x0dbc88881a883492ULL, 0x293e9a9a529aa4c8ULL, 0x4c0b262698262dbeULL, 0x64bf3232c8328dfaULL, 0x7d59b0b0fab0e94aULL, 0xcff2e9e983e91b6aULL, 0x1e770f0f3c0f7833ULL, 0xb733d5d573d5e6a6ULL, 0x1df480803a8074baULL, 0x6127bebec2be997cULL, 0x87ebcdcd13cd26deULL, 0x68893434d034bde4ULL, 0x903248483d487a75ULL, 0xe354ffffdbffab24ULL, 0xf48d7a7af57af78fULL, 0x3d6490907a90f4eaULL, 0xbe9d5f5f615fc23eULL, 0x403d202080201da0ULL, 0xd00f6868bd6867d5ULL, 0x34ca1a1a681ad072ULL, 0x41b7aeae82ae192cULL, 0x757db4b4eab4c95eULL, 0xa8ce54544d549a19ULL, 0x3b7f93937693ece5ULL, 0x442f222288220daaULL, 0xc86364648d6407e9ULL, 0xff2af1f1e3f1db12ULL, 0xe6cc7373d173bfa2ULL, 0x248212124812905aULL, 0x807a40401d403a5dULL, 0x1048080820084028ULL, 0x9b95c3c32bc356e8ULL, 0xc5dfecec97ec337bULL, 0xab4ddbdb4bdb9690ULL, 0x5fc0a1a1bea1611fULL, 0x07918d8d0e8d1c83ULL, 0x7ac83d3df43df5c9ULL, 0x335b97976697ccf1ULL, 0x0000000000000000ULL, 0x83f9cfcf1bcf36d4ULL, 0x566e2b2bac2b4587ULL, 0xece17676c57697b3ULL, 0x19e68282328264b0ULL, 0xb128d6d67fd6fea9ULL, 0x36c31b1b6c1bd877ULL, 0x7774b5b5eeb5c15bULL, 0x43beafaf86af1129ULL, 0xd41d6a6ab56a77dfULL, 0xa0ea50505d50ba0dULL, 0x8a5745450945124cULL, 0xfb38f3f3ebf3cb18ULL, 0x60ad3030c0309df0ULL, 0xc3c4efef9bef2b74ULL, 0x7eda3f3ffc3fe5c3ULL, 0xaac755554955921cULL, 0x59dba2a2b2a27910ULL, 0xc9e9eaea8fea0365ULL, 0xca6a656589650fecULL, 0x6903babad2bab968ULL, 0x5e4a2f2fbc2f6593ULL, 0x9d8ec0c027c04ee7ULL, 0xa160dede5fdebe81ULL, 0x38fc1c1c701ce06cULL, 0xe746fdfdd3fdbb2eULL, 0x9a1f4d4d294d5264ULL, 0x397692927292e4e0ULL, 0xeafa7575c9758fbcULL, 0x0c3606061806301eULL, 0x09ae8a8a128a2498ULL, 0x794bb2b2f2b2f940ULL, 0xd185e6e6bfe66359ULL, 0x1c7e0e0e380e7036ULL, 0x3ee71f1f7c1ff863ULL, 0xc4556262956237f7ULL, 0xb53ad4d477d4eea3ULL, 0x4d81a8a89aa82932ULL, 0x315296966296c4f4ULL, 0xef62f9f9c3f99b3aULL, 0x97a3c5c533c566f6ULL, 0x4a102525942535b1ULL, 0xb2ab59597959f220ULL, 0x15d084842a8454aeULL, 0xe4c57272d572b7a7ULL, 0x72ec3939e439d5ddULL, 0x98164c4c2d4c5a61ULL, 0xbc945e5e655eca3bULL, 0xf09f7878fd78e785ULL, 0x70e53838e038ddd8ULL, 0x05988c8c0a8c1486ULL, 0xbf17d1d163d1c6b2ULL, 0x57e4a5a5aea5410bULL, 0xd9a1e2e2afe2434dULL, 0xc24e616199612ff8ULL, 0x7b42b3b3f6b3f145ULL, 0x42342121842115a5ULL, 0x25089c9c4a9c94d6ULL, 0x3cee1e1e781ef066ULL, 0x8661434311432252ULL, 0x93b1c7c73bc776fcULL, 0xe54ffcfcd7fcb32bULL, 0x0824040410042014ULL, 0xa2e351515951b208ULL, 0x2f2599995e99bcc7ULL, 0xda226d6da96d4fc4ULL, 0x1a650d0d340d6839ULL, 0xe979fafacffa8335ULL, 0xa369dfdf5bdfb684ULL, 0xfca97e7ee57ed79bULL, 0x4819242490243db4ULL, 0x76fe3b3bec3bc5d7ULL, 0x4b9aabab96ab313dULL, 0x81f0cece1fce3ed1ULL, 0x2299111144118855ULL, 0x03838f8f068f0c89ULL, 0x9c044e4e254e4a6bULL, 0x7366b7b7e6b7d151ULL, 0xcbe0ebeb8beb0b60ULL, 0x78c13c3cf03cfdccULL, 0x1ffd81813e817cbfULL, 0x354094946a94d4feULL, 0xf31cf7f7fbf7eb0cULL, 0x6f18b9b9deb9a167ULL, 0x268b13134c13985fULL, 0x58512c2cb02c7d9cULL, 0xbb05d3d36bd3d6b8ULL, 0xd38ce7e7bbe76b5cULL, 0xdc396e6ea56e57cbULL, 0x95aac4c437c46ef3ULL, 0x061b03030c03180fULL, 0xacdc565645568a13ULL, 0x885e44440d441a49ULL, 0xfea07f7fe17fdf9eULL, 0x4f88a9a99ea92137ULL, 0x54672a2aa82a4d82ULL, 0x6b0abbbbd6bbb16dULL, 0x9f87c1c123c146e2ULL, 0xa6f153535153a202ULL, 0xa572dcdc57dcae8bULL, 0x16530b0b2c0b5827ULL, 0x27019d9d4e9d9cd3ULL, 0xd82b6c6cad6c47c1ULL, 0x62a43131c43195f5ULL, 0xe8f37474cd7487b9ULL, 0xf115f6f6fff6e309ULL, 0x8c4c464605460a43ULL, 0x45a5acac8aac0926ULL, 0x0fb589891e893c97ULL, 0x28b414145014a044ULL, 0xdfbae1e1a3e15b42ULL, 0x2ca616165816b04eULL, 0x74f73a3ae83acdd2ULL, 0xd2066969b9696fd0ULL, 0x124109092409482dULL, 0xe0d77070dd70a7adULL, 0x716fb6b6e2b6d954ULL, 0xbd1ed0d067d0ceb7ULL, 0xc7d6eded93ed3b7eULL, 0x85e2cccc17cc2edbULL, 0x8468424215422a57ULL, 0x2d2c98985a98b4c2ULL, 0x55eda4a4aaa4490eULL, 0x50752828a0285d88ULL, 0xb8865c5c6d5cda31ULL, 0xed6bf8f8c7f8933fULL, 0x11c28686228644a4ULL, }; static const u64 C3[256] = { 0x7830d818186018c0ULL, 0xaf462623238c2305ULL, 0xf991b8c6c63fc67eULL, 0x6fcdfbe8e887e813ULL, 0xa113cb878726874cULL, 0x626d11b8b8dab8a9ULL, 0x0502090101040108ULL, 0x6e9e0d4f4f214f42ULL, 0xee6c9b3636d836adULL, 0x0451ffa6a6a2a659ULL, 0xbdb90cd2d26fd2deULL, 0x06f70ef5f5f3f5fbULL, 0x80f2967979f979efULL, 0xcede306f6fa16f5fULL, 0xef3f6d91917e91fcULL, 0x07a4f852525552aaULL, 0xfdc04760609d6027ULL, 0x766535bcbccabc89ULL, 0xcd2b379b9b569bacULL, 0x8c018a8e8e028e04ULL, 0x155bd2a3a3b6a371ULL, 0x3c186c0c0c300c60ULL, 0x8af6847b7bf17bffULL, 0xe16a803535d435b5ULL, 0x693af51d1d741de8ULL, 0x47ddb3e0e0a7e053ULL, 0xacb321d7d77bd7f6ULL, 0xed999cc2c22fc25eULL, 0x965c432e2eb82e6dULL, 0x7a96294b4b314b62ULL, 0x21e15dfefedffea3ULL, 0x16aed55757415782ULL, 0x412abd15155415a8ULL, 0xb6eee87777c1779fULL, 0xeb6e923737dc37a5ULL, 0x56d79ee5e5b3e57bULL, 0xd923139f9f469f8cULL, 0x17fd23f0f0e7f0d3ULL, 0x7f94204a4a354a6aULL, 0x95a944dada4fda9eULL, 0x25b0a258587d58faULL, 0xca8fcfc9c903c906ULL, 0x8d527c2929a42955ULL, 0x22145a0a0a280a50ULL, 0x4f7f50b1b1feb1e1ULL, 0x1a5dc9a0a0baa069ULL, 0xdad6146b6bb16b7fULL, 0xab17d985852e855cULL, 0x73673cbdbdcebd81ULL, 0x34ba8f5d5d695dd2ULL, 0x5020901010401080ULL, 0x03f507f4f4f7f4f3ULL, 0xc08bddcbcb0bcb16ULL, 0xc67cd33e3ef83eedULL, 0x110a2d0505140528ULL, 0xe6ce78676781671fULL, 0x53d597e4e4b7e473ULL, 0xbb4e0227279c2725ULL, 0x5882734141194132ULL, 0x9d0ba78b8b168b2cULL, 0x0153f6a7a7a6a751ULL, 0x94fab27d7de97dcfULL, 0xfb374995956e95dcULL, 0x9fad56d8d847d88eULL, 0x30eb70fbfbcbfb8bULL, 0x71c1cdeeee9fee23ULL, 0x91f8bb7c7ced7cc7ULL, 0xe3cc716666856617ULL, 0x8ea77bdddd53dda6ULL, 0x4b2eaf17175c17b8ULL, 0x468e454747014702ULL, 0xdc211a9e9e429e84ULL, 0xc589d4caca0fca1eULL, 0x995a582d2db42d75ULL, 0x79632ebfbfc6bf91ULL, 0x1b0e3f07071c0738ULL, 0x2347acadad8ead01ULL, 0x2fb4b05a5a755aeaULL, 0xb51bef838336836cULL, 0xff66b63333cc3385ULL, 0xf2c65c636391633fULL, 0x0a04120202080210ULL, 0x384993aaaa92aa39ULL, 0xa8e2de7171d971afULL, 0xcf8dc6c8c807c80eULL, 0x7d32d119196419c8ULL, 0x70923b4949394972ULL, 0x9aaf5fd9d943d986ULL, 0x1df931f2f2eff2c3ULL, 0x48dba8e3e3abe34bULL, 0x2ab6b95b5b715be2ULL, 0x920dbc88881a8834ULL, 0xc8293e9a9a529aa4ULL, 0xbe4c0b262698262dULL, 0xfa64bf3232c8328dULL, 0x4a7d59b0b0fab0e9ULL, 0x6acff2e9e983e91bULL, 0x331e770f0f3c0f78ULL, 0xa6b733d5d573d5e6ULL, 0xba1df480803a8074ULL, 0x7c6127bebec2be99ULL, 0xde87ebcdcd13cd26ULL, 0xe468893434d034bdULL, 0x75903248483d487aULL, 0x24e354ffffdbffabULL, 0x8ff48d7a7af57af7ULL, 0xea3d6490907a90f4ULL, 0x3ebe9d5f5f615fc2ULL, 0xa0403d202080201dULL, 0xd5d00f6868bd6867ULL, 0x7234ca1a1a681ad0ULL, 0x2c41b7aeae82ae19ULL, 0x5e757db4b4eab4c9ULL, 0x19a8ce54544d549aULL, 0xe53b7f93937693ecULL, 0xaa442f222288220dULL, 0xe9c86364648d6407ULL, 0x12ff2af1f1e3f1dbULL, 0xa2e6cc7373d173bfULL, 0x5a24821212481290ULL, 0x5d807a40401d403aULL, 0x2810480808200840ULL, 0xe89b95c3c32bc356ULL, 0x7bc5dfecec97ec33ULL, 0x90ab4ddbdb4bdb96ULL, 0x1f5fc0a1a1bea161ULL, 0x8307918d8d0e8d1cULL, 0xc97ac83d3df43df5ULL, 0xf1335b97976697ccULL, 0x0000000000000000ULL, 0xd483f9cfcf1bcf36ULL, 0x87566e2b2bac2b45ULL, 0xb3ece17676c57697ULL, 0xb019e68282328264ULL, 0xa9b128d6d67fd6feULL, 0x7736c31b1b6c1bd8ULL, 0x5b7774b5b5eeb5c1ULL, 0x2943beafaf86af11ULL, 0xdfd41d6a6ab56a77ULL, 0x0da0ea50505d50baULL, 0x4c8a574545094512ULL, 0x18fb38f3f3ebf3cbULL, 0xf060ad3030c0309dULL, 0x74c3c4efef9bef2bULL, 0xc37eda3f3ffc3fe5ULL, 0x1caac75555495592ULL, 0x1059dba2a2b2a279ULL, 0x65c9e9eaea8fea03ULL, 0xecca6a656589650fULL, 0x686903babad2bab9ULL, 0x935e4a2f2fbc2f65ULL, 0xe79d8ec0c027c04eULL, 0x81a160dede5fdebeULL, 0x6c38fc1c1c701ce0ULL, 0x2ee746fdfdd3fdbbULL, 0x649a1f4d4d294d52ULL, 0xe0397692927292e4ULL, 0xbceafa7575c9758fULL, 0x1e0c360606180630ULL, 0x9809ae8a8a128a24ULL, 0x40794bb2b2f2b2f9ULL, 0x59d185e6e6bfe663ULL, 0x361c7e0e0e380e70ULL, 0x633ee71f1f7c1ff8ULL, 0xf7c4556262956237ULL, 0xa3b53ad4d477d4eeULL, 0x324d81a8a89aa829ULL, 0xf4315296966296c4ULL, 0x3aef62f9f9c3f99bULL, 0xf697a3c5c533c566ULL, 0xb14a102525942535ULL, 0x20b2ab59597959f2ULL, 0xae15d084842a8454ULL, 0xa7e4c57272d572b7ULL, 0xdd72ec3939e439d5ULL, 0x6198164c4c2d4c5aULL, 0x3bbc945e5e655ecaULL, 0x85f09f7878fd78e7ULL, 0xd870e53838e038ddULL, 0x8605988c8c0a8c14ULL, 0xb2bf17d1d163d1c6ULL, 0x0b57e4a5a5aea541ULL, 0x4dd9a1e2e2afe243ULL, 0xf8c24e616199612fULL, 0x457b42b3b3f6b3f1ULL, 0xa542342121842115ULL, 0xd625089c9c4a9c94ULL, 0x663cee1e1e781ef0ULL, 0x5286614343114322ULL, 0xfc93b1c7c73bc776ULL, 0x2be54ffcfcd7fcb3ULL, 0x1408240404100420ULL, 0x08a2e351515951b2ULL, 0xc72f2599995e99bcULL, 0xc4da226d6da96d4fULL, 0x391a650d0d340d68ULL, 0x35e979fafacffa83ULL, 0x84a369dfdf5bdfb6ULL, 0x9bfca97e7ee57ed7ULL, 0xb44819242490243dULL, 0xd776fe3b3bec3bc5ULL, 0x3d4b9aabab96ab31ULL, 0xd181f0cece1fce3eULL, 0x5522991111441188ULL, 0x8903838f8f068f0cULL, 0x6b9c044e4e254e4aULL, 0x517366b7b7e6b7d1ULL, 0x60cbe0ebeb8beb0bULL, 0xcc78c13c3cf03cfdULL, 0xbf1ffd81813e817cULL, 0xfe354094946a94d4ULL, 0x0cf31cf7f7fbf7ebULL, 0x676f18b9b9deb9a1ULL, 0x5f268b13134c1398ULL, 0x9c58512c2cb02c7dULL, 0xb8bb05d3d36bd3d6ULL, 0x5cd38ce7e7bbe76bULL, 0xcbdc396e6ea56e57ULL, 0xf395aac4c437c46eULL, 0x0f061b03030c0318ULL, 0x13acdc565645568aULL, 0x49885e44440d441aULL, 0x9efea07f7fe17fdfULL, 0x374f88a9a99ea921ULL, 0x8254672a2aa82a4dULL, 0x6d6b0abbbbd6bbb1ULL, 0xe29f87c1c123c146ULL, 0x02a6f153535153a2ULL, 0x8ba572dcdc57dcaeULL, 0x2716530b0b2c0b58ULL, 0xd327019d9d4e9d9cULL, 0xc1d82b6c6cad6c47ULL, 0xf562a43131c43195ULL, 0xb9e8f37474cd7487ULL, 0x09f115f6f6fff6e3ULL, 0x438c4c464605460aULL, 0x2645a5acac8aac09ULL, 0x970fb589891e893cULL, 0x4428b414145014a0ULL, 0x42dfbae1e1a3e15bULL, 0x4e2ca616165816b0ULL, 0xd274f73a3ae83acdULL, 0xd0d2066969b9696fULL, 0x2d12410909240948ULL, 0xade0d77070dd70a7ULL, 0x54716fb6b6e2b6d9ULL, 0xb7bd1ed0d067d0ceULL, 0x7ec7d6eded93ed3bULL, 0xdb85e2cccc17cc2eULL, 0x578468424215422aULL, 0xc22d2c98985a98b4ULL, 0x0e55eda4a4aaa449ULL, 0x8850752828a0285dULL, 0x31b8865c5c6d5cdaULL, 0x3fed6bf8f8c7f893ULL, 0xa411c28686228644ULL, }; static const u64 C4[256] = { 0xc07830d818186018ULL, 0x05af462623238c23ULL, 0x7ef991b8c6c63fc6ULL, 0x136fcdfbe8e887e8ULL, 0x4ca113cb87872687ULL, 0xa9626d11b8b8dab8ULL, 0x0805020901010401ULL, 0x426e9e0d4f4f214fULL, 0xadee6c9b3636d836ULL, 0x590451ffa6a6a2a6ULL, 0xdebdb90cd2d26fd2ULL, 0xfb06f70ef5f5f3f5ULL, 0xef80f2967979f979ULL, 0x5fcede306f6fa16fULL, 0xfcef3f6d91917e91ULL, 0xaa07a4f852525552ULL, 0x27fdc04760609d60ULL, 0x89766535bcbccabcULL, 0xaccd2b379b9b569bULL, 0x048c018a8e8e028eULL, 0x71155bd2a3a3b6a3ULL, 0x603c186c0c0c300cULL, 0xff8af6847b7bf17bULL, 0xb5e16a803535d435ULL, 0xe8693af51d1d741dULL, 0x5347ddb3e0e0a7e0ULL, 0xf6acb321d7d77bd7ULL, 0x5eed999cc2c22fc2ULL, 0x6d965c432e2eb82eULL, 0x627a96294b4b314bULL, 0xa321e15dfefedffeULL, 0x8216aed557574157ULL, 0xa8412abd15155415ULL, 0x9fb6eee87777c177ULL, 0xa5eb6e923737dc37ULL, 0x7b56d79ee5e5b3e5ULL, 0x8cd923139f9f469fULL, 0xd317fd23f0f0e7f0ULL, 0x6a7f94204a4a354aULL, 0x9e95a944dada4fdaULL, 0xfa25b0a258587d58ULL, 0x06ca8fcfc9c903c9ULL, 0x558d527c2929a429ULL, 0x5022145a0a0a280aULL, 0xe14f7f50b1b1feb1ULL, 0x691a5dc9a0a0baa0ULL, 0x7fdad6146b6bb16bULL, 0x5cab17d985852e85ULL, 0x8173673cbdbdcebdULL, 0xd234ba8f5d5d695dULL, 0x8050209010104010ULL, 0xf303f507f4f4f7f4ULL, 0x16c08bddcbcb0bcbULL, 0xedc67cd33e3ef83eULL, 0x28110a2d05051405ULL, 0x1fe6ce7867678167ULL, 0x7353d597e4e4b7e4ULL, 0x25bb4e0227279c27ULL, 0x3258827341411941ULL, 0x2c9d0ba78b8b168bULL, 0x510153f6a7a7a6a7ULL, 0xcf94fab27d7de97dULL, 0xdcfb374995956e95ULL, 0x8e9fad56d8d847d8ULL, 0x8b30eb70fbfbcbfbULL, 0x2371c1cdeeee9feeULL, 0xc791f8bb7c7ced7cULL, 0x17e3cc7166668566ULL, 0xa68ea77bdddd53ddULL, 0xb84b2eaf17175c17ULL, 0x02468e4547470147ULL, 0x84dc211a9e9e429eULL, 0x1ec589d4caca0fcaULL, 0x75995a582d2db42dULL, 0x9179632ebfbfc6bfULL, 0x381b0e3f07071c07ULL, 0x012347acadad8eadULL, 0xea2fb4b05a5a755aULL, 0x6cb51bef83833683ULL, 0x85ff66b63333cc33ULL, 0x3ff2c65c63639163ULL, 0x100a041202020802ULL, 0x39384993aaaa92aaULL, 0xafa8e2de7171d971ULL, 0x0ecf8dc6c8c807c8ULL, 0xc87d32d119196419ULL, 0x7270923b49493949ULL, 0x869aaf5fd9d943d9ULL, 0xc31df931f2f2eff2ULL, 0x4b48dba8e3e3abe3ULL, 0xe22ab6b95b5b715bULL, 0x34920dbc88881a88ULL, 0xa4c8293e9a9a529aULL, 0x2dbe4c0b26269826ULL, 0x8dfa64bf3232c832ULL, 0xe94a7d59b0b0fab0ULL, 0x1b6acff2e9e983e9ULL, 0x78331e770f0f3c0fULL, 0xe6a6b733d5d573d5ULL, 0x74ba1df480803a80ULL, 0x997c6127bebec2beULL, 0x26de87ebcdcd13cdULL, 0xbde468893434d034ULL, 0x7a75903248483d48ULL, 0xab24e354ffffdbffULL, 0xf78ff48d7a7af57aULL, 0xf4ea3d6490907a90ULL, 0xc23ebe9d5f5f615fULL, 0x1da0403d20208020ULL, 0x67d5d00f6868bd68ULL, 0xd07234ca1a1a681aULL, 0x192c41b7aeae82aeULL, 0xc95e757db4b4eab4ULL, 0x9a19a8ce54544d54ULL, 0xece53b7f93937693ULL, 0x0daa442f22228822ULL, 0x07e9c86364648d64ULL, 0xdb12ff2af1f1e3f1ULL, 0xbfa2e6cc7373d173ULL, 0x905a248212124812ULL, 0x3a5d807a40401d40ULL, 0x4028104808082008ULL, 0x56e89b95c3c32bc3ULL, 0x337bc5dfecec97ecULL, 0x9690ab4ddbdb4bdbULL, 0x611f5fc0a1a1bea1ULL, 0x1c8307918d8d0e8dULL, 0xf5c97ac83d3df43dULL, 0xccf1335b97976697ULL, 0x0000000000000000ULL, 0x36d483f9cfcf1bcfULL, 0x4587566e2b2bac2bULL, 0x97b3ece17676c576ULL, 0x64b019e682823282ULL, 0xfea9b128d6d67fd6ULL, 0xd87736c31b1b6c1bULL, 0xc15b7774b5b5eeb5ULL, 0x112943beafaf86afULL, 0x77dfd41d6a6ab56aULL, 0xba0da0ea50505d50ULL, 0x124c8a5745450945ULL, 0xcb18fb38f3f3ebf3ULL, 0x9df060ad3030c030ULL, 0x2b74c3c4efef9befULL, 0xe5c37eda3f3ffc3fULL, 0x921caac755554955ULL, 0x791059dba2a2b2a2ULL, 0x0365c9e9eaea8feaULL, 0x0fecca6a65658965ULL, 0xb9686903babad2baULL, 0x65935e4a2f2fbc2fULL, 0x4ee79d8ec0c027c0ULL, 0xbe81a160dede5fdeULL, 0xe06c38fc1c1c701cULL, 0xbb2ee746fdfdd3fdULL, 0x52649a1f4d4d294dULL, 0xe4e0397692927292ULL, 0x8fbceafa7575c975ULL, 0x301e0c3606061806ULL, 0x249809ae8a8a128aULL, 0xf940794bb2b2f2b2ULL, 0x6359d185e6e6bfe6ULL, 0x70361c7e0e0e380eULL, 0xf8633ee71f1f7c1fULL, 0x37f7c45562629562ULL, 0xeea3b53ad4d477d4ULL, 0x29324d81a8a89aa8ULL, 0xc4f4315296966296ULL, 0x9b3aef62f9f9c3f9ULL, 0x66f697a3c5c533c5ULL, 0x35b14a1025259425ULL, 0xf220b2ab59597959ULL, 0x54ae15d084842a84ULL, 0xb7a7e4c57272d572ULL, 0xd5dd72ec3939e439ULL, 0x5a6198164c4c2d4cULL, 0xca3bbc945e5e655eULL, 0xe785f09f7878fd78ULL, 0xddd870e53838e038ULL, 0x148605988c8c0a8cULL, 0xc6b2bf17d1d163d1ULL, 0x410b57e4a5a5aea5ULL, 0x434dd9a1e2e2afe2ULL, 0x2ff8c24e61619961ULL, 0xf1457b42b3b3f6b3ULL, 0x15a5423421218421ULL, 0x94d625089c9c4a9cULL, 0xf0663cee1e1e781eULL, 0x2252866143431143ULL, 0x76fc93b1c7c73bc7ULL, 0xb32be54ffcfcd7fcULL, 0x2014082404041004ULL, 0xb208a2e351515951ULL, 0xbcc72f2599995e99ULL, 0x4fc4da226d6da96dULL, 0x68391a650d0d340dULL, 0x8335e979fafacffaULL, 0xb684a369dfdf5bdfULL, 0xd79bfca97e7ee57eULL, 0x3db4481924249024ULL, 0xc5d776fe3b3bec3bULL, 0x313d4b9aabab96abULL, 0x3ed181f0cece1fceULL, 0x8855229911114411ULL, 0x0c8903838f8f068fULL, 0x4a6b9c044e4e254eULL, 0xd1517366b7b7e6b7ULL, 0x0b60cbe0ebeb8bebULL, 0xfdcc78c13c3cf03cULL, 0x7cbf1ffd81813e81ULL, 0xd4fe354094946a94ULL, 0xeb0cf31cf7f7fbf7ULL, 0xa1676f18b9b9deb9ULL, 0x985f268b13134c13ULL, 0x7d9c58512c2cb02cULL, 0xd6b8bb05d3d36bd3ULL, 0x6b5cd38ce7e7bbe7ULL, 0x57cbdc396e6ea56eULL, 0x6ef395aac4c437c4ULL, 0x180f061b03030c03ULL, 0x8a13acdc56564556ULL, 0x1a49885e44440d44ULL, 0xdf9efea07f7fe17fULL, 0x21374f88a9a99ea9ULL, 0x4d8254672a2aa82aULL, 0xb16d6b0abbbbd6bbULL, 0x46e29f87c1c123c1ULL, 0xa202a6f153535153ULL, 0xae8ba572dcdc57dcULL, 0x582716530b0b2c0bULL, 0x9cd327019d9d4e9dULL, 0x47c1d82b6c6cad6cULL, 0x95f562a43131c431ULL, 0x87b9e8f37474cd74ULL, 0xe309f115f6f6fff6ULL, 0x0a438c4c46460546ULL, 0x092645a5acac8aacULL, 0x3c970fb589891e89ULL, 0xa04428b414145014ULL, 0x5b42dfbae1e1a3e1ULL, 0xb04e2ca616165816ULL, 0xcdd274f73a3ae83aULL, 0x6fd0d2066969b969ULL, 0x482d124109092409ULL, 0xa7ade0d77070dd70ULL, 0xd954716fb6b6e2b6ULL, 0xceb7bd1ed0d067d0ULL, 0x3b7ec7d6eded93edULL, 0x2edb85e2cccc17ccULL, 0x2a57846842421542ULL, 0xb4c22d2c98985a98ULL, 0x490e55eda4a4aaa4ULL, 0x5d8850752828a028ULL, 0xda31b8865c5c6d5cULL, 0x933fed6bf8f8c7f8ULL, 0x44a411c286862286ULL, }; static const u64 C5[256] = { 0x18c07830d8181860ULL, 0x2305af462623238cULL, 0xc67ef991b8c6c63fULL, 0xe8136fcdfbe8e887ULL, 0x874ca113cb878726ULL, 0xb8a9626d11b8b8daULL, 0x0108050209010104ULL, 0x4f426e9e0d4f4f21ULL, 0x36adee6c9b3636d8ULL, 0xa6590451ffa6a6a2ULL, 0xd2debdb90cd2d26fULL, 0xf5fb06f70ef5f5f3ULL, 0x79ef80f2967979f9ULL, 0x6f5fcede306f6fa1ULL, 0x91fcef3f6d91917eULL, 0x52aa07a4f8525255ULL, 0x6027fdc04760609dULL, 0xbc89766535bcbccaULL, 0x9baccd2b379b9b56ULL, 0x8e048c018a8e8e02ULL, 0xa371155bd2a3a3b6ULL, 0x0c603c186c0c0c30ULL, 0x7bff8af6847b7bf1ULL, 0x35b5e16a803535d4ULL, 0x1de8693af51d1d74ULL, 0xe05347ddb3e0e0a7ULL, 0xd7f6acb321d7d77bULL, 0xc25eed999cc2c22fULL, 0x2e6d965c432e2eb8ULL, 0x4b627a96294b4b31ULL, 0xfea321e15dfefedfULL, 0x578216aed5575741ULL, 0x15a8412abd151554ULL, 0x779fb6eee87777c1ULL, 0x37a5eb6e923737dcULL, 0xe57b56d79ee5e5b3ULL, 0x9f8cd923139f9f46ULL, 0xf0d317fd23f0f0e7ULL, 0x4a6a7f94204a4a35ULL, 0xda9e95a944dada4fULL, 0x58fa25b0a258587dULL, 0xc906ca8fcfc9c903ULL, 0x29558d527c2929a4ULL, 0x0a5022145a0a0a28ULL, 0xb1e14f7f50b1b1feULL, 0xa0691a5dc9a0a0baULL, 0x6b7fdad6146b6bb1ULL, 0x855cab17d985852eULL, 0xbd8173673cbdbdceULL, 0x5dd234ba8f5d5d69ULL, 0x1080502090101040ULL, 0xf4f303f507f4f4f7ULL, 0xcb16c08bddcbcb0bULL, 0x3eedc67cd33e3ef8ULL, 0x0528110a2d050514ULL, 0x671fe6ce78676781ULL, 0xe47353d597e4e4b7ULL, 0x2725bb4e0227279cULL, 0x4132588273414119ULL, 0x8b2c9d0ba78b8b16ULL, 0xa7510153f6a7a7a6ULL, 0x7dcf94fab27d7de9ULL, 0x95dcfb374995956eULL, 0xd88e9fad56d8d847ULL, 0xfb8b30eb70fbfbcbULL, 0xee2371c1cdeeee9fULL, 0x7cc791f8bb7c7cedULL, 0x6617e3cc71666685ULL, 0xdda68ea77bdddd53ULL, 0x17b84b2eaf17175cULL, 0x4702468e45474701ULL, 0x9e84dc211a9e9e42ULL, 0xca1ec589d4caca0fULL, 0x2d75995a582d2db4ULL, 0xbf9179632ebfbfc6ULL, 0x07381b0e3f07071cULL, 0xad012347acadad8eULL, 0x5aea2fb4b05a5a75ULL, 0x836cb51bef838336ULL, 0x3385ff66b63333ccULL, 0x633ff2c65c636391ULL, 0x02100a0412020208ULL, 0xaa39384993aaaa92ULL, 0x71afa8e2de7171d9ULL, 0xc80ecf8dc6c8c807ULL, 0x19c87d32d1191964ULL, 0x497270923b494939ULL, 0xd9869aaf5fd9d943ULL, 0xf2c31df931f2f2efULL, 0xe34b48dba8e3e3abULL, 0x5be22ab6b95b5b71ULL, 0x8834920dbc88881aULL, 0x9aa4c8293e9a9a52ULL, 0x262dbe4c0b262698ULL, 0x328dfa64bf3232c8ULL, 0xb0e94a7d59b0b0faULL, 0xe91b6acff2e9e983ULL, 0x0f78331e770f0f3cULL, 0xd5e6a6b733d5d573ULL, 0x8074ba1df480803aULL, 0xbe997c6127bebec2ULL, 0xcd26de87ebcdcd13ULL, 0x34bde468893434d0ULL, 0x487a75903248483dULL, 0xffab24e354ffffdbULL, 0x7af78ff48d7a7af5ULL, 0x90f4ea3d6490907aULL, 0x5fc23ebe9d5f5f61ULL, 0x201da0403d202080ULL, 0x6867d5d00f6868bdULL, 0x1ad07234ca1a1a68ULL, 0xae192c41b7aeae82ULL, 0xb4c95e757db4b4eaULL, 0x549a19a8ce54544dULL, 0x93ece53b7f939376ULL, 0x220daa442f222288ULL, 0x6407e9c86364648dULL, 0xf1db12ff2af1f1e3ULL, 0x73bfa2e6cc7373d1ULL, 0x12905a2482121248ULL, 0x403a5d807a40401dULL, 0x0840281048080820ULL, 0xc356e89b95c3c32bULL, 0xec337bc5dfecec97ULL, 0xdb9690ab4ddbdb4bULL, 0xa1611f5fc0a1a1beULL, 0x8d1c8307918d8d0eULL, 0x3df5c97ac83d3df4ULL, 0x97ccf1335b979766ULL, 0x0000000000000000ULL, 0xcf36d483f9cfcf1bULL, 0x2b4587566e2b2bacULL, 0x7697b3ece17676c5ULL, 0x8264b019e6828232ULL, 0xd6fea9b128d6d67fULL, 0x1bd87736c31b1b6cULL, 0xb5c15b7774b5b5eeULL, 0xaf112943beafaf86ULL, 0x6a77dfd41d6a6ab5ULL, 0x50ba0da0ea50505dULL, 0x45124c8a57454509ULL, 0xf3cb18fb38f3f3ebULL, 0x309df060ad3030c0ULL, 0xef2b74c3c4efef9bULL, 0x3fe5c37eda3f3ffcULL, 0x55921caac7555549ULL, 0xa2791059dba2a2b2ULL, 0xea0365c9e9eaea8fULL, 0x650fecca6a656589ULL, 0xbab9686903babad2ULL, 0x2f65935e4a2f2fbcULL, 0xc04ee79d8ec0c027ULL, 0xdebe81a160dede5fULL, 0x1ce06c38fc1c1c70ULL, 0xfdbb2ee746fdfdd3ULL, 0x4d52649a1f4d4d29ULL, 0x92e4e03976929272ULL, 0x758fbceafa7575c9ULL, 0x06301e0c36060618ULL, 0x8a249809ae8a8a12ULL, 0xb2f940794bb2b2f2ULL, 0xe66359d185e6e6bfULL, 0x0e70361c7e0e0e38ULL, 0x1ff8633ee71f1f7cULL, 0x6237f7c455626295ULL, 0xd4eea3b53ad4d477ULL, 0xa829324d81a8a89aULL, 0x96c4f43152969662ULL, 0xf99b3aef62f9f9c3ULL, 0xc566f697a3c5c533ULL, 0x2535b14a10252594ULL, 0x59f220b2ab595979ULL, 0x8454ae15d084842aULL, 0x72b7a7e4c57272d5ULL, 0x39d5dd72ec3939e4ULL, 0x4c5a6198164c4c2dULL, 0x5eca3bbc945e5e65ULL, 0x78e785f09f7878fdULL, 0x38ddd870e53838e0ULL, 0x8c148605988c8c0aULL, 0xd1c6b2bf17d1d163ULL, 0xa5410b57e4a5a5aeULL, 0xe2434dd9a1e2e2afULL, 0x612ff8c24e616199ULL, 0xb3f1457b42b3b3f6ULL, 0x2115a54234212184ULL, 0x9c94d625089c9c4aULL, 0x1ef0663cee1e1e78ULL, 0x4322528661434311ULL, 0xc776fc93b1c7c73bULL, 0xfcb32be54ffcfcd7ULL, 0x0420140824040410ULL, 0x51b208a2e3515159ULL, 0x99bcc72f2599995eULL, 0x6d4fc4da226d6da9ULL, 0x0d68391a650d0d34ULL, 0xfa8335e979fafacfULL, 0xdfb684a369dfdf5bULL, 0x7ed79bfca97e7ee5ULL, 0x243db44819242490ULL, 0x3bc5d776fe3b3becULL, 0xab313d4b9aabab96ULL, 0xce3ed181f0cece1fULL, 0x1188552299111144ULL, 0x8f0c8903838f8f06ULL, 0x4e4a6b9c044e4e25ULL, 0xb7d1517366b7b7e6ULL, 0xeb0b60cbe0ebeb8bULL, 0x3cfdcc78c13c3cf0ULL, 0x817cbf1ffd81813eULL, 0x94d4fe354094946aULL, 0xf7eb0cf31cf7f7fbULL, 0xb9a1676f18b9b9deULL, 0x13985f268b13134cULL, 0x2c7d9c58512c2cb0ULL, 0xd3d6b8bb05d3d36bULL, 0xe76b5cd38ce7e7bbULL, 0x6e57cbdc396e6ea5ULL, 0xc46ef395aac4c437ULL, 0x03180f061b03030cULL, 0x568a13acdc565645ULL, 0x441a49885e44440dULL, 0x7fdf9efea07f7fe1ULL, 0xa921374f88a9a99eULL, 0x2a4d8254672a2aa8ULL, 0xbbb16d6b0abbbbd6ULL, 0xc146e29f87c1c123ULL, 0x53a202a6f1535351ULL, 0xdcae8ba572dcdc57ULL, 0x0b582716530b0b2cULL, 0x9d9cd327019d9d4eULL, 0x6c47c1d82b6c6cadULL, 0x3195f562a43131c4ULL, 0x7487b9e8f37474cdULL, 0xf6e309f115f6f6ffULL, 0x460a438c4c464605ULL, 0xac092645a5acac8aULL, 0x893c970fb589891eULL, 0x14a04428b4141450ULL, 0xe15b42dfbae1e1a3ULL, 0x16b04e2ca6161658ULL, 0x3acdd274f73a3ae8ULL, 0x696fd0d2066969b9ULL, 0x09482d1241090924ULL, 0x70a7ade0d77070ddULL, 0xb6d954716fb6b6e2ULL, 0xd0ceb7bd1ed0d067ULL, 0xed3b7ec7d6eded93ULL, 0xcc2edb85e2cccc17ULL, 0x422a578468424215ULL, 0x98b4c22d2c98985aULL, 0xa4490e55eda4a4aaULL, 0x285d8850752828a0ULL, 0x5cda31b8865c5c6dULL, 0xf8933fed6bf8f8c7ULL, 0x8644a411c2868622ULL, }; static const u64 C6[256] = { 0x6018c07830d81818ULL, 0x8c2305af46262323ULL, 0x3fc67ef991b8c6c6ULL, 0x87e8136fcdfbe8e8ULL, 0x26874ca113cb8787ULL, 0xdab8a9626d11b8b8ULL, 0x0401080502090101ULL, 0x214f426e9e0d4f4fULL, 0xd836adee6c9b3636ULL, 0xa2a6590451ffa6a6ULL, 0x6fd2debdb90cd2d2ULL, 0xf3f5fb06f70ef5f5ULL, 0xf979ef80f2967979ULL, 0xa16f5fcede306f6fULL, 0x7e91fcef3f6d9191ULL, 0x5552aa07a4f85252ULL, 0x9d6027fdc0476060ULL, 0xcabc89766535bcbcULL, 0x569baccd2b379b9bULL, 0x028e048c018a8e8eULL, 0xb6a371155bd2a3a3ULL, 0x300c603c186c0c0cULL, 0xf17bff8af6847b7bULL, 0xd435b5e16a803535ULL, 0x741de8693af51d1dULL, 0xa7e05347ddb3e0e0ULL, 0x7bd7f6acb321d7d7ULL, 0x2fc25eed999cc2c2ULL, 0xb82e6d965c432e2eULL, 0x314b627a96294b4bULL, 0xdffea321e15dfefeULL, 0x41578216aed55757ULL, 0x5415a8412abd1515ULL, 0xc1779fb6eee87777ULL, 0xdc37a5eb6e923737ULL, 0xb3e57b56d79ee5e5ULL, 0x469f8cd923139f9fULL, 0xe7f0d317fd23f0f0ULL, 0x354a6a7f94204a4aULL, 0x4fda9e95a944dadaULL, 0x7d58fa25b0a25858ULL, 0x03c906ca8fcfc9c9ULL, 0xa429558d527c2929ULL, 0x280a5022145a0a0aULL, 0xfeb1e14f7f50b1b1ULL, 0xbaa0691a5dc9a0a0ULL, 0xb16b7fdad6146b6bULL, 0x2e855cab17d98585ULL, 0xcebd8173673cbdbdULL, 0x695dd234ba8f5d5dULL, 0x4010805020901010ULL, 0xf7f4f303f507f4f4ULL, 0x0bcb16c08bddcbcbULL, 0xf83eedc67cd33e3eULL, 0x140528110a2d0505ULL, 0x81671fe6ce786767ULL, 0xb7e47353d597e4e4ULL, 0x9c2725bb4e022727ULL, 0x1941325882734141ULL, 0x168b2c9d0ba78b8bULL, 0xa6a7510153f6a7a7ULL, 0xe97dcf94fab27d7dULL, 0x6e95dcfb37499595ULL, 0x47d88e9fad56d8d8ULL, 0xcbfb8b30eb70fbfbULL, 0x9fee2371c1cdeeeeULL, 0xed7cc791f8bb7c7cULL, 0x856617e3cc716666ULL, 0x53dda68ea77bddddULL, 0x5c17b84b2eaf1717ULL, 0x014702468e454747ULL, 0x429e84dc211a9e9eULL, 0x0fca1ec589d4cacaULL, 0xb42d75995a582d2dULL, 0xc6bf9179632ebfbfULL, 0x1c07381b0e3f0707ULL, 0x8ead012347acadadULL, 0x755aea2fb4b05a5aULL, 0x36836cb51bef8383ULL, 0xcc3385ff66b63333ULL, 0x91633ff2c65c6363ULL, 0x0802100a04120202ULL, 0x92aa39384993aaaaULL, 0xd971afa8e2de7171ULL, 0x07c80ecf8dc6c8c8ULL, 0x6419c87d32d11919ULL, 0x39497270923b4949ULL, 0x43d9869aaf5fd9d9ULL, 0xeff2c31df931f2f2ULL, 0xabe34b48dba8e3e3ULL, 0x715be22ab6b95b5bULL, 0x1a8834920dbc8888ULL, 0x529aa4c8293e9a9aULL, 0x98262dbe4c0b2626ULL, 0xc8328dfa64bf3232ULL, 0xfab0e94a7d59b0b0ULL, 0x83e91b6acff2e9e9ULL, 0x3c0f78331e770f0fULL, 0x73d5e6a6b733d5d5ULL, 0x3a8074ba1df48080ULL, 0xc2be997c6127bebeULL, 0x13cd26de87ebcdcdULL, 0xd034bde468893434ULL, 0x3d487a7590324848ULL, 0xdbffab24e354ffffULL, 0xf57af78ff48d7a7aULL, 0x7a90f4ea3d649090ULL, 0x615fc23ebe9d5f5fULL, 0x80201da0403d2020ULL, 0xbd6867d5d00f6868ULL, 0x681ad07234ca1a1aULL, 0x82ae192c41b7aeaeULL, 0xeab4c95e757db4b4ULL, 0x4d549a19a8ce5454ULL, 0x7693ece53b7f9393ULL, 0x88220daa442f2222ULL, 0x8d6407e9c8636464ULL, 0xe3f1db12ff2af1f1ULL, 0xd173bfa2e6cc7373ULL, 0x4812905a24821212ULL, 0x1d403a5d807a4040ULL, 0x2008402810480808ULL, 0x2bc356e89b95c3c3ULL, 0x97ec337bc5dfececULL, 0x4bdb9690ab4ddbdbULL, 0xbea1611f5fc0a1a1ULL, 0x0e8d1c8307918d8dULL, 0xf43df5c97ac83d3dULL, 0x6697ccf1335b9797ULL, 0x0000000000000000ULL, 0x1bcf36d483f9cfcfULL, 0xac2b4587566e2b2bULL, 0xc57697b3ece17676ULL, 0x328264b019e68282ULL, 0x7fd6fea9b128d6d6ULL, 0x6c1bd87736c31b1bULL, 0xeeb5c15b7774b5b5ULL, 0x86af112943beafafULL, 0xb56a77dfd41d6a6aULL, 0x5d50ba0da0ea5050ULL, 0x0945124c8a574545ULL, 0xebf3cb18fb38f3f3ULL, 0xc0309df060ad3030ULL, 0x9bef2b74c3c4efefULL, 0xfc3fe5c37eda3f3fULL, 0x4955921caac75555ULL, 0xb2a2791059dba2a2ULL, 0x8fea0365c9e9eaeaULL, 0x89650fecca6a6565ULL, 0xd2bab9686903babaULL, 0xbc2f65935e4a2f2fULL, 0x27c04ee79d8ec0c0ULL, 0x5fdebe81a160dedeULL, 0x701ce06c38fc1c1cULL, 0xd3fdbb2ee746fdfdULL, 0x294d52649a1f4d4dULL, 0x7292e4e039769292ULL, 0xc9758fbceafa7575ULL, 0x1806301e0c360606ULL, 0x128a249809ae8a8aULL, 0xf2b2f940794bb2b2ULL, 0xbfe66359d185e6e6ULL, 0x380e70361c7e0e0eULL, 0x7c1ff8633ee71f1fULL, 0x956237f7c4556262ULL, 0x77d4eea3b53ad4d4ULL, 0x9aa829324d81a8a8ULL, 0x6296c4f431529696ULL, 0xc3f99b3aef62f9f9ULL, 0x33c566f697a3c5c5ULL, 0x942535b14a102525ULL, 0x7959f220b2ab5959ULL, 0x2a8454ae15d08484ULL, 0xd572b7a7e4c57272ULL, 0xe439d5dd72ec3939ULL, 0x2d4c5a6198164c4cULL, 0x655eca3bbc945e5eULL, 0xfd78e785f09f7878ULL, 0xe038ddd870e53838ULL, 0x0a8c148605988c8cULL, 0x63d1c6b2bf17d1d1ULL, 0xaea5410b57e4a5a5ULL, 0xafe2434dd9a1e2e2ULL, 0x99612ff8c24e6161ULL, 0xf6b3f1457b42b3b3ULL, 0x842115a542342121ULL, 0x4a9c94d625089c9cULL, 0x781ef0663cee1e1eULL, 0x1143225286614343ULL, 0x3bc776fc93b1c7c7ULL, 0xd7fcb32be54ffcfcULL, 0x1004201408240404ULL, 0x5951b208a2e35151ULL, 0x5e99bcc72f259999ULL, 0xa96d4fc4da226d6dULL, 0x340d68391a650d0dULL, 0xcffa8335e979fafaULL, 0x5bdfb684a369dfdfULL, 0xe57ed79bfca97e7eULL, 0x90243db448192424ULL, 0xec3bc5d776fe3b3bULL, 0x96ab313d4b9aababULL, 0x1fce3ed181f0ceceULL, 0x4411885522991111ULL, 0x068f0c8903838f8fULL, 0x254e4a6b9c044e4eULL, 0xe6b7d1517366b7b7ULL, 0x8beb0b60cbe0ebebULL, 0xf03cfdcc78c13c3cULL, 0x3e817cbf1ffd8181ULL, 0x6a94d4fe35409494ULL, 0xfbf7eb0cf31cf7f7ULL, 0xdeb9a1676f18b9b9ULL, 0x4c13985f268b1313ULL, 0xb02c7d9c58512c2cULL, 0x6bd3d6b8bb05d3d3ULL, 0xbbe76b5cd38ce7e7ULL, 0xa56e57cbdc396e6eULL, 0x37c46ef395aac4c4ULL, 0x0c03180f061b0303ULL, 0x45568a13acdc5656ULL, 0x0d441a49885e4444ULL, 0xe17fdf9efea07f7fULL, 0x9ea921374f88a9a9ULL, 0xa82a4d8254672a2aULL, 0xd6bbb16d6b0abbbbULL, 0x23c146e29f87c1c1ULL, 0x5153a202a6f15353ULL, 0x57dcae8ba572dcdcULL, 0x2c0b582716530b0bULL, 0x4e9d9cd327019d9dULL, 0xad6c47c1d82b6c6cULL, 0xc43195f562a43131ULL, 0xcd7487b9e8f37474ULL, 0xfff6e309f115f6f6ULL, 0x05460a438c4c4646ULL, 0x8aac092645a5acacULL, 0x1e893c970fb58989ULL, 0x5014a04428b41414ULL, 0xa3e15b42dfbae1e1ULL, 0x5816b04e2ca61616ULL, 0xe83acdd274f73a3aULL, 0xb9696fd0d2066969ULL, 0x2409482d12410909ULL, 0xdd70a7ade0d77070ULL, 0xe2b6d954716fb6b6ULL, 0x67d0ceb7bd1ed0d0ULL, 0x93ed3b7ec7d6ededULL, 0x17cc2edb85e2ccccULL, 0x15422a5784684242ULL, 0x5a98b4c22d2c9898ULL, 0xaaa4490e55eda4a4ULL, 0xa0285d8850752828ULL, 0x6d5cda31b8865c5cULL, 0xc7f8933fed6bf8f8ULL, 0x228644a411c28686ULL, }; static const u64 C7[256] = { 0x186018c07830d818ULL, 0x238c2305af462623ULL, 0xc63fc67ef991b8c6ULL, 0xe887e8136fcdfbe8ULL, 0x8726874ca113cb87ULL, 0xb8dab8a9626d11b8ULL, 0x0104010805020901ULL, 0x4f214f426e9e0d4fULL, 0x36d836adee6c9b36ULL, 0xa6a2a6590451ffa6ULL, 0xd26fd2debdb90cd2ULL, 0xf5f3f5fb06f70ef5ULL, 0x79f979ef80f29679ULL, 0x6fa16f5fcede306fULL, 0x917e91fcef3f6d91ULL, 0x525552aa07a4f852ULL, 0x609d6027fdc04760ULL, 0xbccabc89766535bcULL, 0x9b569baccd2b379bULL, 0x8e028e048c018a8eULL, 0xa3b6a371155bd2a3ULL, 0x0c300c603c186c0cULL, 0x7bf17bff8af6847bULL, 0x35d435b5e16a8035ULL, 0x1d741de8693af51dULL, 0xe0a7e05347ddb3e0ULL, 0xd77bd7f6acb321d7ULL, 0xc22fc25eed999cc2ULL, 0x2eb82e6d965c432eULL, 0x4b314b627a96294bULL, 0xfedffea321e15dfeULL, 0x5741578216aed557ULL, 0x155415a8412abd15ULL, 0x77c1779fb6eee877ULL, 0x37dc37a5eb6e9237ULL, 0xe5b3e57b56d79ee5ULL, 0x9f469f8cd923139fULL, 0xf0e7f0d317fd23f0ULL, 0x4a354a6a7f94204aULL, 0xda4fda9e95a944daULL, 0x587d58fa25b0a258ULL, 0xc903c906ca8fcfc9ULL, 0x29a429558d527c29ULL, 0x0a280a5022145a0aULL, 0xb1feb1e14f7f50b1ULL, 0xa0baa0691a5dc9a0ULL, 0x6bb16b7fdad6146bULL, 0x852e855cab17d985ULL, 0xbdcebd8173673cbdULL, 0x5d695dd234ba8f5dULL, 0x1040108050209010ULL, 0xf4f7f4f303f507f4ULL, 0xcb0bcb16c08bddcbULL, 0x3ef83eedc67cd33eULL, 0x05140528110a2d05ULL, 0x6781671fe6ce7867ULL, 0xe4b7e47353d597e4ULL, 0x279c2725bb4e0227ULL, 0x4119413258827341ULL, 0x8b168b2c9d0ba78bULL, 0xa7a6a7510153f6a7ULL, 0x7de97dcf94fab27dULL, 0x956e95dcfb374995ULL, 0xd847d88e9fad56d8ULL, 0xfbcbfb8b30eb70fbULL, 0xee9fee2371c1cdeeULL, 0x7ced7cc791f8bb7cULL, 0x66856617e3cc7166ULL, 0xdd53dda68ea77bddULL, 0x175c17b84b2eaf17ULL, 0x47014702468e4547ULL, 0x9e429e84dc211a9eULL, 0xca0fca1ec589d4caULL, 0x2db42d75995a582dULL, 0xbfc6bf9179632ebfULL, 0x071c07381b0e3f07ULL, 0xad8ead012347acadULL, 0x5a755aea2fb4b05aULL, 0x8336836cb51bef83ULL, 0x33cc3385ff66b633ULL, 0x6391633ff2c65c63ULL, 0x020802100a041202ULL, 0xaa92aa39384993aaULL, 0x71d971afa8e2de71ULL, 0xc807c80ecf8dc6c8ULL, 0x196419c87d32d119ULL, 0x4939497270923b49ULL, 0xd943d9869aaf5fd9ULL, 0xf2eff2c31df931f2ULL, 0xe3abe34b48dba8e3ULL, 0x5b715be22ab6b95bULL, 0x881a8834920dbc88ULL, 0x9a529aa4c8293e9aULL, 0x2698262dbe4c0b26ULL, 0x32c8328dfa64bf32ULL, 0xb0fab0e94a7d59b0ULL, 0xe983e91b6acff2e9ULL, 0x0f3c0f78331e770fULL, 0xd573d5e6a6b733d5ULL, 0x803a8074ba1df480ULL, 0xbec2be997c6127beULL, 0xcd13cd26de87ebcdULL, 0x34d034bde4688934ULL, 0x483d487a75903248ULL, 0xffdbffab24e354ffULL, 0x7af57af78ff48d7aULL, 0x907a90f4ea3d6490ULL, 0x5f615fc23ebe9d5fULL, 0x2080201da0403d20ULL, 0x68bd6867d5d00f68ULL, 0x1a681ad07234ca1aULL, 0xae82ae192c41b7aeULL, 0xb4eab4c95e757db4ULL, 0x544d549a19a8ce54ULL, 0x937693ece53b7f93ULL, 0x2288220daa442f22ULL, 0x648d6407e9c86364ULL, 0xf1e3f1db12ff2af1ULL, 0x73d173bfa2e6cc73ULL, 0x124812905a248212ULL, 0x401d403a5d807a40ULL, 0x0820084028104808ULL, 0xc32bc356e89b95c3ULL, 0xec97ec337bc5dfecULL, 0xdb4bdb9690ab4ddbULL, 0xa1bea1611f5fc0a1ULL, 0x8d0e8d1c8307918dULL, 0x3df43df5c97ac83dULL, 0x976697ccf1335b97ULL, 0x0000000000000000ULL, 0xcf1bcf36d483f9cfULL, 0x2bac2b4587566e2bULL, 0x76c57697b3ece176ULL, 0x82328264b019e682ULL, 0xd67fd6fea9b128d6ULL, 0x1b6c1bd87736c31bULL, 0xb5eeb5c15b7774b5ULL, 0xaf86af112943beafULL, 0x6ab56a77dfd41d6aULL, 0x505d50ba0da0ea50ULL, 0x450945124c8a5745ULL, 0xf3ebf3cb18fb38f3ULL, 0x30c0309df060ad30ULL, 0xef9bef2b74c3c4efULL, 0x3ffc3fe5c37eda3fULL, 0x554955921caac755ULL, 0xa2b2a2791059dba2ULL, 0xea8fea0365c9e9eaULL, 0x6589650fecca6a65ULL, 0xbad2bab9686903baULL, 0x2fbc2f65935e4a2fULL, 0xc027c04ee79d8ec0ULL, 0xde5fdebe81a160deULL, 0x1c701ce06c38fc1cULL, 0xfdd3fdbb2ee746fdULL, 0x4d294d52649a1f4dULL, 0x927292e4e0397692ULL, 0x75c9758fbceafa75ULL, 0x061806301e0c3606ULL, 0x8a128a249809ae8aULL, 0xb2f2b2f940794bb2ULL, 0xe6bfe66359d185e6ULL, 0x0e380e70361c7e0eULL, 0x1f7c1ff8633ee71fULL, 0x62956237f7c45562ULL, 0xd477d4eea3b53ad4ULL, 0xa89aa829324d81a8ULL, 0x966296c4f4315296ULL, 0xf9c3f99b3aef62f9ULL, 0xc533c566f697a3c5ULL, 0x25942535b14a1025ULL, 0x597959f220b2ab59ULL, 0x842a8454ae15d084ULL, 0x72d572b7a7e4c572ULL, 0x39e439d5dd72ec39ULL, 0x4c2d4c5a6198164cULL, 0x5e655eca3bbc945eULL, 0x78fd78e785f09f78ULL, 0x38e038ddd870e538ULL, 0x8c0a8c148605988cULL, 0xd163d1c6b2bf17d1ULL, 0xa5aea5410b57e4a5ULL, 0xe2afe2434dd9a1e2ULL, 0x6199612ff8c24e61ULL, 0xb3f6b3f1457b42b3ULL, 0x21842115a5423421ULL, 0x9c4a9c94d625089cULL, 0x1e781ef0663cee1eULL, 0x4311432252866143ULL, 0xc73bc776fc93b1c7ULL, 0xfcd7fcb32be54ffcULL, 0x0410042014082404ULL, 0x515951b208a2e351ULL, 0x995e99bcc72f2599ULL, 0x6da96d4fc4da226dULL, 0x0d340d68391a650dULL, 0xfacffa8335e979faULL, 0xdf5bdfb684a369dfULL, 0x7ee57ed79bfca97eULL, 0x2490243db4481924ULL, 0x3bec3bc5d776fe3bULL, 0xab96ab313d4b9aabULL, 0xce1fce3ed181f0ceULL, 0x1144118855229911ULL, 0x8f068f0c8903838fULL, 0x4e254e4a6b9c044eULL, 0xb7e6b7d1517366b7ULL, 0xeb8beb0b60cbe0ebULL, 0x3cf03cfdcc78c13cULL, 0x813e817cbf1ffd81ULL, 0x946a94d4fe354094ULL, 0xf7fbf7eb0cf31cf7ULL, 0xb9deb9a1676f18b9ULL, 0x134c13985f268b13ULL, 0x2cb02c7d9c58512cULL, 0xd36bd3d6b8bb05d3ULL, 0xe7bbe76b5cd38ce7ULL, 0x6ea56e57cbdc396eULL, 0xc437c46ef395aac4ULL, 0x030c03180f061b03ULL, 0x5645568a13acdc56ULL, 0x440d441a49885e44ULL, 0x7fe17fdf9efea07fULL, 0xa99ea921374f88a9ULL, 0x2aa82a4d8254672aULL, 0xbbd6bbb16d6b0abbULL, 0xc123c146e29f87c1ULL, 0x535153a202a6f153ULL, 0xdc57dcae8ba572dcULL, 0x0b2c0b582716530bULL, 0x9d4e9d9cd327019dULL, 0x6cad6c47c1d82b6cULL, 0x31c43195f562a431ULL, 0x74cd7487b9e8f374ULL, 0xf6fff6e309f115f6ULL, 0x4605460a438c4c46ULL, 0xac8aac092645a5acULL, 0x891e893c970fb589ULL, 0x145014a04428b414ULL, 0xe1a3e15b42dfbae1ULL, 0x165816b04e2ca616ULL, 0x3ae83acdd274f73aULL, 0x69b9696fd0d20669ULL, 0x092409482d124109ULL, 0x70dd70a7ade0d770ULL, 0xb6e2b6d954716fb6ULL, 0xd067d0ceb7bd1ed0ULL, 0xed93ed3b7ec7d6edULL, 0xcc17cc2edb85e2ccULL, 0x4215422a57846842ULL, 0x985a98b4c22d2c98ULL, 0xa4aaa4490e55eda4ULL, 0x28a0285d88507528ULL, 0x5c6d5cda31b8865cULL, 0xf8c7f8933fed6bf8ULL, 0x86228644a411c286ULL, }; static const u64 rc[WHIRLPOOL_ROUNDS] = { 0x1823c6e887b8014fULL, 0x36a6d2f5796f9152ULL, 0x60bc9b8ea30c7b35ULL, 0x1de0d7c22e4bfe57ULL, 0x157737e59ff04adaULL, 0x58c9290ab1a06b85ULL, 0xbd5d10f4cb3e0567ULL, 0xe427418ba77d95d8ULL, 0xfbee7c66dd17479eULL, 0xca2dbf07ad5a8333ULL, }; /* * The core Whirlpool transform. */ static __no_kmsan_checks void wp512_process_buffer(struct wp512_ctx *wctx) { int i, r; u64 K[8]; /* the round key */ u64 block[8]; /* mu(buffer) */ u64 state[8]; /* the cipher state */ u64 L[8]; const __be64 *buffer = (const __be64 *)wctx->buffer; for (i = 0; i < 8; i++) block[i] = be64_to_cpu(buffer[i]); state[0] = block[0] ^ (K[0] = wctx->hash[0]); state[1] = block[1] ^ (K[1] = wctx->hash[1]); state[2] = block[2] ^ (K[2] = wctx->hash[2]); state[3] = block[3] ^ (K[3] = wctx->hash[3]); state[4] = block[4] ^ (K[4] = wctx->hash[4]); state[5] = block[5] ^ (K[5] = wctx->hash[5]); state[6] = block[6] ^ (K[6] = wctx->hash[6]); state[7] = block[7] ^ (K[7] = wctx->hash[7]); for (r = 0; r < WHIRLPOOL_ROUNDS; r++) { L[0] = C0[(int)(K[0] >> 56) ] ^ C1[(int)(K[7] >> 48) & 0xff] ^ C2[(int)(K[6] >> 40) & 0xff] ^ C3[(int)(K[5] >> 32) & 0xff] ^ C4[(int)(K[4] >> 24) & 0xff] ^ C5[(int)(K[3] >> 16) & 0xff] ^ C6[(int)(K[2] >> 8) & 0xff] ^ C7[(int)(K[1] ) & 0xff] ^ rc[r]; L[1] = C0[(int)(K[1] >> 56) ] ^ C1[(int)(K[0] >> 48) & 0xff] ^ C2[(int)(K[7] >> 40) & 0xff] ^ C3[(int)(K[6] >> 32) & 0xff] ^ C4[(int)(K[5] >> 24) & 0xff] ^ C5[(int)(K[4] >> 16) & 0xff] ^ C6[(int)(K[3] >> 8) & 0xff] ^ C7[(int)(K[2] ) & 0xff]; L[2] = C0[(int)(K[2] >> 56) ] ^ C1[(int)(K[1] >> 48) & 0xff] ^ C2[(int)(K[0] >> 40) & 0xff] ^ C3[(int)(K[7] >> 32) & 0xff] ^ C4[(int)(K[6] >> 24) & 0xff] ^ C5[(int)(K[5] >> 16) & 0xff] ^ C6[(int)(K[4] >> 8) & 0xff] ^ C7[(int)(K[3] ) & 0xff]; L[3] = C0[(int)(K[3] >> 56) ] ^ C1[(int)(K[2] >> 48) & 0xff] ^ C2[(int)(K[1] >> 40) & 0xff] ^ C3[(int)(K[0] >> 32) & 0xff] ^ C4[(int)(K[7] >> 24) & 0xff] ^ C5[(int)(K[6] >> 16) & 0xff] ^ C6[(int)(K[5] >> 8) & 0xff] ^ C7[(int)(K[4] ) & 0xff]; L[4] = C0[(int)(K[4] >> 56) ] ^ C1[(int)(K[3] >> 48) & 0xff] ^ C2[(int)(K[2] >> 40) & 0xff] ^ C3[(int)(K[1] >> 32) & 0xff] ^ C4[(int)(K[0] >> 24) & 0xff] ^ C5[(int)(K[7] >> 16) & 0xff] ^ C6[(int)(K[6] >> 8) & 0xff] ^ C7[(int)(K[5] ) & 0xff]; L[5] = C0[(int)(K[5] >> 56) ] ^ C1[(int)(K[4] >> 48) & 0xff] ^ C2[(int)(K[3] >> 40) & 0xff] ^ C3[(int)(K[2] >> 32) & 0xff] ^ C4[(int)(K[1] >> 24) & 0xff] ^ C5[(int)(K[0] >> 16) & 0xff] ^ C6[(int)(K[7] >> 8) & 0xff] ^ C7[(int)(K[6] ) & 0xff]; L[6] = C0[(int)(K[6] >> 56) ] ^ C1[(int)(K[5] >> 48) & 0xff] ^ C2[(int)(K[4] >> 40) & 0xff] ^ C3[(int)(K[3] >> 32) & 0xff] ^ C4[(int)(K[2] >> 24) & 0xff] ^ C5[(int)(K[1] >> 16) & 0xff] ^ C6[(int)(K[0] >> 8) & 0xff] ^ C7[(int)(K[7] ) & 0xff]; L[7] = C0[(int)(K[7] >> 56) ] ^ C1[(int)(K[6] >> 48) & 0xff] ^ C2[(int)(K[5] >> 40) & 0xff] ^ C3[(int)(K[4] >> 32) & 0xff] ^ C4[(int)(K[3] >> 24) & 0xff] ^ C5[(int)(K[2] >> 16) & 0xff] ^ C6[(int)(K[1] >> 8) & 0xff] ^ C7[(int)(K[0] ) & 0xff]; K[0] = L[0]; K[1] = L[1]; K[2] = L[2]; K[3] = L[3]; K[4] = L[4]; K[5] = L[5]; K[6] = L[6]; K[7] = L[7]; L[0] = C0[(int)(state[0] >> 56) ] ^ C1[(int)(state[7] >> 48) & 0xff] ^ C2[(int)(state[6] >> 40) & 0xff] ^ C3[(int)(state[5] >> 32) & 0xff] ^ C4[(int)(state[4] >> 24) & 0xff] ^ C5[(int)(state[3] >> 16) & 0xff] ^ C6[(int)(state[2] >> 8) & 0xff] ^ C7[(int)(state[1] ) & 0xff] ^ K[0]; L[1] = C0[(int)(state[1] >> 56) ] ^ C1[(int)(state[0] >> 48) & 0xff] ^ C2[(int)(state[7] >> 40) & 0xff] ^ C3[(int)(state[6] >> 32) & 0xff] ^ C4[(int)(state[5] >> 24) & 0xff] ^ C5[(int)(state[4] >> 16) & 0xff] ^ C6[(int)(state[3] >> 8) & 0xff] ^ C7[(int)(state[2] ) & 0xff] ^ K[1]; L[2] = C0[(int)(state[2] >> 56) ] ^ C1[(int)(state[1] >> 48) & 0xff] ^ C2[(int)(state[0] >> 40) & 0xff] ^ C3[(int)(state[7] >> 32) & 0xff] ^ C4[(int)(state[6] >> 24) & 0xff] ^ C5[(int)(state[5] >> 16) & 0xff] ^ C6[(int)(state[4] >> 8) & 0xff] ^ C7[(int)(state[3] ) & 0xff] ^ K[2]; L[3] = C0[(int)(state[3] >> 56) ] ^ C1[(int)(state[2] >> 48) & 0xff] ^ C2[(int)(state[1] >> 40) & 0xff] ^ C3[(int)(state[0] >> 32) & 0xff] ^ C4[(int)(state[7] >> 24) & 0xff] ^ C5[(int)(state[6] >> 16) & 0xff] ^ C6[(int)(state[5] >> 8) & 0xff] ^ C7[(int)(state[4] ) & 0xff] ^ K[3]; L[4] = C0[(int)(state[4] >> 56) ] ^ C1[(int)(state[3] >> 48) & 0xff] ^ C2[(int)(state[2] >> 40) & 0xff] ^ C3[(int)(state[1] >> 32) & 0xff] ^ C4[(int)(state[0] >> 24) & 0xff] ^ C5[(int)(state[7] >> 16) & 0xff] ^ C6[(int)(state[6] >> 8) & 0xff] ^ C7[(int)(state[5] ) & 0xff] ^ K[4]; L[5] = C0[(int)(state[5] >> 56) ] ^ C1[(int)(state[4] >> 48) & 0xff] ^ C2[(int)(state[3] >> 40) & 0xff] ^ C3[(int)(state[2] >> 32) & 0xff] ^ C4[(int)(state[1] >> 24) & 0xff] ^ C5[(int)(state[0] >> 16) & 0xff] ^ C6[(int)(state[7] >> 8) & 0xff] ^ C7[(int)(state[6] ) & 0xff] ^ K[5]; L[6] = C0[(int)(state[6] >> 56) ] ^ C1[(int)(state[5] >> 48) & 0xff] ^ C2[(int)(state[4] >> 40) & 0xff] ^ C3[(int)(state[3] >> 32) & 0xff] ^ C4[(int)(state[2] >> 24) & 0xff] ^ C5[(int)(state[1] >> 16) & 0xff] ^ C6[(int)(state[0] >> 8) & 0xff] ^ C7[(int)(state[7] ) & 0xff] ^ K[6]; L[7] = C0[(int)(state[7] >> 56) ] ^ C1[(int)(state[6] >> 48) & 0xff] ^ C2[(int)(state[5] >> 40) & 0xff] ^ C3[(int)(state[4] >> 32) & 0xff] ^ C4[(int)(state[3] >> 24) & 0xff] ^ C5[(int)(state[2] >> 16) & 0xff] ^ C6[(int)(state[1] >> 8) & 0xff] ^ C7[(int)(state[0] ) & 0xff] ^ K[7]; state[0] = L[0]; state[1] = L[1]; state[2] = L[2]; state[3] = L[3]; state[4] = L[4]; state[5] = L[5]; state[6] = L[6]; state[7] = L[7]; } /* * apply the Miyaguchi-Preneel compression function: */ wctx->hash[0] ^= state[0] ^ block[0]; wctx->hash[1] ^= state[1] ^ block[1]; wctx->hash[2] ^= state[2] ^ block[2]; wctx->hash[3] ^= state[3] ^ block[3]; wctx->hash[4] ^= state[4] ^ block[4]; wctx->hash[5] ^= state[5] ^ block[5]; wctx->hash[6] ^= state[6] ^ block[6]; wctx->hash[7] ^= state[7] ^ block[7]; } static int wp512_init(struct shash_desc *desc) { struct wp512_ctx *wctx = shash_desc_ctx(desc); int i; memset(wctx->bitLength, 0, 32); wctx->bufferBits = wctx->bufferPos = 0; wctx->buffer[0] = 0; for (i = 0; i < 8; i++) { wctx->hash[i] = 0L; } return 0; } static int wp512_update(struct shash_desc *desc, const u8 *source, unsigned int len) { struct wp512_ctx *wctx = shash_desc_ctx(desc); int sourcePos = 0; unsigned int bits_len = len * 8; // convert to number of bits int sourceGap = (8 - ((int)bits_len & 7)) & 7; int bufferRem = wctx->bufferBits & 7; int i; u32 b, carry; u8 *buffer = wctx->buffer; u8 *bitLength = wctx->bitLength; int bufferBits = wctx->bufferBits; int bufferPos = wctx->bufferPos; u64 value = bits_len; for (i = 31, carry = 0; i >= 0 && (carry != 0 || value != 0ULL); i--) { carry += bitLength[i] + ((u32)value & 0xff); bitLength[i] = (u8)carry; carry >>= 8; value >>= 8; } while (bits_len > 8) { b = ((source[sourcePos] << sourceGap) & 0xff) | ((source[sourcePos + 1] & 0xff) >> (8 - sourceGap)); buffer[bufferPos++] |= (u8)(b >> bufferRem); bufferBits += 8 - bufferRem; if (bufferBits == WP512_BLOCK_SIZE * 8) { wp512_process_buffer(wctx); bufferBits = bufferPos = 0; } buffer[bufferPos] = b << (8 - bufferRem); bufferBits += bufferRem; bits_len -= 8; sourcePos++; } if (bits_len > 0) { b = (source[sourcePos] << sourceGap) & 0xff; buffer[bufferPos] |= b >> bufferRem; } else { b = 0; } if (bufferRem + bits_len < 8) { bufferBits += bits_len; } else { bufferPos++; bufferBits += 8 - bufferRem; bits_len -= 8 - bufferRem; if (bufferBits == WP512_BLOCK_SIZE * 8) { wp512_process_buffer(wctx); bufferBits = bufferPos = 0; } buffer[bufferPos] = b << (8 - bufferRem); bufferBits += (int)bits_len; } wctx->bufferBits = bufferBits; wctx->bufferPos = bufferPos; return 0; } static int wp512_final(struct shash_desc *desc, u8 *out) { struct wp512_ctx *wctx = shash_desc_ctx(desc); int i; u8 *buffer = wctx->buffer; u8 *bitLength = wctx->bitLength; int bufferBits = wctx->bufferBits; int bufferPos = wctx->bufferPos; __be64 *digest = (__be64 *)out; buffer[bufferPos] |= 0x80U >> (bufferBits & 7); bufferPos++; if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) { if (bufferPos < WP512_BLOCK_SIZE) memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos); wp512_process_buffer(wctx); bufferPos = 0; } if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES) memset(&buffer[bufferPos], 0, (WP512_BLOCK_SIZE - WP512_LENGTHBYTES) - bufferPos); bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES; memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES], bitLength, WP512_LENGTHBYTES); wp512_process_buffer(wctx); for (i = 0; i < WP512_DIGEST_SIZE/8; i++) digest[i] = cpu_to_be64(wctx->hash[i]); wctx->bufferBits = bufferBits; wctx->bufferPos = bufferPos; return 0; } static int wp384_final(struct shash_desc *desc, u8 *out) { u8 D[64]; wp512_final(desc, D); memcpy(out, D, WP384_DIGEST_SIZE); memzero_explicit(D, WP512_DIGEST_SIZE); return 0; } static int wp256_final(struct shash_desc *desc, u8 *out) { u8 D[64]; wp512_final(desc, D); memcpy(out, D, WP256_DIGEST_SIZE); memzero_explicit(D, WP512_DIGEST_SIZE); return 0; } static struct shash_alg wp_algs[3] = { { .digestsize = WP512_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, .final = wp512_final, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp512", .cra_driver_name = "wp512-generic", .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } }, { .digestsize = WP384_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, .final = wp384_final, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp384", .cra_driver_name = "wp384-generic", .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } }, { .digestsize = WP256_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, .final = wp256_final, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp256", .cra_driver_name = "wp256-generic", .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } } }; static int __init wp512_mod_init(void) { return crypto_register_shashes(wp_algs, ARRAY_SIZE(wp_algs)); } static void __exit wp512_mod_fini(void) { crypto_unregister_shashes(wp_algs, ARRAY_SIZE(wp_algs)); } MODULE_ALIAS_CRYPTO("wp512"); MODULE_ALIAS_CRYPTO("wp384"); MODULE_ALIAS_CRYPTO("wp256"); module_init(wp512_mod_init); module_exit(wp512_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Whirlpool Message Digest Algorithm"); |
| 148 148 136 234 165 225 23 3 20 23 23 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | /* * linux/fs/hfs/string.c * * Copyright (C) 1995-1997 Paul H. Hargrove * (C) 2003 Ardis Technologies <roman@ardistech.com> * This file may be distributed under the terms of the GNU General Public License. * * This file contains the string comparison function for the * Macintosh character set. * * The code in this file is derived from code which is copyright * 1986, 1989, 1990 by Abacus Research and Development, Inc. (ARDI) * It is used here by the permission of ARDI's president Cliff Matthews. */ #include "hfs_fs.h" #include <linux/dcache.h> /*================ File-local variables ================*/ /* * unsigned char caseorder[] * * Defines the lexical ordering of characters on the Macintosh * * Composition of the 'casefold' and 'order' tables from ARDI's code * with the entry for 0x20 changed to match that for 0xCA to remove * special case for those two characters. */ static unsigned char caseorder[256] = { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 0x20,0x22,0x23,0x28,0x29,0x2A,0x2B,0x2C,0x2F,0x30,0x31,0x32,0x33,0x34,0x35,0x36, 0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,0x40,0x41,0x42,0x43,0x44,0x45,0x46, 0x47,0x48,0x57,0x59,0x5D,0x5F,0x66,0x68,0x6A,0x6C,0x72,0x74,0x76,0x78,0x7A,0x7E, 0x8C,0x8E,0x90,0x92,0x95,0x97,0x9E,0xA0,0xA2,0xA4,0xA7,0xA9,0xAA,0xAB,0xAC,0xAD, 0x4E,0x48,0x57,0x59,0x5D,0x5F,0x66,0x68,0x6A,0x6C,0x72,0x74,0x76,0x78,0x7A,0x7E, 0x8C,0x8E,0x90,0x92,0x95,0x97,0x9E,0xA0,0xA2,0xA4,0xA7,0xAF,0xB0,0xB1,0xB2,0xB3, 0x4A,0x4C,0x5A,0x60,0x7B,0x7F,0x98,0x4F,0x49,0x51,0x4A,0x4B,0x4C,0x5A,0x60,0x63, 0x64,0x65,0x6E,0x6F,0x70,0x71,0x7B,0x84,0x85,0x86,0x7F,0x80,0x9A,0x9B,0x9C,0x98, 0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0x94,0xBB,0xBC,0xBD,0xBE,0xBF,0xC0,0x4D,0x81, 0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0x55,0x8A,0xCC,0x4D,0x81, 0xCD,0xCE,0xCF,0xD0,0xD1,0xD2,0xD3,0x26,0x27,0xD4,0x20,0x49,0x4B,0x80,0x82,0x82, 0xD5,0xD6,0x24,0x25,0x2D,0x2E,0xD7,0xD8,0xA6,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF }; /*================ Global functions ================*/ /* * Hash a string to an integer in a case-independent way */ int hfs_hash_dentry(const struct dentry *dentry, struct qstr *this) { const unsigned char *name = this->name; unsigned int hash, len = this->len; if (len > HFS_NAMELEN) len = HFS_NAMELEN; hash = init_name_hash(dentry); for (; len; len--) hash = partial_name_hash(caseorder[*name++], hash); this->hash = end_name_hash(hash); return 0; } /* * Compare two strings in the HFS filename character ordering * Returns positive, negative, or zero, not just 0 or (+/-)1 * * Equivalent to ARDI's call: * ROMlib_RelString(s1+1, s2+1, true, false, (s1[0]<<16) | s2[0]) */ int hfs_strcmp(const unsigned char *s1, unsigned int len1, const unsigned char *s2, unsigned int len2) { int len, tmp; len = (len1 > len2) ? len2 : len1; while (len--) { tmp = (int)caseorder[*(s1++)] - (int)caseorder[*(s2++)]; if (tmp) return tmp; } return len1 - len2; } /* * Test for equality of two strings in the HFS filename character ordering. * return 1 on failure and 0 on success */ int hfs_compare_dentry(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { const unsigned char *n1, *n2; if (len >= HFS_NAMELEN) { if (name->len < HFS_NAMELEN) return 1; len = HFS_NAMELEN; } else if (len != name->len) return 1; n1 = str; n2 = name->name; while (len--) { if (caseorder[*n1++] != caseorder[*n2++]) return 1; } return 0; } |
| 1 2 1 1 1 1 1 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Apple "Magic" Wireless Mouse driver * * Copyright (c) 2010 Michael Poole <mdpoole@troilus.org> * Copyright (c) 2010 Chase Douglas <chase.douglas@canonical.com> */ /* */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/device.h> #include <linux/hid.h> #include <linux/input/mt.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/workqueue.h> #include "hid-ids.h" static bool emulate_3button = true; module_param(emulate_3button, bool, 0644); MODULE_PARM_DESC(emulate_3button, "Emulate a middle button"); static int middle_button_start = -350; static int middle_button_stop = +350; static bool emulate_scroll_wheel = true; module_param(emulate_scroll_wheel, bool, 0644); MODULE_PARM_DESC(emulate_scroll_wheel, "Emulate a scroll wheel"); static unsigned int scroll_speed = 32; static int param_set_scroll_speed(const char *val, const struct kernel_param *kp) { unsigned long speed; if (!val || kstrtoul(val, 0, &speed) || speed > 63) return -EINVAL; scroll_speed = speed; return 0; } module_param_call(scroll_speed, param_set_scroll_speed, param_get_uint, &scroll_speed, 0644); MODULE_PARM_DESC(scroll_speed, "Scroll speed, value from 0 (slow) to 63 (fast)"); static bool scroll_acceleration = false; module_param(scroll_acceleration, bool, 0644); MODULE_PARM_DESC(scroll_acceleration, "Accelerate sequential scroll events"); static bool report_undeciphered; module_param(report_undeciphered, bool, 0644); MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state field using a MSC_RAW event"); #define TRACKPAD2_2021_BT_VERSION 0x110 #define TRACKPAD_2024_BT_VERSION 0x314 #define TRACKPAD_REPORT_ID 0x28 #define TRACKPAD2_USB_REPORT_ID 0x02 #define TRACKPAD2_BT_REPORT_ID 0x31 #define MOUSE_REPORT_ID 0x29 #define MOUSE2_REPORT_ID 0x12 #define DOUBLE_REPORT_ID 0xf7 #define USB_BATTERY_TIMEOUT_MS 60000 /* These definitions are not precise, but they're close enough. (Bits * 0x03 seem to indicate the aspect ratio of the touch, bits 0x70 seem * to be some kind of bit mask -- 0x20 may be a near-field reading, * and 0x40 is actual contact, and 0x10 may be a start/stop or change * indication.) */ #define TOUCH_STATE_MASK 0xf0 #define TOUCH_STATE_NONE 0x00 #define TOUCH_STATE_START 0x30 #define TOUCH_STATE_DRAG 0x40 /* Number of high-resolution events for each low-resolution detent. */ #define SCROLL_HR_STEPS 10 #define SCROLL_HR_MULT (120 / SCROLL_HR_STEPS) #define SCROLL_HR_THRESHOLD 90 /* units */ #define SCROLL_ACCEL_DEFAULT 7 /* Touch surface information. Dimension is in hundredths of a mm, min and max * are in units. */ #define MOUSE_DIMENSION_X (float)9056 #define MOUSE_MIN_X -1100 #define MOUSE_MAX_X 1258 #define MOUSE_RES_X ((MOUSE_MAX_X - MOUSE_MIN_X) / (MOUSE_DIMENSION_X / 100)) #define MOUSE_DIMENSION_Y (float)5152 #define MOUSE_MIN_Y -1589 #define MOUSE_MAX_Y 2047 #define MOUSE_RES_Y ((MOUSE_MAX_Y - MOUSE_MIN_Y) / (MOUSE_DIMENSION_Y / 100)) #define TRACKPAD_DIMENSION_X (float)13000 #define TRACKPAD_MIN_X -2909 #define TRACKPAD_MAX_X 3167 #define TRACKPAD_RES_X \ ((TRACKPAD_MAX_X - TRACKPAD_MIN_X) / (TRACKPAD_DIMENSION_X / 100)) #define TRACKPAD_DIMENSION_Y (float)11000 #define TRACKPAD_MIN_Y -2456 #define TRACKPAD_MAX_Y 2565 #define TRACKPAD_RES_Y \ ((TRACKPAD_MAX_Y - TRACKPAD_MIN_Y) / (TRACKPAD_DIMENSION_Y / 100)) #define TRACKPAD2_DIMENSION_X (float)16000 #define TRACKPAD2_MIN_X -3678 #define TRACKPAD2_MAX_X 3934 #define TRACKPAD2_RES_X \ ((TRACKPAD2_MAX_X - TRACKPAD2_MIN_X) / (TRACKPAD2_DIMENSION_X / 100)) #define TRACKPAD2_DIMENSION_Y (float)11490 #define TRACKPAD2_MIN_Y -2478 #define TRACKPAD2_MAX_Y 2587 #define TRACKPAD2_RES_Y \ ((TRACKPAD2_MAX_Y - TRACKPAD2_MIN_Y) / (TRACKPAD2_DIMENSION_Y / 100)) /** * struct magicmouse_sc - Tracks Magic Mouse-specific data. * @input: Input device through which we report events. * @quirks: Currently unused. * @ntouches: Number of touches in most recent touch report. * @scroll_accel: Number of consecutive scroll motions. * @scroll_jiffies: Time of last scroll motion. * @touches: Most recent data for a touch, indexed by tracking ID. * @tracking_ids: Mapping of current touch input data to @touches. * @hdev: Pointer to the underlying HID device. * @work: Workqueue to handle initialization retry for quirky devices. * @battery_timer: Timer for obtaining battery level information. */ struct magicmouse_sc { struct input_dev *input; unsigned long quirks; int ntouches; int scroll_accel; unsigned long scroll_jiffies; struct { short x; short y; short scroll_x; short scroll_y; short scroll_x_hr; short scroll_y_hr; u8 size; bool scroll_x_active; bool scroll_y_active; } touches[16]; int tracking_ids[16]; struct hid_device *hdev; struct delayed_work work; struct timer_list battery_timer; }; static int magicmouse_firm_touch(struct magicmouse_sc *msc) { int touch = -1; int ii; /* If there is only one "firm" touch, set touch to its * tracking ID. */ for (ii = 0; ii < msc->ntouches; ii++) { int idx = msc->tracking_ids[ii]; if (msc->touches[idx].size < 8) { /* Ignore this touch. */ } else if (touch >= 0) { touch = -1; break; } else { touch = idx; } } return touch; } static void magicmouse_emit_buttons(struct magicmouse_sc *msc, int state) { int last_state = test_bit(BTN_LEFT, msc->input->key) << 0 | test_bit(BTN_RIGHT, msc->input->key) << 1 | test_bit(BTN_MIDDLE, msc->input->key) << 2; if (emulate_3button) { int id; /* If some button was pressed before, keep it held * down. Otherwise, if there's exactly one firm * touch, use that to override the mouse's guess. */ if (state == 0) { /* The button was released. */ } else if (last_state != 0) { state = last_state; } else if ((id = magicmouse_firm_touch(msc)) >= 0) { int x = msc->touches[id].x; if (x < middle_button_start) state = 1; else if (x > middle_button_stop) state = 2; else state = 4; } /* else: we keep the mouse's guess */ input_report_key(msc->input, BTN_MIDDLE, state & 4); } input_report_key(msc->input, BTN_LEFT, state & 1); input_report_key(msc->input, BTN_RIGHT, state & 2); if (state != last_state) msc->scroll_accel = SCROLL_ACCEL_DEFAULT; } static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tdata) { struct input_dev *input = msc->input; int id, x, y, size, orientation, touch_major, touch_minor, state, down; int pressure = 0; if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE || input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 || input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC) { id = (tdata[6] << 2 | tdata[5] >> 6) & 0xf; x = (tdata[1] << 28 | tdata[0] << 20) >> 20; y = -((tdata[2] << 24 | tdata[1] << 16) >> 20); size = tdata[5] & 0x3f; orientation = (tdata[6] >> 2) - 32; touch_major = tdata[3]; touch_minor = tdata[4]; state = tdata[7] & TOUCH_STATE_MASK; down = state != TOUCH_STATE_NONE; } else if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 || input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC) { id = tdata[8] & 0xf; x = (tdata[1] << 27 | tdata[0] << 19) >> 19; y = -((tdata[3] << 30 | tdata[2] << 22 | tdata[1] << 14) >> 19); size = tdata[6]; orientation = (tdata[8] >> 5) - 4; touch_major = tdata[4]; touch_minor = tdata[5]; pressure = tdata[7]; state = tdata[3] & 0xC0; down = state == 0x80; } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */ id = (tdata[7] << 2 | tdata[6] >> 6) & 0xf; x = (tdata[1] << 27 | tdata[0] << 19) >> 19; y = -((tdata[3] << 30 | tdata[2] << 22 | tdata[1] << 14) >> 19); size = tdata[6] & 0x3f; orientation = (tdata[7] >> 2) - 32; touch_major = tdata[4]; touch_minor = tdata[5]; state = tdata[8] & TOUCH_STATE_MASK; down = state != TOUCH_STATE_NONE; } /* Store tracking ID and other fields. */ msc->tracking_ids[raw_id] = id; msc->touches[id].x = x; msc->touches[id].y = y; msc->touches[id].size = size; /* If requested, emulate a scroll wheel by detecting small * vertical touch motions. */ if (emulate_scroll_wheel && input->id.product != USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 && input->id.product != USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC) { unsigned long now = jiffies; int step_x = msc->touches[id].scroll_x - x; int step_y = msc->touches[id].scroll_y - y; int step_hr = max_t(int, ((64 - (int)scroll_speed) * msc->scroll_accel) / SCROLL_HR_STEPS, 1); int step_x_hr = msc->touches[id].scroll_x_hr - x; int step_y_hr = msc->touches[id].scroll_y_hr - y; /* Calculate and apply the scroll motion. */ switch (state) { case TOUCH_STATE_START: msc->touches[id].scroll_x = x; msc->touches[id].scroll_y = y; msc->touches[id].scroll_x_hr = x; msc->touches[id].scroll_y_hr = y; msc->touches[id].scroll_x_active = false; msc->touches[id].scroll_y_active = false; /* Reset acceleration after half a second. */ if (scroll_acceleration && time_before(now, msc->scroll_jiffies + HZ / 2)) msc->scroll_accel = max_t(int, msc->scroll_accel - 1, 1); else msc->scroll_accel = SCROLL_ACCEL_DEFAULT; break; case TOUCH_STATE_DRAG: step_x /= (64 - (int)scroll_speed) * msc->scroll_accel; if (step_x != 0) { msc->touches[id].scroll_x -= step_x * (64 - scroll_speed) * msc->scroll_accel; msc->scroll_jiffies = now; input_report_rel(input, REL_HWHEEL, -step_x); } step_y /= (64 - (int)scroll_speed) * msc->scroll_accel; if (step_y != 0) { msc->touches[id].scroll_y -= step_y * (64 - scroll_speed) * msc->scroll_accel; msc->scroll_jiffies = now; input_report_rel(input, REL_WHEEL, step_y); } if (!msc->touches[id].scroll_x_active && abs(step_x_hr) > SCROLL_HR_THRESHOLD) { msc->touches[id].scroll_x_active = true; msc->touches[id].scroll_x_hr = x; step_x_hr = 0; } step_x_hr /= step_hr; if (step_x_hr != 0 && msc->touches[id].scroll_x_active) { msc->touches[id].scroll_x_hr -= step_x_hr * step_hr; input_report_rel(input, REL_HWHEEL_HI_RES, -step_x_hr * SCROLL_HR_MULT); } if (!msc->touches[id].scroll_y_active && abs(step_y_hr) > SCROLL_HR_THRESHOLD) { msc->touches[id].scroll_y_active = true; msc->touches[id].scroll_y_hr = y; step_y_hr = 0; } step_y_hr /= step_hr; if (step_y_hr != 0 && msc->touches[id].scroll_y_active) { msc->touches[id].scroll_y_hr -= step_y_hr * step_hr; input_report_rel(input, REL_WHEEL_HI_RES, step_y_hr * SCROLL_HR_MULT); } break; } } if (down) msc->ntouches++; input_mt_slot(input, id); input_mt_report_slot_state(input, MT_TOOL_FINGER, down); /* Generate the input events for this touch. */ if (down) { input_report_abs(input, ABS_MT_TOUCH_MAJOR, touch_major << 2); input_report_abs(input, ABS_MT_TOUCH_MINOR, touch_minor << 2); input_report_abs(input, ABS_MT_ORIENTATION, -orientation); input_report_abs(input, ABS_MT_POSITION_X, x); input_report_abs(input, ABS_MT_POSITION_Y, y); if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 || input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC) input_report_abs(input, ABS_MT_PRESSURE, pressure); if (report_undeciphered) { if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE || input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 || input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC) input_event(input, EV_MSC, MSC_RAW, tdata[7]); else if (input->id.product != USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 && input->id.product != USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC) input_event(input, EV_MSC, MSC_RAW, tdata[8]); } } } static int magicmouse_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct magicmouse_sc *msc = hid_get_drvdata(hdev); struct input_dev *input = msc->input; int x = 0, y = 0, ii, clicks = 0, npoints; switch (data[0]) { case TRACKPAD_REPORT_ID: case TRACKPAD2_BT_REPORT_ID: /* Expect four bytes of prefix, and N*9 bytes of touch data. */ if (size < 4 || ((size - 4) % 9) != 0) return 0; npoints = (size - 4) / 9; if (npoints > 15) { hid_warn(hdev, "invalid size value (%d) for TRACKPAD_REPORT_ID\n", size); return 0; } msc->ntouches = 0; for (ii = 0; ii < npoints; ii++) magicmouse_emit_touch(msc, ii, data + ii * 9 + 4); clicks = data[1]; /* The following bits provide a device specific timestamp. They * are unused here. * * ts = data[1] >> 6 | data[2] << 2 | data[3] << 10; */ break; case TRACKPAD2_USB_REPORT_ID: /* Expect twelve bytes of prefix and N*9 bytes of touch data. */ if (size < 12 || ((size - 12) % 9) != 0) return 0; npoints = (size - 12) / 9; if (npoints > 15) { hid_warn(hdev, "invalid size value (%d) for TRACKPAD2_USB_REPORT_ID\n", size); return 0; } msc->ntouches = 0; for (ii = 0; ii < npoints; ii++) magicmouse_emit_touch(msc, ii, data + ii * 9 + 12); clicks = data[1]; break; case MOUSE_REPORT_ID: /* Expect six bytes of prefix, and N*8 bytes of touch data. */ if (size < 6 || ((size - 6) % 8) != 0) return 0; npoints = (size - 6) / 8; if (npoints > 15) { hid_warn(hdev, "invalid size value (%d) for MOUSE_REPORT_ID\n", size); return 0; } msc->ntouches = 0; for (ii = 0; ii < npoints; ii++) magicmouse_emit_touch(msc, ii, data + ii * 8 + 6); /* When emulating three-button mode, it is important * to have the current touch information before * generating a click event. */ x = (int)(((data[3] & 0x0c) << 28) | (data[1] << 22)) >> 22; y = (int)(((data[3] & 0x30) << 26) | (data[2] << 22)) >> 22; clicks = data[3]; /* The following bits provide a device specific timestamp. They * are unused here. * * ts = data[3] >> 6 | data[4] << 2 | data[5] << 10; */ break; case MOUSE2_REPORT_ID: /* Size is either 8 or (14 + 8 * N) */ if (size != 8 && (size < 14 || (size - 14) % 8 != 0)) return 0; npoints = (size - 14) / 8; if (npoints > 15) { hid_warn(hdev, "invalid size value (%d) for MOUSE2_REPORT_ID\n", size); return 0; } msc->ntouches = 0; for (ii = 0; ii < npoints; ii++) magicmouse_emit_touch(msc, ii, data + ii * 8 + 14); /* When emulating three-button mode, it is important * to have the current touch information before * generating a click event. */ x = (int)((data[3] << 24) | (data[2] << 16)) >> 16; y = (int)((data[5] << 24) | (data[4] << 16)) >> 16; clicks = data[1]; /* The following bits provide a device specific timestamp. They * are unused here. * * ts = data[11] >> 6 | data[12] << 2 | data[13] << 10; */ break; case DOUBLE_REPORT_ID: /* Sometimes the trackpad sends two touch reports in one * packet. */ magicmouse_raw_event(hdev, report, data + 2, data[1]); magicmouse_raw_event(hdev, report, data + 2 + data[1], size - 2 - data[1]); return 0; default: return 0; } if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE || input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 || input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC) { magicmouse_emit_buttons(msc, clicks & 3); input_report_rel(input, REL_X, x); input_report_rel(input, REL_Y, y); } else if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 || input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC) { input_mt_sync_frame(input); input_report_key(input, BTN_MOUSE, clicks & 1); } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */ input_report_key(input, BTN_MOUSE, clicks & 1); input_mt_report_pointer_emulation(input, true); } input_sync(input); return 1; } static int magicmouse_event(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value) { struct magicmouse_sc *msc = hid_get_drvdata(hdev); if ((msc->input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 || msc->input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC) && field->report->id == MOUSE2_REPORT_ID) { /* * magic_mouse_raw_event has done all the work. Skip hidinput. * * Specifically, hidinput may modify BTN_LEFT and BTN_RIGHT, * breaking emulate_3button. */ return 1; } return 0; } static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hdev) { int error; int mt_flags = 0; __set_bit(EV_KEY, input->evbit); if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE || input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 || input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC) { __set_bit(BTN_LEFT, input->keybit); __set_bit(BTN_RIGHT, input->keybit); if (emulate_3button) __set_bit(BTN_MIDDLE, input->keybit); __set_bit(EV_REL, input->evbit); __set_bit(REL_X, input->relbit); __set_bit(REL_Y, input->relbit); if (emulate_scroll_wheel) { __set_bit(REL_WHEEL, input->relbit); __set_bit(REL_HWHEEL, input->relbit); __set_bit(REL_WHEEL_HI_RES, input->relbit); __set_bit(REL_HWHEEL_HI_RES, input->relbit); } } else if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 || input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC) { /* If the trackpad has been connected to a Mac, the name is * automatically personalized, e.g., "José Expósito's Trackpad". * When connected through Bluetooth, the personalized name is * reported, however, when connected through USB the generic * name is reported. * Set the device name to ensure the same driver settings get * loaded, whether connected through bluetooth or USB. */ if (hdev->vendor == BT_VENDOR_ID_APPLE) { if (input->id.version == TRACKPAD2_2021_BT_VERSION) input->name = "Apple Inc. Magic Trackpad 2021"; else if (input->id.version == TRACKPAD_2024_BT_VERSION) { input->name = "Apple Inc. Magic Trackpad USB-C"; } else { input->name = "Apple Inc. Magic Trackpad"; } } else { /* USB_VENDOR_ID_APPLE */ input->name = hdev->name; } __clear_bit(EV_MSC, input->evbit); __clear_bit(BTN_0, input->keybit); __clear_bit(BTN_RIGHT, input->keybit); __clear_bit(BTN_MIDDLE, input->keybit); __set_bit(BTN_MOUSE, input->keybit); __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); __set_bit(BTN_TOOL_FINGER, input->keybit); mt_flags = INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED | INPUT_MT_TRACK; } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */ /* input->keybit is initialized with incorrect button info * for Magic Trackpad. There really is only one physical * button (BTN_LEFT == BTN_MOUSE). Make sure we don't * advertise buttons that don't exist... */ __clear_bit(BTN_RIGHT, input->keybit); __clear_bit(BTN_MIDDLE, input->keybit); __set_bit(BTN_MOUSE, input->keybit); __set_bit(BTN_TOOL_FINGER, input->keybit); __set_bit(BTN_TOOL_DOUBLETAP, input->keybit); __set_bit(BTN_TOOL_TRIPLETAP, input->keybit); __set_bit(BTN_TOOL_QUADTAP, input->keybit); __set_bit(BTN_TOOL_QUINTTAP, input->keybit); __set_bit(BTN_TOUCH, input->keybit); __set_bit(INPUT_PROP_POINTER, input->propbit); __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); } __set_bit(EV_ABS, input->evbit); error = input_mt_init_slots(input, 16, mt_flags); if (error) return error; input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0, 255 << 2, 4, 0); input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0, 255 << 2, 4, 0); /* Note: Touch Y position from the device is inverted relative * to how pointer motion is reported (and relative to how USB * HID recommends the coordinates work). This driver keeps * the origin at the same position, and just uses the additive * inverse of the reported Y. */ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE || input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 || input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC) { input_set_abs_params(input, ABS_MT_ORIENTATION, -31, 32, 1, 0); input_set_abs_params(input, ABS_MT_POSITION_X, MOUSE_MIN_X, MOUSE_MAX_X, 4, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, MOUSE_MIN_Y, MOUSE_MAX_Y, 4, 0); input_abs_set_res(input, ABS_MT_POSITION_X, MOUSE_RES_X); input_abs_set_res(input, ABS_MT_POSITION_Y, MOUSE_RES_Y); } else if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 || input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC) { input_set_abs_params(input, ABS_MT_PRESSURE, 0, 253, 0, 0); input_set_abs_params(input, ABS_PRESSURE, 0, 253, 0, 0); input_set_abs_params(input, ABS_MT_ORIENTATION, -3, 4, 0, 0); input_set_abs_params(input, ABS_X, TRACKPAD2_MIN_X, TRACKPAD2_MAX_X, 0, 0); input_set_abs_params(input, ABS_Y, TRACKPAD2_MIN_Y, TRACKPAD2_MAX_Y, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_X, TRACKPAD2_MIN_X, TRACKPAD2_MAX_X, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, TRACKPAD2_MIN_Y, TRACKPAD2_MAX_Y, 0, 0); input_abs_set_res(input, ABS_X, TRACKPAD2_RES_X); input_abs_set_res(input, ABS_Y, TRACKPAD2_RES_Y); input_abs_set_res(input, ABS_MT_POSITION_X, TRACKPAD2_RES_X); input_abs_set_res(input, ABS_MT_POSITION_Y, TRACKPAD2_RES_Y); } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */ input_set_abs_params(input, ABS_MT_ORIENTATION, -31, 32, 1, 0); input_set_abs_params(input, ABS_X, TRACKPAD_MIN_X, TRACKPAD_MAX_X, 4, 0); input_set_abs_params(input, ABS_Y, TRACKPAD_MIN_Y, TRACKPAD_MAX_Y, 4, 0); input_set_abs_params(input, ABS_MT_POSITION_X, TRACKPAD_MIN_X, TRACKPAD_MAX_X, 4, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, TRACKPAD_MIN_Y, TRACKPAD_MAX_Y, 4, 0); input_abs_set_res(input, ABS_X, TRACKPAD_RES_X); input_abs_set_res(input, ABS_Y, TRACKPAD_RES_Y); input_abs_set_res(input, ABS_MT_POSITION_X, TRACKPAD_RES_X); input_abs_set_res(input, ABS_MT_POSITION_Y, TRACKPAD_RES_Y); } input_set_events_per_packet(input, 60); if (report_undeciphered && input->id.product != USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 && input->id.product != USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC) { __set_bit(EV_MSC, input->evbit); __set_bit(MSC_RAW, input->mscbit); } /* * hid-input may mark device as using autorepeat, but neither * the trackpad, nor the mouse actually want it. */ __clear_bit(EV_REP, input->evbit); return 0; } static int magicmouse_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { struct magicmouse_sc *msc = hid_get_drvdata(hdev); if (!msc->input) msc->input = hi->input; /* Magic Trackpad does not give relative data after switching to MT */ if ((hi->input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD || hi->input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 || hi->input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC) && field->flags & HID_MAIN_ITEM_RELATIVE) return -1; return 0; } static int magicmouse_input_configured(struct hid_device *hdev, struct hid_input *hi) { struct magicmouse_sc *msc = hid_get_drvdata(hdev); int ret; ret = magicmouse_setup_input(msc->input, hdev); if (ret) { hid_err(hdev, "magicmouse setup input failed (%d)\n", ret); /* clean msc->input to notify probe() of the failure */ msc->input = NULL; return ret; } return 0; } static int magicmouse_enable_multitouch(struct hid_device *hdev) { const u8 *feature; const u8 feature_mt[] = { 0xD7, 0x01 }; const u8 feature_mt_mouse2[] = { 0xF1, 0x02, 0x01 }; const u8 feature_mt_trackpad2_usb[] = { 0x02, 0x01 }; const u8 feature_mt_trackpad2_bt[] = { 0xF1, 0x02, 0x01 }; u8 *buf; int ret; int feature_size; switch (hdev->product) { case USB_DEVICE_ID_APPLE_MAGICTRACKPAD2: case USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC: switch (hdev->vendor) { case BT_VENDOR_ID_APPLE: feature_size = sizeof(feature_mt_trackpad2_bt); feature = feature_mt_trackpad2_bt; break; default: /* USB_VENDOR_ID_APPLE */ feature_size = sizeof(feature_mt_trackpad2_usb); feature = feature_mt_trackpad2_usb; } break; case USB_DEVICE_ID_APPLE_MAGICMOUSE2: case USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC: feature_size = sizeof(feature_mt_mouse2); feature = feature_mt_mouse2; break; default: feature_size = sizeof(feature_mt); feature = feature_mt; } buf = kmemdup(feature, feature_size, GFP_KERNEL); if (!buf) return -ENOMEM; ret = hid_hw_raw_request(hdev, buf[0], buf, feature_size, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); kfree(buf); return ret; } static void magicmouse_enable_mt_work(struct work_struct *work) { struct magicmouse_sc *msc = container_of(work, struct magicmouse_sc, work.work); int ret; ret = magicmouse_enable_multitouch(msc->hdev); if (ret < 0) hid_err(msc->hdev, "unable to request touch data (%d)\n", ret); } static int magicmouse_fetch_battery(struct hid_device *hdev) { #ifdef CONFIG_HID_BATTERY_STRENGTH struct hid_report_enum *report_enum; struct hid_report *report; if (!hdev->battery || hdev->vendor != USB_VENDOR_ID_APPLE || (hdev->product != USB_DEVICE_ID_APPLE_MAGICMOUSE2 && hdev->product != USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC && hdev->product != USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 && hdev->product != USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC)) return -1; report_enum = &hdev->report_enum[hdev->battery_report_type]; report = report_enum->report_id_hash[hdev->battery_report_id]; if (!report || report->maxfield < 1) return -1; if (hdev->battery_capacity == hdev->battery_max) return -1; hid_hw_request(hdev, report, HID_REQ_GET_REPORT); return 0; #else return -1; #endif } static void magicmouse_battery_timer_tick(struct timer_list *t) { struct magicmouse_sc *msc = timer_container_of(msc, t, battery_timer); struct hid_device *hdev = msc->hdev; if (magicmouse_fetch_battery(hdev) == 0) { mod_timer(&msc->battery_timer, jiffies + msecs_to_jiffies(USB_BATTERY_TIMEOUT_MS)); } } static int magicmouse_probe(struct hid_device *hdev, const struct hid_device_id *id) { struct magicmouse_sc *msc; struct hid_report *report; int ret; msc = devm_kzalloc(&hdev->dev, sizeof(*msc), GFP_KERNEL); if (msc == NULL) { hid_err(hdev, "can't alloc magicmouse descriptor\n"); return -ENOMEM; } msc->scroll_accel = SCROLL_ACCEL_DEFAULT; msc->hdev = hdev; INIT_DEFERRABLE_WORK(&msc->work, magicmouse_enable_mt_work); msc->quirks = id->driver_data; hid_set_drvdata(hdev, msc); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "magicmouse hid parse failed\n"); return ret; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "magicmouse hw start failed\n"); return ret; } timer_setup(&msc->battery_timer, magicmouse_battery_timer_tick, 0); mod_timer(&msc->battery_timer, jiffies + msecs_to_jiffies(USB_BATTERY_TIMEOUT_MS)); magicmouse_fetch_battery(hdev); if (id->vendor == USB_VENDOR_ID_APPLE && (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 || id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC || ((id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 || id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC) && hdev->type != HID_TYPE_USBMOUSE))) return 0; if (!msc->input) { hid_err(hdev, "magicmouse input not registered\n"); ret = -ENOMEM; goto err_stop_hw; } switch (id->product) { case USB_DEVICE_ID_APPLE_MAGICMOUSE: report = hid_register_report(hdev, HID_INPUT_REPORT, MOUSE_REPORT_ID, 0); break; case USB_DEVICE_ID_APPLE_MAGICMOUSE2: case USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC: report = hid_register_report(hdev, HID_INPUT_REPORT, MOUSE2_REPORT_ID, 0); break; case USB_DEVICE_ID_APPLE_MAGICTRACKPAD2: case USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC: switch (id->vendor) { case BT_VENDOR_ID_APPLE: report = hid_register_report(hdev, HID_INPUT_REPORT, TRACKPAD2_BT_REPORT_ID, 0); break; default: report = hid_register_report(hdev, HID_INPUT_REPORT, TRACKPAD2_USB_REPORT_ID, 0); } break; default: /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */ report = hid_register_report(hdev, HID_INPUT_REPORT, TRACKPAD_REPORT_ID, 0); report = hid_register_report(hdev, HID_INPUT_REPORT, DOUBLE_REPORT_ID, 0); } if (!report) { hid_err(hdev, "unable to register touch report\n"); ret = -ENOMEM; goto err_stop_hw; } report->size = 6; /* * Some devices repond with 'invalid report id' when feature * report switching it into multitouch mode is sent to it. * * This results in -EIO from the _raw low-level transport callback, * but there seems to be no other way of switching the mode. * Thus the super-ugly hacky success check below. */ ret = magicmouse_enable_multitouch(hdev); if (ret != -EIO && ret < 0) { hid_err(hdev, "unable to request touch data (%d)\n", ret); goto err_stop_hw; } if (ret == -EIO && (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 || id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC)) { schedule_delayed_work(&msc->work, msecs_to_jiffies(500)); } return 0; err_stop_hw: timer_delete_sync(&msc->battery_timer); hid_hw_stop(hdev); return ret; } static void magicmouse_remove(struct hid_device *hdev) { struct magicmouse_sc *msc = hid_get_drvdata(hdev); if (msc) { cancel_delayed_work_sync(&msc->work); timer_delete_sync(&msc->battery_timer); } hid_hw_stop(hdev); } static const __u8 *magicmouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { /* * Change the usage from: * 0x06, 0x00, 0xff, // Usage Page (Vendor Defined Page 1) 0 * 0x09, 0x0b, // Usage (Vendor Usage 0x0b) 3 * To: * 0x05, 0x01, // Usage Page (Generic Desktop) 0 * 0x09, 0x02, // Usage (Mouse) 2 */ if (hdev->vendor == USB_VENDOR_ID_APPLE && (hdev->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 || hdev->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC || hdev->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 || hdev->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC) && *rsize == 83 && rdesc[46] == 0x84 && rdesc[58] == 0x85) { hid_info(hdev, "fixing up magicmouse battery report descriptor\n"); *rsize = *rsize - 1; rdesc = kmemdup(rdesc + 1, *rsize, GFP_KERNEL); if (!rdesc) return NULL; rdesc[0] = 0x05; rdesc[1] = 0x01; rdesc[2] = 0x09; rdesc[3] = 0x02; } return rdesc; } static const struct hid_device_id magic_mice[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE), .driver_data = 0 }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE2), .driver_data = 0 }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE2), .driver_data = 0 }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC), .driver_data = 0 }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE2_USBC), .driver_data = 0 }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD), .driver_data = 0 }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2), .driver_data = 0 }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2), .driver_data = 0 }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC), .driver_data = 0 }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2_USBC), .driver_data = 0 }, { } }; MODULE_DEVICE_TABLE(hid, magic_mice); static struct hid_driver magicmouse_driver = { .name = "magicmouse", .id_table = magic_mice, .probe = magicmouse_probe, .remove = magicmouse_remove, .report_fixup = magicmouse_report_fixup, .raw_event = magicmouse_raw_event, .event = magicmouse_event, .input_mapping = magicmouse_input_mapping, .input_configured = magicmouse_input_configured, }; module_hid_driver(magicmouse_driver); MODULE_DESCRIPTION("Apple \"Magic\" Wireless Mouse driver"); MODULE_LICENSE("GPL"); |
| 27 7368 53 49 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Landlock - Credential hooks * * Copyright © 2019-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2019-2020 ANSSI * Copyright © 2021-2025 Microsoft Corporation */ #ifndef _SECURITY_LANDLOCK_CRED_H #define _SECURITY_LANDLOCK_CRED_H #include <linux/container_of.h> #include <linux/cred.h> #include <linux/init.h> #include <linux/rcupdate.h> #include "access.h" #include "limits.h" #include "ruleset.h" #include "setup.h" /** * struct landlock_cred_security - Credential security blob * * This structure is packed to minimize the size of struct * landlock_file_security. However, it is always aligned in the LSM cred blob, * see lsm_set_blob_size(). */ struct landlock_cred_security { /** * @domain: Immutable ruleset enforced on a task. */ struct landlock_ruleset *domain; #ifdef CONFIG_AUDIT /** * @domain_exec: Bitmask identifying the domain layers that were enforced by * the current task's executed file (i.e. no new execve(2) since * landlock_restrict_self(2)). */ u16 domain_exec; /** * @log_subdomains_off: Set if the domain descendants's log_status should be * set to %LANDLOCK_LOG_DISABLED. This is not a landlock_hierarchy * configuration because it applies to future descendant domains and it does * not require a current domain. */ u8 log_subdomains_off : 1; #endif /* CONFIG_AUDIT */ } __packed; #ifdef CONFIG_AUDIT /* Makes sure all layer executions can be stored. */ static_assert(BITS_PER_TYPE(typeof_member(struct landlock_cred_security, domain_exec)) >= LANDLOCK_MAX_NUM_LAYERS); #endif /* CONFIG_AUDIT */ static inline struct landlock_cred_security * landlock_cred(const struct cred *cred) { return cred->security + landlock_blob_sizes.lbs_cred; } static inline struct landlock_ruleset *landlock_get_current_domain(void) { return landlock_cred(current_cred())->domain; } /* * The call needs to come from an RCU read-side critical section. */ static inline const struct landlock_ruleset * landlock_get_task_domain(const struct task_struct *const task) { return landlock_cred(__task_cred(task))->domain; } static inline bool landlocked(const struct task_struct *const task) { bool has_dom; if (task == current) return !!landlock_get_current_domain(); rcu_read_lock(); has_dom = !!landlock_get_task_domain(task); rcu_read_unlock(); return has_dom; } /** * landlock_get_applicable_subject - Return the subject's Landlock credential * if its enforced domain applies to (i.e. * handles) at least one of the access rights * specified in @masks * * @cred: credential * @masks: access masks * @handle_layer: returned youngest layer handling a subset of @masks. Not set * if the function returns NULL. * * Returns: landlock_cred(@cred) if any access rights specified in @masks is * handled, or NULL otherwise. */ static inline const struct landlock_cred_security * landlock_get_applicable_subject(const struct cred *const cred, const struct access_masks masks, size_t *const handle_layer) { const union access_masks_all masks_all = { .masks = masks, }; const struct landlock_ruleset *domain; ssize_t layer_level; if (!cred) return NULL; domain = landlock_cred(cred)->domain; if (!domain) return NULL; for (layer_level = domain->num_layers - 1; layer_level >= 0; layer_level--) { union access_masks_all layer = { .masks = domain->access_masks[layer_level], }; if (layer.all & masks_all.all) { if (handle_layer) *handle_layer = layer_level; return landlock_cred(cred); } } return NULL; } __init void landlock_add_cred_hooks(void); #endif /* _SECURITY_LANDLOCK_CRED_H */ |
| 65 66 65 64 60 19 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Crypto library utility functions * * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ #include <linux/unaligned.h> #include <crypto/utils.h> #include <linux/module.h> /* * XOR @len bytes from @src1 and @src2 together, writing the result to @dst * (which may alias one of the sources). Don't call this directly; call * crypto_xor() or crypto_xor_cpy() instead. */ void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len) { int relalign = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { int size = sizeof(unsigned long); int d = (((unsigned long)dst ^ (unsigned long)src1) | ((unsigned long)dst ^ (unsigned long)src2)) & (size - 1); relalign = d ? 1 << __ffs(d) : size; /* * If we care about alignment, process as many bytes as * needed to advance dst and src to values whose alignments * equal their relative alignment. This will allow us to * process the remainder of the input using optimal strides. */ while (((unsigned long)dst & (relalign - 1)) && len > 0) { *dst++ = *src1++ ^ *src2++; len--; } } while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) { if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { u64 l = get_unaligned((u64 *)src1) ^ get_unaligned((u64 *)src2); put_unaligned(l, (u64 *)dst); } else { *(u64 *)dst = *(u64 *)src1 ^ *(u64 *)src2; } dst += 8; src1 += 8; src2 += 8; len -= 8; } while (len >= 4 && !(relalign & 3)) { if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { u32 l = get_unaligned((u32 *)src1) ^ get_unaligned((u32 *)src2); put_unaligned(l, (u32 *)dst); } else { *(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2; } dst += 4; src1 += 4; src2 += 4; len -= 4; } while (len >= 2 && !(relalign & 1)) { if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { u16 l = get_unaligned((u16 *)src1) ^ get_unaligned((u16 *)src2); put_unaligned(l, (u16 *)dst); } else { *(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2; } dst += 2; src1 += 2; src2 += 2; len -= 2; } while (len--) *dst++ = *src1++ ^ *src2++; } EXPORT_SYMBOL_GPL(__crypto_xor); MODULE_DESCRIPTION("Crypto library utility functions"); MODULE_LICENSE("GPL"); |
| 3215 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BIO_INTEGRITY_H #define _LINUX_BIO_INTEGRITY_H #include <linux/bio.h> enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ BIP_DISK_NOCHECK = 1 << 2, /* disable disk integrity checking */ BIP_IP_CHECKSUM = 1 << 3, /* IP checksum */ BIP_COPY_USER = 1 << 4, /* Kernel bounce buffer in use */ BIP_CHECK_GUARD = 1 << 5, /* guard check */ BIP_CHECK_REFTAG = 1 << 6, /* reftag check */ BIP_CHECK_APPTAG = 1 << 7, /* apptag check */ }; struct bio_integrity_payload { struct bvec_iter bip_iter; unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_max_vcnt; /* integrity bio_vec slots */ unsigned short bip_flags; /* control flags */ u16 app_tag; /* application tag value */ struct bio_vec *bip_vec; }; #define BIP_CLONE_FLAGS (BIP_MAPPED_INTEGRITY | BIP_IP_CHECKSUM | \ BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG) #ifdef CONFIG_BLK_DEV_INTEGRITY #define bip_for_each_vec(bvl, bip, iter) \ for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter) #define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ for_each_bio(_bio) \ bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) { if (bio->bi_opf & REQ_INTEGRITY) return bio->bi_integrity; return NULL; } static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) { struct bio_integrity_payload *bip = bio_integrity(bio); if (bip) return bip->bip_flags & flag; return false; } static inline sector_t bip_get_seed(struct bio_integrity_payload *bip) { return bip->bip_iter.bi_sector; } static inline void bip_set_seed(struct bio_integrity_payload *bip, sector_t seed) { bip->bip_iter.bi_sector = seed; } void bio_integrity_init(struct bio *bio, struct bio_integrity_payload *bip, struct bio_vec *bvecs, unsigned int nr_vecs); struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp, unsigned int nr); int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset); int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter); int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta); void bio_integrity_unmap_user(struct bio *bio); bool bio_integrity_prep(struct bio *bio); void bio_integrity_advance(struct bio *bio, unsigned int bytes_done); void bio_integrity_trim(struct bio *bio); int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask); #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) { return NULL; } static inline int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter) { return -EINVAL; } static inline int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta) { return -EINVAL; } static inline void bio_integrity_unmap_user(struct bio *bio) { } static inline bool bio_integrity_prep(struct bio *bio) { return true; } static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) { return 0; } static inline void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) { } static inline void bio_integrity_trim(struct bio *bio) { } static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) { return false; } static inline struct bio_integrity_payload * bio_integrity_alloc(struct bio *bio, gfp_t gfp, unsigned int nr) { return ERR_PTR(-EINVAL); } static inline int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { return 0; } #endif /* CONFIG_BLK_DEV_INTEGRITY */ #endif /* _LINUX_BIO_INTEGRITY_H */ |
| 5 1 5 5 5 5 5 2 2 1 2 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 | // SPDX-License-Identifier: GPL-2.0-or-later /* * SPCA501 chip based cameras initialization data * * V4L2 by Jean-Francois Moine <http://moinejf.free.fr> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "spca501" #include "gspca.h" MODULE_AUTHOR("Michel Xhaard <mxhaard@users.sourceforge.net>"); MODULE_DESCRIPTION("GSPCA/SPCA501 USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ unsigned short contrast; __u8 brightness; __u8 colors; __u8 blue_balance; __u8 red_balance; char subtype; #define Arowana300KCMOSCamera 0 #define IntelCreateAndShare 1 #define KodakDVC325 2 #define MystFromOriUnknownCamera 3 #define SmileIntlCamera 4 #define ThreeComHomeConnectLite 5 #define ViewQuestM318B 6 }; static const struct v4l2_pix_format vga_mode[] = { {160, 120, V4L2_PIX_FMT_SPCA501, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2}, {320, 240, V4L2_PIX_FMT_SPCA501, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {640, 480, V4L2_PIX_FMT_SPCA501, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; #define SPCA50X_REG_USB 0x2 /* spca505 501 */ /* * Data to initialize a SPCA501. From a capture file provided by Bill Roehl * With SPCA501 chip description */ #define CCDSP_SET /* set CCDSP parameters */ #define TG_SET /* set time generator set */ #undef DSPWIN_SET /* set DSP windows parameters */ #undef ALTER_GAMA /* Set alternate set to YUV transform coeffs. */ #define SPCA501_SNAPBIT 0x80 #define SPCA501_SNAPCTRL 0x10 /* Frame packet header offsets for the spca501 */ #define SPCA501_OFFSET_GPIO 1 #define SPCA501_OFFSET_TYPE 2 #define SPCA501_OFFSET_TURN3A 3 #define SPCA501_OFFSET_FRAMSEQ 4 #define SPCA501_OFFSET_COMPRESS 5 #define SPCA501_OFFSET_QUANT 6 #define SPCA501_OFFSET_QUANT2 7 #define SPCA501_OFFSET_DATA 8 #define SPCA501_PROP_COMP_ENABLE(d) ((d) & 1) #define SPCA501_PROP_SNAP(d) ((d) & 0x40) #define SPCA501_PROP_SNAP_CTRL(d) ((d) & 0x10) #define SPCA501_PROP_COMP_THRESH(d) (((d) & 0x0e) >> 1) #define SPCA501_PROP_COMP_QUANT(d) (((d) & 0x70) >> 4) /* SPCA501 CCDSP control */ #define SPCA501_REG_CCDSP 0x01 /* SPCA501 control/status registers */ #define SPCA501_REG_CTLRL 0x02 /* registers for color correction and YUV transformation */ #define SPCA501_A11 0x08 #define SPCA501_A12 0x09 #define SPCA501_A13 0x0A #define SPCA501_A21 0x0B #define SPCA501_A22 0x0C #define SPCA501_A23 0x0D #define SPCA501_A31 0x0E #define SPCA501_A32 0x0F #define SPCA501_A33 0x10 /* Data for video camera initialization before capturing */ static const __u16 spca501_open_data[][3] = { /* bmRequest,value,index */ {0x2, 0x50, 0x00}, /* C/S enable soft reset */ {0x2, 0x40, 0x00}, /* C/S disable soft reset */ {0x2, 0x02, 0x05}, /* C/S general purpose I/O data */ {0x2, 0x03, 0x05}, /* C/S general purpose I/O data */ #ifdef CCDSP_SET {0x1, 0x38, 0x01}, /* CCDSP options */ {0x1, 0x05, 0x02}, /* CCDSP Optical black level for user settings */ {0x1, 0xC0, 0x03}, /* CCDSP Optical black settings */ {0x1, 0x67, 0x07}, {0x1, 0x63, 0x3f}, /* CCDSP CCD gamma enable */ {0x1, 0x03, 0x56}, /* Add gamma correction */ {0x1, 0xFF, 0x15}, /* CCDSP High luminance for white balance */ {0x1, 0x01, 0x16}, /* CCDSP Low luminance for white balance */ /* Color correction and RGB-to-YUV transformation coefficients changing */ #ifdef ALTER_GAMA {0x0, 0x00, 0x08}, /* A11 */ {0x0, 0x00, 0x09}, /* A12 */ {0x0, 0x90, 0x0A}, /* A13 */ {0x0, 0x12, 0x0B}, /* A21 */ {0x0, 0x00, 0x0C}, /* A22 */ {0x0, 0x00, 0x0D}, /* A23 */ {0x0, 0x00, 0x0E}, /* A31 */ {0x0, 0x02, 0x0F}, /* A32 */ {0x0, 0x00, 0x10}, /* A33 */ #else {0x1, 0x2a, 0x08}, /* A11 0x31 */ {0x1, 0xf8, 0x09}, /* A12 f8 */ {0x1, 0xf8, 0x0A}, /* A13 f8 */ {0x1, 0xf8, 0x0B}, /* A21 f8 */ {0x1, 0x14, 0x0C}, /* A22 0x14 */ {0x1, 0xf8, 0x0D}, /* A23 f8 */ {0x1, 0xf8, 0x0E}, /* A31 f8 */ {0x1, 0xf8, 0x0F}, /* A32 f8 */ {0x1, 0x20, 0x10}, /* A33 0x20 */ #endif {0x1, 0x00, 0x11}, /* R offset */ {0x1, 0x00, 0x12}, /* G offset */ {0x1, 0x00, 0x13}, /* B offset */ {0x1, 0x00, 0x14}, /* GB offset */ #endif #ifdef TG_SET /* Time generator manipulations */ {0x0, 0xfc, 0x0}, /* Set up high bits of shutter speed */ {0x0, 0x01, 0x1}, /* Set up low bits of shutter speed */ {0x0, 0xe4, 0x04}, /* DCLK*2 clock phase adjustment */ {0x0, 0x08, 0x05}, /* ADCK phase adjustment, inv. ext. VB */ {0x0, 0x03, 0x06}, /* FR phase adjustment */ {0x0, 0x01, 0x07}, /* FCDS phase adjustment */ {0x0, 0x39, 0x08}, /* FS phase adjustment */ {0x0, 0x88, 0x0a}, /* FH1 phase and delay adjustment */ {0x0, 0x03, 0x0f}, /* pixel identification */ {0x0, 0x00, 0x11}, /* clock source selection (default) */ /*VERY strange manipulations with * select DMCLP or OBPX to be ADCLP output (0x0C) * OPB always toggle or not (0x0D) but they allow * us to set up brightness */ {0x0, 0x01, 0x0c}, {0x0, 0xe0, 0x0d}, /* Done */ #endif #ifdef DSPWIN_SET {0x1, 0xa0, 0x01}, /* Setting image processing parameters */ {0x1, 0x1c, 0x17}, /* Changing Windows positions X1 */ {0x1, 0xe2, 0x19}, /* X2 */ {0x1, 0x1c, 0x1b}, /* X3 */ {0x1, 0xe2, 0x1d}, /* X4 */ {0x1, 0x5f, 0x1f}, /* X5 */ {0x1, 0x32, 0x20}, /* Y5 */ {0x1, 0x01, 0x10}, /* Changing A33 */ #endif {0x2, 0x204a, 0x07},/* Setting video compression & resolution 160x120 */ {0x2, 0x94, 0x06}, /* Setting video no compression */ {} }; /* The SPCAxxx docs from Sunplus document these values in tables, one table per register number. In the data below, dmRequest is the register number, index is the Addr, and value is a combination of Bit values. Bit Value (hex) 0 01 1 02 2 04 3 08 4 10 5 20 6 40 7 80 */ /* Data for chip initialization (set default values) */ static const __u16 spca501_init_data[][3] = { /* Set all the values to powerup defaults */ /* bmRequest,value,index */ {0x0, 0xAA, 0x00}, {0x0, 0x02, 0x01}, {0x0, 0x01, 0x02}, {0x0, 0x02, 0x03}, {0x0, 0xCE, 0x04}, {0x0, 0x00, 0x05}, {0x0, 0x00, 0x06}, {0x0, 0x00, 0x07}, {0x0, 0x00, 0x08}, {0x0, 0x00, 0x09}, {0x0, 0x90, 0x0A}, {0x0, 0x12, 0x0B}, {0x0, 0x00, 0x0C}, {0x0, 0x00, 0x0D}, {0x0, 0x00, 0x0E}, {0x0, 0x02, 0x0F}, {0x0, 0x00, 0x10}, {0x0, 0x00, 0x11}, {0x0, 0x00, 0x12}, {0x0, 0x00, 0x13}, {0x0, 0x00, 0x14}, {0x0, 0x00, 0x15}, {0x0, 0x00, 0x16}, {0x0, 0x00, 0x17}, {0x0, 0x00, 0x18}, {0x0, 0x00, 0x19}, {0x0, 0x00, 0x1A}, {0x0, 0x00, 0x1B}, {0x0, 0x00, 0x1C}, {0x0, 0x00, 0x1D}, {0x0, 0x00, 0x1E}, {0x0, 0x00, 0x1F}, {0x0, 0x00, 0x20}, {0x0, 0x00, 0x21}, {0x0, 0x00, 0x22}, {0x0, 0x00, 0x23}, {0x0, 0x00, 0x24}, {0x0, 0x00, 0x25}, {0x0, 0x00, 0x26}, {0x0, 0x00, 0x27}, {0x0, 0x00, 0x28}, {0x0, 0x00, 0x29}, {0x0, 0x00, 0x2A}, {0x0, 0x00, 0x2B}, {0x0, 0x00, 0x2C}, {0x0, 0x00, 0x2D}, {0x0, 0x00, 0x2E}, {0x0, 0x00, 0x2F}, {0x0, 0x00, 0x30}, {0x0, 0x00, 0x31}, {0x0, 0x00, 0x32}, {0x0, 0x00, 0x33}, {0x0, 0x00, 0x34}, {0x0, 0x00, 0x35}, {0x0, 0x00, 0x36}, {0x0, 0x00, 0x37}, {0x0, 0x00, 0x38}, {0x0, 0x00, 0x39}, {0x0, 0x00, 0x3A}, {0x0, 0x00, 0x3B}, {0x0, 0x00, 0x3C}, {0x0, 0x00, 0x3D}, {0x0, 0x00, 0x3E}, {0x0, 0x00, 0x3F}, {0x0, 0x00, 0x40}, {0x0, 0x00, 0x41}, {0x0, 0x00, 0x42}, {0x0, 0x00, 0x43}, {0x0, 0x00, 0x44}, {0x0, 0x00, 0x45}, {0x0, 0x00, 0x46}, {0x0, 0x00, 0x47}, {0x0, 0x00, 0x48}, {0x0, 0x00, 0x49}, {0x0, 0x00, 0x4A}, {0x0, 0x00, 0x4B}, {0x0, 0x00, 0x4C}, {0x0, 0x00, 0x4D}, {0x0, 0x00, 0x4E}, {0x0, 0x00, 0x4F}, {0x0, 0x00, 0x50}, {0x0, 0x00, 0x51}, {0x0, 0x00, 0x52}, {0x0, 0x00, 0x53}, {0x0, 0x00, 0x54}, {0x0, 0x00, 0x55}, {0x0, 0x00, 0x56}, {0x0, 0x00, 0x57}, {0x0, 0x00, 0x58}, {0x0, 0x00, 0x59}, {0x0, 0x00, 0x5A}, {0x0, 0x00, 0x5B}, {0x0, 0x00, 0x5C}, {0x0, 0x00, 0x5D}, {0x0, 0x00, 0x5E}, {0x0, 0x00, 0x5F}, {0x0, 0x00, 0x60}, {0x0, 0x00, 0x61}, {0x0, 0x00, 0x62}, {0x0, 0x00, 0x63}, {0x0, 0x00, 0x64}, {0x0, 0x00, 0x65}, {0x0, 0x00, 0x66}, {0x0, 0x00, 0x67}, {0x0, 0x00, 0x68}, {0x0, 0x00, 0x69}, {0x0, 0x00, 0x6A}, {0x0, 0x00, 0x6B}, {0x0, 0x00, 0x6C}, {0x0, 0x00, 0x6D}, {0x0, 0x00, 0x6E}, {0x0, 0x00, 0x6F}, {0x0, 0x00, 0x70}, {0x0, 0x00, 0x71}, {0x0, 0x00, 0x72}, {0x0, 0x00, 0x73}, {0x0, 0x00, 0x74}, {0x0, 0x00, 0x75}, {0x0, 0x00, 0x76}, {0x0, 0x00, 0x77}, {0x0, 0x00, 0x78}, {0x0, 0x00, 0x79}, {0x0, 0x00, 0x7A}, {0x0, 0x00, 0x7B}, {0x0, 0x00, 0x7C}, {0x0, 0x00, 0x7D}, {0x0, 0x00, 0x7E}, {0x0, 0x00, 0x7F}, {0x0, 0x00, 0x80}, {0x0, 0x00, 0x81}, {0x0, 0x00, 0x82}, {0x0, 0x00, 0x83}, {0x0, 0x00, 0x84}, {0x0, 0x00, 0x85}, {0x0, 0x00, 0x86}, {0x0, 0x00, 0x87}, {0x0, 0x00, 0x88}, {0x0, 0x00, 0x89}, {0x0, 0x00, 0x8A}, {0x0, 0x00, 0x8B}, {0x0, 0x00, 0x8C}, {0x0, 0x00, 0x8D}, {0x0, 0x00, 0x8E}, {0x0, 0x00, 0x8F}, {0x0, 0x00, 0x90}, {0x0, 0x00, 0x91}, {0x0, 0x00, 0x92}, {0x0, 0x00, 0x93}, {0x0, 0x00, 0x94}, {0x0, 0x00, 0x95}, {0x0, 0x00, 0x96}, {0x0, 0x00, 0x97}, {0x0, 0x00, 0x98}, {0x0, 0x00, 0x99}, {0x0, 0x00, 0x9A}, {0x0, 0x00, 0x9B}, {0x0, 0x00, 0x9C}, {0x0, 0x00, 0x9D}, {0x0, 0x00, 0x9E}, {0x0, 0x00, 0x9F}, {0x0, 0x00, 0xA0}, {0x0, 0x00, 0xA1}, {0x0, 0x00, 0xA2}, {0x0, 0x00, 0xA3}, {0x0, 0x00, 0xA4}, {0x0, 0x00, 0xA5}, {0x0, 0x00, 0xA6}, {0x0, 0x00, 0xA7}, {0x0, 0x00, 0xA8}, {0x0, 0x00, 0xA9}, {0x0, 0x00, 0xAA}, {0x0, 0x00, 0xAB}, {0x0, 0x00, 0xAC}, {0x0, 0x00, 0xAD}, {0x0, 0x00, 0xAE}, {0x0, 0x00, 0xAF}, {0x0, 0x00, 0xB0}, {0x0, 0x00, 0xB1}, {0x0, 0x00, 0xB2}, {0x0, 0x00, 0xB3}, {0x0, 0x00, 0xB4}, {0x0, 0x00, 0xB5}, {0x0, 0x00, 0xB6}, {0x0, 0x00, 0xB7}, {0x0, 0x00, 0xB8}, {0x0, 0x00, 0xB9}, {0x0, 0x00, 0xBA}, {0x0, 0x00, 0xBB}, {0x0, 0x00, 0xBC}, {0x0, 0x00, 0xBD}, {0x0, 0x00, 0xBE}, {0x0, 0x00, 0xBF}, {0x0, 0x00, 0xC0}, {0x0, 0x00, 0xC1}, {0x0, 0x00, 0xC2}, {0x0, 0x00, 0xC3}, {0x0, 0x00, 0xC4}, {0x0, 0x00, 0xC5}, {0x0, 0x00, 0xC6}, {0x0, 0x00, 0xC7}, {0x0, 0x00, 0xC8}, {0x0, 0x00, 0xC9}, {0x0, 0x00, 0xCA}, {0x0, 0x00, 0xCB}, {0x0, 0x00, 0xCC}, {0x1, 0xF4, 0x00}, {0x1, 0x38, 0x01}, {0x1, 0x40, 0x02}, {0x1, 0x0A, 0x03}, {0x1, 0x40, 0x04}, {0x1, 0x40, 0x05}, {0x1, 0x40, 0x06}, {0x1, 0x67, 0x07}, {0x1, 0x31, 0x08}, {0x1, 0x00, 0x09}, {0x1, 0x00, 0x0A}, {0x1, 0x00, 0x0B}, {0x1, 0x14, 0x0C}, {0x1, 0x00, 0x0D}, {0x1, 0x00, 0x0E}, {0x1, 0x00, 0x0F}, {0x1, 0x1E, 0x10}, {0x1, 0x00, 0x11}, {0x1, 0x00, 0x12}, {0x1, 0x00, 0x13}, {0x1, 0x00, 0x14}, {0x1, 0xFF, 0x15}, {0x1, 0x01, 0x16}, {0x1, 0x32, 0x17}, {0x1, 0x23, 0x18}, {0x1, 0xCE, 0x19}, {0x1, 0x23, 0x1A}, {0x1, 0x32, 0x1B}, {0x1, 0x8D, 0x1C}, {0x1, 0xCE, 0x1D}, {0x1, 0x8D, 0x1E}, {0x1, 0x00, 0x1F}, {0x1, 0x00, 0x20}, {0x1, 0xFF, 0x3E}, {0x1, 0x02, 0x3F}, {0x1, 0x00, 0x40}, {0x1, 0x00, 0x41}, {0x1, 0x00, 0x42}, {0x1, 0x00, 0x43}, {0x1, 0x00, 0x44}, {0x1, 0x00, 0x45}, {0x1, 0x00, 0x46}, {0x1, 0x00, 0x47}, {0x1, 0x00, 0x48}, {0x1, 0x00, 0x49}, {0x1, 0x00, 0x4A}, {0x1, 0x00, 0x4B}, {0x1, 0x00, 0x4C}, {0x1, 0x00, 0x4D}, {0x1, 0x00, 0x4E}, {0x1, 0x00, 0x4F}, {0x1, 0x00, 0x50}, {0x1, 0x00, 0x51}, {0x1, 0x00, 0x52}, {0x1, 0x00, 0x53}, {0x1, 0x00, 0x54}, {0x1, 0x00, 0x55}, {0x1, 0x00, 0x56}, {0x1, 0x00, 0x57}, {0x1, 0x00, 0x58}, {0x1, 0x00, 0x59}, {0x1, 0x00, 0x5A}, {0x2, 0x03, 0x00}, {0x2, 0x00, 0x01}, {0x2, 0x00, 0x05}, {0x2, 0x00, 0x06}, {0x2, 0x00, 0x07}, {0x2, 0x00, 0x10}, {0x2, 0x00, 0x11}, /* Strange - looks like the 501 driver doesn't do anything * at insert time except read the EEPROM */ {} }; /* Data for video camera init before capture. * Capture and decoding by Colin Peart. * This is for the 3com HomeConnect Lite which is spca501a based. */ static const __u16 spca501_3com_open_data[][3] = { /* bmRequest,value,index */ {0x2, 0x0050, 0x0000}, /* C/S Enable TG soft reset, timing mode=010 */ {0x2, 0x0043, 0x0000}, /* C/S Disable TG soft reset, timing mode=010 */ {0x2, 0x0002, 0x0005}, /* C/S GPIO */ {0x2, 0x0003, 0x0005}, /* C/S GPIO */ #ifdef CCDSP_SET {0x1, 0x0020, 0x0001}, /* CCDSP Options */ {0x1, 0x0020, 0x0002}, /* CCDSP Black Level */ {0x1, 0x006e, 0x0007}, /* CCDSP Gamma options */ {0x1, 0x0090, 0x0015}, /* CCDSP Luminance Low */ {0x1, 0x00ff, 0x0016}, /* CCDSP Luminance High */ {0x1, 0x0003, 0x003F}, /* CCDSP Gamma correction toggle */ #ifdef ALTER_GAMMA {0x1, 0x0010, 0x0008}, /* CCDSP YUV A11 */ {0x1, 0x0000, 0x0009}, /* CCDSP YUV A12 */ {0x1, 0x0000, 0x000a}, /* CCDSP YUV A13 */ {0x1, 0x0000, 0x000b}, /* CCDSP YUV A21 */ {0x1, 0x0010, 0x000c}, /* CCDSP YUV A22 */ {0x1, 0x0000, 0x000d}, /* CCDSP YUV A23 */ {0x1, 0x0000, 0x000e}, /* CCDSP YUV A31 */ {0x1, 0x0000, 0x000f}, /* CCDSP YUV A32 */ {0x1, 0x0010, 0x0010}, /* CCDSP YUV A33 */ {0x1, 0x0000, 0x0011}, /* CCDSP R Offset */ {0x1, 0x0000, 0x0012}, /* CCDSP G Offset */ {0x1, 0x0001, 0x0013}, /* CCDSP B Offset */ {0x1, 0x0001, 0x0014}, /* CCDSP BG Offset */ {0x1, 0x003f, 0x00C1}, /* CCDSP Gamma Correction Enable */ #endif #endif #ifdef TG_SET {0x0, 0x00fc, 0x0000}, /* TG Shutter Speed High Bits */ {0x0, 0x0000, 0x0001}, /* TG Shutter Speed Low Bits */ {0x0, 0x00e4, 0x0004}, /* TG DCLK*2 Adjust */ {0x0, 0x0008, 0x0005}, /* TG ADCK Adjust */ {0x0, 0x0003, 0x0006}, /* TG FR Phase Adjust */ {0x0, 0x0001, 0x0007}, /* TG FCDS Phase Adjust */ {0x0, 0x0039, 0x0008}, /* TG FS Phase Adjust */ {0x0, 0x0088, 0x000a}, /* TG MH1 */ {0x0, 0x0003, 0x000f}, /* TG Pixel ID */ /* Like below, unexplained toglleing */ {0x0, 0x0080, 0x000c}, {0x0, 0x0000, 0x000d}, {0x0, 0x0080, 0x000c}, {0x0, 0x0004, 0x000d}, {0x0, 0x0000, 0x000c}, {0x0, 0x0000, 0x000d}, {0x0, 0x0040, 0x000c}, {0x0, 0x0017, 0x000d}, {0x0, 0x00c0, 0x000c}, {0x0, 0x0000, 0x000d}, {0x0, 0x0080, 0x000c}, {0x0, 0x0006, 0x000d}, {0x0, 0x0080, 0x000c}, {0x0, 0x0004, 0x000d}, {0x0, 0x0002, 0x0003}, #endif #ifdef DSPWIN_SET {0x1, 0x001c, 0x0017}, /* CCDSP W1 Start X */ {0x1, 0x00e2, 0x0019}, /* CCDSP W2 Start X */ {0x1, 0x001c, 0x001b}, /* CCDSP W3 Start X */ {0x1, 0x00e2, 0x001d}, /* CCDSP W4 Start X */ {0x1, 0x00aa, 0x001f}, /* CCDSP W5 Start X */ {0x1, 0x0070, 0x0020}, /* CCDSP W5 Start Y */ #endif {0x0, 0x0001, 0x0010}, /* TG Start Clock */ /* {0x2, 0x006a, 0x0001}, * C/S Enable ISOSYNCH Packet Engine */ {0x2, 0x0068, 0x0001}, /* C/S Disable ISOSYNCH Packet Engine */ {0x2, 0x0000, 0x0005}, {0x2, 0x0043, 0x0000}, /* C/S Set Timing Mode, Disable TG soft reset */ {0x2, 0x0043, 0x0000}, /* C/S Set Timing Mode, Disable TG soft reset */ {0x2, 0x0002, 0x0005}, /* C/S GPIO */ {0x2, 0x0003, 0x0005}, /* C/S GPIO */ {0x2, 0x006a, 0x0001}, /* C/S Enable ISOSYNCH Packet Engine */ {} }; /* * Data used to initialize a SPCA501C with HV7131B sensor. * From a capture file taken with USBSnoop v 1.5 * I have a "SPCA501C pc camera chipset" manual by sunplus, but some * of the value meanings are obscure or simply "reserved". * to do list: * 1) Understand what every value means * 2) Understand why some values seem to appear more than once * 3) Write a small comment for each line of the following arrays. */ static const __u16 spca501c_arowana_open_data[][3] = { /* bmRequest,value,index */ {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x01, 0x0006, 0x0011}, {0x01, 0x00ff, 0x0012}, {0x01, 0x0014, 0x0013}, {0x01, 0x0000, 0x0014}, {0x01, 0x0042, 0x0051}, {0x01, 0x0040, 0x0052}, {0x01, 0x0051, 0x0053}, {0x01, 0x0040, 0x0054}, {0x01, 0x0000, 0x0055}, {0x00, 0x0025, 0x0000}, {0x00, 0x0026, 0x0000}, {0x00, 0x0001, 0x0000}, {0x00, 0x0027, 0x0000}, {0x00, 0x008a, 0x0000}, {} }; static const __u16 spca501c_arowana_init_data[][3] = { /* bmRequest,value,index */ {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x01, 0x0006, 0x0011}, {0x01, 0x00ff, 0x0012}, {0x01, 0x0014, 0x0013}, {0x01, 0x0000, 0x0014}, {0x01, 0x0042, 0x0051}, {0x01, 0x0040, 0x0052}, {0x01, 0x0051, 0x0053}, {0x01, 0x0040, 0x0054}, {0x01, 0x0000, 0x0055}, {0x00, 0x0025, 0x0000}, {0x00, 0x0026, 0x0000}, {0x00, 0x0001, 0x0000}, {0x00, 0x0027, 0x0000}, {0x00, 0x008a, 0x0000}, {0x02, 0x0000, 0x0005}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x000c, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0000, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, {0x00, 0x0000, 0x0024}, {0x00, 0x00d5, 0x0025}, {0x00, 0x0000, 0x0026}, {0x00, 0x000b, 0x0027}, {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, {0xff, 0x0000, 0x00d0}, {0xff, 0x00d8, 0x00d1}, {0xff, 0x0000, 0x00d4}, {0xff, 0x0000, 0x00d5}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0x00fd, 0x000a}, {0x01, 0x0038, 0x000b}, {0x01, 0x00d1, 0x000c}, {0x01, 0x00f7, 0x000d}, {0x01, 0x00ed, 0x000e}, {0x01, 0x00d8, 0x000f}, {0x01, 0x0038, 0x0010}, {0x01, 0x00ff, 0x0015}, {0x01, 0x0001, 0x0016}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, {0x01, 0x00ff, 0x003e}, {0x01, 0x0003, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0001, 0x0056}, {0x01, 0x0060, 0x0057}, {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x100a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc002, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x000c, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0000, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, {0x00, 0x0000, 0x0024}, {0x00, 0x00d5, 0x0025}, {0x00, 0x0000, 0x0026}, {0x00, 0x000b, 0x0027}, {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, {0xff, 0x0000, 0x00d0}, {0xff, 0x00d8, 0x00d1}, {0xff, 0x0000, 0x00d4}, {0xff, 0x0000, 0x00d5}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0x00fd, 0x000a}, {0x01, 0x0038, 0x000b}, {0x01, 0x00d1, 0x000c}, {0x01, 0x00f7, 0x000d}, {0x01, 0x00ed, 0x000e}, {0x01, 0x00d8, 0x000f}, {0x01, 0x0038, 0x0010}, {0x01, 0x00ff, 0x0015}, {0x01, 0x0001, 0x0016}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, {0x01, 0x00ff, 0x003e}, {0x01, 0x0003, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0001, 0x0056}, {0x01, 0x0060, 0x0057}, {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x100a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc002, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0003, 0x003f}, {0x01, 0x0001, 0x0056}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0xfffd, 0x000a}, {0x01, 0x0023, 0x000b}, {0x01, 0xffea, 0x000c}, {0x01, 0xfff4, 0x000d}, {0x01, 0xfffc, 0x000e}, {0x01, 0xffe3, 0x000f}, {0x01, 0x001f, 0x0010}, {0x01, 0x00a8, 0x0001}, {0x01, 0x0067, 0x0007}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x00c8, 0x0015}, {0x01, 0x0032, 0x0016}, {0x01, 0x0000, 0x0011}, {0x01, 0x0000, 0x0012}, {0x01, 0x0000, 0x0013}, {0x01, 0x000a, 0x0003}, {0x02, 0xc002, 0x0001}, {0x02, 0x0007, 0x0005}, {0x02, 0xc000, 0x0001}, {0x02, 0x0000, 0x0005}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x000c, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0000, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, {0x00, 0x0000, 0x0024}, {0x00, 0x00d5, 0x0025}, {0x00, 0x0000, 0x0026}, {0x00, 0x000b, 0x0027}, {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, {0xff, 0x0000, 0x00d0}, {0xff, 0x00d8, 0x00d1}, {0xff, 0x0000, 0x00d4}, {0xff, 0x0000, 0x00d5}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0x00fd, 0x000a}, {0x01, 0x0038, 0x000b}, {0x01, 0x00d1, 0x000c}, {0x01, 0x00f7, 0x000d}, {0x01, 0x00ed, 0x000e}, {0x01, 0x00d8, 0x000f}, {0x01, 0x0038, 0x0010}, {0x01, 0x00ff, 0x0015}, {0x01, 0x0001, 0x0016}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, {0x01, 0x00ff, 0x003e}, {0x01, 0x0003, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0001, 0x0056}, {0x01, 0x0060, 0x0057}, {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x100a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc002, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x000c, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0000, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, {0x00, 0x0000, 0x0024}, {0x00, 0x00d5, 0x0025}, {0x00, 0x0000, 0x0026}, {0x00, 0x000b, 0x0027}, {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, {0xff, 0x0000, 0x00d0}, {0xff, 0x00d8, 0x00d1}, {0xff, 0x0000, 0x00d4}, {0xff, 0x0000, 0x00d5}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0x00fd, 0x000a}, {0x01, 0x0038, 0x000b}, {0x01, 0x00d1, 0x000c}, {0x01, 0x00f7, 0x000d}, {0x01, 0x00ed, 0x000e}, {0x01, 0x00d8, 0x000f}, {0x01, 0x0038, 0x0010}, {0x01, 0x00ff, 0x0015}, {0x01, 0x0001, 0x0016}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, {0x01, 0x00ff, 0x003e}, {0x01, 0x0003, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0001, 0x0056}, {0x01, 0x0060, 0x0057}, {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x100a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc002, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x000f, 0x0000}, {0x01, 0x0003, 0x003f}, {0x01, 0x0001, 0x0056}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0xfffd, 0x000a}, {0x01, 0x0023, 0x000b}, {0x01, 0xffea, 0x000c}, {0x01, 0xfff4, 0x000d}, {0x01, 0xfffc, 0x000e}, {0x01, 0xffe3, 0x000f}, {0x01, 0x001f, 0x0010}, {0x01, 0x00a8, 0x0001}, {0x01, 0x0067, 0x0007}, {0x01, 0x0042, 0x0051}, {0x01, 0x0051, 0x0053}, {0x01, 0x000a, 0x0003}, {0x02, 0xc002, 0x0001}, {0x02, 0x0007, 0x0005}, {0x02, 0xc000, 0x0001}, {0x02, 0x0000, 0x0005}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x000c, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0000, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, {0x00, 0x0000, 0x0024}, {0x00, 0x00d5, 0x0025}, {0x00, 0x0000, 0x0026}, {0x00, 0x000b, 0x0027}, {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, {0xff, 0x0000, 0x00d0}, {0xff, 0x00d8, 0x00d1}, {0xff, 0x0000, 0x00d4}, {0xff, 0x0000, 0x00d5}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0x00fd, 0x000a}, {0x01, 0x0038, 0x000b}, {0x01, 0x00d1, 0x000c}, {0x01, 0x00f7, 0x000d}, {0x01, 0x00ed, 0x000e}, {0x01, 0x00d8, 0x000f}, {0x01, 0x0038, 0x0010}, {0x01, 0x00ff, 0x0015}, {0x01, 0x0001, 0x0016}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, {0x01, 0x00ff, 0x003e}, {0x01, 0x0003, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0001, 0x0056}, {0x01, 0x0060, 0x0057}, {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x100a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc002, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x000c, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0000, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, {0x00, 0x0000, 0x0024}, {0x00, 0x00d5, 0x0025}, {0x00, 0x0000, 0x0026}, {0x00, 0x000b, 0x0027}, {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, {0xff, 0x0000, 0x00d0}, {0xff, 0x00d8, 0x00d1}, {0xff, 0x0000, 0x00d4}, {0xff, 0x0000, 0x00d5}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0x00fd, 0x000a}, {0x01, 0x0038, 0x000b}, {0x01, 0x00d1, 0x000c}, {0x01, 0x00f7, 0x000d}, {0x01, 0x00ed, 0x000e}, {0x01, 0x00d8, 0x000f}, {0x01, 0x0038, 0x0010}, {0x01, 0x00ff, 0x0015}, {0x01, 0x0001, 0x0016}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, {0x01, 0x00ff, 0x003e}, {0x01, 0x0003, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0001, 0x0056}, {0x01, 0x0060, 0x0057}, {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x100a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc002, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x001e, 0x0000}, {0x01, 0x0003, 0x003f}, {0x01, 0x0001, 0x0056}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0xfffd, 0x000a}, {0x01, 0x0023, 0x000b}, {0x01, 0xffea, 0x000c}, {0x01, 0xfff4, 0x000d}, {0x01, 0xfffc, 0x000e}, {0x01, 0xffe3, 0x000f}, {0x01, 0x001f, 0x0010}, {0x01, 0x00a8, 0x0001}, {0x01, 0x0067, 0x0007}, {0x01, 0x0042, 0x0051}, {0x01, 0x0051, 0x0053}, {0x01, 0x000a, 0x0003}, {0x02, 0xc002, 0x0001}, {0x02, 0x0007, 0x0005}, {0x01, 0x0042, 0x0051}, {0x01, 0x0051, 0x0053}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x002d, 0x0000}, {0x01, 0x0003, 0x003f}, {0x01, 0x0001, 0x0056}, {0x02, 0xc000, 0x0001}, {0x02, 0x0000, 0x0005}, {} }; /* Unknown camera from Ori Usbid 0x0000:0x0000 */ /* Based on snoops from Ori Cohen */ static const __u16 spca501c_mysterious_open_data[][3] = { {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, /* DSP Registers */ {0x01, 0x0016, 0x0011}, /* RGB offset */ {0x01, 0x0000, 0x0012}, {0x01, 0x0006, 0x0013}, {0x01, 0x0078, 0x0051}, {0x01, 0x0040, 0x0052}, {0x01, 0x0046, 0x0053}, {0x01, 0x0040, 0x0054}, {0x00, 0x0025, 0x0000}, /* {0x00, 0x0000, 0x0000 }, */ /* Part 2 */ /* TG Registers */ {0x00, 0x0026, 0x0000}, {0x00, 0x0001, 0x0000}, {0x00, 0x0027, 0x0000}, {0x00, 0x008a, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {} }; /* Based on snoops from Ori Cohen */ static const __u16 spca501c_mysterious_init_data[][3] = { /* Part 3 */ /* TG registers */ /* {0x00, 0x0000, 0x0000}, */ {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x0006, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0001, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, /* 640 */ {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, /* 480 */ {0x00, 0x0000, 0x0024}, /* Offset H hight */ {0x00, 0x00d3, 0x0025}, /* low */ {0x00, 0x0000, 0x0026}, /* Offset V */ {0x00, 0x000d, 0x0027}, /* low */ {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, /* DSP Registers */ {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, /* Level Calc bit7 ->1 Auto */ {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x000f, 0x0008}, /* A11 Color correction coeff */ {0x01, 0x002d, 0x0009}, /* A12 */ {0x01, 0x0005, 0x000a}, /* A13 */ {0x01, 0x0023, 0x000b}, /* A21 */ {0x01, 0x00e0, 0x000c}, /* A22 */ {0x01, 0x00fd, 0x000d}, /* A23 */ {0x01, 0x00f4, 0x000e}, /* A31 */ {0x01, 0x00e4, 0x000f}, /* A32 */ {0x01, 0x0028, 0x0010}, /* A33 */ {0x01, 0x00ff, 0x0015}, /* Reserved */ {0x01, 0x0001, 0x0016}, /* Reserved */ {0x01, 0x0032, 0x0017}, /* Win1 Start begin */ {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, /* Win1 Start end */ {0x01, 0x00ff, 0x003e}, /* Reserved begin */ {0x01, 0x0002, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0003, 0x0056}, /* Reserved end */ {0x01, 0x0060, 0x0057}, /* Edge Gain */ {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, /* Edge Bandwidth */ {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x200a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc000, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, /* Part 4 */ {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x02, 0x0000, 0x0005}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x004e, 0x0000}, /* Part 5 */ {0x01, 0x0003, 0x003f}, {0x01, 0x0001, 0x0056}, {0x01, 0x000f, 0x0008}, {0x01, 0x002d, 0x0009}, {0x01, 0x0005, 0x000a}, {0x01, 0x0023, 0x000b}, {0x01, 0xffe0, 0x000c}, {0x01, 0xfffd, 0x000d}, {0x01, 0xfff4, 0x000e}, {0x01, 0xffe4, 0x000f}, {0x01, 0x0028, 0x0010}, {0x01, 0x00a8, 0x0001}, {0x01, 0x0066, 0x0007}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x00c8, 0x0015}, /* c8 Poids fort Luma */ {0x01, 0x0032, 0x0016}, /* 32 */ {0x01, 0x0016, 0x0011}, /* R 00 */ {0x01, 0x0016, 0x0012}, /* G 00 */ {0x01, 0x0016, 0x0013}, /* B 00 */ {0x01, 0x000a, 0x0003}, {0x02, 0xc002, 0x0001}, {0x02, 0x0007, 0x0005}, {} }; static int reg_write(struct gspca_dev *gspca_dev, __u16 req, __u16 index, __u16 value) { int ret; struct usb_device *dev = gspca_dev->dev; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), req, USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); gspca_dbg(gspca_dev, D_USBO, "reg write: 0x%02x 0x%02x 0x%02x\n", req, index, value); if (ret < 0) pr_err("reg write: error %d\n", ret); return ret; } static int write_vector(struct gspca_dev *gspca_dev, const __u16 data[][3]) { int ret, i = 0; while (data[i][0] != 0 || data[i][1] != 0 || data[i][2] != 0) { ret = reg_write(gspca_dev, data[i][0], data[i][2], data[i][1]); if (ret < 0) { gspca_err(gspca_dev, "Reg write failed for 0x%02x,0x%02x,0x%02x\n", data[i][0], data[i][1], data[i][2]); return ret; } i++; } return 0; } static void setbrightness(struct gspca_dev *gspca_dev, s32 val) { reg_write(gspca_dev, SPCA501_REG_CCDSP, 0x12, val); } static void setcontrast(struct gspca_dev *gspca_dev, s32 val) { reg_write(gspca_dev, 0x00, 0x00, (val >> 8) & 0xff); reg_write(gspca_dev, 0x00, 0x01, val & 0xff); } static void setcolors(struct gspca_dev *gspca_dev, s32 val) { reg_write(gspca_dev, SPCA501_REG_CCDSP, 0x0c, val); } static void setblue_balance(struct gspca_dev *gspca_dev, s32 val) { reg_write(gspca_dev, SPCA501_REG_CCDSP, 0x11, val); } static void setred_balance(struct gspca_dev *gspca_dev, s32 val) { reg_write(gspca_dev, SPCA501_REG_CCDSP, 0x13, val); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; cam = &gspca_dev->cam; cam->cam_mode = vga_mode; cam->nmodes = ARRAY_SIZE(vga_mode); sd->subtype = id->driver_info; return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; switch (sd->subtype) { case Arowana300KCMOSCamera: case SmileIntlCamera: /* Arowana 300k CMOS Camera data */ if (write_vector(gspca_dev, spca501c_arowana_init_data)) goto error; break; case MystFromOriUnknownCamera: /* Unknown Ori CMOS Camera data */ if (write_vector(gspca_dev, spca501c_mysterious_open_data)) goto error; break; default: /* generic spca501 init data */ if (write_vector(gspca_dev, spca501_init_data)) goto error; break; } gspca_dbg(gspca_dev, D_STREAM, "Initializing SPCA501 finished\n"); return 0; error: return -EINVAL; } static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int mode; switch (sd->subtype) { case ThreeComHomeConnectLite: /* Special handling for 3com data */ write_vector(gspca_dev, spca501_3com_open_data); break; case Arowana300KCMOSCamera: case SmileIntlCamera: /* Arowana 300k CMOS Camera data */ write_vector(gspca_dev, spca501c_arowana_open_data); break; case MystFromOriUnknownCamera: /* Unknown CMOS Camera data */ write_vector(gspca_dev, spca501c_mysterious_init_data); break; default: /* Generic 501 open data */ write_vector(gspca_dev, spca501_open_data); } /* memorize the wanted pixel format */ mode = gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv; /* Enable ISO packet machine CTRL reg=2, * index=1 bitmask=0x2 (bit ordinal 1) */ reg_write(gspca_dev, SPCA50X_REG_USB, 0x6, 0x94); switch (mode) { case 0: /* 640x480 */ reg_write(gspca_dev, SPCA50X_REG_USB, 0x07, 0x004a); break; case 1: /* 320x240 */ reg_write(gspca_dev, SPCA50X_REG_USB, 0x07, 0x104a); break; default: /* case 2: * 160x120 */ reg_write(gspca_dev, SPCA50X_REG_USB, 0x07, 0x204a); break; } reg_write(gspca_dev, SPCA501_REG_CTLRL, 0x01, 0x02); return 0; } static void sd_stopN(struct gspca_dev *gspca_dev) { /* Disable ISO packet * machine CTRL reg=2, index=1 bitmask=0x0 (bit ordinal 1) */ reg_write(gspca_dev, SPCA501_REG_CTLRL, 0x01, 0x00); } /* called on streamoff with alt 0 and on disconnect */ static void sd_stop0(struct gspca_dev *gspca_dev) { if (!gspca_dev->present) return; reg_write(gspca_dev, SPCA501_REG_CTLRL, 0x05, 0x00); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { switch (data[0]) { case 0: /* start of frame */ gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); data += SPCA501_OFFSET_DATA; len -= SPCA501_OFFSET_DATA; gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); return; case 0xff: /* drop */ /* gspca_dev->last_packet_type = DISCARD_PACKET; */ return; } data++; len--; gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; case V4L2_CID_CONTRAST: setcontrast(gspca_dev, ctrl->val); break; case V4L2_CID_SATURATION: setcolors(gspca_dev, ctrl->val); break; case V4L2_CID_BLUE_BALANCE: setblue_balance(gspca_dev, ctrl->val); break; case V4L2_CID_RED_BALANCE: setred_balance(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 5); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 127, 1, 0); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 64725, 1, 64725); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SATURATION, 0, 63, 1, 20); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BLUE_BALANCE, 0, 127, 1, 0); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_RED_BALANCE, 0, 127, 1, 0); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .stop0 = sd_stop0, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x040a, 0x0002), .driver_info = KodakDVC325}, {USB_DEVICE(0x0497, 0xc001), .driver_info = SmileIntlCamera}, {USB_DEVICE(0x0506, 0x00df), .driver_info = ThreeComHomeConnectLite}, {USB_DEVICE(0x0733, 0x0401), .driver_info = IntelCreateAndShare}, {USB_DEVICE(0x0733, 0x0402), .driver_info = ViewQuestM318B}, {USB_DEVICE(0x1776, 0x501c), .driver_info = Arowana300KCMOSCamera}, {USB_DEVICE(0x0000, 0x0000), .driver_info = MystFromOriUnknownCamera}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver); |
| 136 52 51 51 4 47 48 14 6 1 11 11 114 113 113 4127 5 4126 4125 2458 2459 2459 2466 3 2455 3637 3645 51 51 51 2318 2316 2322 1 1 2971 25 25 2212 2215 2215 72 72 24 39 41 11 11 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 | // SPDX-License-Identifier: GPL-2.0 /* * fs/sysfs/file.c - sysfs regular (text) file implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007 Tejun Heo <teheo@suse.de> * * Please see Documentation/filesystems/sysfs.rst for more information. */ #include <linux/module.h> #include <linux/kobject.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/seq_file.h> #include <linux/mm.h> #include "sysfs.h" static struct kobject *sysfs_file_kobj(struct kernfs_node *kn) { guard(rcu)(); return rcu_dereference(kn->__parent)->priv; } /* * Determine ktype->sysfs_ops for the given kernfs_node. This function * must be called while holding an active reference. */ static const struct sysfs_ops *sysfs_file_ops(struct kernfs_node *kn) { struct kobject *kobj = sysfs_file_kobj(kn); if (kn->flags & KERNFS_LOCKDEP) lockdep_assert_held(kn); return kobj->ktype ? kobj->ktype->sysfs_ops : NULL; } /* * Reads on sysfs are handled through seq_file, which takes care of hairy * details like buffering and seeking. The following function pipes * sysfs_ops->show() result through seq_file. */ static int sysfs_kf_seq_show(struct seq_file *sf, void *v) { struct kernfs_open_file *of = sf->private; struct kobject *kobj = sysfs_file_kobj(of->kn); const struct sysfs_ops *ops = sysfs_file_ops(of->kn); ssize_t count; char *buf; if (WARN_ON_ONCE(!ops->show)) return -EINVAL; /* acquire buffer and ensure that it's >= PAGE_SIZE and clear */ count = seq_get_buf(sf, &buf); if (count < PAGE_SIZE) { seq_commit(sf, -1); return 0; } memset(buf, 0, PAGE_SIZE); count = ops->show(kobj, of->kn->priv, buf); if (count < 0) return count; /* * The code works fine with PAGE_SIZE return but it's likely to * indicate truncated result or overflow in normal use cases. */ if (count >= (ssize_t)PAGE_SIZE) { printk("fill_read_buffer: %pS returned bad count\n", ops->show); /* Try to struggle along */ count = PAGE_SIZE - 1; } seq_commit(sf, count); return 0; } static ssize_t sysfs_kf_bin_read(struct kernfs_open_file *of, char *buf, size_t count, loff_t pos) { struct bin_attribute *battr = of->kn->priv; struct kobject *kobj = sysfs_file_kobj(of->kn); loff_t size = file_inode(of->file)->i_size; if (!count) return 0; if (size) { if (pos >= size) return 0; if (pos + count > size) count = size - pos; } if (!battr->read && !battr->read_new) return -EIO; if (battr->read_new) return battr->read_new(of->file, kobj, battr, buf, pos, count); return battr->read(of->file, kobj, battr, buf, pos, count); } /* kernfs read callback for regular sysfs files with pre-alloc */ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf, size_t count, loff_t pos) { const struct sysfs_ops *ops = sysfs_file_ops(of->kn); struct kobject *kobj = sysfs_file_kobj(of->kn); ssize_t len; /* * If buf != of->prealloc_buf, we don't know how * large it is, so cannot safely pass it to ->show */ if (WARN_ON_ONCE(buf != of->prealloc_buf)) return 0; len = ops->show(kobj, of->kn->priv, buf); if (len < 0) return len; if (pos) { if (len <= pos) return 0; len -= pos; memmove(buf, buf + pos, len); } return min_t(ssize_t, count, len); } /* kernfs write callback for regular sysfs files */ static ssize_t sysfs_kf_write(struct kernfs_open_file *of, char *buf, size_t count, loff_t pos) { const struct sysfs_ops *ops = sysfs_file_ops(of->kn); struct kobject *kobj = sysfs_file_kobj(of->kn); if (!count) return 0; return ops->store(kobj, of->kn->priv, buf, count); } /* kernfs write callback for bin sysfs files */ static ssize_t sysfs_kf_bin_write(struct kernfs_open_file *of, char *buf, size_t count, loff_t pos) { struct bin_attribute *battr = of->kn->priv; struct kobject *kobj = sysfs_file_kobj(of->kn); loff_t size = file_inode(of->file)->i_size; if (size) { if (size <= pos) return -EFBIG; count = min_t(ssize_t, count, size - pos); } if (!count) return 0; if (!battr->write && !battr->write_new) return -EIO; if (battr->write_new) return battr->write_new(of->file, kobj, battr, buf, pos, count); return battr->write(of->file, kobj, battr, buf, pos, count); } static int sysfs_kf_bin_mmap(struct kernfs_open_file *of, struct vm_area_struct *vma) { struct bin_attribute *battr = of->kn->priv; struct kobject *kobj = sysfs_file_kobj(of->kn); return battr->mmap(of->file, kobj, battr, vma); } static loff_t sysfs_kf_bin_llseek(struct kernfs_open_file *of, loff_t offset, int whence) { struct bin_attribute *battr = of->kn->priv; struct kobject *kobj = sysfs_file_kobj(of->kn); if (battr->llseek) return battr->llseek(of->file, kobj, battr, offset, whence); else return generic_file_llseek(of->file, offset, whence); } static int sysfs_kf_bin_open(struct kernfs_open_file *of) { struct bin_attribute *battr = of->kn->priv; if (battr->f_mapping) of->file->f_mapping = battr->f_mapping(); return 0; } void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr) { struct kernfs_node *kn = kobj->sd, *tmp; if (kn && dir) kn = kernfs_find_and_get(kn, dir); else kernfs_get(kn); if (kn && attr) { tmp = kernfs_find_and_get(kn, attr); kernfs_put(kn); kn = tmp; } if (kn) { kernfs_notify(kn); kernfs_put(kn); } } EXPORT_SYMBOL_GPL(sysfs_notify); static const struct kernfs_ops sysfs_file_kfops_empty = { }; static const struct kernfs_ops sysfs_file_kfops_ro = { .seq_show = sysfs_kf_seq_show, }; static const struct kernfs_ops sysfs_file_kfops_wo = { .write = sysfs_kf_write, }; static const struct kernfs_ops sysfs_file_kfops_rw = { .seq_show = sysfs_kf_seq_show, .write = sysfs_kf_write, }; static const struct kernfs_ops sysfs_prealloc_kfops_ro = { .read = sysfs_kf_read, .prealloc = true, }; static const struct kernfs_ops sysfs_prealloc_kfops_wo = { .write = sysfs_kf_write, .prealloc = true, }; static const struct kernfs_ops sysfs_prealloc_kfops_rw = { .read = sysfs_kf_read, .write = sysfs_kf_write, .prealloc = true, }; static const struct kernfs_ops sysfs_bin_kfops_ro = { .read = sysfs_kf_bin_read, }; static const struct kernfs_ops sysfs_bin_kfops_wo = { .write = sysfs_kf_bin_write, }; static const struct kernfs_ops sysfs_bin_kfops_rw = { .read = sysfs_kf_bin_read, .write = sysfs_kf_bin_write, }; static const struct kernfs_ops sysfs_bin_kfops_mmap = { .read = sysfs_kf_bin_read, .write = sysfs_kf_bin_write, .mmap = sysfs_kf_bin_mmap, .open = sysfs_kf_bin_open, .llseek = sysfs_kf_bin_llseek, }; int sysfs_add_file_mode_ns(struct kernfs_node *parent, const struct attribute *attr, umode_t mode, kuid_t uid, kgid_t gid, const void *ns) { struct kobject *kobj = parent->priv; const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops; struct lock_class_key *key = NULL; const struct kernfs_ops *ops = NULL; struct kernfs_node *kn; /* every kobject with an attribute needs a ktype assigned */ if (WARN(!sysfs_ops, KERN_ERR "missing sysfs attribute operations for kobject: %s\n", kobject_name(kobj))) return -EINVAL; if (mode & SYSFS_PREALLOC) { if (sysfs_ops->show && sysfs_ops->store) ops = &sysfs_prealloc_kfops_rw; else if (sysfs_ops->show) ops = &sysfs_prealloc_kfops_ro; else if (sysfs_ops->store) ops = &sysfs_prealloc_kfops_wo; } else { if (sysfs_ops->show && sysfs_ops->store) ops = &sysfs_file_kfops_rw; else if (sysfs_ops->show) ops = &sysfs_file_kfops_ro; else if (sysfs_ops->store) ops = &sysfs_file_kfops_wo; } if (!ops) ops = &sysfs_file_kfops_empty; #ifdef CONFIG_DEBUG_LOCK_ALLOC if (!attr->ignore_lockdep) key = attr->key ?: (struct lock_class_key *)&attr->skey; #endif kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid, PAGE_SIZE, ops, (void *)attr, ns, key); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(parent, attr->name); return PTR_ERR(kn); } return 0; } int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent, const struct bin_attribute *battr, umode_t mode, size_t size, kuid_t uid, kgid_t gid, const void *ns) { const struct attribute *attr = &battr->attr; struct lock_class_key *key = NULL; const struct kernfs_ops *ops; struct kernfs_node *kn; if (battr->read && battr->read_new) return -EINVAL; if (battr->write && battr->write_new) return -EINVAL; if (battr->mmap) ops = &sysfs_bin_kfops_mmap; else if ((battr->read || battr->read_new) && (battr->write || battr->write_new)) ops = &sysfs_bin_kfops_rw; else if (battr->read || battr->read_new) ops = &sysfs_bin_kfops_ro; else if (battr->write || battr->write_new) ops = &sysfs_bin_kfops_wo; else ops = &sysfs_file_kfops_empty; #ifdef CONFIG_DEBUG_LOCK_ALLOC if (!attr->ignore_lockdep) key = attr->key ?: (struct lock_class_key *)&attr->skey; #endif kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid, size, ops, (void *)attr, ns, key); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(parent, attr->name); return PTR_ERR(kn); } return 0; } /** * sysfs_create_file_ns - create an attribute file for an object with custom ns * @kobj: object we're creating for * @attr: attribute descriptor * @ns: namespace the new file should belong to */ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) { kuid_t uid; kgid_t gid; if (WARN_ON(!kobj || !kobj->sd || !attr)) return -EINVAL; kobject_get_ownership(kobj, &uid, &gid); return sysfs_add_file_mode_ns(kobj->sd, attr, attr->mode, uid, gid, ns); } EXPORT_SYMBOL_GPL(sysfs_create_file_ns); int sysfs_create_files(struct kobject *kobj, const struct attribute * const *ptr) { int err = 0; int i; for (i = 0; ptr[i] && !err; i++) err = sysfs_create_file(kobj, ptr[i]); if (err) while (--i >= 0) sysfs_remove_file(kobj, ptr[i]); return err; } EXPORT_SYMBOL_GPL(sysfs_create_files); /** * sysfs_add_file_to_group - add an attribute file to a pre-existing group. * @kobj: object we're acting for. * @attr: attribute descriptor. * @group: group name. */ int sysfs_add_file_to_group(struct kobject *kobj, const struct attribute *attr, const char *group) { struct kernfs_node *parent; kuid_t uid; kgid_t gid; int error; if (group) { parent = kernfs_find_and_get(kobj->sd, group); } else { parent = kobj->sd; kernfs_get(parent); } if (!parent) return -ENOENT; kobject_get_ownership(kobj, &uid, &gid); error = sysfs_add_file_mode_ns(parent, attr, attr->mode, uid, gid, NULL); kernfs_put(parent); return error; } EXPORT_SYMBOL_GPL(sysfs_add_file_to_group); /** * sysfs_chmod_file - update the modified mode value on an object attribute. * @kobj: object we're acting for. * @attr: attribute descriptor. * @mode: file permissions. * */ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, umode_t mode) { struct kernfs_node *kn; struct iattr newattrs; int rc; kn = kernfs_find_and_get(kobj->sd, attr->name); if (!kn) return -ENOENT; newattrs.ia_mode = (mode & S_IALLUGO) | (kn->mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE; rc = kernfs_setattr(kn, &newattrs); kernfs_put(kn); return rc; } EXPORT_SYMBOL_GPL(sysfs_chmod_file); /** * sysfs_break_active_protection - break "active" protection * @kobj: The kernel object @attr is associated with. * @attr: The attribute to break the "active" protection for. * * With sysfs, just like kernfs, deletion of an attribute is postponed until * all active .show() and .store() callbacks have finished unless this function * is called. Hence this function is useful in methods that implement self * deletion. */ struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, const struct attribute *attr) { struct kernfs_node *kn; kobject_get(kobj); kn = kernfs_find_and_get(kobj->sd, attr->name); if (kn) kernfs_break_active_protection(kn); else kobject_put(kobj); return kn; } EXPORT_SYMBOL_GPL(sysfs_break_active_protection); /** * sysfs_unbreak_active_protection - restore "active" protection * @kn: Pointer returned by sysfs_break_active_protection(). * * Undo the effects of sysfs_break_active_protection(). Since this function * calls kernfs_put() on the kernfs node that corresponds to the 'attr' * argument passed to sysfs_break_active_protection() that attribute may have * been removed between the sysfs_break_active_protection() and * sysfs_unbreak_active_protection() calls, it is not safe to access @kn after * this function has returned. */ void sysfs_unbreak_active_protection(struct kernfs_node *kn) { struct kobject *kobj = sysfs_file_kobj(kn); kernfs_unbreak_active_protection(kn); kernfs_put(kn); kobject_put(kobj); } EXPORT_SYMBOL_GPL(sysfs_unbreak_active_protection); /** * sysfs_remove_file_ns - remove an object attribute with a custom ns tag * @kobj: object we're acting for * @attr: attribute descriptor * @ns: namespace tag of the file to remove * * Hash the attribute name and namespace tag and kill the victim. */ void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) { struct kernfs_node *parent = kobj->sd; kernfs_remove_by_name_ns(parent, attr->name, ns); } EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); /** * sysfs_remove_file_self - remove an object attribute from its own method * @kobj: object we're acting for * @attr: attribute descriptor * * See kernfs_remove_self() for details. */ bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr) { struct kernfs_node *parent = kobj->sd; struct kernfs_node *kn; bool ret; kn = kernfs_find_and_get(parent, attr->name); if (WARN_ON_ONCE(!kn)) return false; ret = kernfs_remove_self(kn); kernfs_put(kn); return ret; } EXPORT_SYMBOL_GPL(sysfs_remove_file_self); void sysfs_remove_files(struct kobject *kobj, const struct attribute * const *ptr) { int i; for (i = 0; ptr[i]; i++) sysfs_remove_file(kobj, ptr[i]); } EXPORT_SYMBOL_GPL(sysfs_remove_files); /** * sysfs_remove_file_from_group - remove an attribute file from a group. * @kobj: object we're acting for. * @attr: attribute descriptor. * @group: group name. */ void sysfs_remove_file_from_group(struct kobject *kobj, const struct attribute *attr, const char *group) { struct kernfs_node *parent; if (group) { parent = kernfs_find_and_get(kobj->sd, group); } else { parent = kobj->sd; kernfs_get(parent); } if (parent) { kernfs_remove_by_name(parent, attr->name); kernfs_put(parent); } } EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); /** * sysfs_create_bin_file - create binary file for object. * @kobj: object. * @attr: attribute descriptor. */ int sysfs_create_bin_file(struct kobject *kobj, const struct bin_attribute *attr) { kuid_t uid; kgid_t gid; if (WARN_ON(!kobj || !kobj->sd || !attr)) return -EINVAL; kobject_get_ownership(kobj, &uid, &gid); return sysfs_add_bin_file_mode_ns(kobj->sd, attr, attr->attr.mode, attr->size, uid, gid, NULL); } EXPORT_SYMBOL_GPL(sysfs_create_bin_file); /** * sysfs_remove_bin_file - remove binary file for object. * @kobj: object. * @attr: attribute descriptor. */ void sysfs_remove_bin_file(struct kobject *kobj, const struct bin_attribute *attr) { kernfs_remove_by_name(kobj->sd, attr->attr.name); } EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); static int internal_change_owner(struct kernfs_node *kn, kuid_t kuid, kgid_t kgid) { struct iattr newattrs = { .ia_valid = ATTR_UID | ATTR_GID, .ia_uid = kuid, .ia_gid = kgid, }; return kernfs_setattr(kn, &newattrs); } /** * sysfs_link_change_owner - change owner of a sysfs file. * @kobj: object of the kernfs_node the symlink is located in. * @targ: object of the kernfs_node the symlink points to. * @name: name of the link. * @kuid: new owner's kuid * @kgid: new owner's kgid * * This function looks up the sysfs symlink entry @name under @kobj and changes * the ownership to @kuid/@kgid. The symlink is looked up in the namespace of * @targ. * * Returns 0 on success or error code on failure. */ int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ, const char *name, kuid_t kuid, kgid_t kgid) { struct kernfs_node *kn = NULL; int error; if (!name || !kobj->state_in_sysfs || !targ->state_in_sysfs) return -EINVAL; error = -ENOENT; kn = kernfs_find_and_get_ns(kobj->sd, name, targ->sd->ns); if (!kn) goto out; error = -EINVAL; if (kernfs_type(kn) != KERNFS_LINK) goto out; if (kn->symlink.target_kn->priv != targ) goto out; error = internal_change_owner(kn, kuid, kgid); out: kernfs_put(kn); return error; } /** * sysfs_file_change_owner - change owner of a sysfs file. * @kobj: object. * @name: name of the file to change. * @kuid: new owner's kuid * @kgid: new owner's kgid * * This function looks up the sysfs entry @name under @kobj and changes the * ownership to @kuid/@kgid. * * Returns 0 on success or error code on failure. */ int sysfs_file_change_owner(struct kobject *kobj, const char *name, kuid_t kuid, kgid_t kgid) { struct kernfs_node *kn; int error; if (!name) return -EINVAL; if (!kobj->state_in_sysfs) return -EINVAL; kn = kernfs_find_and_get(kobj->sd, name); if (!kn) return -ENOENT; error = internal_change_owner(kn, kuid, kgid); kernfs_put(kn); return error; } EXPORT_SYMBOL_GPL(sysfs_file_change_owner); /** * sysfs_change_owner - change owner of the given object. * @kobj: object. * @kuid: new owner's kuid * @kgid: new owner's kgid * * Change the owner of the default directory, files, groups, and attributes of * @kobj to @kuid/@kgid. Note that sysfs_change_owner mirrors how the sysfs * entries for a kobject are added by driver core. In summary, * sysfs_change_owner() takes care of the default directory entry for @kobj, * the default attributes associated with the ktype of @kobj and the default * attributes associated with the ktype of @kobj. * Additional properties not added by driver core have to be changed by the * driver or subsystem which created them. This is similar to how * driver/subsystem specific entries are removed. * * Returns 0 on success or error code on failure. */ int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid) { int error; const struct kobj_type *ktype; if (!kobj->state_in_sysfs) return -EINVAL; /* Change the owner of the kobject itself. */ error = internal_change_owner(kobj->sd, kuid, kgid); if (error) return error; ktype = get_ktype(kobj); if (ktype) { /* * Change owner of the default groups associated with the * ktype of @kobj. */ error = sysfs_groups_change_owner(kobj, ktype->default_groups, kuid, kgid); if (error) return error; } return 0; } EXPORT_SYMBOL_GPL(sysfs_change_owner); /** * sysfs_emit - scnprintf equivalent, aware of PAGE_SIZE buffer. * @buf: start of PAGE_SIZE buffer. * @fmt: format * @...: optional arguments to @format * * * Returns number of characters written to @buf. */ int sysfs_emit(char *buf, const char *fmt, ...) { va_list args; int len; if (WARN(!buf || offset_in_page(buf), "invalid sysfs_emit: buf:%p\n", buf)) return 0; va_start(args, fmt); len = vscnprintf(buf, PAGE_SIZE, fmt, args); va_end(args); return len; } EXPORT_SYMBOL_GPL(sysfs_emit); /** * sysfs_emit_at - scnprintf equivalent, aware of PAGE_SIZE buffer. * @buf: start of PAGE_SIZE buffer. * @at: offset in @buf to start write in bytes * @at must be >= 0 && < PAGE_SIZE * @fmt: format * @...: optional arguments to @fmt * * * Returns number of characters written starting at &@buf[@at]. */ int sysfs_emit_at(char *buf, int at, const char *fmt, ...) { va_list args; int len; if (WARN(!buf || offset_in_page(buf) || at < 0 || at >= PAGE_SIZE, "invalid sysfs_emit_at: buf:%p at:%d\n", buf, at)) return 0; va_start(args, fmt); len = vscnprintf(buf + at, PAGE_SIZE - at, fmt, args); va_end(args); return len; } EXPORT_SYMBOL_GPL(sysfs_emit_at); /** * sysfs_bin_attr_simple_read - read callback to simply copy from memory. * @file: attribute file which is being read. * @kobj: object to which the attribute belongs. * @attr: attribute descriptor. * @buf: destination buffer. * @off: offset in bytes from which to read. * @count: maximum number of bytes to read. * * Simple ->read() callback for bin_attributes backed by a buffer in memory. * The @private and @size members in struct bin_attribute must be set to the * buffer's location and size before the bin_attribute is created in sysfs. * * Bounds check for @off and @count is done in sysfs_kf_bin_read(). * Negative value check for @off is done in vfs_setpos() and default_llseek(). * * Returns number of bytes written to @buf. */ ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { memcpy(buf, attr->private + off, count); return count; } EXPORT_SYMBOL_GPL(sysfs_bin_attr_simple_read); |
| 605 10734 1940 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM maple_tree #if !defined(_TRACE_MM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MM_H #include <linux/tracepoint.h> struct ma_state; TRACE_EVENT(ma_op, TP_PROTO(const char *fn, struct ma_state *mas), TP_ARGS(fn, mas), TP_STRUCT__entry( __field(const char *, fn) __field(unsigned long, min) __field(unsigned long, max) __field(unsigned long, index) __field(unsigned long, last) __field(void *, node) ), TP_fast_assign( __entry->fn = fn; __entry->min = mas->min; __entry->max = mas->max; __entry->index = mas->index; __entry->last = mas->last; __entry->node = mas->node; ), TP_printk("%s\tNode: %p (%lu %lu) range: %lu-%lu", __entry->fn, (void *) __entry->node, (unsigned long) __entry->min, (unsigned long) __entry->max, (unsigned long) __entry->index, (unsigned long) __entry->last ) ) TRACE_EVENT(ma_read, TP_PROTO(const char *fn, struct ma_state *mas), TP_ARGS(fn, mas), TP_STRUCT__entry( __field(const char *, fn) __field(unsigned long, min) __field(unsigned long, max) __field(unsigned long, index) __field(unsigned long, last) __field(void *, node) ), TP_fast_assign( __entry->fn = fn; __entry->min = mas->min; __entry->max = mas->max; __entry->index = mas->index; __entry->last = mas->last; __entry->node = mas->node; ), TP_printk("%s\tNode: %p (%lu %lu) range: %lu-%lu", __entry->fn, (void *) __entry->node, (unsigned long) __entry->min, (unsigned long) __entry->max, (unsigned long) __entry->index, (unsigned long) __entry->last ) ) TRACE_EVENT(ma_write, TP_PROTO(const char *fn, struct ma_state *mas, unsigned long piv, void *val), TP_ARGS(fn, mas, piv, val), TP_STRUCT__entry( __field(const char *, fn) __field(unsigned long, min) __field(unsigned long, max) __field(unsigned long, index) __field(unsigned long, last) __field(unsigned long, piv) __field(void *, val) __field(void *, node) ), TP_fast_assign( __entry->fn = fn; __entry->min = mas->min; __entry->max = mas->max; __entry->index = mas->index; __entry->last = mas->last; __entry->piv = piv; __entry->val = val; __entry->node = mas->node; ), TP_printk("%s\tNode %p (%lu %lu) range:%lu-%lu piv (%lu) val %p", __entry->fn, (void *) __entry->node, (unsigned long) __entry->min, (unsigned long) __entry->max, (unsigned long) __entry->index, (unsigned long) __entry->last, (unsigned long) __entry->piv, (void *) __entry->val ) ) #endif /* _TRACE_MM_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
| 74 501 93 14 61 61 290 83 1 111 2452 39 1845 395 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_VMSTAT_H #define _LINUX_VMSTAT_H #include <linux/types.h> #include <linux/percpu.h> #include <linux/mmzone.h> #include <linux/vm_event_item.h> #include <linux/atomic.h> #include <linux/static_key.h> #include <linux/mmdebug.h> #ifdef CONFIG_NUMA DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key); #endif struct reclaim_stat { unsigned nr_dirty; unsigned nr_unqueued_dirty; unsigned nr_congested; unsigned nr_writeback; unsigned nr_immediate; unsigned nr_pageout; unsigned nr_activate[ANON_AND_FILE]; unsigned nr_ref_keep; unsigned nr_unmap_fail; unsigned nr_lazyfree_fail; unsigned nr_demoted; }; /* Stat data for system wide items */ enum vm_stat_item { NR_DIRTY_THRESHOLD, NR_DIRTY_BG_THRESHOLD, NR_MEMMAP_PAGES, /* page metadata allocated through buddy allocator */ NR_MEMMAP_BOOT_PAGES, /* page metadata allocated through boot allocator */ NR_VM_STAT_ITEMS, }; #ifdef CONFIG_VM_EVENT_COUNTERS /* * Light weight per cpu counter implementation. * * Counters should only be incremented and no critical kernel component * should rely on the counter values. * * Counters are handled completely inline. On many platforms the code * generated will simply be the increment of a global address. */ struct vm_event_state { unsigned long event[NR_VM_EVENT_ITEMS]; }; DECLARE_PER_CPU(struct vm_event_state, vm_event_states); /* * vm counters are allowed to be racy. Use raw_cpu_ops to avoid the * local_irq_disable overhead. */ static inline void __count_vm_event(enum vm_event_item item) { raw_cpu_inc(vm_event_states.event[item]); } static inline void count_vm_event(enum vm_event_item item) { this_cpu_inc(vm_event_states.event[item]); } static inline void __count_vm_events(enum vm_event_item item, long delta) { raw_cpu_add(vm_event_states.event[item], delta); } static inline void count_vm_events(enum vm_event_item item, long delta) { this_cpu_add(vm_event_states.event[item], delta); } extern void all_vm_events(unsigned long *); extern void vm_events_fold_cpu(int cpu); #else /* Disable counters */ static inline void count_vm_event(enum vm_event_item item) { } static inline void count_vm_events(enum vm_event_item item, long delta) { } static inline void __count_vm_event(enum vm_event_item item) { } static inline void __count_vm_events(enum vm_event_item item, long delta) { } static inline void all_vm_events(unsigned long *ret) { } static inline void vm_events_fold_cpu(int cpu) { } #endif /* CONFIG_VM_EVENT_COUNTERS */ #ifdef CONFIG_NUMA_BALANCING #define count_vm_numa_event(x) count_vm_event(x) #define count_vm_numa_events(x, y) count_vm_events(x, y) #else #define count_vm_numa_event(x) do {} while (0) #define count_vm_numa_events(x, y) do { (void)(y); } while (0) #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_DEBUG_TLBFLUSH #define count_vm_tlb_event(x) count_vm_event(x) #define count_vm_tlb_events(x, y) count_vm_events(x, y) #else #define count_vm_tlb_event(x) do {} while (0) #define count_vm_tlb_events(x, y) do { (void)(y); } while (0) #endif #ifdef CONFIG_PER_VMA_LOCK_STATS #define count_vm_vma_lock_event(x) count_vm_event(x) #else #define count_vm_vma_lock_event(x) do {} while (0) #endif #define __count_zid_vm_events(item, zid, delta) \ __count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta) /* * Zone and node-based page accounting with per cpu differentials. */ extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS]; extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS]; extern atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; #ifdef CONFIG_NUMA static inline void zone_numa_event_add(long x, struct zone *zone, enum numa_stat_item item) { atomic_long_add(x, &zone->vm_numa_event[item]); atomic_long_add(x, &vm_numa_event[item]); } static inline unsigned long zone_numa_event_state(struct zone *zone, enum numa_stat_item item) { return atomic_long_read(&zone->vm_numa_event[item]); } static inline unsigned long global_numa_event_state(enum numa_stat_item item) { return atomic_long_read(&vm_numa_event[item]); } #endif /* CONFIG_NUMA */ static inline void zone_page_state_add(long x, struct zone *zone, enum zone_stat_item item) { atomic_long_add(x, &zone->vm_stat[item]); atomic_long_add(x, &vm_zone_stat[item]); } static inline void node_page_state_add(long x, struct pglist_data *pgdat, enum node_stat_item item) { atomic_long_add(x, &pgdat->vm_stat[item]); atomic_long_add(x, &vm_node_stat[item]); } static inline unsigned long global_zone_page_state(enum zone_stat_item item) { long x = atomic_long_read(&vm_zone_stat[item]); #ifdef CONFIG_SMP if (x < 0) x = 0; #endif return x; } static inline unsigned long global_node_page_state_pages(enum node_stat_item item) { long x = atomic_long_read(&vm_node_stat[item]); #ifdef CONFIG_SMP if (x < 0) x = 0; #endif return x; } static inline unsigned long global_node_page_state(enum node_stat_item item) { VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); return global_node_page_state_pages(item); } static inline unsigned long zone_page_state(struct zone *zone, enum zone_stat_item item) { long x = atomic_long_read(&zone->vm_stat[item]); #ifdef CONFIG_SMP if (x < 0) x = 0; #endif return x; } /* * More accurate version that also considers the currently pending * deltas. For that we need to loop over all cpus to find the current * deltas. There is no synchronization so the result cannot be * exactly accurate either. */ static inline unsigned long zone_page_state_snapshot(struct zone *zone, enum zone_stat_item item) { long x = atomic_long_read(&zone->vm_stat[item]); #ifdef CONFIG_SMP int cpu; for_each_online_cpu(cpu) x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_stat_diff[item]; if (x < 0) x = 0; #endif return x; } #ifdef CONFIG_NUMA /* See __count_vm_event comment on why raw_cpu_inc is used. */ static inline void __count_numa_event(struct zone *zone, enum numa_stat_item item) { struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; raw_cpu_inc(pzstats->vm_numa_event[item]); } static inline void __count_numa_events(struct zone *zone, enum numa_stat_item item, long delta) { struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; raw_cpu_add(pzstats->vm_numa_event[item], delta); } extern unsigned long sum_zone_node_page_state(int node, enum zone_stat_item item); extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item); extern unsigned long node_page_state(struct pglist_data *pgdat, enum node_stat_item item); extern unsigned long node_page_state_pages(struct pglist_data *pgdat, enum node_stat_item item); extern void fold_vm_numa_events(void); #else #define sum_zone_node_page_state(node, item) global_zone_page_state(item) #define node_page_state(node, item) global_node_page_state(item) #define node_page_state_pages(node, item) global_node_page_state_pages(item) static inline void fold_vm_numa_events(void) { } #endif /* CONFIG_NUMA */ #ifdef CONFIG_SMP void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long); void __inc_zone_page_state(struct page *, enum zone_stat_item); void __dec_zone_page_state(struct page *, enum zone_stat_item); void __mod_node_page_state(struct pglist_data *, enum node_stat_item item, long); void __inc_node_page_state(struct page *, enum node_stat_item); void __dec_node_page_state(struct page *, enum node_stat_item); void mod_zone_page_state(struct zone *, enum zone_stat_item, long); void inc_zone_page_state(struct page *, enum zone_stat_item); void dec_zone_page_state(struct page *, enum zone_stat_item); void mod_node_page_state(struct pglist_data *, enum node_stat_item, long); void inc_node_page_state(struct page *, enum node_stat_item); void dec_node_page_state(struct page *, enum node_stat_item); extern void inc_node_state(struct pglist_data *, enum node_stat_item); extern void __inc_zone_state(struct zone *, enum zone_stat_item); extern void __inc_node_state(struct pglist_data *, enum node_stat_item); extern void dec_zone_state(struct zone *, enum zone_stat_item); extern void __dec_zone_state(struct zone *, enum zone_stat_item); extern void __dec_node_state(struct pglist_data *, enum node_stat_item); void quiet_vmstat(void); void cpu_vm_stats_fold(int cpu); void refresh_zone_stat_thresholds(void); void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *); int calculate_pressure_threshold(struct zone *zone); int calculate_normal_threshold(struct zone *zone); void set_pgdat_percpu_threshold(pg_data_t *pgdat, int (*calculate_pressure)(struct zone *)); #else /* CONFIG_SMP */ /* * We do not maintain differentials in a single processor configuration. * The functions directly modify the zone and global counters. */ static inline void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, long delta) { zone_page_state_add(delta, zone, item); } static inline void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, int delta) { if (vmstat_item_in_bytes(item)) { /* * Only cgroups use subpage accounting right now; at * the global level, these items still change in * multiples of whole pages. Store them as pages * internally to keep the per-cpu counters compact. */ VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1)); delta >>= PAGE_SHIFT; } node_page_state_add(delta, pgdat, item); } static inline void __inc_zone_state(struct zone *zone, enum zone_stat_item item) { atomic_long_inc(&zone->vm_stat[item]); atomic_long_inc(&vm_zone_stat[item]); } static inline void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) { atomic_long_inc(&pgdat->vm_stat[item]); atomic_long_inc(&vm_node_stat[item]); } static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item) { atomic_long_dec(&zone->vm_stat[item]); atomic_long_dec(&vm_zone_stat[item]); } static inline void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) { atomic_long_dec(&pgdat->vm_stat[item]); atomic_long_dec(&vm_node_stat[item]); } static inline void __inc_zone_page_state(struct page *page, enum zone_stat_item item) { __inc_zone_state(page_zone(page), item); } static inline void __inc_node_page_state(struct page *page, enum node_stat_item item) { __inc_node_state(page_pgdat(page), item); } static inline void __dec_zone_page_state(struct page *page, enum zone_stat_item item) { __dec_zone_state(page_zone(page), item); } static inline void __dec_node_page_state(struct page *page, enum node_stat_item item) { __dec_node_state(page_pgdat(page), item); } /* * We only use atomic operations to update counters. So there is no need to * disable interrupts. */ #define inc_zone_page_state __inc_zone_page_state #define dec_zone_page_state __dec_zone_page_state #define mod_zone_page_state __mod_zone_page_state #define inc_node_page_state __inc_node_page_state #define dec_node_page_state __dec_node_page_state #define mod_node_page_state __mod_node_page_state #define inc_zone_state __inc_zone_state #define inc_node_state __inc_node_state #define dec_zone_state __dec_zone_state #define set_pgdat_percpu_threshold(pgdat, callback) { } static inline void refresh_zone_stat_thresholds(void) { } static inline void cpu_vm_stats_fold(int cpu) { } static inline void quiet_vmstat(void) { } static inline void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) { } #endif /* CONFIG_SMP */ static inline void __zone_stat_mod_folio(struct folio *folio, enum zone_stat_item item, long nr) { __mod_zone_page_state(folio_zone(folio), item, nr); } static inline void __zone_stat_add_folio(struct folio *folio, enum zone_stat_item item) { __mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio)); } static inline void __zone_stat_sub_folio(struct folio *folio, enum zone_stat_item item) { __mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio)); } static inline void zone_stat_mod_folio(struct folio *folio, enum zone_stat_item item, long nr) { mod_zone_page_state(folio_zone(folio), item, nr); } static inline void zone_stat_add_folio(struct folio *folio, enum zone_stat_item item) { mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio)); } static inline void zone_stat_sub_folio(struct folio *folio, enum zone_stat_item item) { mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio)); } static inline void __node_stat_mod_folio(struct folio *folio, enum node_stat_item item, long nr) { __mod_node_page_state(folio_pgdat(folio), item, nr); } static inline void __node_stat_add_folio(struct folio *folio, enum node_stat_item item) { __mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio)); } static inline void __node_stat_sub_folio(struct folio *folio, enum node_stat_item item) { __mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); } static inline void node_stat_mod_folio(struct folio *folio, enum node_stat_item item, long nr) { mod_node_page_state(folio_pgdat(folio), item, nr); } static inline void node_stat_add_folio(struct folio *folio, enum node_stat_item item) { mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio)); } static inline void node_stat_sub_folio(struct folio *folio, enum node_stat_item item) { mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); } extern const char * const vmstat_text[]; static inline const char *zone_stat_name(enum zone_stat_item item) { return vmstat_text[item]; } #ifdef CONFIG_NUMA static inline const char *numa_stat_name(enum numa_stat_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + item]; } #endif /* CONFIG_NUMA */ static inline const char *node_stat_name(enum node_stat_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS + item]; } static inline const char *lru_list_name(enum lru_list lru) { return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS + NR_VM_NODE_STAT_ITEMS + NR_VM_STAT_ITEMS + item]; } #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ #ifdef CONFIG_MEMCG void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { unsigned long flags; local_irq_save(flags); __mod_lruvec_state(lruvec, idx, val); local_irq_restore(flags); } void __lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val); static inline void lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val) { unsigned long flags; local_irq_save(flags); __lruvec_stat_mod_folio(folio, idx, val); local_irq_restore(flags); } static inline void mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { lruvec_stat_mod_folio(page_folio(page), idx, val); } #else static inline void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); } static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { mod_node_page_state(lruvec_pgdat(lruvec), idx, val); } static inline void __lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val) { __mod_node_page_state(folio_pgdat(folio), idx, val); } static inline void lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val) { mod_node_page_state(folio_pgdat(folio), idx, val); } static inline void mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { mod_node_page_state(page_pgdat(page), idx, val); } #endif /* CONFIG_MEMCG */ static inline void __lruvec_stat_add_folio(struct folio *folio, enum node_stat_item idx) { __lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); } static inline void __lruvec_stat_sub_folio(struct folio *folio, enum node_stat_item idx) { __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } static inline void lruvec_stat_add_folio(struct folio *folio, enum node_stat_item idx) { lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); } static inline void lruvec_stat_sub_folio(struct folio *folio, enum node_stat_item idx) { lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } void memmap_boot_pages_add(long delta); void memmap_pages_add(long delta); #endif /* _LINUX_VMSTAT_H */ |
| 23 23 13961 2528 13232 1057 1065 1168 1175 867 1174 1172 484 12 371 476 480 229 227 3 1 2 194 57 191 192 57 55 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 | // SPDX-License-Identifier: GPL-2.0 /* * Fast batching percpu counters. */ #include <linux/percpu_counter.h> #include <linux/mutex.h> #include <linux/init.h> #include <linux/cpu.h> #include <linux/module.h> #include <linux/debugobjects.h> #ifdef CONFIG_HOTPLUG_CPU static LIST_HEAD(percpu_counters); static DEFINE_SPINLOCK(percpu_counters_lock); #endif #ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER static const struct debug_obj_descr percpu_counter_debug_descr; static bool percpu_counter_fixup_free(void *addr, enum debug_obj_state state) { struct percpu_counter *fbc = addr; switch (state) { case ODEBUG_STATE_ACTIVE: percpu_counter_destroy(fbc); debug_object_free(fbc, &percpu_counter_debug_descr); return true; default: return false; } } static const struct debug_obj_descr percpu_counter_debug_descr = { .name = "percpu_counter", .fixup_free = percpu_counter_fixup_free, }; static inline void debug_percpu_counter_activate(struct percpu_counter *fbc) { debug_object_init(fbc, &percpu_counter_debug_descr); debug_object_activate(fbc, &percpu_counter_debug_descr); } static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc) { debug_object_deactivate(fbc, &percpu_counter_debug_descr); debug_object_free(fbc, &percpu_counter_debug_descr); } #else /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */ static inline void debug_percpu_counter_activate(struct percpu_counter *fbc) { } static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc) { } #endif /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */ void percpu_counter_set(struct percpu_counter *fbc, s64 amount) { int cpu; unsigned long flags; raw_spin_lock_irqsave(&fbc->lock, flags); for_each_possible_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); *pcount = 0; } fbc->count = amount; raw_spin_unlock_irqrestore(&fbc->lock, flags); } EXPORT_SYMBOL(percpu_counter_set); /* * Add to a counter while respecting batch size. * * There are 2 implementations, both dealing with the following problem: * * The decision slow path/fast path and the actual update must be atomic. * Otherwise a call in process context could check the current values and * decide that the fast path can be used. If now an interrupt occurs before * the this_cpu_add(), and the interrupt updates this_cpu(*fbc->counters), * then the this_cpu_add() that is executed after the interrupt has completed * can produce values larger than "batch" or even overflows. */ #ifdef CONFIG_HAVE_CMPXCHG_LOCAL /* * Safety against interrupts is achieved in 2 ways: * 1. the fast path uses local cmpxchg (note: no lock prefix) * 2. the slow path operates with interrupts disabled */ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { s64 count; unsigned long flags; count = this_cpu_read(*fbc->counters); do { if (unlikely(abs(count + amount) >= batch)) { raw_spin_lock_irqsave(&fbc->lock, flags); /* * Note: by now we might have migrated to another CPU * or the value might have changed. */ count = __this_cpu_read(*fbc->counters); fbc->count += count + amount; __this_cpu_sub(*fbc->counters, count); raw_spin_unlock_irqrestore(&fbc->lock, flags); return; } } while (!this_cpu_try_cmpxchg(*fbc->counters, &count, count + amount)); } #else /* * local_irq_save() is used to make the function irq safe: * - The slow path would be ok as protected by an irq-safe spinlock. * - this_cpu_add would be ok as it is irq-safe by definition. */ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { s64 count; unsigned long flags; local_irq_save(flags); count = __this_cpu_read(*fbc->counters) + amount; if (abs(count) >= batch) { raw_spin_lock(&fbc->lock); fbc->count += count; __this_cpu_sub(*fbc->counters, count - amount); raw_spin_unlock(&fbc->lock); } else { this_cpu_add(*fbc->counters, amount); } local_irq_restore(flags); } #endif EXPORT_SYMBOL(percpu_counter_add_batch); /* * For percpu_counter with a big batch, the devication of its count could * be big, and there is requirement to reduce the deviation, like when the * counter's batch could be runtime decreased to get a better accuracy, * which can be achieved by running this sync function on each CPU. */ void percpu_counter_sync(struct percpu_counter *fbc) { unsigned long flags; s64 count; raw_spin_lock_irqsave(&fbc->lock, flags); count = __this_cpu_read(*fbc->counters); fbc->count += count; __this_cpu_sub(*fbc->counters, count); raw_spin_unlock_irqrestore(&fbc->lock, flags); } EXPORT_SYMBOL(percpu_counter_sync); /* * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive(). * * We use the cpu mask of (cpu_online_mask | cpu_dying_mask) to capture sums * from CPUs that are in the process of being taken offline. Dying cpus have * been removed from the online mask, but may not have had the hotplug dead * notifier called to fold the percpu count back into the global counter sum. * By including dying CPUs in the iteration mask, we avoid this race condition * so __percpu_counter_sum() just does the right thing when CPUs are being taken * offline. */ s64 __percpu_counter_sum(struct percpu_counter *fbc) { s64 ret; int cpu; unsigned long flags; raw_spin_lock_irqsave(&fbc->lock, flags); ret = fbc->count; for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; } raw_spin_unlock_irqrestore(&fbc->lock, flags); return ret; } EXPORT_SYMBOL(__percpu_counter_sum); int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, gfp_t gfp, u32 nr_counters, struct lock_class_key *key) { unsigned long flags __maybe_unused; size_t counter_size; s32 __percpu *counters; u32 i; counter_size = ALIGN(sizeof(*counters), __alignof__(*counters)); counters = __alloc_percpu_gfp(nr_counters * counter_size, __alignof__(*counters), gfp); if (!counters) { fbc[0].counters = NULL; return -ENOMEM; } for (i = 0; i < nr_counters; i++) { raw_spin_lock_init(&fbc[i].lock); lockdep_set_class(&fbc[i].lock, key); #ifdef CONFIG_HOTPLUG_CPU INIT_LIST_HEAD(&fbc[i].list); #endif fbc[i].count = amount; fbc[i].counters = (void __percpu *)counters + i * counter_size; debug_percpu_counter_activate(&fbc[i]); } #ifdef CONFIG_HOTPLUG_CPU spin_lock_irqsave(&percpu_counters_lock, flags); for (i = 0; i < nr_counters; i++) list_add(&fbc[i].list, &percpu_counters); spin_unlock_irqrestore(&percpu_counters_lock, flags); #endif return 0; } EXPORT_SYMBOL(__percpu_counter_init_many); void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters) { unsigned long flags __maybe_unused; u32 i; if (WARN_ON_ONCE(!fbc)) return; if (!fbc[0].counters) return; for (i = 0; i < nr_counters; i++) debug_percpu_counter_deactivate(&fbc[i]); #ifdef CONFIG_HOTPLUG_CPU spin_lock_irqsave(&percpu_counters_lock, flags); for (i = 0; i < nr_counters; i++) list_del(&fbc[i].list); spin_unlock_irqrestore(&percpu_counters_lock, flags); #endif free_percpu(fbc[0].counters); for (i = 0; i < nr_counters; i++) fbc[i].counters = NULL; } EXPORT_SYMBOL(percpu_counter_destroy_many); int percpu_counter_batch __read_mostly = 32; EXPORT_SYMBOL(percpu_counter_batch); static int compute_batch_value(unsigned int cpu) { int nr = num_online_cpus(); percpu_counter_batch = max(32, nr*2); return 0; } static int percpu_counter_cpu_dead(unsigned int cpu) { #ifdef CONFIG_HOTPLUG_CPU struct percpu_counter *fbc; compute_batch_value(cpu); spin_lock_irq(&percpu_counters_lock); list_for_each_entry(fbc, &percpu_counters, list) { s32 *pcount; raw_spin_lock(&fbc->lock); pcount = per_cpu_ptr(fbc->counters, cpu); fbc->count += *pcount; *pcount = 0; raw_spin_unlock(&fbc->lock); } spin_unlock_irq(&percpu_counters_lock); #endif return 0; } /* * Compare counter against given value. * Return 1 if greater, 0 if equal and -1 if less */ int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) { s64 count; count = percpu_counter_read(fbc); /* Check to see if rough count will be sufficient for comparison */ if (abs(count - rhs) > (batch * num_online_cpus())) { if (count > rhs) return 1; else return -1; } /* Need to use precise count */ count = percpu_counter_sum(fbc); if (count > rhs) return 1; else if (count < rhs) return -1; else return 0; } EXPORT_SYMBOL(__percpu_counter_compare); /* * Compare counter, and add amount if total is: less than or equal to limit if * amount is positive, or greater than or equal to limit if amount is negative. * Return true if amount is added, or false if total would be beyond the limit. * * Negative limit is allowed, but unusual. * When negative amounts (subs) are given to percpu_counter_limited_add(), * the limit would most naturally be 0 - but other limits are also allowed. * * Overflow beyond S64_MAX is not allowed for: counter, limit and amount * are all assumed to be sane (far from S64_MIN and S64_MAX). */ bool __percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount, s32 batch) { s64 count; s64 unknown; unsigned long flags; bool good = false; if (amount == 0) return true; local_irq_save(flags); unknown = batch * num_online_cpus(); count = __this_cpu_read(*fbc->counters); /* Skip taking the lock when safe */ if (abs(count + amount) <= batch && ((amount > 0 && fbc->count + unknown <= limit) || (amount < 0 && fbc->count - unknown >= limit))) { this_cpu_add(*fbc->counters, amount); local_irq_restore(flags); return true; } raw_spin_lock(&fbc->lock); count = fbc->count + amount; /* Skip percpu_counter_sum() when safe */ if (amount > 0) { if (count - unknown > limit) goto out; if (count + unknown <= limit) good = true; } else { if (count + unknown < limit) goto out; if (count - unknown >= limit) good = true; } if (!good) { s32 *pcount; int cpu; for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) { pcount = per_cpu_ptr(fbc->counters, cpu); count += *pcount; } if (amount > 0) { if (count > limit) goto out; } else { if (count < limit) goto out; } good = true; } count = __this_cpu_read(*fbc->counters); fbc->count += count + amount; __this_cpu_sub(*fbc->counters, count); out: raw_spin_unlock(&fbc->lock); local_irq_restore(flags); return good; } static int __init percpu_counter_startup(void) { int ret; ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "lib/percpu_cnt:online", compute_batch_value, NULL); WARN_ON(ret < 0); ret = cpuhp_setup_state_nocalls(CPUHP_PERCPU_CNT_DEAD, "lib/percpu_cnt:dead", NULL, percpu_counter_cpu_dead); WARN_ON(ret < 0); return 0; } module_init(percpu_counter_startup); |
| 23 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2009 IBM Corporation * Author: Mimi Zohar <zohar@us.ibm.com> */ #ifndef _LINUX_INTEGRITY_H #define _LINUX_INTEGRITY_H #include <linux/fs.h> #include <linux/iversion.h> enum integrity_status { INTEGRITY_PASS = 0, INTEGRITY_PASS_IMMUTABLE, INTEGRITY_FAIL, INTEGRITY_FAIL_IMMUTABLE, INTEGRITY_NOLABEL, INTEGRITY_NOXATTRS, INTEGRITY_UNKNOWN, }; #ifdef CONFIG_INTEGRITY extern void __init integrity_load_keys(void); #else static inline void integrity_load_keys(void) { } #endif /* CONFIG_INTEGRITY */ /* An inode's attributes for detection of changes */ struct integrity_inode_attributes { u64 version; /* track inode changes */ unsigned long ino; dev_t dev; }; /* * On stacked filesystems the i_version alone is not enough to detect file data * or metadata change. Additional metadata is required. */ static inline void integrity_inode_attrs_store(struct integrity_inode_attributes *attrs, u64 i_version, const struct inode *inode) { attrs->version = i_version; attrs->dev = inode->i_sb->s_dev; attrs->ino = inode->i_ino; } /* * On stacked filesystems detect whether the inode or its content has changed. */ static inline bool integrity_inode_attrs_changed(const struct integrity_inode_attributes *attrs, const struct inode *inode) { return (inode->i_sb->s_dev != attrs->dev || inode->i_ino != attrs->ino || !inode_eq_iversion(inode, attrs->version)); } #endif /* _LINUX_INTEGRITY_H */ |
| 102 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | #ifndef IO_URING_MEMMAP_H #define IO_URING_MEMMAP_H #define IORING_MAP_OFF_PARAM_REGION 0x20000000ULL #define IORING_MAP_OFF_ZCRX_REGION 0x30000000ULL #define IORING_OFF_ZCRX_SHIFT 16 struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages); #ifndef CONFIG_MMU unsigned int io_uring_nommu_mmap_capabilities(struct file *file); #endif unsigned long io_uring_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); int io_uring_mmap(struct file *file, struct vm_area_struct *vma); void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr); int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg, unsigned long mmap_offset); int io_create_region_mmap_safe(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg, unsigned long mmap_offset); static inline void *io_region_get_ptr(struct io_mapped_region *mr) { return mr->ptr; } static inline bool io_region_is_set(struct io_mapped_region *mr) { return !!mr->nr_pages; } #endif |
| 52 3 15 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2006, Johannes Berg <johannes@sipsolutions.net> */ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/leds.h> #include "ieee80211_i.h" #define MAC80211_BLINK_DELAY 50 /* ms */ static inline void ieee80211_led_rx(struct ieee80211_local *local) { #ifdef CONFIG_MAC80211_LEDS if (!atomic_read(&local->rx_led_active)) return; led_trigger_blink_oneshot(&local->rx_led, MAC80211_BLINK_DELAY, MAC80211_BLINK_DELAY, 0); #endif } static inline void ieee80211_led_tx(struct ieee80211_local *local) { #ifdef CONFIG_MAC80211_LEDS if (!atomic_read(&local->tx_led_active)) return; led_trigger_blink_oneshot(&local->tx_led, MAC80211_BLINK_DELAY, MAC80211_BLINK_DELAY, 0); #endif } #ifdef CONFIG_MAC80211_LEDS void ieee80211_led_assoc(struct ieee80211_local *local, bool associated); void ieee80211_led_radio(struct ieee80211_local *local, bool enabled); void ieee80211_alloc_led_names(struct ieee80211_local *local); void ieee80211_free_led_names(struct ieee80211_local *local); void ieee80211_led_init(struct ieee80211_local *local); void ieee80211_led_exit(struct ieee80211_local *local); void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, unsigned int types_on, unsigned int types_off); #else static inline void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) { } static inline void ieee80211_led_radio(struct ieee80211_local *local, bool enabled) { } static inline void ieee80211_alloc_led_names(struct ieee80211_local *local) { } static inline void ieee80211_free_led_names(struct ieee80211_local *local) { } static inline void ieee80211_led_init(struct ieee80211_local *local) { } static inline void ieee80211_led_exit(struct ieee80211_local *local) { } static inline void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, unsigned int types_on, unsigned int types_off) { } #endif static inline void ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, int bytes) { #ifdef CONFIG_MAC80211_LEDS if (atomic_read(&local->tpt_led_active)) local->tpt_led_trigger->tx_bytes += bytes; #endif } static inline void ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, int bytes) { #ifdef CONFIG_MAC80211_LEDS if (atomic_read(&local->tpt_led_active)) local->tpt_led_trigger->rx_bytes += bytes; #endif } |
| 4 1 5 1 4 1 4 4 1 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/net/netfilter/xt_IDLETIMER.c * * Netfilter module to trigger a timer when packet matches. * After timer expires a kevent will be sent. * * Copyright (C) 2004, 2010 Nokia Corporation * Written by Timo Teras <ext-timo.teras@nokia.com> * * Converted to x_tables and reworked for upstream inclusion * by Luciano Coelho <luciano.coelho@nokia.com> * * Contact: Luciano Coelho <luciano.coelho@nokia.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/timer.h> #include <linux/alarmtimer.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/netfilter.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_IDLETIMER.h> #include <linux/kdev_t.h> #include <linux/kobject.h> #include <linux/workqueue.h> #include <linux/sysfs.h> struct idletimer_tg { struct list_head entry; struct alarm alarm; struct timer_list timer; struct work_struct work; struct kobject *kobj; struct device_attribute attr; unsigned int refcnt; u8 timer_type; }; static LIST_HEAD(idletimer_tg_list); static DEFINE_MUTEX(list_mutex); static struct kobject *idletimer_tg_kobj; static struct idletimer_tg *__idletimer_tg_find_by_label(const char *label) { struct idletimer_tg *entry; list_for_each_entry(entry, &idletimer_tg_list, entry) { if (!strcmp(label, entry->attr.attr.name)) return entry; } return NULL; } static ssize_t idletimer_tg_show(struct device *dev, struct device_attribute *attr, char *buf) { struct idletimer_tg *timer; unsigned long expires = 0; struct timespec64 ktimespec = {}; long time_diff = 0; mutex_lock(&list_mutex); timer = __idletimer_tg_find_by_label(attr->attr.name); if (timer) { if (timer->timer_type & XT_IDLETIMER_ALARM) { ktime_t expires_alarm = alarm_expires_remaining(&timer->alarm); ktimespec = ktime_to_timespec64(expires_alarm); time_diff = ktimespec.tv_sec; } else { expires = timer->timer.expires; time_diff = jiffies_to_msecs(expires - jiffies) / 1000; } } mutex_unlock(&list_mutex); if (time_after(expires, jiffies) || ktimespec.tv_sec > 0) return sysfs_emit(buf, "%ld\n", time_diff); return sysfs_emit(buf, "0\n"); } static void idletimer_tg_work(struct work_struct *work) { struct idletimer_tg *timer = container_of(work, struct idletimer_tg, work); sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name); } static void idletimer_tg_expired(struct timer_list *t) { struct idletimer_tg *timer = timer_container_of(timer, t, timer); pr_debug("timer %s expired\n", timer->attr.attr.name); schedule_work(&timer->work); } static void idletimer_tg_alarmproc(struct alarm *alarm, ktime_t now) { struct idletimer_tg *timer = alarm->data; pr_debug("alarm %s expired\n", timer->attr.attr.name); schedule_work(&timer->work); } static int idletimer_check_sysfs_name(const char *name, unsigned int size) { int ret; ret = xt_check_proc_name(name, size); if (ret < 0) return ret; if (!strcmp(name, "power") || !strcmp(name, "subsystem") || !strcmp(name, "uevent")) return -EINVAL; return 0; } static int idletimer_tg_create(struct idletimer_tg_info *info) { int ret; info->timer = kzalloc(sizeof(*info->timer), GFP_KERNEL); if (!info->timer) { ret = -ENOMEM; goto out; } ret = idletimer_check_sysfs_name(info->label, sizeof(info->label)); if (ret < 0) goto out_free_timer; sysfs_attr_init(&info->timer->attr.attr); info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); if (!info->timer->attr.attr.name) { ret = -ENOMEM; goto out_free_timer; } info->timer->attr.attr.mode = 0444; info->timer->attr.show = idletimer_tg_show; ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr); if (ret < 0) { pr_debug("couldn't add file to sysfs"); goto out_free_attr; } list_add(&info->timer->entry, &idletimer_tg_list); timer_setup(&info->timer->timer, idletimer_tg_expired, 0); info->timer->refcnt = 1; INIT_WORK(&info->timer->work, idletimer_tg_work); mod_timer(&info->timer->timer, secs_to_jiffies(info->timeout) + jiffies); return 0; out_free_attr: kfree(info->timer->attr.attr.name); out_free_timer: kfree(info->timer); out: return ret; } static int idletimer_tg_create_v1(struct idletimer_tg_info_v1 *info) { int ret; info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL); if (!info->timer) { ret = -ENOMEM; goto out; } ret = idletimer_check_sysfs_name(info->label, sizeof(info->label)); if (ret < 0) goto out_free_timer; sysfs_attr_init(&info->timer->attr.attr); info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); if (!info->timer->attr.attr.name) { ret = -ENOMEM; goto out_free_timer; } info->timer->attr.attr.mode = 0444; info->timer->attr.show = idletimer_tg_show; ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr); if (ret < 0) { pr_debug("couldn't add file to sysfs"); goto out_free_attr; } /* notify userspace */ kobject_uevent(idletimer_tg_kobj,KOBJ_ADD); list_add(&info->timer->entry, &idletimer_tg_list); pr_debug("timer type value is %u", info->timer_type); info->timer->timer_type = info->timer_type; info->timer->refcnt = 1; INIT_WORK(&info->timer->work, idletimer_tg_work); if (info->timer->timer_type & XT_IDLETIMER_ALARM) { ktime_t tout; alarm_init(&info->timer->alarm, ALARM_BOOTTIME, idletimer_tg_alarmproc); info->timer->alarm.data = info->timer; tout = ktime_set(info->timeout, 0); alarm_start_relative(&info->timer->alarm, tout); } else { timer_setup(&info->timer->timer, idletimer_tg_expired, 0); mod_timer(&info->timer->timer, secs_to_jiffies(info->timeout) + jiffies); } return 0; out_free_attr: kfree(info->timer->attr.attr.name); out_free_timer: kfree(info->timer); out: return ret; } /* * The actual xt_tables plugin. */ static unsigned int idletimer_tg_target(struct sk_buff *skb, const struct xt_action_param *par) { const struct idletimer_tg_info *info = par->targinfo; pr_debug("resetting timer %s, timeout period %u\n", info->label, info->timeout); mod_timer(&info->timer->timer, secs_to_jiffies(info->timeout) + jiffies); return XT_CONTINUE; } /* * The actual xt_tables plugin. */ static unsigned int idletimer_tg_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct idletimer_tg_info_v1 *info = par->targinfo; pr_debug("resetting timer %s, timeout period %u\n", info->label, info->timeout); if (info->timer->timer_type & XT_IDLETIMER_ALARM) { ktime_t tout = ktime_set(info->timeout, 0); alarm_start_relative(&info->timer->alarm, tout); } else { mod_timer(&info->timer->timer, secs_to_jiffies(info->timeout) + jiffies); } return XT_CONTINUE; } static int idletimer_tg_helper(struct idletimer_tg_info *info) { if (info->timeout == 0) { pr_debug("timeout value is zero\n"); return -EINVAL; } if (info->timeout >= INT_MAX / 1000) { pr_debug("timeout value is too big\n"); return -EINVAL; } if (info->label[0] == '\0' || strnlen(info->label, MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) { pr_debug("label is empty or not nul-terminated\n"); return -EINVAL; } return 0; } static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) { struct idletimer_tg_info *info = par->targinfo; int ret; pr_debug("checkentry targinfo%s\n", info->label); ret = idletimer_tg_helper(info); if(ret < 0) { pr_debug("checkentry helper return invalid\n"); return -EINVAL; } mutex_lock(&list_mutex); info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { info->timer->refcnt++; mod_timer(&info->timer->timer, secs_to_jiffies(info->timeout) + jiffies); pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); } else { ret = idletimer_tg_create(info); if (ret < 0) { pr_debug("failed to create timer\n"); mutex_unlock(&list_mutex); return ret; } } mutex_unlock(&list_mutex); return 0; } static int idletimer_tg_checkentry_v1(const struct xt_tgchk_param *par) { struct idletimer_tg_info_v1 *info = par->targinfo; int ret; pr_debug("checkentry targinfo%s\n", info->label); if (info->send_nl_msg) return -EOPNOTSUPP; ret = idletimer_tg_helper((struct idletimer_tg_info *)info); if(ret < 0) { pr_debug("checkentry helper return invalid\n"); return -EINVAL; } if (info->timer_type > XT_IDLETIMER_ALARM) { pr_debug("invalid value for timer type\n"); return -EINVAL; } mutex_lock(&list_mutex); info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { if (info->timer->timer_type != info->timer_type) { pr_debug("Adding/Replacing rule with same label and different timer type is not allowed\n"); mutex_unlock(&list_mutex); return -EINVAL; } info->timer->refcnt++; if (info->timer_type & XT_IDLETIMER_ALARM) { /* calculate remaining expiry time */ ktime_t tout = alarm_expires_remaining(&info->timer->alarm); struct timespec64 ktimespec = ktime_to_timespec64(tout); if (ktimespec.tv_sec > 0) { pr_debug("time_expiry_remaining %lld\n", ktimespec.tv_sec); alarm_start_relative(&info->timer->alarm, tout); } } else { mod_timer(&info->timer->timer, secs_to_jiffies(info->timeout) + jiffies); } pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); } else { ret = idletimer_tg_create_v1(info); if (ret < 0) { pr_debug("failed to create timer\n"); mutex_unlock(&list_mutex); return ret; } } mutex_unlock(&list_mutex); return 0; } static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) { const struct idletimer_tg_info *info = par->targinfo; pr_debug("destroy targinfo %s\n", info->label); mutex_lock(&list_mutex); if (--info->timer->refcnt > 0) { pr_debug("decreased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); mutex_unlock(&list_mutex); return; } pr_debug("deleting timer %s\n", info->label); list_del(&info->timer->entry); mutex_unlock(&list_mutex); timer_shutdown_sync(&info->timer->timer); cancel_work_sync(&info->timer->work); sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); kfree(info->timer->attr.attr.name); kfree(info->timer); } static void idletimer_tg_destroy_v1(const struct xt_tgdtor_param *par) { const struct idletimer_tg_info_v1 *info = par->targinfo; pr_debug("destroy targinfo %s\n", info->label); mutex_lock(&list_mutex); if (--info->timer->refcnt > 0) { pr_debug("decreased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); mutex_unlock(&list_mutex); return; } pr_debug("deleting timer %s\n", info->label); list_del(&info->timer->entry); mutex_unlock(&list_mutex); if (info->timer->timer_type & XT_IDLETIMER_ALARM) { alarm_cancel(&info->timer->alarm); } else { timer_shutdown_sync(&info->timer->timer); } cancel_work_sync(&info->timer->work); sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); kfree(info->timer->attr.attr.name); kfree(info->timer); } static struct xt_target idletimer_tg[] __read_mostly = { { .name = "IDLETIMER", .family = NFPROTO_IPV4, .target = idletimer_tg_target, .targetsize = sizeof(struct idletimer_tg_info), .usersize = offsetof(struct idletimer_tg_info, timer), .checkentry = idletimer_tg_checkentry, .destroy = idletimer_tg_destroy, .me = THIS_MODULE, }, { .name = "IDLETIMER", .family = NFPROTO_IPV4, .revision = 1, .target = idletimer_tg_target_v1, .targetsize = sizeof(struct idletimer_tg_info_v1), .usersize = offsetof(struct idletimer_tg_info_v1, timer), .checkentry = idletimer_tg_checkentry_v1, .destroy = idletimer_tg_destroy_v1, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "IDLETIMER", .family = NFPROTO_IPV6, .target = idletimer_tg_target, .targetsize = sizeof(struct idletimer_tg_info), .usersize = offsetof(struct idletimer_tg_info, timer), .checkentry = idletimer_tg_checkentry, .destroy = idletimer_tg_destroy, .me = THIS_MODULE, }, { .name = "IDLETIMER", .family = NFPROTO_IPV6, .revision = 1, .target = idletimer_tg_target_v1, .targetsize = sizeof(struct idletimer_tg_info_v1), .usersize = offsetof(struct idletimer_tg_info_v1, timer), .checkentry = idletimer_tg_checkentry_v1, .destroy = idletimer_tg_destroy_v1, .me = THIS_MODULE, }, #endif }; static struct class *idletimer_tg_class; static struct device *idletimer_tg_device; static int __init idletimer_tg_init(void) { int err; idletimer_tg_class = class_create("xt_idletimer"); err = PTR_ERR(idletimer_tg_class); if (IS_ERR(idletimer_tg_class)) { pr_debug("couldn't register device class\n"); goto out; } idletimer_tg_device = device_create(idletimer_tg_class, NULL, MKDEV(0, 0), NULL, "timers"); err = PTR_ERR(idletimer_tg_device); if (IS_ERR(idletimer_tg_device)) { pr_debug("couldn't register system device\n"); goto out_class; } idletimer_tg_kobj = &idletimer_tg_device->kobj; err = xt_register_targets(idletimer_tg, ARRAY_SIZE(idletimer_tg)); if (err < 0) { pr_debug("couldn't register xt target\n"); goto out_dev; } return 0; out_dev: device_destroy(idletimer_tg_class, MKDEV(0, 0)); out_class: class_destroy(idletimer_tg_class); out: return err; } static void __exit idletimer_tg_exit(void) { xt_unregister_targets(idletimer_tg, ARRAY_SIZE(idletimer_tg)); device_destroy(idletimer_tg_class, MKDEV(0, 0)); class_destroy(idletimer_tg_class); } module_init(idletimer_tg_init); module_exit(idletimer_tg_exit); MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>"); MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>"); MODULE_DESCRIPTION("Xtables: idle time monitor"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("ipt_IDLETIMER"); MODULE_ALIAS("ip6t_IDLETIMER"); |
| 14 3182 3189 8 1276 14 7 14 7 14 14 7 14 7 7 7 1282 2 1284 1276 3182 26 1289 3189 3182 3180 3181 3188 43 43 42 378 375 14 14 14 3 3180 1 1 1 1 1 1 43 43 3 3 214 215 217 215 217 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2016 Facebook * Copyright (C) 2013-2014 Jens Axboe */ #include <linux/sched.h> #include <linux/random.h> #include <linux/sbitmap.h> #include <linux/seq_file.h> static int init_alloc_hint(struct sbitmap *sb, gfp_t flags) { unsigned depth = sb->depth; sb->alloc_hint = alloc_percpu_gfp(unsigned int, flags); if (!sb->alloc_hint) return -ENOMEM; if (depth && !sb->round_robin) { int i; for_each_possible_cpu(i) *per_cpu_ptr(sb->alloc_hint, i) = get_random_u32_below(depth); } return 0; } static inline unsigned update_alloc_hint_before_get(struct sbitmap *sb, unsigned int depth) { unsigned hint; hint = this_cpu_read(*sb->alloc_hint); if (unlikely(hint >= depth)) { hint = depth ? get_random_u32_below(depth) : 0; this_cpu_write(*sb->alloc_hint, hint); } return hint; } static inline void update_alloc_hint_after_get(struct sbitmap *sb, unsigned int depth, unsigned int hint, unsigned int nr) { if (nr == -1) { /* If the map is full, a hint won't do us much good. */ this_cpu_write(*sb->alloc_hint, 0); } else if (nr == hint || unlikely(sb->round_robin)) { /* Only update the hint if we used it. */ hint = nr + 1; if (hint >= depth - 1) hint = 0; this_cpu_write(*sb->alloc_hint, hint); } } /* * See if we have deferred clears that we can batch move */ static inline bool sbitmap_deferred_clear(struct sbitmap_word *map, unsigned int depth, unsigned int alloc_hint, bool wrap) { unsigned long mask, word_mask; guard(raw_spinlock_irqsave)(&map->swap_lock); if (!map->cleared) { if (depth == 0) return false; word_mask = (~0UL) >> (BITS_PER_LONG - depth); /* * The current behavior is to always retry after moving * ->cleared to word, and we change it to retry in case * of any free bits. To avoid an infinite loop, we need * to take wrap & alloc_hint into account, otherwise a * soft lockup may occur. */ if (!wrap && alloc_hint) word_mask &= ~((1UL << alloc_hint) - 1); return (READ_ONCE(map->word) & word_mask) != word_mask; } /* * First get a stable cleared mask, setting the old mask to 0. */ mask = xchg(&map->cleared, 0); /* * Now clear the masked bits in our free word */ atomic_long_andnot(mask, (atomic_long_t *)&map->word); BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(map->word)); return true; } int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, gfp_t flags, int node, bool round_robin, bool alloc_hint) { unsigned int bits_per_word; int i; if (shift < 0) shift = sbitmap_calculate_shift(depth); bits_per_word = 1U << shift; if (bits_per_word > BITS_PER_LONG) return -EINVAL; sb->shift = shift; sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); sb->round_robin = round_robin; if (depth == 0) { sb->map = NULL; return 0; } if (alloc_hint) { if (init_alloc_hint(sb, flags)) return -ENOMEM; } else { sb->alloc_hint = NULL; } sb->map = kvzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node); if (!sb->map) { free_percpu(sb->alloc_hint); return -ENOMEM; } for (i = 0; i < sb->map_nr; i++) raw_spin_lock_init(&sb->map[i].swap_lock); return 0; } EXPORT_SYMBOL_GPL(sbitmap_init_node); void sbitmap_resize(struct sbitmap *sb, unsigned int depth) { unsigned int bits_per_word = 1U << sb->shift; unsigned int i; for (i = 0; i < sb->map_nr; i++) sbitmap_deferred_clear(&sb->map[i], 0, 0, 0); sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); } EXPORT_SYMBOL_GPL(sbitmap_resize); static int __sbitmap_get_word(unsigned long *word, unsigned long depth, unsigned int hint, bool wrap) { int nr; /* don't wrap if starting from 0 */ wrap = wrap && hint; while (1) { nr = find_next_zero_bit(word, depth, hint); if (unlikely(nr >= depth)) { /* * We started with an offset, and we didn't reset the * offset to 0 in a failure case, so start from 0 to * exhaust the map. */ if (hint && wrap) { hint = 0; continue; } return -1; } if (!test_and_set_bit_lock(nr, word)) break; hint = nr + 1; if (hint >= depth - 1) hint = 0; } return nr; } static int sbitmap_find_bit_in_word(struct sbitmap_word *map, unsigned int depth, unsigned int alloc_hint, bool wrap) { int nr; do { nr = __sbitmap_get_word(&map->word, depth, alloc_hint, wrap); if (nr != -1) break; if (!sbitmap_deferred_clear(map, depth, alloc_hint, wrap)) break; } while (1); return nr; } static int sbitmap_find_bit(struct sbitmap *sb, unsigned int depth, unsigned int index, unsigned int alloc_hint, bool wrap) { unsigned int i; int nr = -1; for (i = 0; i < sb->map_nr; i++) { nr = sbitmap_find_bit_in_word(&sb->map[index], min_t(unsigned int, __map_depth(sb, index), depth), alloc_hint, wrap); if (nr != -1) { nr += index << sb->shift; break; } /* Jump to next index. */ alloc_hint = 0; if (++index >= sb->map_nr) index = 0; } return nr; } static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) { unsigned int index; index = SB_NR_TO_INDEX(sb, alloc_hint); /* * Unless we're doing round robin tag allocation, just use the * alloc_hint to find the right word index. No point in looping * twice in find_next_zero_bit() for that case. */ if (sb->round_robin) alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); else alloc_hint = 0; return sbitmap_find_bit(sb, UINT_MAX, index, alloc_hint, !sb->round_robin); } int sbitmap_get(struct sbitmap *sb) { int nr; unsigned int hint, depth; if (WARN_ON_ONCE(unlikely(!sb->alloc_hint))) return -1; depth = READ_ONCE(sb->depth); hint = update_alloc_hint_before_get(sb, depth); nr = __sbitmap_get(sb, hint); update_alloc_hint_after_get(sb, depth, hint, nr); return nr; } EXPORT_SYMBOL_GPL(sbitmap_get); static int __sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, unsigned long shallow_depth) { unsigned int index; index = SB_NR_TO_INDEX(sb, alloc_hint); alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); return sbitmap_find_bit(sb, shallow_depth, index, alloc_hint, true); } int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) { int nr; unsigned int hint, depth; if (WARN_ON_ONCE(unlikely(!sb->alloc_hint))) return -1; depth = READ_ONCE(sb->depth); hint = update_alloc_hint_before_get(sb, depth); nr = __sbitmap_get_shallow(sb, hint, shallow_depth); update_alloc_hint_after_get(sb, depth, hint, nr); return nr; } EXPORT_SYMBOL_GPL(sbitmap_get_shallow); bool sbitmap_any_bit_set(const struct sbitmap *sb) { unsigned int i; for (i = 0; i < sb->map_nr; i++) { if (sb->map[i].word & ~sb->map[i].cleared) return true; } return false; } EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) { unsigned int i, weight = 0; for (i = 0; i < sb->map_nr; i++) { const struct sbitmap_word *word = &sb->map[i]; unsigned int word_depth = __map_depth(sb, i); if (set) weight += bitmap_weight(&word->word, word_depth); else weight += bitmap_weight(&word->cleared, word_depth); } return weight; } static unsigned int sbitmap_cleared(const struct sbitmap *sb) { return __sbitmap_weight(sb, false); } unsigned int sbitmap_weight(const struct sbitmap *sb) { return __sbitmap_weight(sb, true) - sbitmap_cleared(sb); } EXPORT_SYMBOL_GPL(sbitmap_weight); void sbitmap_show(struct sbitmap *sb, struct seq_file *m) { seq_printf(m, "depth=%u\n", sb->depth); seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb)); seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); seq_printf(m, "map_nr=%u\n", sb->map_nr); } EXPORT_SYMBOL_GPL(sbitmap_show); static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) { if ((offset & 0xf) == 0) { if (offset != 0) seq_putc(m, '\n'); seq_printf(m, "%08x:", offset); } if ((offset & 0x1) == 0) seq_putc(m, ' '); seq_printf(m, "%02x", byte); } void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) { u8 byte = 0; unsigned int byte_bits = 0; unsigned int offset = 0; int i; for (i = 0; i < sb->map_nr; i++) { unsigned long word = READ_ONCE(sb->map[i].word); unsigned long cleared = READ_ONCE(sb->map[i].cleared); unsigned int word_bits = __map_depth(sb, i); word &= ~cleared; while (word_bits > 0) { unsigned int bits = min(8 - byte_bits, word_bits); byte |= (word & (BIT(bits) - 1)) << byte_bits; byte_bits += bits; if (byte_bits == 8) { emit_byte(m, offset, byte); byte = 0; byte_bits = 0; offset++; } word >>= bits; word_bits -= bits; } } if (byte_bits) { emit_byte(m, offset, byte); offset++; } if (offset) seq_putc(m, '\n'); } EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { unsigned int wake_batch; unsigned int shallow_depth; /* * Each full word of the bitmap has bits_per_word bits, and there might * be a partial word. There are depth / bits_per_word full words and * depth % bits_per_word bits left over. In bitwise arithmetic: * * bits_per_word = 1 << shift * depth / bits_per_word = depth >> shift * depth % bits_per_word = depth & ((1 << shift) - 1) * * Each word can be limited to sbq->min_shallow_depth bits. */ shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); depth = ((depth >> sbq->sb.shift) * shallow_depth + min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, SBQ_WAKE_BATCH); return wake_batch; } int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, int shift, bool round_robin, gfp_t flags, int node) { int ret; int i; ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node, round_robin, true); if (ret) return ret; sbq->min_shallow_depth = UINT_MAX; sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); atomic_set(&sbq->wake_index, 0); atomic_set(&sbq->ws_active, 0); atomic_set(&sbq->completion_cnt, 0); atomic_set(&sbq->wakeup_cnt, 0); sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); if (!sbq->ws) { sbitmap_free(&sbq->sb); return -ENOMEM; } for (i = 0; i < SBQ_WAIT_QUEUES; i++) init_waitqueue_head(&sbq->ws[i].wait); return 0; } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { unsigned int wake_batch; wake_batch = sbq_calc_wake_batch(sbq, depth); if (sbq->wake_batch != wake_batch) WRITE_ONCE(sbq->wake_batch, wake_batch); } void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, unsigned int users) { unsigned int wake_batch; unsigned int depth = (sbq->sb.depth + users - 1) / users; wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, 1, SBQ_WAKE_BATCH); WRITE_ONCE(sbq->wake_batch, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch); void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) { sbitmap_queue_update_wake_batch(sbq, depth); sbitmap_resize(&sbq->sb, depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_resize); int __sbitmap_queue_get(struct sbitmap_queue *sbq) { return sbitmap_get(&sbq->sb); } EXPORT_SYMBOL_GPL(__sbitmap_queue_get); unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, unsigned int *offset) { struct sbitmap *sb = &sbq->sb; unsigned int hint, depth; unsigned long index, nr; int i; if (unlikely(sb->round_robin)) return 0; depth = READ_ONCE(sb->depth); hint = update_alloc_hint_before_get(sb, depth); index = SB_NR_TO_INDEX(sb, hint); for (i = 0; i < sb->map_nr; i++) { struct sbitmap_word *map = &sb->map[index]; unsigned long get_mask; unsigned int map_depth = __map_depth(sb, index); unsigned long val; sbitmap_deferred_clear(map, 0, 0, 0); val = READ_ONCE(map->word); if (val == (1UL << (map_depth - 1)) - 1) goto next; nr = find_first_zero_bit(&val, map_depth); if (nr + nr_tags <= map_depth) { atomic_long_t *ptr = (atomic_long_t *) &map->word; get_mask = ((1UL << nr_tags) - 1) << nr; while (!atomic_long_try_cmpxchg(ptr, &val, get_mask | val)) ; get_mask = (get_mask & ~val) >> nr; if (get_mask) { *offset = nr + (index << sb->shift); update_alloc_hint_after_get(sb, depth, hint, *offset + nr_tags - 1); return get_mask; } } next: /* Jump to next index. */ if (++index >= sb->map_nr) index = 0; } return 0; } int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int shallow_depth) { WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); return sbitmap_get_shallow(&sbq->sb, shallow_depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_get_shallow); void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, unsigned int min_shallow_depth) { sbq->min_shallow_depth = min_shallow_depth; sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { int i, wake_index, woken; if (!atomic_read(&sbq->ws_active)) return; wake_index = atomic_read(&sbq->wake_index); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[wake_index]; /* * Advance the index before checking the current queue. * It improves fairness, by ensuring the queue doesn't * need to be fully emptied before trying to wake up * from the next one. */ wake_index = sbq_index_inc(wake_index); if (waitqueue_active(&ws->wait)) { woken = wake_up_nr(&ws->wait, nr); if (woken == nr) break; nr -= woken; } } if (wake_index != atomic_read(&sbq->wake_index)) atomic_set(&sbq->wake_index, wake_index); } void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { unsigned int wake_batch = READ_ONCE(sbq->wake_batch); unsigned int wakeups; if (!atomic_read(&sbq->ws_active)) return; atomic_add(nr, &sbq->completion_cnt); wakeups = atomic_read(&sbq->wakeup_cnt); do { if (atomic_read(&sbq->completion_cnt) - wakeups < wake_batch) return; } while (!atomic_try_cmpxchg(&sbq->wakeup_cnt, &wakeups, wakeups + wake_batch)); __sbitmap_queue_wake_up(sbq, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); static inline void sbitmap_update_cpu_hint(struct sbitmap *sb, int cpu, int tag) { if (likely(!sb->round_robin && tag < sb->depth)) data_race(*per_cpu_ptr(sb->alloc_hint, cpu) = tag); } void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, int *tags, int nr_tags) { struct sbitmap *sb = &sbq->sb; unsigned long *addr = NULL; unsigned long mask = 0; int i; smp_mb__before_atomic(); for (i = 0; i < nr_tags; i++) { const int tag = tags[i] - offset; unsigned long *this_addr; /* since we're clearing a batch, skip the deferred map */ this_addr = &sb->map[SB_NR_TO_INDEX(sb, tag)].word; if (!addr) { addr = this_addr; } else if (addr != this_addr) { atomic_long_andnot(mask, (atomic_long_t *) addr); mask = 0; addr = this_addr; } mask |= (1UL << SB_NR_TO_BIT(sb, tag)); } if (mask) atomic_long_andnot(mask, (atomic_long_t *) addr); smp_mb__after_atomic(); sbitmap_queue_wake_up(sbq, nr_tags); sbitmap_update_cpu_hint(&sbq->sb, raw_smp_processor_id(), tags[nr_tags - 1] - offset); } void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu) { /* * Once the clear bit is set, the bit may be allocated out. * * Orders READ/WRITE on the associated instance(such as request * of blk_mq) by this bit for avoiding race with re-allocation, * and its pair is the memory barrier implied in __sbitmap_get_word. * * One invariant is that the clear bit has to be zero when the bit * is in use. */ smp_mb__before_atomic(); sbitmap_deferred_clear_bit(&sbq->sb, nr); /* * Pairs with the memory barrier in set_current_state() to ensure the * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the * waiter. See the comment on waitqueue_active(). */ smp_mb__after_atomic(); sbitmap_queue_wake_up(sbq, 1); sbitmap_update_cpu_hint(&sbq->sb, cpu, nr); } EXPORT_SYMBOL_GPL(sbitmap_queue_clear); void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) { int i, wake_index; /* * Pairs with the memory barrier in set_current_state() like in * sbitmap_queue_wake_up(). */ smp_mb(); wake_index = atomic_read(&sbq->wake_index); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[wake_index]; if (waitqueue_active(&ws->wait)) wake_up(&ws->wait); wake_index = sbq_index_inc(wake_index); } } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) { bool first; int i; sbitmap_show(&sbq->sb, m); seq_puts(m, "alloc_hint={"); first = true; for_each_possible_cpu(i) { if (!first) seq_puts(m, ", "); first = false; seq_printf(m, "%u", *per_cpu_ptr(sbq->sb.alloc_hint, i)); } seq_puts(m, "}\n"); seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active)); seq_puts(m, "ws={\n"); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[i]; seq_printf(m, "\t{.wait=%s},\n", waitqueue_active(&ws->wait) ? "active" : "inactive"); } seq_puts(m, "}\n"); seq_printf(m, "round_robin=%d\n", sbq->sb.round_robin); seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_show); void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait) { if (!sbq_wait->sbq) { sbq_wait->sbq = sbq; atomic_inc(&sbq->ws_active); add_wait_queue(&ws->wait, &sbq_wait->wait); } } EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue); void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait) { list_del_init(&sbq_wait->wait.entry); if (sbq_wait->sbq) { atomic_dec(&sbq_wait->sbq->ws_active); sbq_wait->sbq = NULL; } } EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue); void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait, int state) { if (!sbq_wait->sbq) { atomic_inc(&sbq->ws_active); sbq_wait->sbq = sbq; } prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state); } EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait); void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait) { finish_wait(&ws->wait, &sbq_wait->wait); if (sbq_wait->sbq) { atomic_dec(&sbq->ws_active); sbq_wait->sbq = NULL; } } EXPORT_SYMBOL_GPL(sbitmap_finish_wait); |
| 115 23 8 3 1 2 2 3 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "../kernel/futex/futex.h" #include "io_uring.h" #include "alloc_cache.h" #include "futex.h" struct io_futex { struct file *file; void __user *uaddr; unsigned long futex_val; unsigned long futex_mask; unsigned long futexv_owned; u32 futex_flags; unsigned int futex_nr; bool futexv_unqueued; }; struct io_futex_data { struct futex_q q; struct io_kiocb *req; }; #define IO_FUTEX_ALLOC_CACHE_MAX 32 bool io_futex_cache_init(struct io_ring_ctx *ctx) { return io_alloc_cache_init(&ctx->futex_cache, IO_FUTEX_ALLOC_CACHE_MAX, sizeof(struct io_futex_data), 0); } void io_futex_cache_free(struct io_ring_ctx *ctx) { io_alloc_cache_free(&ctx->futex_cache, kfree); } static void __io_futex_complete(struct io_kiocb *req, io_tw_token_t tw) { req->async_data = NULL; hlist_del_init(&req->hash_node); io_req_task_complete(req, tw); } static void io_futex_complete(struct io_kiocb *req, io_tw_token_t tw) { struct io_ring_ctx *ctx = req->ctx; io_tw_lock(ctx, tw); io_cache_free(&ctx->futex_cache, req->async_data); __io_futex_complete(req, tw); } static void io_futexv_complete(struct io_kiocb *req, io_tw_token_t tw) { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); struct futex_vector *futexv = req->async_data; io_tw_lock(req->ctx, tw); if (!iof->futexv_unqueued) { int res; res = futex_unqueue_multiple(futexv, iof->futex_nr); if (res != -1) io_req_set_res(req, res, 0); } kfree(req->async_data); req->flags &= ~REQ_F_ASYNC_DATA; __io_futex_complete(req, tw); } static bool io_futexv_claim(struct io_futex *iof) { if (test_bit(0, &iof->futexv_owned) || test_and_set_bit_lock(0, &iof->futexv_owned)) return false; return true; } static bool __io_futex_cancel(struct io_kiocb *req) { /* futex wake already done or in progress */ if (req->opcode == IORING_OP_FUTEX_WAIT) { struct io_futex_data *ifd = req->async_data; if (!futex_unqueue(&ifd->q)) return false; req->io_task_work.func = io_futex_complete; } else { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); if (!io_futexv_claim(iof)) return false; req->io_task_work.func = io_futexv_complete; } hlist_del_init(&req->hash_node); io_req_set_res(req, -ECANCELED, 0); io_req_task_work_add(req); return true; } int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, unsigned int issue_flags) { return io_cancel_remove(ctx, cd, issue_flags, &ctx->futex_list, __io_futex_cancel); } bool io_futex_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, bool cancel_all) { return io_cancel_remove_all(ctx, tctx, &ctx->futex_list, cancel_all, __io_futex_cancel); } int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); u32 flags; if (unlikely(sqe->len || sqe->futex_flags || sqe->buf_index || sqe->file_index)) return -EINVAL; iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); iof->futex_val = READ_ONCE(sqe->addr2); iof->futex_mask = READ_ONCE(sqe->addr3); flags = READ_ONCE(sqe->fd); if (flags & ~FUTEX2_VALID_MASK) return -EINVAL; iof->futex_flags = futex2_to_flags(flags); if (!futex_flags_valid(iof->futex_flags)) return -EINVAL; if (!futex_validate_input(iof->futex_flags, iof->futex_val) || !futex_validate_input(iof->futex_flags, iof->futex_mask)) return -EINVAL; /* Mark as inflight, so file exit cancelation will find it */ io_req_track_inflight(req); return 0; } static void io_futex_wakev_fn(struct wake_q_head *wake_q, struct futex_q *q) { struct io_kiocb *req = q->wake_data; struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); if (!io_futexv_claim(iof)) return; if (unlikely(!__futex_wake_mark(q))) return; io_req_set_res(req, 0, 0); req->io_task_work.func = io_futexv_complete; io_req_task_work_add(req); } int io_futexv_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); struct futex_vector *futexv; int ret; /* No flags or mask supported for waitv */ if (unlikely(sqe->fd || sqe->buf_index || sqe->file_index || sqe->addr2 || sqe->futex_flags || sqe->addr3)) return -EINVAL; iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); iof->futex_nr = READ_ONCE(sqe->len); if (!iof->futex_nr || iof->futex_nr > FUTEX_WAITV_MAX) return -EINVAL; futexv = kcalloc(iof->futex_nr, sizeof(*futexv), GFP_KERNEL); if (!futexv) return -ENOMEM; ret = futex_parse_waitv(futexv, iof->uaddr, iof->futex_nr, io_futex_wakev_fn, req); if (ret) { kfree(futexv); return ret; } /* Mark as inflight, so file exit cancelation will find it */ io_req_track_inflight(req); iof->futexv_owned = 0; iof->futexv_unqueued = 0; req->flags |= REQ_F_ASYNC_DATA; req->async_data = futexv; return 0; } static void io_futex_wake_fn(struct wake_q_head *wake_q, struct futex_q *q) { struct io_futex_data *ifd = container_of(q, struct io_futex_data, q); struct io_kiocb *req = ifd->req; if (unlikely(!__futex_wake_mark(q))) return; io_req_set_res(req, 0, 0); req->io_task_work.func = io_futex_complete; io_req_task_work_add(req); } int io_futexv_wait(struct io_kiocb *req, unsigned int issue_flags) { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); struct futex_vector *futexv = req->async_data; struct io_ring_ctx *ctx = req->ctx; int ret, woken = -1; io_ring_submit_lock(ctx, issue_flags); ret = futex_wait_multiple_setup(futexv, iof->futex_nr, &woken); /* * Error case, ret is < 0. Mark the request as failed. */ if (unlikely(ret < 0)) { io_ring_submit_unlock(ctx, issue_flags); req_set_fail(req); io_req_set_res(req, ret, 0); kfree(futexv); req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; return IOU_COMPLETE; } /* * 0 return means that we successfully setup the waiters, and that * nobody triggered a wakeup while we were doing so. If the wakeup * happened post setup, the task_work will be run post this issue and * under the submission lock. 1 means We got woken while setting up, * let that side do the completion. Note that * futex_wait_multiple_setup() will have unqueued all the futexes in * this case. Mark us as having done that already, since this is * different from normal wakeup. */ if (!ret) { /* * If futex_wait_multiple_setup() returns 0 for a * successful setup, then the task state will not be * runnable. This is fine for the sync syscall, as * it'll be blocking unless we already got one of the * futexes woken, but it obviously won't work for an * async invocation. Mark us runnable again. */ __set_current_state(TASK_RUNNING); hlist_add_head(&req->hash_node, &ctx->futex_list); } else { iof->futexv_unqueued = 1; if (woken != -1) io_req_set_res(req, woken, 0); } io_ring_submit_unlock(ctx, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; } int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); struct io_ring_ctx *ctx = req->ctx; struct io_futex_data *ifd = NULL; int ret; if (!iof->futex_mask) { ret = -EINVAL; goto done; } io_ring_submit_lock(ctx, issue_flags); ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT); if (!ifd) { ret = -ENOMEM; goto done_unlock; } req->async_data = ifd; ifd->q = futex_q_init; ifd->q.bitset = iof->futex_mask; ifd->q.wake = io_futex_wake_fn; ifd->req = req; ret = futex_wait_setup(iof->uaddr, iof->futex_val, iof->futex_flags, &ifd->q, NULL, NULL); if (!ret) { hlist_add_head(&req->hash_node, &ctx->futex_list); io_ring_submit_unlock(ctx, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; } done_unlock: io_ring_submit_unlock(ctx, issue_flags); done: if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); kfree(ifd); return IOU_COMPLETE; } int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags) { struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); int ret; /* * Strict flags - ensure that waking 0 futexes yields a 0 result. * See commit 43adf8449510 ("futex: FLAGS_STRICT") for details. */ ret = futex_wake(iof->uaddr, FLAGS_STRICT | iof->futex_flags, iof->futex_val, iof->futex_mask); if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_COMPLETE; } |
| 14 4 10 26 26 12 8 8 1 7 2 7 7 2 2 1 1 2 2 2 118 117 12 51 51 53 5 51 1 1 32 11 1 1 1 1 1 1 15 7 6 41 27 15 41 1 2 2 5 2 1 1 20 2 2 2 2 2 7 15 17 14 14 48 48 48 1 50 3 47 48 7 7 6 7 7 7 7 7 7 7 7 7 7 7 7 12 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 | /* * Copyright (c) 2006-2009 Red Hat Inc. * Copyright (c) 2006-2008 Intel Corporation * Copyright (c) 2007 Dave Airlie <airlied@linux.ie> * * DRM framebuffer helper functions * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * * Authors: * Dave Airlie <airlied@linux.ie> * Jesse Barnes <jesse.barnes@intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/console.h> #include <linux/pci.h> #include <linux/sysrq.h> #include <linux/vga_switcheroo.h> #include <drm/drm_atomic.h> #include <drm/drm_drv.h> #include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_framebuffer.h> #include <drm/drm_modeset_helper_vtables.h> #include <drm/drm_print.h> #include <drm/drm_vblank.h> #include "drm_internal.h" #include "drm_crtc_internal.h" static bool drm_fbdev_emulation = true; module_param_named(fbdev_emulation, drm_fbdev_emulation, bool, 0600); MODULE_PARM_DESC(fbdev_emulation, "Enable legacy fbdev emulation [default=true]"); static int drm_fbdev_overalloc = CONFIG_DRM_FBDEV_OVERALLOC; module_param(drm_fbdev_overalloc, int, 0444); MODULE_PARM_DESC(drm_fbdev_overalloc, "Overallocation of the fbdev buffer (%) [default=" __MODULE_STRING(CONFIG_DRM_FBDEV_OVERALLOC) "]"); /* * In order to keep user-space compatibility, we want in certain use-cases * to keep leaking the fbdev physical address to the user-space program * handling the fbdev buffer. * * This is a bad habit, essentially kept to support closed-source OpenGL * drivers that should really be moved into open-source upstream projects * instead of using legacy physical addresses in user space to communicate * with other out-of-tree kernel modules. * * This module_param *should* be removed as soon as possible and be * considered as a broken and legacy behaviour from a modern fbdev device. */ static bool drm_leak_fbdev_smem; #if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM) module_param_unsafe(drm_leak_fbdev_smem, bool, 0600); MODULE_PARM_DESC(drm_leak_fbdev_smem, "Allow unsafe leaking fbdev physical smem address [default=false]"); #endif static LIST_HEAD(kernel_fb_helper_list); static DEFINE_MUTEX(kernel_fb_helper_lock); /** * DOC: fbdev helpers * * The fb helper functions are useful to provide an fbdev on top of a drm kernel * mode setting driver. They can be used mostly independently from the crtc * helper functions used by many drivers to implement the kernel mode setting * interfaces. Drivers that use one of the shared memory managers, TTM, SHMEM, * DMA, should instead use the corresponding fbdev emulation. * * For suspend/resume consider using drm_mode_config_helper_suspend() and * drm_mode_config_helper_resume() which takes care of fbdev as well. * * All other functions exported by the fb helper library can be used to * implement the fbdev driver interface by the driver. * * It is possible, though perhaps somewhat tricky, to implement race-free * hotplug detection using the fbdev helpers. The drm_fb_helper_prepare() * helper must be called first to initialize the minimum required to make * hotplug detection work. Drivers also need to make sure to properly set up * the &drm_mode_config.funcs member. After calling drm_kms_helper_poll_init() * it is safe to enable interrupts and start processing hotplug events. At the * same time, drivers should initialize all modeset objects such as CRTCs, * encoders and connectors. To finish up the fbdev helper initialization, the * drm_fb_helper_init() function is called. To probe for all attached displays * and set up an initial configuration using the detected hardware, drivers * should call drm_fb_helper_initial_config(). * * If &drm_framebuffer_funcs.dirty is set, the * drm_fb_helper_{cfb,sys}_{write,fillrect,copyarea,imageblit} functions will * accumulate changes and schedule &drm_fb_helper.dirty_work to run right * away. This worker then calls the dirty() function ensuring that it will * always run in process context since the fb_*() function could be running in * atomic context. If drm_fb_helper_deferred_io() is used as the deferred_io * callback it will also schedule dirty_work with the damage collected from the * mmap page writes. */ static void drm_fb_helper_restore_lut_atomic(struct drm_crtc *crtc) { uint16_t *r_base, *g_base, *b_base; if (crtc->funcs->gamma_set == NULL) return; r_base = crtc->gamma_store; g_base = r_base + crtc->gamma_size; b_base = g_base + crtc->gamma_size; crtc->funcs->gamma_set(crtc, r_base, g_base, b_base, crtc->gamma_size, NULL); } /** * drm_fb_helper_debug_enter - implementation for &fb_ops.fb_debug_enter * @info: fbdev registered by the helper */ int drm_fb_helper_debug_enter(struct fb_info *info) { struct drm_fb_helper *helper = info->par; const struct drm_crtc_helper_funcs *funcs; struct drm_mode_set *mode_set; list_for_each_entry(helper, &kernel_fb_helper_list, kernel_fb_list) { mutex_lock(&helper->client.modeset_mutex); drm_client_for_each_modeset(mode_set, &helper->client) { if (!mode_set->crtc->enabled) continue; funcs = mode_set->crtc->helper_private; if (funcs->mode_set_base_atomic == NULL) continue; if (drm_drv_uses_atomic_modeset(mode_set->crtc->dev)) continue; funcs->mode_set_base_atomic(mode_set->crtc, mode_set->fb, mode_set->x, mode_set->y, ENTER_ATOMIC_MODE_SET); } mutex_unlock(&helper->client.modeset_mutex); } return 0; } EXPORT_SYMBOL(drm_fb_helper_debug_enter); /** * drm_fb_helper_debug_leave - implementation for &fb_ops.fb_debug_leave * @info: fbdev registered by the helper */ int drm_fb_helper_debug_leave(struct fb_info *info) { struct drm_fb_helper *helper = info->par; struct drm_client_dev *client = &helper->client; struct drm_device *dev = helper->dev; struct drm_crtc *crtc; const struct drm_crtc_helper_funcs *funcs; struct drm_mode_set *mode_set; struct drm_framebuffer *fb; mutex_lock(&client->modeset_mutex); drm_client_for_each_modeset(mode_set, client) { crtc = mode_set->crtc; if (drm_drv_uses_atomic_modeset(crtc->dev)) continue; funcs = crtc->helper_private; fb = crtc->primary->fb; if (!crtc->enabled) continue; if (!fb) { drm_err(dev, "no fb to restore?\n"); continue; } if (funcs->mode_set_base_atomic == NULL) continue; drm_fb_helper_restore_lut_atomic(mode_set->crtc); funcs->mode_set_base_atomic(mode_set->crtc, fb, crtc->x, crtc->y, LEAVE_ATOMIC_MODE_SET); } mutex_unlock(&client->modeset_mutex); return 0; } EXPORT_SYMBOL(drm_fb_helper_debug_leave); static int __drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper, bool force) { bool do_delayed; int ret; if (!drm_fbdev_emulation || !fb_helper) return -ENODEV; if (READ_ONCE(fb_helper->deferred_setup)) return 0; mutex_lock(&fb_helper->lock); if (force) { /* * Yes this is the _locked version which expects the master lock * to be held. But for forced restores we're intentionally * racing here, see drm_fb_helper_set_par(). */ ret = drm_client_modeset_commit_locked(&fb_helper->client); } else { ret = drm_client_modeset_commit(&fb_helper->client); } do_delayed = fb_helper->delayed_hotplug; if (do_delayed) fb_helper->delayed_hotplug = false; mutex_unlock(&fb_helper->lock); if (do_delayed) drm_fb_helper_hotplug_event(fb_helper); if (fb_helper->funcs->fb_restore) fb_helper->funcs->fb_restore(fb_helper); return ret; } /** * drm_fb_helper_restore_fbdev_mode_unlocked - restore fbdev configuration * @fb_helper: driver-allocated fbdev helper, can be NULL * * This helper should be called from fbdev emulation's &drm_client_funcs.restore * callback. It ensures that the user isn't greeted with a black screen when the * userspace compositor releases the display device. * * Returns: * 0 on success, or a negative errno code otherwise. */ int drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper) { return __drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper, false); } EXPORT_SYMBOL(drm_fb_helper_restore_fbdev_mode_unlocked); #ifdef CONFIG_MAGIC_SYSRQ /* emergency restore, don't bother with error reporting */ static void drm_fb_helper_restore_work_fn(struct work_struct *ignored) { struct drm_fb_helper *helper; mutex_lock(&kernel_fb_helper_lock); list_for_each_entry(helper, &kernel_fb_helper_list, kernel_fb_list) { struct drm_device *dev = helper->dev; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) continue; mutex_lock(&helper->lock); drm_client_modeset_commit_locked(&helper->client); mutex_unlock(&helper->lock); } mutex_unlock(&kernel_fb_helper_lock); } static DECLARE_WORK(drm_fb_helper_restore_work, drm_fb_helper_restore_work_fn); static void drm_fb_helper_sysrq(u8 dummy1) { schedule_work(&drm_fb_helper_restore_work); } static const struct sysrq_key_op sysrq_drm_fb_helper_restore_op = { .handler = drm_fb_helper_sysrq, .help_msg = "force-fb(v)", .action_msg = "Restore framebuffer console", }; #else static const struct sysrq_key_op sysrq_drm_fb_helper_restore_op = { }; #endif static void drm_fb_helper_dpms(struct fb_info *info, int dpms_mode) { struct drm_fb_helper *fb_helper = info->par; mutex_lock(&fb_helper->lock); drm_client_modeset_dpms(&fb_helper->client, dpms_mode); mutex_unlock(&fb_helper->lock); } /** * drm_fb_helper_blank - implementation for &fb_ops.fb_blank * @blank: desired blanking state * @info: fbdev registered by the helper */ int drm_fb_helper_blank(int blank, struct fb_info *info) { if (oops_in_progress) return -EBUSY; switch (blank) { /* Display: On; HSync: On, VSync: On */ case FB_BLANK_UNBLANK: drm_fb_helper_dpms(info, DRM_MODE_DPMS_ON); break; /* Display: Off; HSync: On, VSync: On */ case FB_BLANK_NORMAL: drm_fb_helper_dpms(info, DRM_MODE_DPMS_STANDBY); break; /* Display: Off; HSync: Off, VSync: On */ case FB_BLANK_HSYNC_SUSPEND: drm_fb_helper_dpms(info, DRM_MODE_DPMS_STANDBY); break; /* Display: Off; HSync: On, VSync: Off */ case FB_BLANK_VSYNC_SUSPEND: drm_fb_helper_dpms(info, DRM_MODE_DPMS_SUSPEND); break; /* Display: Off; HSync: Off, VSync: Off */ case FB_BLANK_POWERDOWN: drm_fb_helper_dpms(info, DRM_MODE_DPMS_OFF); break; } return 0; } EXPORT_SYMBOL(drm_fb_helper_blank); static void drm_fb_helper_resume_worker(struct work_struct *work) { struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper, resume_work); console_lock(); fb_set_suspend(helper->info, 0); console_unlock(); } static void drm_fb_helper_fb_dirty(struct drm_fb_helper *helper) { struct drm_device *dev = helper->dev; struct drm_clip_rect *clip = &helper->damage_clip; struct drm_clip_rect clip_copy; unsigned long flags; int ret; if (drm_WARN_ON_ONCE(dev, !helper->funcs->fb_dirty)) return; spin_lock_irqsave(&helper->damage_lock, flags); clip_copy = *clip; clip->x1 = clip->y1 = ~0; clip->x2 = clip->y2 = 0; spin_unlock_irqrestore(&helper->damage_lock, flags); ret = helper->funcs->fb_dirty(helper, &clip_copy); if (ret) goto err; return; err: /* * Restore damage clip rectangle on errors. The next run * of the damage worker will perform the update. */ spin_lock_irqsave(&helper->damage_lock, flags); clip->x1 = min_t(u32, clip->x1, clip_copy.x1); clip->y1 = min_t(u32, clip->y1, clip_copy.y1); clip->x2 = max_t(u32, clip->x2, clip_copy.x2); clip->y2 = max_t(u32, clip->y2, clip_copy.y2); spin_unlock_irqrestore(&helper->damage_lock, flags); } static void drm_fb_helper_damage_work(struct work_struct *work) { struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper, damage_work); drm_fb_helper_fb_dirty(helper); } /** * drm_fb_helper_prepare - setup a drm_fb_helper structure * @dev: DRM device * @helper: driver-allocated fbdev helper structure to set up * @preferred_bpp: Preferred bits per pixel for the device. * @funcs: pointer to structure of functions associate with this helper * * Sets up the bare minimum to make the framebuffer helper usable. This is * useful to implement race-free initialization of the polling helpers. */ void drm_fb_helper_prepare(struct drm_device *dev, struct drm_fb_helper *helper, unsigned int preferred_bpp, const struct drm_fb_helper_funcs *funcs) { /* * Pick a preferred bpp of 32 if no value has been given. This * will select XRGB8888 for the framebuffer formats. All drivers * have to support XRGB8888 for backwards compatibility with legacy * userspace, so it's the safe choice here. * * TODO: Replace struct drm_mode_config.preferred_depth and this * bpp value with a preferred format that is given as struct * drm_format_info. Then derive all other values from the * format. */ if (!preferred_bpp) preferred_bpp = 32; INIT_LIST_HEAD(&helper->kernel_fb_list); spin_lock_init(&helper->damage_lock); INIT_WORK(&helper->resume_work, drm_fb_helper_resume_worker); INIT_WORK(&helper->damage_work, drm_fb_helper_damage_work); helper->damage_clip.x1 = helper->damage_clip.y1 = ~0; mutex_init(&helper->lock); helper->funcs = funcs; helper->dev = dev; helper->preferred_bpp = preferred_bpp; } EXPORT_SYMBOL(drm_fb_helper_prepare); /** * drm_fb_helper_unprepare - clean up a drm_fb_helper structure * @fb_helper: driver-allocated fbdev helper structure to set up * * Cleans up the framebuffer helper. Inverse of drm_fb_helper_prepare(). */ void drm_fb_helper_unprepare(struct drm_fb_helper *fb_helper) { mutex_destroy(&fb_helper->lock); } EXPORT_SYMBOL(drm_fb_helper_unprepare); /** * drm_fb_helper_init - initialize a &struct drm_fb_helper * @dev: drm device * @fb_helper: driver-allocated fbdev helper structure to initialize * * This allocates the structures for the fbdev helper with the given limits. * Note that this won't yet touch the hardware (through the driver interfaces) * nor register the fbdev. This is only done in drm_fb_helper_initial_config() * to allow driver writes more control over the exact init sequence. * * Drivers must call drm_fb_helper_prepare() before calling this function. * * RETURNS: * Zero if everything went ok, nonzero otherwise. */ int drm_fb_helper_init(struct drm_device *dev, struct drm_fb_helper *fb_helper) { int ret; /* * If this is not the generic fbdev client, initialize a drm_client * without callbacks so we can use the modesets. */ if (!fb_helper->client.funcs) { ret = drm_client_init(dev, &fb_helper->client, "drm_fb_helper", NULL); if (ret) return ret; } dev->fb_helper = fb_helper; return 0; } EXPORT_SYMBOL(drm_fb_helper_init); /** * drm_fb_helper_alloc_info - allocate fb_info and some of its members * @fb_helper: driver-allocated fbdev helper * * A helper to alloc fb_info and the member cmap. Called by the driver * within the struct &drm_driver.fbdev_probe callback function. Drivers do * not need to release the allocated fb_info structure themselves, this is * automatically done when calling drm_fb_helper_fini(). * * RETURNS: * fb_info pointer if things went okay, pointer containing error code * otherwise */ struct fb_info *drm_fb_helper_alloc_info(struct drm_fb_helper *fb_helper) { struct device *dev = fb_helper->dev->dev; struct fb_info *info; int ret; info = framebuffer_alloc(0, dev); if (!info) return ERR_PTR(-ENOMEM); if (!drm_leak_fbdev_smem) info->flags |= FBINFO_HIDE_SMEM_START; ret = fb_alloc_cmap(&info->cmap, 256, 0); if (ret) goto err_release; fb_helper->info = info; info->skip_vt_switch = true; info->skip_panic = drm_panic_is_enabled(fb_helper->dev); return info; err_release: framebuffer_release(info); return ERR_PTR(ret); } EXPORT_SYMBOL(drm_fb_helper_alloc_info); /** * drm_fb_helper_release_info - release fb_info and its members * @fb_helper: driver-allocated fbdev helper * * A helper to release fb_info and the member cmap. Drivers do not * need to release the allocated fb_info structure themselves, this is * automatically done when calling drm_fb_helper_fini(). */ void drm_fb_helper_release_info(struct drm_fb_helper *fb_helper) { struct fb_info *info = fb_helper->info; if (!info) return; fb_helper->info = NULL; if (info->cmap.len) fb_dealloc_cmap(&info->cmap); framebuffer_release(info); } EXPORT_SYMBOL(drm_fb_helper_release_info); /** * drm_fb_helper_unregister_info - unregister fb_info framebuffer device * @fb_helper: driver-allocated fbdev helper, must not be NULL * * A wrapper around unregister_framebuffer, to release the fb_info * framebuffer device. This must be called before releasing all resources for * @fb_helper by calling drm_fb_helper_fini(). */ void drm_fb_helper_unregister_info(struct drm_fb_helper *fb_helper) { struct fb_info *info = fb_helper->info; struct device *dev = info->device; if (dev_is_pci(dev)) vga_switcheroo_client_fb_set(to_pci_dev(dev), NULL); unregister_framebuffer(fb_helper->info); } EXPORT_SYMBOL(drm_fb_helper_unregister_info); /** * drm_fb_helper_fini - finialize a &struct drm_fb_helper * @fb_helper: driver-allocated fbdev helper, can be NULL * * This cleans up all remaining resources associated with @fb_helper. */ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper) { if (!fb_helper) return; fb_helper->dev->fb_helper = NULL; if (!drm_fbdev_emulation) return; cancel_work_sync(&fb_helper->resume_work); cancel_work_sync(&fb_helper->damage_work); drm_fb_helper_release_info(fb_helper); mutex_lock(&kernel_fb_helper_lock); if (!list_empty(&fb_helper->kernel_fb_list)) { list_del(&fb_helper->kernel_fb_list); if (list_empty(&kernel_fb_helper_list)) unregister_sysrq_key('v', &sysrq_drm_fb_helper_restore_op); } mutex_unlock(&kernel_fb_helper_lock); if (!fb_helper->client.funcs) drm_client_release(&fb_helper->client); } EXPORT_SYMBOL(drm_fb_helper_fini); static void drm_fb_helper_add_damage_clip(struct drm_fb_helper *helper, u32 x, u32 y, u32 width, u32 height) { struct drm_clip_rect *clip = &helper->damage_clip; unsigned long flags; spin_lock_irqsave(&helper->damage_lock, flags); clip->x1 = min_t(u32, clip->x1, x); clip->y1 = min_t(u32, clip->y1, y); clip->x2 = max_t(u32, clip->x2, x + width); clip->y2 = max_t(u32, clip->y2, y + height); spin_unlock_irqrestore(&helper->damage_lock, flags); } static void drm_fb_helper_damage(struct drm_fb_helper *helper, u32 x, u32 y, u32 width, u32 height) { /* * This function may be invoked by panic() to flush the frame * buffer, where all CPUs except the panic CPU are stopped. * During the following schedule_work(), the panic CPU needs * the worker_pool lock, which might be held by a stopped CPU, * causing schedule_work() and panic() to block. Return early on * oops_in_progress to prevent this blocking. */ if (oops_in_progress) return; drm_fb_helper_add_damage_clip(helper, x, y, width, height); schedule_work(&helper->damage_work); } /* * Convert memory region into area of scanlines and pixels per * scanline. The parameters off and len must not reach beyond * the end of the framebuffer. */ static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off, size_t len, struct drm_rect *clip) { u32 line_length = info->fix.line_length; u32 fb_height = info->var.yres; off_t end = off + len; u32 x1 = 0; u32 y1 = off / line_length; u32 x2 = info->var.xres; u32 y2 = DIV_ROUND_UP(end, line_length); /* Don't allow any of them beyond the bottom bound of display area */ if (y1 > fb_height) y1 = fb_height; if (y2 > fb_height) y2 = fb_height; if ((y2 - y1) == 1) { /* * We've only written to a single scanline. Try to reduce * the number of horizontal pixels that need an update. */ off_t bit_off = (off % line_length) * 8; off_t bit_end = (end % line_length) * 8; x1 = bit_off / info->var.bits_per_pixel; x2 = DIV_ROUND_UP(bit_end, info->var.bits_per_pixel); } drm_rect_init(clip, x1, y1, x2 - x1, y2 - y1); } /* Don't use in new code. */ void drm_fb_helper_damage_range(struct fb_info *info, off_t off, size_t len) { struct drm_fb_helper *fb_helper = info->par; struct drm_rect damage_area; drm_fb_helper_memory_range_to_clip(info, off, len, &damage_area); drm_fb_helper_damage(fb_helper, damage_area.x1, damage_area.y1, drm_rect_width(&damage_area), drm_rect_height(&damage_area)); } EXPORT_SYMBOL(drm_fb_helper_damage_range); /* Don't use in new code. */ void drm_fb_helper_damage_area(struct fb_info *info, u32 x, u32 y, u32 width, u32 height) { struct drm_fb_helper *fb_helper = info->par; drm_fb_helper_damage(fb_helper, x, y, width, height); } EXPORT_SYMBOL(drm_fb_helper_damage_area); #ifdef CONFIG_FB_DEFERRED_IO /** * drm_fb_helper_deferred_io() - fbdev deferred_io callback function * @info: fb_info struct pointer * @pagereflist: list of mmap framebuffer pages that have to be flushed * * This function is used as the &fb_deferred_io.deferred_io * callback function for flushing the fbdev mmap writes. */ void drm_fb_helper_deferred_io(struct fb_info *info, struct list_head *pagereflist) { struct drm_fb_helper *helper = info->par; unsigned long start, end, min_off, max_off, total_size; struct fb_deferred_io_pageref *pageref; struct drm_rect damage_area; min_off = ULONG_MAX; max_off = 0; list_for_each_entry(pageref, pagereflist, list) { start = pageref->offset; end = start + PAGE_SIZE; min_off = min(min_off, start); max_off = max(max_off, end); } /* * As we can only track pages, we might reach beyond the end * of the screen and account for non-existing scanlines. Hence, * keep the covered memory area within the screen buffer. */ if (info->screen_size) total_size = info->screen_size; else total_size = info->fix.smem_len; max_off = min(max_off, total_size); if (min_off < max_off) { drm_fb_helper_memory_range_to_clip(info, min_off, max_off - min_off, &damage_area); drm_fb_helper_damage(helper, damage_area.x1, damage_area.y1, drm_rect_width(&damage_area), drm_rect_height(&damage_area)); } } EXPORT_SYMBOL(drm_fb_helper_deferred_io); #endif /** * drm_fb_helper_set_suspend - wrapper around fb_set_suspend * @fb_helper: driver-allocated fbdev helper, can be NULL * @suspend: whether to suspend or resume * * A wrapper around fb_set_suspend implemented by fbdev core. * Use drm_fb_helper_set_suspend_unlocked() if you don't need to take * the lock yourself */ void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper, bool suspend) { if (!fb_helper || !fb_helper->info) return; if (fb_helper->funcs->fb_set_suspend) fb_helper->funcs->fb_set_suspend(fb_helper, suspend); else fb_set_suspend(fb_helper->info, suspend); } EXPORT_SYMBOL(drm_fb_helper_set_suspend); /** * drm_fb_helper_set_suspend_unlocked - wrapper around fb_set_suspend that also * takes the console lock * @fb_helper: driver-allocated fbdev helper, can be NULL * @suspend: whether to suspend or resume * * A wrapper around fb_set_suspend() that takes the console lock. If the lock * isn't available on resume, a worker is tasked with waiting for the lock * to become available. The console lock can be pretty contented on resume * due to all the printk activity. * * This function can be called multiple times with the same state since * &fb_info.state is checked to see if fbdev is running or not before locking. * * Use drm_fb_helper_set_suspend() if you need to take the lock yourself. */ void drm_fb_helper_set_suspend_unlocked(struct drm_fb_helper *fb_helper, bool suspend) { if (!fb_helper || !fb_helper->info) return; /* make sure there's no pending/ongoing resume */ flush_work(&fb_helper->resume_work); if (suspend) { if (fb_helper->info->state != FBINFO_STATE_RUNNING) return; console_lock(); } else { if (fb_helper->info->state == FBINFO_STATE_RUNNING) return; if (!console_trylock()) { schedule_work(&fb_helper->resume_work); return; } } drm_fb_helper_set_suspend(fb_helper, suspend); console_unlock(); } EXPORT_SYMBOL(drm_fb_helper_set_suspend_unlocked); static int setcmap_pseudo_palette(struct fb_cmap *cmap, struct fb_info *info) { u32 *palette = (u32 *)info->pseudo_palette; int i; if (cmap->start + cmap->len > 16) return -EINVAL; for (i = 0; i < cmap->len; ++i) { u16 red = cmap->red[i]; u16 green = cmap->green[i]; u16 blue = cmap->blue[i]; u32 value; red >>= 16 - info->var.red.length; green >>= 16 - info->var.green.length; blue >>= 16 - info->var.blue.length; value = (red << info->var.red.offset) | (green << info->var.green.offset) | (blue << info->var.blue.offset); if (info->var.transp.length > 0) { u32 mask = (1 << info->var.transp.length) - 1; mask <<= info->var.transp.offset; value |= mask; } palette[cmap->start + i] = value; } return 0; } static int setcmap_legacy(struct fb_cmap *cmap, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct drm_mode_set *modeset; struct drm_crtc *crtc; u16 *r, *g, *b; int ret = 0; drm_modeset_lock_all(fb_helper->dev); drm_client_for_each_modeset(modeset, &fb_helper->client) { crtc = modeset->crtc; if (!crtc->funcs->gamma_set || !crtc->gamma_size) { ret = -EINVAL; goto out; } if (cmap->start + cmap->len > crtc->gamma_size) { ret = -EINVAL; goto out; } r = crtc->gamma_store; g = r + crtc->gamma_size; b = g + crtc->gamma_size; memcpy(r + cmap->start, cmap->red, cmap->len * sizeof(*r)); memcpy(g + cmap->start, cmap->green, cmap->len * sizeof(*g)); memcpy(b + cmap->start, cmap->blue, cmap->len * sizeof(*b)); ret = crtc->funcs->gamma_set(crtc, r, g, b, crtc->gamma_size, NULL); if (ret) goto out; } out: drm_modeset_unlock_all(fb_helper->dev); return ret; } static struct drm_property_blob *setcmap_new_gamma_lut(struct drm_crtc *crtc, struct fb_cmap *cmap) { struct drm_device *dev = crtc->dev; struct drm_property_blob *gamma_lut; struct drm_color_lut *lut; int size = crtc->gamma_size; int i; if (!size || cmap->start + cmap->len > size) return ERR_PTR(-EINVAL); gamma_lut = drm_property_create_blob(dev, sizeof(*lut) * size, NULL); if (IS_ERR(gamma_lut)) return gamma_lut; lut = gamma_lut->data; if (cmap->start || cmap->len != size) { u16 *r = crtc->gamma_store; u16 *g = r + crtc->gamma_size; u16 *b = g + crtc->gamma_size; for (i = 0; i < cmap->start; i++) { lut[i].red = r[i]; lut[i].green = g[i]; lut[i].blue = b[i]; } for (i = cmap->start + cmap->len; i < size; i++) { lut[i].red = r[i]; lut[i].green = g[i]; lut[i].blue = b[i]; } } for (i = 0; i < cmap->len; i++) { lut[cmap->start + i].red = cmap->red[i]; lut[cmap->start + i].green = cmap->green[i]; lut[cmap->start + i].blue = cmap->blue[i]; } return gamma_lut; } static int setcmap_atomic(struct fb_cmap *cmap, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct drm_device *dev = fb_helper->dev; struct drm_property_blob *gamma_lut = NULL; struct drm_modeset_acquire_ctx ctx; struct drm_crtc_state *crtc_state; struct drm_atomic_state *state; struct drm_mode_set *modeset; struct drm_crtc *crtc; u16 *r, *g, *b; bool replaced; int ret = 0; drm_modeset_acquire_init(&ctx, 0); state = drm_atomic_state_alloc(dev); if (!state) { ret = -ENOMEM; goto out_ctx; } state->acquire_ctx = &ctx; retry: drm_client_for_each_modeset(modeset, &fb_helper->client) { crtc = modeset->crtc; if (!gamma_lut) gamma_lut = setcmap_new_gamma_lut(crtc, cmap); if (IS_ERR(gamma_lut)) { ret = PTR_ERR(gamma_lut); gamma_lut = NULL; goto out_state; } crtc_state = drm_atomic_get_crtc_state(state, crtc); if (IS_ERR(crtc_state)) { ret = PTR_ERR(crtc_state); goto out_state; } /* * FIXME: This always uses gamma_lut. Some HW have only * degamma_lut, in which case we should reset gamma_lut and set * degamma_lut. See drm_crtc_legacy_gamma_set(). */ replaced = drm_property_replace_blob(&crtc_state->degamma_lut, NULL); replaced |= drm_property_replace_blob(&crtc_state->ctm, NULL); replaced |= drm_property_replace_blob(&crtc_state->gamma_lut, gamma_lut); crtc_state->color_mgmt_changed |= replaced; } ret = drm_atomic_commit(state); if (ret) goto out_state; drm_client_for_each_modeset(modeset, &fb_helper->client) { crtc = modeset->crtc; r = crtc->gamma_store; g = r + crtc->gamma_size; b = g + crtc->gamma_size; memcpy(r + cmap->start, cmap->red, cmap->len * sizeof(*r)); memcpy(g + cmap->start, cmap->green, cmap->len * sizeof(*g)); memcpy(b + cmap->start, cmap->blue, cmap->len * sizeof(*b)); } out_state: if (ret == -EDEADLK) goto backoff; drm_property_blob_put(gamma_lut); drm_atomic_state_put(state); out_ctx: drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); return ret; backoff: drm_atomic_state_clear(state); drm_modeset_backoff(&ctx); goto retry; } /** * drm_fb_helper_setcmap - implementation for &fb_ops.fb_setcmap * @cmap: cmap to set * @info: fbdev registered by the helper */ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct drm_device *dev = fb_helper->dev; int ret; if (oops_in_progress) return -EBUSY; mutex_lock(&fb_helper->lock); if (!drm_master_internal_acquire(dev)) { ret = -EBUSY; goto unlock; } mutex_lock(&fb_helper->client.modeset_mutex); if (info->fix.visual == FB_VISUAL_TRUECOLOR) ret = setcmap_pseudo_palette(cmap, info); else if (drm_drv_uses_atomic_modeset(fb_helper->dev)) ret = setcmap_atomic(cmap, info); else ret = setcmap_legacy(cmap, info); mutex_unlock(&fb_helper->client.modeset_mutex); drm_master_internal_release(dev); unlock: mutex_unlock(&fb_helper->lock); return ret; } EXPORT_SYMBOL(drm_fb_helper_setcmap); /** * drm_fb_helper_ioctl - legacy ioctl implementation * @info: fbdev registered by the helper * @cmd: ioctl command * @arg: ioctl argument * * A helper to implement the standard fbdev ioctl. Only * FBIO_WAITFORVSYNC is implemented for now. */ int drm_fb_helper_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg) { struct drm_fb_helper *fb_helper = info->par; struct drm_device *dev = fb_helper->dev; struct drm_crtc *crtc; int ret = 0; mutex_lock(&fb_helper->lock); if (!drm_master_internal_acquire(dev)) { ret = -EBUSY; goto unlock; } switch (cmd) { case FBIO_WAITFORVSYNC: /* * Only consider the first CRTC. * * This ioctl is supposed to take the CRTC number as * an argument, but in fbdev times, what that number * was supposed to be was quite unclear, different * drivers were passing that argument differently * (some by reference, some by value), and most of the * userspace applications were just hardcoding 0 as an * argument. * * The first CRTC should be the integrated panel on * most drivers, so this is the best choice we can * make. If we're not smart enough here, one should * just consider switch the userspace to KMS. */ crtc = fb_helper->client.modesets[0].crtc; /* * Only wait for a vblank event if the CRTC is * enabled, otherwise just don't do anythintg, * not even report an error. */ ret = drm_crtc_vblank_get(crtc); if (!ret) { drm_crtc_wait_one_vblank(crtc); drm_crtc_vblank_put(crtc); } ret = 0; break; default: ret = -ENOTTY; } drm_master_internal_release(dev); unlock: mutex_unlock(&fb_helper->lock); return ret; } EXPORT_SYMBOL(drm_fb_helper_ioctl); static bool drm_fb_pixel_format_equal(const struct fb_var_screeninfo *var_1, const struct fb_var_screeninfo *var_2) { return var_1->bits_per_pixel == var_2->bits_per_pixel && var_1->grayscale == var_2->grayscale && var_1->red.offset == var_2->red.offset && var_1->red.length == var_2->red.length && var_1->red.msb_right == var_2->red.msb_right && var_1->green.offset == var_2->green.offset && var_1->green.length == var_2->green.length && var_1->green.msb_right == var_2->green.msb_right && var_1->blue.offset == var_2->blue.offset && var_1->blue.length == var_2->blue.length && var_1->blue.msb_right == var_2->blue.msb_right && var_1->transp.offset == var_2->transp.offset && var_1->transp.length == var_2->transp.length && var_1->transp.msb_right == var_2->transp.msb_right; } static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var, const struct drm_format_info *format) { u8 depth = format->depth; if (format->is_color_indexed) { var->red.offset = 0; var->green.offset = 0; var->blue.offset = 0; var->red.length = depth; var->green.length = depth; var->blue.length = depth; var->transp.offset = 0; var->transp.length = 0; return; } switch (depth) { case 15: var->red.offset = 10; var->green.offset = 5; var->blue.offset = 0; var->red.length = 5; var->green.length = 5; var->blue.length = 5; var->transp.offset = 15; var->transp.length = 1; break; case 16: var->red.offset = 11; var->green.offset = 5; var->blue.offset = 0; var->red.length = 5; var->green.length = 6; var->blue.length = 5; var->transp.offset = 0; break; case 24: var->red.offset = 16; var->green.offset = 8; var->blue.offset = 0; var->red.length = 8; var->green.length = 8; var->blue.length = 8; var->transp.offset = 0; var->transp.length = 0; break; case 32: var->red.offset = 16; var->green.offset = 8; var->blue.offset = 0; var->red.length = 8; var->green.length = 8; var->blue.length = 8; var->transp.offset = 24; var->transp.length = 8; break; default: break; } } static void __fill_var(struct fb_var_screeninfo *var, struct fb_info *info, struct drm_framebuffer *fb) { int i; var->xres_virtual = fb->width; var->yres_virtual = fb->height; var->accel_flags = 0; var->bits_per_pixel = drm_format_info_bpp(fb->format, 0); var->height = info->var.height; var->width = info->var.width; var->left_margin = var->right_margin = 0; var->upper_margin = var->lower_margin = 0; var->hsync_len = var->vsync_len = 0; var->sync = var->vmode = 0; var->rotate = 0; var->colorspace = 0; for (i = 0; i < 4; i++) var->reserved[i] = 0; } /** * drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var * @var: screeninfo to check * @info: fbdev registered by the helper */ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct drm_framebuffer *fb = fb_helper->fb; const struct drm_format_info *format = fb->format; struct drm_device *dev = fb_helper->dev; unsigned int bpp; if (in_dbg_master()) return -EINVAL; if (var->pixclock != 0) { drm_dbg_kms(dev, "fbdev emulation doesn't support changing the pixel clock, value of pixclock is ignored\n"); var->pixclock = 0; } switch (format->format) { case DRM_FORMAT_C1: case DRM_FORMAT_C2: case DRM_FORMAT_C4: /* supported format with sub-byte pixels */ break; default: if ((drm_format_info_block_width(format, 0) > 1) || (drm_format_info_block_height(format, 0) > 1)) return -EINVAL; break; } /* * Changes struct fb_var_screeninfo are currently not pushed back * to KMS, hence fail if different settings are requested. */ bpp = drm_format_info_bpp(format, 0); if (var->bits_per_pixel > bpp || var->xres > fb->width || var->yres > fb->height || var->xres_virtual > fb->width || var->yres_virtual > fb->height) { drm_dbg_kms(dev, "fb requested width/height/bpp can't fit in current fb " "request %dx%d-%d (virtual %dx%d) > %dx%d-%d\n", var->xres, var->yres, var->bits_per_pixel, var->xres_virtual, var->yres_virtual, fb->width, fb->height, bpp); return -EINVAL; } __fill_var(var, info, fb); /* * fb_pan_display() validates this, but fb_set_par() doesn't and just * falls over. Note that __fill_var above adjusts y/res_virtual. */ if (var->yoffset > var->yres_virtual - var->yres || var->xoffset > var->xres_virtual - var->xres) return -EINVAL; /* We neither support grayscale nor FOURCC (also stored in here). */ if (var->grayscale > 0) return -EINVAL; if (var->nonstd) return -EINVAL; /* * Workaround for SDL 1.2, which is known to be setting all pixel format * fields values to zero in some cases. We treat this situation as a * kind of "use some reasonable autodetected values". */ if (!var->red.offset && !var->green.offset && !var->blue.offset && !var->transp.offset && !var->red.length && !var->green.length && !var->blue.length && !var->transp.length && !var->red.msb_right && !var->green.msb_right && !var->blue.msb_right && !var->transp.msb_right) { drm_fb_helper_fill_pixel_fmt(var, format); } /* * drm fbdev emulation doesn't support changing the pixel format at all, * so reject all pixel format changing requests. */ if (!drm_fb_pixel_format_equal(var, &info->var)) { drm_dbg_kms(dev, "fbdev emulation doesn't support changing the pixel format\n"); return -EINVAL; } return 0; } EXPORT_SYMBOL(drm_fb_helper_check_var); /** * drm_fb_helper_set_par - implementation for &fb_ops.fb_set_par * @info: fbdev registered by the helper * * This will let fbcon do the mode init and is called at initialization time by * the fbdev core when registering the driver, and later on through the hotplug * callback. */ int drm_fb_helper_set_par(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct fb_var_screeninfo *var = &info->var; bool force; if (oops_in_progress) return -EBUSY; /* * Normally we want to make sure that a kms master takes precedence over * fbdev, to avoid fbdev flickering and occasionally stealing the * display status. But Xorg first sets the vt back to text mode using * the KDSET IOCTL with KD_TEXT, and only after that drops the master * status when exiting. * * In the past this was caught by drm_fb_helper_lastclose(), but on * modern systems where logind always keeps a drm fd open to orchestrate * the vt switching, this doesn't work. * * To not break the userspace ABI we have this special case here, which * is only used for the above case. Everything else uses the normal * commit function, which ensures that we never steal the display from * an active drm master. */ force = var->activate & FB_ACTIVATE_KD_TEXT; __drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper, force); return 0; } EXPORT_SYMBOL(drm_fb_helper_set_par); static void pan_set(struct drm_fb_helper *fb_helper, int dx, int dy) { struct drm_mode_set *mode_set; mutex_lock(&fb_helper->client.modeset_mutex); drm_client_for_each_modeset(mode_set, &fb_helper->client) { mode_set->x += dx; mode_set->y += dy; } mutex_unlock(&fb_helper->client.modeset_mutex); } static int pan_display_atomic(struct fb_var_screeninfo *var, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; int ret, dx, dy; dx = var->xoffset - info->var.xoffset; dy = var->yoffset - info->var.yoffset; pan_set(fb_helper, dx, dy); ret = drm_client_modeset_commit_locked(&fb_helper->client); if (!ret) { info->var.xoffset = var->xoffset; info->var.yoffset = var->yoffset; } else pan_set(fb_helper, -dx, -dy); return ret; } static int pan_display_legacy(struct fb_var_screeninfo *var, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct drm_client_dev *client = &fb_helper->client; struct drm_mode_set *modeset; int ret = 0; mutex_lock(&client->modeset_mutex); drm_modeset_lock_all(fb_helper->dev); drm_client_for_each_modeset(modeset, client) { modeset->x = var->xoffset; modeset->y = var->yoffset; if (modeset->num_connectors) { ret = drm_mode_set_config_internal(modeset); if (!ret) { info->var.xoffset = var->xoffset; info->var.yoffset = var->yoffset; } } } drm_modeset_unlock_all(fb_helper->dev); mutex_unlock(&client->modeset_mutex); return ret; } /** * drm_fb_helper_pan_display - implementation for &fb_ops.fb_pan_display * @var: updated screen information * @info: fbdev registered by the helper */ int drm_fb_helper_pan_display(struct fb_var_screeninfo *var, struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; struct drm_device *dev = fb_helper->dev; int ret; if (oops_in_progress) return -EBUSY; mutex_lock(&fb_helper->lock); if (!drm_master_internal_acquire(dev)) { ret = -EBUSY; goto unlock; } if (drm_drv_uses_atomic_modeset(dev)) ret = pan_display_atomic(var, info); else ret = pan_display_legacy(var, info); drm_master_internal_release(dev); unlock: mutex_unlock(&fb_helper->lock); return ret; } EXPORT_SYMBOL(drm_fb_helper_pan_display); static uint32_t drm_fb_helper_find_format(struct drm_fb_helper *fb_helper, const uint32_t *formats, size_t format_count, unsigned int color_mode) { struct drm_device *dev = fb_helper->dev; uint32_t format; size_t i; format = drm_driver_color_mode_format(dev, color_mode); if (!format) { drm_info(dev, "unsupported color mode of %d\n", color_mode); return DRM_FORMAT_INVALID; } for (i = 0; i < format_count; ++i) { if (formats[i] == format) return format; } drm_warn(dev, "format %p4cc not supported\n", &format); return DRM_FORMAT_INVALID; } static int __drm_fb_helper_find_sizes(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; int crtc_count = 0; struct drm_connector_list_iter conn_iter; struct drm_connector *connector; struct drm_mode_set *mode_set; uint32_t surface_format = DRM_FORMAT_INVALID; const struct drm_format_info *info; memset(sizes, 0, sizeof(*sizes)); sizes->fb_width = (u32)-1; sizes->fb_height = (u32)-1; drm_client_for_each_modeset(mode_set, client) { struct drm_crtc *crtc = mode_set->crtc; struct drm_plane *plane = crtc->primary; drm_dbg_kms(dev, "test CRTC %u primary plane\n", drm_crtc_index(crtc)); drm_connector_list_iter_begin(fb_helper->dev, &conn_iter); drm_client_for_each_connector_iter(connector, &conn_iter) { struct drm_cmdline_mode *cmdline_mode = &connector->cmdline_mode; if (!cmdline_mode->bpp_specified) continue; surface_format = drm_fb_helper_find_format(fb_helper, plane->format_types, plane->format_count, cmdline_mode->bpp); if (surface_format != DRM_FORMAT_INVALID) break; /* found supported format */ } drm_connector_list_iter_end(&conn_iter); if (surface_format != DRM_FORMAT_INVALID) break; /* found supported format */ /* try preferred color mode */ surface_format = drm_fb_helper_find_format(fb_helper, plane->format_types, plane->format_count, fb_helper->preferred_bpp); if (surface_format != DRM_FORMAT_INVALID) break; /* found supported format */ } if (surface_format == DRM_FORMAT_INVALID) { /* * If none of the given color modes works, fall back * to XRGB8888. Drivers are expected to provide this * format for compatibility with legacy applications. */ drm_warn(dev, "No compatible format found\n"); surface_format = drm_driver_legacy_fb_format(dev, 32, 24); } info = drm_format_info(surface_format); sizes->surface_bpp = drm_format_info_bpp(info, 0); sizes->surface_depth = info->depth; /* first up get a count of crtcs now in use and new min/maxes width/heights */ crtc_count = 0; drm_client_for_each_modeset(mode_set, client) { struct drm_display_mode *desired_mode; int x, y, j; /* in case of tile group, are we the last tile vert or horiz? * If no tile group you are always the last one both vertically * and horizontally */ bool lastv = true, lasth = true; desired_mode = mode_set->mode; if (!desired_mode) continue; crtc_count++; x = mode_set->x; y = mode_set->y; sizes->surface_width = max_t(u32, desired_mode->hdisplay + x, sizes->surface_width); sizes->surface_height = max_t(u32, desired_mode->vdisplay + y, sizes->surface_height); for (j = 0; j < mode_set->num_connectors; j++) { struct drm_connector *connector = mode_set->connectors[j]; if (connector->has_tile && desired_mode->hdisplay == connector->tile_h_size && desired_mode->vdisplay == connector->tile_v_size) { lasth = (connector->tile_h_loc == (connector->num_h_tile - 1)); lastv = (connector->tile_v_loc == (connector->num_v_tile - 1)); /* cloning to multiple tiles is just crazy-talk, so: */ break; } } if (lasth) sizes->fb_width = min_t(u32, desired_mode->hdisplay + x, sizes->fb_width); if (lastv) sizes->fb_height = min_t(u32, desired_mode->vdisplay + y, sizes->fb_height); } if (crtc_count == 0 || sizes->fb_width == -1 || sizes->fb_height == -1) { drm_info(dev, "Cannot find any crtc or sizes\n"); return -EAGAIN; } return 0; } static int drm_fb_helper_find_sizes(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; struct drm_mode_config *config = &dev->mode_config; int ret; mutex_lock(&client->modeset_mutex); ret = __drm_fb_helper_find_sizes(fb_helper, sizes); mutex_unlock(&client->modeset_mutex); if (ret) return ret; /* Handle our overallocation */ sizes->surface_height *= drm_fbdev_overalloc; sizes->surface_height /= 100; if (sizes->surface_height > config->max_height) { drm_dbg_kms(dev, "Fbdev over-allocation too large; clamping height to %d\n", config->max_height); sizes->surface_height = config->max_height; } return 0; } /* * Allocates the backing storage and sets up the fbdev info structure through * the ->fbdev_probe callback. */ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper) { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; struct drm_fb_helper_surface_size sizes; struct fb_info *info; int ret; if (drm_WARN_ON(dev, !dev->driver->fbdev_probe)) return -EINVAL; ret = drm_fb_helper_find_sizes(fb_helper, &sizes); if (ret) { /* First time: disable all crtc's.. */ if (!fb_helper->deferred_setup) drm_client_modeset_commit(client); return ret; } /* push down into drivers */ ret = dev->driver->fbdev_probe(fb_helper, &sizes); if (ret < 0) return ret; strcpy(fb_helper->fb->comm, "[fbcon]"); info = fb_helper->info; /* Set the fb info for vgaswitcheroo clients. Does nothing otherwise. */ if (dev_is_pci(info->device)) vga_switcheroo_client_fb_set(to_pci_dev(info->device), info); return 0; } static void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch, bool is_color_indexed) { info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.visual = is_color_indexed ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR; info->fix.mmio_start = 0; info->fix.mmio_len = 0; info->fix.type_aux = 0; info->fix.xpanstep = 1; /* doing it in hw */ info->fix.ypanstep = 1; /* doing it in hw */ info->fix.ywrapstep = 0; info->fix.accel = FB_ACCEL_NONE; info->fix.line_length = pitch; } static void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helper, uint32_t fb_width, uint32_t fb_height) { struct drm_framebuffer *fb = fb_helper->fb; const struct drm_format_info *format = fb->format; switch (format->format) { case DRM_FORMAT_C1: case DRM_FORMAT_C2: case DRM_FORMAT_C4: /* supported format with sub-byte pixels */ break; default: WARN_ON((drm_format_info_block_width(format, 0) > 1) || (drm_format_info_block_height(format, 0) > 1)); break; } info->pseudo_palette = fb_helper->pseudo_palette; info->var.xoffset = 0; info->var.yoffset = 0; __fill_var(&info->var, info, fb); info->var.activate = FB_ACTIVATE_NOW; drm_fb_helper_fill_pixel_fmt(&info->var, format); info->var.xres = fb_width; info->var.yres = fb_height; } /** * drm_fb_helper_fill_info - initializes fbdev information * @info: fbdev instance to set up * @fb_helper: fb helper instance to use as template * @sizes: describes fbdev size and scanout surface size * * Sets up the variable and fixed fbdev metainformation from the given fb helper * instance and the drm framebuffer allocated in &drm_fb_helper.fb. * * Drivers should call this (or their equivalent setup code) from their * &drm_driver.fbdev_probe callback after having allocated the fbdev * backing storage framebuffer. */ void drm_fb_helper_fill_info(struct fb_info *info, struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct drm_framebuffer *fb = fb_helper->fb; drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->is_color_indexed); drm_fb_helper_fill_var(info, fb_helper, sizes->fb_width, sizes->fb_height); info->par = fb_helper; /* * The DRM drivers fbdev emulation device name can be confusing if the * driver name also has a "drm" suffix on it. Leading to names such as * "simpledrmdrmfb" in /proc/fb. Unfortunately, it's an uAPI and can't * be changed due user-space tools (e.g: pm-utils) matching against it. */ snprintf(info->fix.id, sizeof(info->fix.id), "%sdrmfb", fb_helper->dev->driver->name); } EXPORT_SYMBOL(drm_fb_helper_fill_info); /* * This is a continuation of drm_setup_crtcs() that sets up anything related * to the framebuffer. During initialization, drm_setup_crtcs() is called before * the framebuffer has been allocated (fb_helper->fb and fb_helper->info). * So, any setup that touches those fields needs to be done here instead of in * drm_setup_crtcs(). */ static void drm_setup_crtcs_fb(struct drm_fb_helper *fb_helper) { struct drm_client_dev *client = &fb_helper->client; struct drm_connector_list_iter conn_iter; struct fb_info *info = fb_helper->info; unsigned int rotation, sw_rotations = 0; struct drm_connector *connector; struct drm_mode_set *modeset; mutex_lock(&client->modeset_mutex); drm_client_for_each_modeset(modeset, client) { if (!modeset->num_connectors) continue; modeset->fb = fb_helper->fb; if (drm_client_rotation(modeset, &rotation)) /* Rotating in hardware, fbcon should not rotate */ sw_rotations |= DRM_MODE_ROTATE_0; else sw_rotations |= rotation; } mutex_unlock(&client->modeset_mutex); drm_connector_list_iter_begin(fb_helper->dev, &conn_iter); drm_client_for_each_connector_iter(connector, &conn_iter) { /* use first connected connector for the physical dimensions */ if (connector->status == connector_status_connected) { info->var.width = connector->display_info.width_mm; info->var.height = connector->display_info.height_mm; break; } } drm_connector_list_iter_end(&conn_iter); switch (sw_rotations) { case DRM_MODE_ROTATE_0: info->fbcon_rotate_hint = FB_ROTATE_UR; break; case DRM_MODE_ROTATE_90: info->fbcon_rotate_hint = FB_ROTATE_CCW; break; case DRM_MODE_ROTATE_180: info->fbcon_rotate_hint = FB_ROTATE_UD; break; case DRM_MODE_ROTATE_270: info->fbcon_rotate_hint = FB_ROTATE_CW; break; default: /* * Multiple bits are set / multiple rotations requested * fbcon cannot handle separate rotation settings per * output, so fallback to unrotated. */ info->fbcon_rotate_hint = FB_ROTATE_UR; } } /* Note: Drops fb_helper->lock before returning. */ static int __drm_fb_helper_initial_config_and_unlock(struct drm_fb_helper *fb_helper) { struct drm_device *dev = fb_helper->dev; struct fb_info *info; unsigned int width, height; int ret; width = dev->mode_config.max_width; height = dev->mode_config.max_height; drm_client_modeset_probe(&fb_helper->client, width, height); ret = drm_fb_helper_single_fb_probe(fb_helper); if (ret < 0) { if (ret == -EAGAIN) { fb_helper->deferred_setup = true; ret = 0; } mutex_unlock(&fb_helper->lock); return ret; } drm_setup_crtcs_fb(fb_helper); fb_helper->deferred_setup = false; info = fb_helper->info; info->var.pixclock = 0; /* Need to drop locks to avoid recursive deadlock in * register_framebuffer. This is ok because the only thing left to do is * register the fbdev emulation instance in kernel_fb_helper_list. */ mutex_unlock(&fb_helper->lock); ret = register_framebuffer(info); if (ret < 0) return ret; drm_info(dev, "fb%d: %s frame buffer device\n", info->node, info->fix.id); mutex_lock(&kernel_fb_helper_lock); if (list_empty(&kernel_fb_helper_list)) register_sysrq_key('v', &sysrq_drm_fb_helper_restore_op); list_add(&fb_helper->kernel_fb_list, &kernel_fb_helper_list); mutex_unlock(&kernel_fb_helper_lock); return 0; } /** * drm_fb_helper_initial_config - setup a sane initial connector configuration * @fb_helper: fb_helper device struct * * Scans the CRTCs and connectors and tries to put together an initial setup. * At the moment, this is a cloned configuration across all heads with * a new framebuffer object as the backing store. * * Note that this also registers the fbdev and so allows userspace to call into * the driver through the fbdev interfaces. * * This function will call down into the &drm_driver.fbdev_probe callback * to let the driver allocate and initialize the fbdev info structure and the * drm framebuffer used to back the fbdev. drm_fb_helper_fill_info() is provided * as a helper to setup simple default values for the fbdev info structure. * * HANG DEBUGGING: * * When you have fbcon support built-in or already loaded, this function will do * a full modeset to setup the fbdev console. Due to locking misdesign in the * VT/fbdev subsystem that entire modeset sequence has to be done while holding * console_lock. Until console_unlock is called no dmesg lines will be sent out * to consoles, not even serial console. This means when your driver crashes, * you will see absolutely nothing else but a system stuck in this function, * with no further output. Any kind of printk() you place within your own driver * or in the drm core modeset code will also never show up. * * Standard debug practice is to run the fbcon setup without taking the * console_lock as a hack, to be able to see backtraces and crashes on the * serial line. This can be done by setting the fb.lockless_register_fb=1 kernel * cmdline option. * * The other option is to just disable fbdev emulation since very likely the * first modeset from userspace will crash in the same way, and is even easier * to debug. This can be done by setting the drm_kms_helper.fbdev_emulation=0 * kernel cmdline option. * * RETURNS: * Zero if everything went ok, nonzero otherwise. */ int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper) { int ret; if (!drm_fbdev_emulation) return 0; mutex_lock(&fb_helper->lock); ret = __drm_fb_helper_initial_config_and_unlock(fb_helper); return ret; } EXPORT_SYMBOL(drm_fb_helper_initial_config); /** * drm_fb_helper_hotplug_event - respond to a hotplug notification by * probing all the outputs attached to the fb * @fb_helper: driver-allocated fbdev helper, can be NULL * * Scan the connectors attached to the fb_helper and try to put together a * setup after notification of a change in output configuration. * * Called at runtime, takes the mode config locks to be able to check/change the * modeset configuration. Must be run from process context (which usually means * either the output polling work or a work item launched from the driver's * hotplug interrupt). * * Note that drivers may call this even before calling * drm_fb_helper_initial_config but only after drm_fb_helper_init. This allows * for a race-free fbcon setup and will make sure that the fbdev emulation will * not miss any hotplug events. * * RETURNS: * 0 on success and a non-zero error code otherwise. */ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) { int err = 0; if (!drm_fbdev_emulation || !fb_helper) return 0; mutex_lock(&fb_helper->lock); if (fb_helper->deferred_setup) { err = __drm_fb_helper_initial_config_and_unlock(fb_helper); return err; } if (!fb_helper->fb || !drm_master_internal_acquire(fb_helper->dev)) { fb_helper->delayed_hotplug = true; mutex_unlock(&fb_helper->lock); return err; } drm_master_internal_release(fb_helper->dev); drm_dbg_kms(fb_helper->dev, "\n"); drm_client_modeset_probe(&fb_helper->client, fb_helper->fb->width, fb_helper->fb->height); drm_setup_crtcs_fb(fb_helper); mutex_unlock(&fb_helper->lock); drm_fb_helper_set_par(fb_helper->info); return 0; } EXPORT_SYMBOL(drm_fb_helper_hotplug_event); /** * drm_fb_helper_lastclose - DRM driver lastclose helper for fbdev emulation * @dev: DRM device * * This function is obsolete. Call drm_fb_helper_restore_fbdev_mode_unlocked() * instead. */ void drm_fb_helper_lastclose(struct drm_device *dev) { drm_fb_helper_restore_fbdev_mode_unlocked(dev->fb_helper); } EXPORT_SYMBOL(drm_fb_helper_lastclose); |
| 2 7 7 6 5 7 7 7 4 4 4 4 4 6 6 6 6 6 6 6 6 13 13 11 9 6 6 7 13 13 13 13 1 1 8 4 3 7 13 13 3 13 13 13 13 13 7 13 42 42 3 2 3 42 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 | // SPDX-License-Identifier: GPL-2.0 #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <net/wext.h> #include <net/hotdata.h> #include "dev.h" static void *dev_seq_from_index(struct seq_file *seq, loff_t *pos) { unsigned long ifindex = *pos; struct net_device *dev; for_each_netdev_dump(seq_file_net(seq), dev, ifindex) { *pos = dev->ifindex; return dev; } return NULL; } static void *dev_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; return dev_seq_from_index(seq, pos); } static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return dev_seq_from_index(seq, pos); } static void dev_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, stats->rx_fifo_errors, stats->rx_length_errors + stats->rx_over_errors + stats->rx_crc_errors + stats->rx_frame_errors, stats->rx_compressed, stats->multicast, stats->tx_bytes, stats->tx_packets, stats->tx_errors, stats->tx_dropped, stats->tx_fifo_errors, stats->collisions, stats->tx_carrier_errors + stats->tx_aborted_errors + stats->tx_window_errors + stats->tx_heartbeat_errors, stats->tx_compressed); } /* * Called from the PROCfs module. This now uses the new arbitrary sized * /proc/net interface to create /proc/net/dev */ static int dev_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_puts(seq, "Inter-| Receive " " | Transmit\n" " face |bytes packets errs drop fifo frame " "compressed multicast|bytes packets errs " "drop fifo colls carrier compressed\n"); else dev_seq_printf_stats(seq, v); return 0; } static u32 softnet_input_pkt_queue_len(struct softnet_data *sd) { return skb_queue_len_lockless(&sd->input_pkt_queue); } static u32 softnet_process_queue_len(struct softnet_data *sd) { return skb_queue_len_lockless(&sd->process_queue); } static struct softnet_data *softnet_get_online(loff_t *pos) { struct softnet_data *sd = NULL; while (*pos < nr_cpu_ids) if (cpu_online(*pos)) { sd = &per_cpu(softnet_data, *pos); break; } else ++*pos; return sd; } static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) { return softnet_get_online(pos); } static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return softnet_get_online(pos); } static void softnet_seq_stop(struct seq_file *seq, void *v) { } static int softnet_seq_show(struct seq_file *seq, void *v) { struct softnet_data *sd = v; u32 input_qlen = softnet_input_pkt_queue_len(sd); u32 process_qlen = softnet_process_queue_len(sd); unsigned int flow_limit_count = 0; #ifdef CONFIG_NET_FLOW_LIMIT struct sd_flow_limit *fl; rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); /* Pairs with WRITE_ONCE() in skb_flow_limit() */ if (fl) flow_limit_count = READ_ONCE(fl->count); rcu_read_unlock(); #endif /* the index is the CPU id owing this sd. Since offline CPUs are not * displayed, it would be othrwise not trivial for the user-space * mapping the data a specific CPU */ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x\n", READ_ONCE(sd->processed), atomic_read(&sd->dropped), READ_ONCE(sd->time_squeeze), 0, 0, 0, 0, 0, /* was fastroute */ 0, /* was cpu_collision */ READ_ONCE(sd->received_rps), flow_limit_count, input_qlen + process_qlen, (int)seq->index, input_qlen, process_qlen); return 0; } static const struct seq_operations dev_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, .show = dev_seq_show, }; static const struct seq_operations softnet_seq_ops = { .start = softnet_seq_start, .next = softnet_seq_next, .stop = softnet_seq_stop, .show = softnet_seq_show, }; static void *ptype_get_idx(struct seq_file *seq, loff_t pos) { struct list_head *ptype_list = NULL; struct packet_type *pt = NULL; struct net_device *dev; loff_t i = 0; int t; for_each_netdev_rcu(seq_file_net(seq), dev) { ptype_list = &dev->ptype_all; list_for_each_entry_rcu(pt, ptype_list, list) { if (i == pos) return pt; ++i; } } list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) { if (i == pos) return pt; ++i; } list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_specific, list) { if (i == pos) return pt; ++i; } for (t = 0; t < PTYPE_HASH_SIZE; t++) { list_for_each_entry_rcu(pt, &ptype_base[t], list) { if (i == pos) return pt; ++i; } } return NULL; } static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net *net = seq_file_net(seq); struct net_device *dev; struct packet_type *pt; struct list_head *nxt; int hash; ++*pos; if (v == SEQ_START_TOKEN) return ptype_get_idx(seq, 0); pt = v; nxt = pt->list.next; if (pt->dev) { if (nxt != &pt->dev->ptype_all) goto found; dev = pt->dev; for_each_netdev_continue_rcu(seq_file_net(seq), dev) { if (!list_empty(&dev->ptype_all)) { nxt = dev->ptype_all.next; goto found; } } nxt = net->ptype_all.next; goto net_ptype_all; } if (pt->af_packet_net) { net_ptype_all: if (nxt != &net->ptype_all && nxt != &net->ptype_specific) goto found; if (nxt == &net->ptype_all) { /* continue with ->ptype_specific if it's not empty */ nxt = net->ptype_specific.next; if (nxt != &net->ptype_specific) goto found; } hash = 0; nxt = ptype_base[0].next; } else hash = ntohs(pt->type) & PTYPE_HASH_MASK; while (nxt == &ptype_base[hash]) { if (++hash >= PTYPE_HASH_SIZE) return NULL; nxt = ptype_base[hash].next; } found: return list_entry(nxt, struct packet_type, list); } static void ptype_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static int ptype_seq_show(struct seq_file *seq, void *v) { struct packet_type *pt = v; if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else seq_printf(seq, "%04x", ntohs(pt->type)); seq_printf(seq, " %-8s %ps\n", pt->dev ? pt->dev->name : "", pt->func); } return 0; } static const struct seq_operations ptype_seq_ops = { .start = ptype_seq_start, .next = ptype_seq_next, .stop = ptype_seq_stop, .show = ptype_seq_show, }; static int __net_init dev_proc_net_init(struct net *net) { int rc = -ENOMEM; if (!proc_create_net("dev", 0444, net->proc_net, &dev_seq_ops, sizeof(struct seq_net_private))) goto out; if (!proc_create_seq("softnet_stat", 0444, net->proc_net, &softnet_seq_ops)) goto out_dev; if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops, sizeof(struct seq_net_private))) goto out_softnet; if (wext_proc_init(net)) goto out_ptype; rc = 0; out: return rc; out_ptype: remove_proc_entry("ptype", net->proc_net); out_softnet: remove_proc_entry("softnet_stat", net->proc_net); out_dev: remove_proc_entry("dev", net->proc_net); goto out; } static void __net_exit dev_proc_net_exit(struct net *net) { wext_proc_exit(net); remove_proc_entry("ptype", net->proc_net); remove_proc_entry("softnet_stat", net->proc_net); remove_proc_entry("dev", net->proc_net); } static struct pernet_operations __net_initdata dev_proc_ops = { .init = dev_proc_net_init, .exit = dev_proc_net_exit, }; static int dev_mc_seq_show(struct seq_file *seq, void *v) { struct netdev_hw_addr *ha; struct net_device *dev = v; if (v == SEQ_START_TOKEN) return 0; netif_addr_lock_bh(dev); netdev_for_each_mc_addr(ha, dev) { seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n", dev->ifindex, dev->name, ha->refcount, ha->global_use, (int)dev->addr_len, ha->addr); } netif_addr_unlock_bh(dev); return 0; } static const struct seq_operations dev_mc_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, .show = dev_mc_seq_show, }; static int __net_init dev_mc_net_init(struct net *net) { if (!proc_create_net("dev_mcast", 0, net->proc_net, &dev_mc_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; return 0; } static void __net_exit dev_mc_net_exit(struct net *net) { remove_proc_entry("dev_mcast", net->proc_net); } static struct pernet_operations __net_initdata dev_mc_net_ops = { .init = dev_mc_net_init, .exit = dev_mc_net_exit, }; int __init dev_proc_init(void) { int ret = register_pernet_subsys(&dev_proc_ops); if (!ret) return register_pernet_subsys(&dev_mc_net_ops); return ret; } |
| 5443 3301 3 2659 5461 5 570 80 5 2649 2655 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | /* SPDX-License-Identifier: GPL-2.0-only */ /* A pointer that can point to either kernel or userspace memory. */ #ifndef _LINUX_BPFPTR_H #define _LINUX_BPFPTR_H #include <linux/mm.h> #include <linux/sockptr.h> typedef sockptr_t bpfptr_t; static inline bool bpfptr_is_kernel(bpfptr_t bpfptr) { return bpfptr.is_kernel; } static inline bpfptr_t KERNEL_BPFPTR(void *p) { return (bpfptr_t) { .kernel = p, .is_kernel = true }; } static inline bpfptr_t USER_BPFPTR(void __user *p) { return (bpfptr_t) { .user = p }; } static inline bpfptr_t make_bpfptr(u64 addr, bool is_kernel) { if (is_kernel) return KERNEL_BPFPTR((void*) (uintptr_t) addr); else return USER_BPFPTR(u64_to_user_ptr(addr)); } static inline bool bpfptr_is_null(bpfptr_t bpfptr) { if (bpfptr_is_kernel(bpfptr)) return !bpfptr.kernel; return !bpfptr.user; } static inline void bpfptr_add(bpfptr_t *bpfptr, size_t val) { if (bpfptr_is_kernel(*bpfptr)) bpfptr->kernel += val; else bpfptr->user += val; } static inline int copy_from_bpfptr_offset(void *dst, bpfptr_t src, size_t offset, size_t size) { if (!bpfptr_is_kernel(src)) return copy_from_user(dst, src.user + offset, size); return copy_from_kernel_nofault(dst, src.kernel + offset, size); } static inline int copy_from_bpfptr(void *dst, bpfptr_t src, size_t size) { return copy_from_bpfptr_offset(dst, src, 0, size); } static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset, const void *src, size_t size) { return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size); } static inline void *kvmemdup_bpfptr_noprof(bpfptr_t src, size_t len) { void *p = kvmalloc_noprof(len, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); if (copy_from_bpfptr(p, src, len)) { kvfree(p); return ERR_PTR(-EFAULT); } return p; } #define kvmemdup_bpfptr(...) alloc_hooks(kvmemdup_bpfptr_noprof(__VA_ARGS__)) static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count) { if (bpfptr_is_kernel(src)) return strncpy_from_kernel_nofault(dst, src.kernel, count); return strncpy_from_user(dst, src.user, count); } #endif /* _LINUX_BPFPTR_H */ |
| 9 9 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 | /* SPDX-License-Identifier: GPL-2.0 * * IO cost model based controller. * * Copyright (C) 2019 Tejun Heo <tj@kernel.org> * Copyright (C) 2019 Andy Newell <newella@fb.com> * Copyright (C) 2019 Facebook * * One challenge of controlling IO resources is the lack of trivially * observable cost metric. This is distinguished from CPU and memory where * wallclock time and the number of bytes can serve as accurate enough * approximations. * * Bandwidth and iops are the most commonly used metrics for IO devices but * depending on the type and specifics of the device, different IO patterns * easily lead to multiple orders of magnitude variations rendering them * useless for the purpose of IO capacity distribution. While on-device * time, with a lot of clutches, could serve as a useful approximation for * non-queued rotational devices, this is no longer viable with modern * devices, even the rotational ones. * * While there is no cost metric we can trivially observe, it isn't a * complete mystery. For example, on a rotational device, seek cost * dominates while a contiguous transfer contributes a smaller amount * proportional to the size. If we can characterize at least the relative * costs of these different types of IOs, it should be possible to * implement a reasonable work-conserving proportional IO resource * distribution. * * 1. IO Cost Model * * IO cost model estimates the cost of an IO given its basic parameters and * history (e.g. the end sector of the last IO). The cost is measured in * device time. If a given IO is estimated to cost 10ms, the device should * be able to process ~100 of those IOs in a second. * * Currently, there's only one builtin cost model - linear. Each IO is * classified as sequential or random and given a base cost accordingly. * On top of that, a size cost proportional to the length of the IO is * added. While simple, this model captures the operational * characteristics of a wide varienty of devices well enough. Default * parameters for several different classes of devices are provided and the * parameters can be configured from userspace via * /sys/fs/cgroup/io.cost.model. * * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate * device-specific coefficients. * * 2. Control Strategy * * The device virtual time (vtime) is used as the primary control metric. * The control strategy is composed of the following three parts. * * 2-1. Vtime Distribution * * When a cgroup becomes active in terms of IOs, its hierarchical share is * calculated. Please consider the following hierarchy where the numbers * inside parentheses denote the configured weights. * * root * / \ * A (w:100) B (w:300) * / \ * A0 (w:100) A1 (w:100) * * If B is idle and only A0 and A1 are actively issuing IOs, as the two are * of equal weight, each gets 50% share. If then B starts issuing IOs, B * gets 300/(100+300) or 75% share, and A0 and A1 equally splits the rest, * 12.5% each. The distribution mechanism only cares about these flattened * shares. They're called hweights (hierarchical weights) and always add * upto 1 (WEIGHT_ONE). * * A given cgroup's vtime runs slower in inverse proportion to its hweight. * For example, with 12.5% weight, A0's time runs 8 times slower (100/12.5) * against the device vtime - an IO which takes 10ms on the underlying * device is considered to take 80ms on A0. * * This constitutes the basis of IO capacity distribution. Each cgroup's * vtime is running at a rate determined by its hweight. A cgroup tracks * the vtime consumed by past IOs and can issue a new IO if doing so * wouldn't outrun the current device vtime. Otherwise, the IO is * suspended until the vtime has progressed enough to cover it. * * 2-2. Vrate Adjustment * * It's unrealistic to expect the cost model to be perfect. There are too * many devices and even on the same device the overall performance * fluctuates depending on numerous factors such as IO mixture and device * internal garbage collection. The controller needs to adapt dynamically. * * This is achieved by adjusting the overall IO rate according to how busy * the device is. If the device becomes overloaded, we're sending down too * many IOs and should generally slow down. If there are waiting issuers * but the device isn't saturated, we're issuing too few and should * generally speed up. * * To slow down, we lower the vrate - the rate at which the device vtime * passes compared to the wall clock. For example, if the vtime is running * at the vrate of 75%, all cgroups added up would only be able to issue * 750ms worth of IOs per second, and vice-versa for speeding up. * * Device business is determined using two criteria - rq wait and * completion latencies. * * When a device gets saturated, the on-device and then the request queues * fill up and a bio which is ready to be issued has to wait for a request * to become available. When this delay becomes noticeable, it's a clear * indication that the device is saturated and we lower the vrate. This * saturation signal is fairly conservative as it only triggers when both * hardware and software queues are filled up, and is used as the default * busy signal. * * As devices can have deep queues and be unfair in how the queued commands * are executed, solely depending on rq wait may not result in satisfactory * control quality. For a better control quality, completion latency QoS * parameters can be configured so that the device is considered saturated * if N'th percentile completion latency rises above the set point. * * The completion latency requirements are a function of both the * underlying device characteristics and the desired IO latency quality of * service. There is an inherent trade-off - the tighter the latency QoS, * the higher the bandwidth lossage. Latency QoS is disabled by default * and can be set through /sys/fs/cgroup/io.cost.qos. * * 2-3. Work Conservation * * Imagine two cgroups A and B with equal weights. A is issuing a small IO * periodically while B is sending out enough parallel IOs to saturate the * device on its own. Let's say A's usage amounts to 100ms worth of IO * cost per second, i.e., 10% of the device capacity. The naive * distribution of half and half would lead to 60% utilization of the * device, a significant reduction in the total amount of work done * compared to free-for-all competition. This is too high a cost to pay * for IO control. * * To conserve the total amount of work done, we keep track of how much * each active cgroup is actually using and yield part of its weight if * there are other cgroups which can make use of it. In the above case, * A's weight will be lowered so that it hovers above the actual usage and * B would be able to use the rest. * * As we don't want to penalize a cgroup for donating its weight, the * surplus weight adjustment factors in a margin and has an immediate * snapback mechanism in case the cgroup needs more IO vtime for itself. * * Note that adjusting down surplus weights has the same effects as * accelerating vtime for other cgroups and work conservation can also be * implemented by adjusting vrate dynamically. However, squaring who can * donate and should take back how much requires hweight propagations * anyway making it easier to implement and understand as a separate * mechanism. * * 3. Monitoring * * Instead of debugfs or other clumsy monitoring mechanisms, this * controller uses a drgn based monitoring script - * tools/cgroup/iocost_monitor.py. For details on drgn, please see * https://github.com/osandov/drgn. The output looks like the following. * * sdb RUN per=300ms cur_per=234.218:v203.695 busy= +1 vrate= 62.12% * active weight hweight% inflt% dbt delay usages% * test/a * 50/ 50 33.33/ 33.33 27.65 2 0*041 033:033:033 * test/b * 100/ 100 66.67/ 66.67 17.56 0 0*000 066:079:077 * * - per : Timer period * - cur_per : Internal wall and device vtime clock * - vrate : Device virtual time rate against wall clock * - weight : Surplus-adjusted and configured weights * - hweight : Surplus-adjusted and configured hierarchical weights * - inflt : The percentage of in-flight IO cost at the end of last period * - del_ms : Deferred issuer delay induction level and duration * - usages : Usage history */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/timer.h> #include <linux/time64.h> #include <linux/parser.h> #include <linux/sched/signal.h> #include <asm/local.h> #include <asm/local64.h> #include "blk-rq-qos.h" #include "blk-stat.h" #include "blk-wbt.h" #include "blk-cgroup.h" #ifdef CONFIG_TRACEPOINTS /* copied from TRACE_CGROUP_PATH, see cgroup-internal.h */ #define TRACE_IOCG_PATH_LEN 1024 static DEFINE_SPINLOCK(trace_iocg_path_lock); static char trace_iocg_path[TRACE_IOCG_PATH_LEN]; #define TRACE_IOCG_PATH(type, iocg, ...) \ do { \ unsigned long flags; \ if (trace_iocost_##type##_enabled()) { \ spin_lock_irqsave(&trace_iocg_path_lock, flags); \ cgroup_path(iocg_to_blkg(iocg)->blkcg->css.cgroup, \ trace_iocg_path, TRACE_IOCG_PATH_LEN); \ trace_iocost_##type(iocg, trace_iocg_path, \ ##__VA_ARGS__); \ spin_unlock_irqrestore(&trace_iocg_path_lock, flags); \ } \ } while (0) #else /* CONFIG_TRACE_POINTS */ #define TRACE_IOCG_PATH(type, iocg, ...) do { } while (0) #endif /* CONFIG_TRACE_POINTS */ enum { MILLION = 1000000, /* timer period is calculated from latency requirements, bound it */ MIN_PERIOD = USEC_PER_MSEC, MAX_PERIOD = USEC_PER_SEC, /* * iocg->vtime is targeted at 50% behind the device vtime, which * serves as its IO credit buffer. Surplus weight adjustment is * immediately canceled if the vtime margin runs below 10%. */ MARGIN_MIN_PCT = 10, MARGIN_LOW_PCT = 20, MARGIN_TARGET_PCT = 50, INUSE_ADJ_STEP_PCT = 25, /* Have some play in timer operations */ TIMER_SLACK_PCT = 1, /* 1/64k is granular enough and can easily be handled w/ u32 */ WEIGHT_ONE = 1 << 16, }; enum { /* * As vtime is used to calculate the cost of each IO, it needs to * be fairly high precision. For example, it should be able to * represent the cost of a single page worth of discard with * suffificient accuracy. At the same time, it should be able to * represent reasonably long enough durations to be useful and * convenient during operation. * * 1s worth of vtime is 2^37. This gives us both sub-nanosecond * granularity and days of wrap-around time even at extreme vrates. */ VTIME_PER_SEC_SHIFT = 37, VTIME_PER_SEC = 1LLU << VTIME_PER_SEC_SHIFT, VTIME_PER_USEC = VTIME_PER_SEC / USEC_PER_SEC, VTIME_PER_NSEC = VTIME_PER_SEC / NSEC_PER_SEC, /* bound vrate adjustments within two orders of magnitude */ VRATE_MIN_PPM = 10000, /* 1% */ VRATE_MAX_PPM = 100000000, /* 10000% */ VRATE_MIN = VTIME_PER_USEC * VRATE_MIN_PPM / MILLION, VRATE_CLAMP_ADJ_PCT = 4, /* switch iff the conditions are met for longer than this */ AUTOP_CYCLE_NSEC = 10LLU * NSEC_PER_SEC, }; enum { /* if IOs end up waiting for requests, issue less */ RQ_WAIT_BUSY_PCT = 5, /* unbusy hysterisis */ UNBUSY_THR_PCT = 75, /* * The effect of delay is indirect and non-linear and a huge amount of * future debt can accumulate abruptly while unthrottled. Linearly scale * up delay as debt is going up and then let it decay exponentially. * This gives us quick ramp ups while delay is accumulating and long * tails which can help reducing the frequency of debt explosions on * unthrottle. The parameters are experimentally determined. * * The delay mechanism provides adequate protection and behavior in many * cases. However, this is far from ideal and falls shorts on both * fronts. The debtors are often throttled too harshly costing a * significant level of fairness and possibly total work while the * protection against their impacts on the system can be choppy and * unreliable. * * The shortcoming primarily stems from the fact that, unlike for page * cache, the kernel doesn't have well-defined back-pressure propagation * mechanism and policies for anonymous memory. Fully addressing this * issue will likely require substantial improvements in the area. */ MIN_DELAY_THR_PCT = 500, MAX_DELAY_THR_PCT = 25000, MIN_DELAY = 250, MAX_DELAY = 250 * USEC_PER_MSEC, /* halve debts if avg usage over 100ms is under 50% */ DFGV_USAGE_PCT = 50, DFGV_PERIOD = 100 * USEC_PER_MSEC, /* don't let cmds which take a very long time pin lagging for too long */ MAX_LAGGING_PERIODS = 10, /* * Count IO size in 4k pages. The 12bit shift helps keeping * size-proportional components of cost calculation in closer * numbers of digits to per-IO cost components. */ IOC_PAGE_SHIFT = 12, IOC_PAGE_SIZE = 1 << IOC_PAGE_SHIFT, IOC_SECT_TO_PAGE_SHIFT = IOC_PAGE_SHIFT - SECTOR_SHIFT, /* if apart further than 16M, consider randio for linear model */ LCOEF_RANDIO_PAGES = 4096, }; enum ioc_running { IOC_IDLE, IOC_RUNNING, IOC_STOP, }; /* io.cost.qos controls including per-dev enable of the whole controller */ enum { QOS_ENABLE, QOS_CTRL, NR_QOS_CTRL_PARAMS, }; /* io.cost.qos params */ enum { QOS_RPPM, QOS_RLAT, QOS_WPPM, QOS_WLAT, QOS_MIN, QOS_MAX, NR_QOS_PARAMS, }; /* io.cost.model controls */ enum { COST_CTRL, COST_MODEL, NR_COST_CTRL_PARAMS, }; /* builtin linear cost model coefficients */ enum { I_LCOEF_RBPS, I_LCOEF_RSEQIOPS, I_LCOEF_RRANDIOPS, I_LCOEF_WBPS, I_LCOEF_WSEQIOPS, I_LCOEF_WRANDIOPS, NR_I_LCOEFS, }; enum { LCOEF_RPAGE, LCOEF_RSEQIO, LCOEF_RRANDIO, LCOEF_WPAGE, LCOEF_WSEQIO, LCOEF_WRANDIO, NR_LCOEFS, }; enum { AUTOP_INVALID, AUTOP_HDD, AUTOP_SSD_QD1, AUTOP_SSD_DFL, AUTOP_SSD_FAST, }; struct ioc_params { u32 qos[NR_QOS_PARAMS]; u64 i_lcoefs[NR_I_LCOEFS]; u64 lcoefs[NR_LCOEFS]; u32 too_fast_vrate_pct; u32 too_slow_vrate_pct; }; struct ioc_margins { s64 min; s64 low; s64 target; }; struct ioc_missed { local_t nr_met; local_t nr_missed; u32 last_met; u32 last_missed; }; struct ioc_pcpu_stat { struct ioc_missed missed[2]; local64_t rq_wait_ns; u64 last_rq_wait_ns; }; /* per device */ struct ioc { struct rq_qos rqos; bool enabled; struct ioc_params params; struct ioc_margins margins; u32 period_us; u32 timer_slack_ns; u64 vrate_min; u64 vrate_max; spinlock_t lock; struct timer_list timer; struct list_head active_iocgs; /* active cgroups */ struct ioc_pcpu_stat __percpu *pcpu_stat; enum ioc_running running; atomic64_t vtime_rate; u64 vtime_base_rate; s64 vtime_err; seqcount_spinlock_t period_seqcount; u64 period_at; /* wallclock starttime */ u64 period_at_vtime; /* vtime starttime */ atomic64_t cur_period; /* inc'd each period */ int busy_level; /* saturation history */ bool weights_updated; atomic_t hweight_gen; /* for lazy hweights */ /* debt forgivness */ u64 dfgv_period_at; u64 dfgv_period_rem; u64 dfgv_usage_us_sum; u64 autop_too_fast_at; u64 autop_too_slow_at; int autop_idx; bool user_qos_params:1; bool user_cost_model:1; }; struct iocg_pcpu_stat { local64_t abs_vusage; }; struct iocg_stat { u64 usage_us; u64 wait_us; u64 indebt_us; u64 indelay_us; }; /* per device-cgroup pair */ struct ioc_gq { struct blkg_policy_data pd; struct ioc *ioc; /* * A iocg can get its weight from two sources - an explicit * per-device-cgroup configuration or the default weight of the * cgroup. `cfg_weight` is the explicit per-device-cgroup * configuration. `weight` is the effective considering both * sources. * * When an idle cgroup becomes active its `active` goes from 0 to * `weight`. `inuse` is the surplus adjusted active weight. * `active` and `inuse` are used to calculate `hweight_active` and * `hweight_inuse`. * * `last_inuse` remembers `inuse` while an iocg is idle to persist * surplus adjustments. * * `inuse` may be adjusted dynamically during period. `saved_*` are used * to determine and track adjustments. */ u32 cfg_weight; u32 weight; u32 active; u32 inuse; u32 last_inuse; s64 saved_margin; sector_t cursor; /* to detect randio */ /* * `vtime` is this iocg's vtime cursor which progresses as IOs are * issued. If lagging behind device vtime, the delta represents * the currently available IO budget. If running ahead, the * overage. * * `vtime_done` is the same but progressed on completion rather * than issue. The delta behind `vtime` represents the cost of * currently in-flight IOs. */ atomic64_t vtime; atomic64_t done_vtime; u64 abs_vdebt; /* current delay in effect and when it started */ u64 delay; u64 delay_at; /* * The period this iocg was last active in. Used for deactivation * and invalidating `vtime`. */ atomic64_t active_period; struct list_head active_list; /* see __propagate_weights() and current_hweight() for details */ u64 child_active_sum; u64 child_inuse_sum; u64 child_adjusted_sum; int hweight_gen; u32 hweight_active; u32 hweight_inuse; u32 hweight_donating; u32 hweight_after_donation; struct list_head walk_list; struct list_head surplus_list; struct wait_queue_head waitq; struct hrtimer waitq_timer; /* timestamp at the latest activation */ u64 activated_at; /* statistics */ struct iocg_pcpu_stat __percpu *pcpu_stat; struct iocg_stat stat; struct iocg_stat last_stat; u64 last_stat_abs_vusage; u64 usage_delta_us; u64 wait_since; u64 indebt_since; u64 indelay_since; /* this iocg's depth in the hierarchy and ancestors including self */ int level; struct ioc_gq *ancestors[]; }; /* per cgroup */ struct ioc_cgrp { struct blkcg_policy_data cpd; unsigned int dfl_weight; }; struct ioc_now { u64 now_ns; u64 now; u64 vnow; }; struct iocg_wait { struct wait_queue_entry wait; struct bio *bio; u64 abs_cost; bool committed; }; struct iocg_wake_ctx { struct ioc_gq *iocg; u32 hw_inuse; s64 vbudget; }; static const struct ioc_params autop[] = { [AUTOP_HDD] = { .qos = { [QOS_RLAT] = 250000, /* 250ms */ [QOS_WLAT] = 250000, [QOS_MIN] = VRATE_MIN_PPM, [QOS_MAX] = VRATE_MAX_PPM, }, .i_lcoefs = { [I_LCOEF_RBPS] = 174019176, [I_LCOEF_RSEQIOPS] = 41708, [I_LCOEF_RRANDIOPS] = 370, [I_LCOEF_WBPS] = 178075866, [I_LCOEF_WSEQIOPS] = 42705, [I_LCOEF_WRANDIOPS] = 378, }, }, [AUTOP_SSD_QD1] = { .qos = { [QOS_RLAT] = 25000, /* 25ms */ [QOS_WLAT] = 25000, [QOS_MIN] = VRATE_MIN_PPM, [QOS_MAX] = VRATE_MAX_PPM, }, .i_lcoefs = { [I_LCOEF_RBPS] = 245855193, [I_LCOEF_RSEQIOPS] = 61575, [I_LCOEF_RRANDIOPS] = 6946, [I_LCOEF_WBPS] = 141365009, [I_LCOEF_WSEQIOPS] = 33716, [I_LCOEF_WRANDIOPS] = 26796, }, }, [AUTOP_SSD_DFL] = { .qos = { [QOS_RLAT] = 25000, /* 25ms */ [QOS_WLAT] = 25000, [QOS_MIN] = VRATE_MIN_PPM, [QOS_MAX] = VRATE_MAX_PPM, }, .i_lcoefs = { [I_LCOEF_RBPS] = 488636629, [I_LCOEF_RSEQIOPS] = 8932, [I_LCOEF_RRANDIOPS] = 8518, [I_LCOEF_WBPS] = 427891549, [I_LCOEF_WSEQIOPS] = 28755, [I_LCOEF_WRANDIOPS] = 21940, }, .too_fast_vrate_pct = 500, }, [AUTOP_SSD_FAST] = { .qos = { [QOS_RLAT] = 5000, /* 5ms */ [QOS_WLAT] = 5000, [QOS_MIN] = VRATE_MIN_PPM, [QOS_MAX] = VRATE_MAX_PPM, }, .i_lcoefs = { [I_LCOEF_RBPS] = 3102524156LLU, [I_LCOEF_RSEQIOPS] = 724816, [I_LCOEF_RRANDIOPS] = 778122, [I_LCOEF_WBPS] = 1742780862LLU, [I_LCOEF_WSEQIOPS] = 425702, [I_LCOEF_WRANDIOPS] = 443193, }, .too_slow_vrate_pct = 10, }, }; /* * vrate adjust percentages indexed by ioc->busy_level. We adjust up on * vtime credit shortage and down on device saturation. */ static const u32 vrate_adj_pct[] = { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 16 }; static struct blkcg_policy blkcg_policy_iocost; /* accessors and helpers */ static struct ioc *rqos_to_ioc(struct rq_qos *rqos) { return container_of(rqos, struct ioc, rqos); } static struct ioc *q_to_ioc(struct request_queue *q) { return rqos_to_ioc(rq_qos_id(q, RQ_QOS_COST)); } static const char __maybe_unused *ioc_name(struct ioc *ioc) { struct gendisk *disk = ioc->rqos.disk; if (!disk) return "<unknown>"; return disk->disk_name; } static struct ioc_gq *pd_to_iocg(struct blkg_policy_data *pd) { return pd ? container_of(pd, struct ioc_gq, pd) : NULL; } static struct ioc_gq *blkg_to_iocg(struct blkcg_gq *blkg) { return pd_to_iocg(blkg_to_pd(blkg, &blkcg_policy_iocost)); } static struct blkcg_gq *iocg_to_blkg(struct ioc_gq *iocg) { return pd_to_blkg(&iocg->pd); } static struct ioc_cgrp *blkcg_to_iocc(struct blkcg *blkcg) { return container_of(blkcg_to_cpd(blkcg, &blkcg_policy_iocost), struct ioc_cgrp, cpd); } /* * Scale @abs_cost to the inverse of @hw_inuse. The lower the hierarchical * weight, the more expensive each IO. Must round up. */ static u64 abs_cost_to_cost(u64 abs_cost, u32 hw_inuse) { return DIV64_U64_ROUND_UP(abs_cost * WEIGHT_ONE, hw_inuse); } /* * The inverse of abs_cost_to_cost(). Must round up. */ static u64 cost_to_abs_cost(u64 cost, u32 hw_inuse) { return DIV64_U64_ROUND_UP(cost * hw_inuse, WEIGHT_ONE); } static void iocg_commit_bio(struct ioc_gq *iocg, struct bio *bio, u64 abs_cost, u64 cost) { struct iocg_pcpu_stat *gcs; bio->bi_iocost_cost = cost; atomic64_add(cost, &iocg->vtime); gcs = get_cpu_ptr(iocg->pcpu_stat); local64_add(abs_cost, &gcs->abs_vusage); put_cpu_ptr(gcs); } static void iocg_lock(struct ioc_gq *iocg, bool lock_ioc, unsigned long *flags) { if (lock_ioc) { spin_lock_irqsave(&iocg->ioc->lock, *flags); spin_lock(&iocg->waitq.lock); } else { spin_lock_irqsave(&iocg->waitq.lock, *flags); } } static void iocg_unlock(struct ioc_gq *iocg, bool unlock_ioc, unsigned long *flags) { if (unlock_ioc) { spin_unlock(&iocg->waitq.lock); spin_unlock_irqrestore(&iocg->ioc->lock, *flags); } else { spin_unlock_irqrestore(&iocg->waitq.lock, *flags); } } #define CREATE_TRACE_POINTS #include <trace/events/iocost.h> static void ioc_refresh_margins(struct ioc *ioc) { struct ioc_margins *margins = &ioc->margins; u32 period_us = ioc->period_us; u64 vrate = ioc->vtime_base_rate; margins->min = (period_us * MARGIN_MIN_PCT / 100) * vrate; margins->low = (period_us * MARGIN_LOW_PCT / 100) * vrate; margins->target = (period_us * MARGIN_TARGET_PCT / 100) * vrate; } /* latency Qos params changed, update period_us and all the dependent params */ static void ioc_refresh_period_us(struct ioc *ioc) { u32 ppm, lat, multi, period_us; lockdep_assert_held(&ioc->lock); /* pick the higher latency target */ if (ioc->params.qos[QOS_RLAT] >= ioc->params.qos[QOS_WLAT]) { ppm = ioc->params.qos[QOS_RPPM]; lat = ioc->params.qos[QOS_RLAT]; } else { ppm = ioc->params.qos[QOS_WPPM]; lat = ioc->params.qos[QOS_WLAT]; } /* * We want the period to be long enough to contain a healthy number * of IOs while short enough for granular control. Define it as a * multiple of the latency target. Ideally, the multiplier should * be scaled according to the percentile so that it would nominally * contain a certain number of requests. Let's be simpler and * scale it linearly so that it's 2x >= pct(90) and 10x at pct(50). */ if (ppm) multi = max_t(u32, (MILLION - ppm) / 50000, 2); else multi = 2; period_us = multi * lat; period_us = clamp_t(u32, period_us, MIN_PERIOD, MAX_PERIOD); /* calculate dependent params */ ioc->period_us = period_us; ioc->timer_slack_ns = div64_u64( (u64)period_us * NSEC_PER_USEC * TIMER_SLACK_PCT, 100); ioc_refresh_margins(ioc); } /* * ioc->rqos.disk isn't initialized when this function is called from * the init path. */ static int ioc_autop_idx(struct ioc *ioc, struct gendisk *disk) { int idx = ioc->autop_idx; const struct ioc_params *p = &autop[idx]; u32 vrate_pct; u64 now_ns; /* rotational? */ if (!blk_queue_nonrot(disk->queue)) return AUTOP_HDD; /* handle SATA SSDs w/ broken NCQ */ if (blk_queue_depth(disk->queue) == 1) return AUTOP_SSD_QD1; /* use one of the normal ssd sets */ if (idx < AUTOP_SSD_DFL) return AUTOP_SSD_DFL; /* if user is overriding anything, maintain what was there */ if (ioc->user_qos_params || ioc->user_cost_model) return idx; /* step up/down based on the vrate */ vrate_pct = div64_u64(ioc->vtime_base_rate * 100, VTIME_PER_USEC); now_ns = blk_time_get_ns(); if (p->too_fast_vrate_pct && p->too_fast_vrate_pct <= vrate_pct) { if (!ioc->autop_too_fast_at) ioc->autop_too_fast_at = now_ns; if (now_ns - ioc->autop_too_fast_at >= AUTOP_CYCLE_NSEC) return idx + 1; } else { ioc->autop_too_fast_at = 0; } if (p->too_slow_vrate_pct && p->too_slow_vrate_pct >= vrate_pct) { if (!ioc->autop_too_slow_at) ioc->autop_too_slow_at = now_ns; if (now_ns - ioc->autop_too_slow_at >= AUTOP_CYCLE_NSEC) return idx - 1; } else { ioc->autop_too_slow_at = 0; } return idx; } /* * Take the followings as input * * @bps maximum sequential throughput * @seqiops maximum sequential 4k iops * @randiops maximum random 4k iops * * and calculate the linear model cost coefficients. * * *@page per-page cost 1s / (@bps / 4096) * *@seqio base cost of a seq IO max((1s / @seqiops) - *@page, 0) * @randiops base cost of a rand IO max((1s / @randiops) - *@page, 0) */ static void calc_lcoefs(u64 bps, u64 seqiops, u64 randiops, u64 *page, u64 *seqio, u64 *randio) { u64 v; *page = *seqio = *randio = 0; if (bps) { u64 bps_pages = DIV_ROUND_UP_ULL(bps, IOC_PAGE_SIZE); if (bps_pages) *page = DIV64_U64_ROUND_UP(VTIME_PER_SEC, bps_pages); else *page = 1; } if (seqiops) { v = DIV64_U64_ROUND_UP(VTIME_PER_SEC, seqiops); if (v > *page) *seqio = v - *page; } if (randiops) { v = DIV64_U64_ROUND_UP(VTIME_PER_SEC, randiops); if (v > *page) *randio = v - *page; } } static void ioc_refresh_lcoefs(struct ioc *ioc) { u64 *u = ioc->params.i_lcoefs; u64 *c = ioc->params.lcoefs; calc_lcoefs(u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS], &c[LCOEF_RPAGE], &c[LCOEF_RSEQIO], &c[LCOEF_RRANDIO]); calc_lcoefs(u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS], &c[LCOEF_WPAGE], &c[LCOEF_WSEQIO], &c[LCOEF_WRANDIO]); } /* * struct gendisk is required as an argument because ioc->rqos.disk * is not properly initialized when called from the init path. */ static bool ioc_refresh_params_disk(struct ioc *ioc, bool force, struct gendisk *disk) { const struct ioc_params *p; int idx; lockdep_assert_held(&ioc->lock); idx = ioc_autop_idx(ioc, disk); p = &autop[idx]; if (idx == ioc->autop_idx && !force) return false; if (idx != ioc->autop_idx) { atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC); ioc->vtime_base_rate = VTIME_PER_USEC; } ioc->autop_idx = idx; ioc->autop_too_fast_at = 0; ioc->autop_too_slow_at = 0; if (!ioc->user_qos_params) memcpy(ioc->params.qos, p->qos, sizeof(p->qos)); if (!ioc->user_cost_model) memcpy(ioc->params.i_lcoefs, p->i_lcoefs, sizeof(p->i_lcoefs)); ioc_refresh_period_us(ioc); ioc_refresh_lcoefs(ioc); ioc->vrate_min = DIV64_U64_ROUND_UP((u64)ioc->params.qos[QOS_MIN] * VTIME_PER_USEC, MILLION); ioc->vrate_max = DIV64_U64_ROUND_UP((u64)ioc->params.qos[QOS_MAX] * VTIME_PER_USEC, MILLION); return true; } static bool ioc_refresh_params(struct ioc *ioc, bool force) { return ioc_refresh_params_disk(ioc, force, ioc->rqos.disk); } /* * When an iocg accumulates too much vtime or gets deactivated, we throw away * some vtime, which lowers the overall device utilization. As the exact amount * which is being thrown away is known, we can compensate by accelerating the * vrate accordingly so that the extra vtime generated in the current period * matches what got lost. */ static void ioc_refresh_vrate(struct ioc *ioc, struct ioc_now *now) { s64 pleft = ioc->period_at + ioc->period_us - now->now; s64 vperiod = ioc->period_us * ioc->vtime_base_rate; s64 vcomp, vcomp_min, vcomp_max; lockdep_assert_held(&ioc->lock); /* we need some time left in this period */ if (pleft <= 0) goto done; /* * Calculate how much vrate should be adjusted to offset the error. * Limit the amount of adjustment and deduct the adjusted amount from * the error. */ vcomp = -div64_s64(ioc->vtime_err, pleft); vcomp_min = -(ioc->vtime_base_rate >> 1); vcomp_max = ioc->vtime_base_rate; vcomp = clamp(vcomp, vcomp_min, vcomp_max); ioc->vtime_err += vcomp * pleft; atomic64_set(&ioc->vtime_rate, ioc->vtime_base_rate + vcomp); done: /* bound how much error can accumulate */ ioc->vtime_err = clamp(ioc->vtime_err, -vperiod, vperiod); } static void ioc_adjust_base_vrate(struct ioc *ioc, u32 rq_wait_pct, int nr_lagging, int nr_shortages, int prev_busy_level, u32 *missed_ppm) { u64 vrate = ioc->vtime_base_rate; u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max; if (!ioc->busy_level || (ioc->busy_level < 0 && nr_lagging)) { if (ioc->busy_level != prev_busy_level || nr_lagging) trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, nr_lagging, nr_shortages); return; } /* * If vrate is out of bounds, apply clamp gradually as the * bounds can change abruptly. Otherwise, apply busy_level * based adjustment. */ if (vrate < vrate_min) { vrate = div64_u64(vrate * (100 + VRATE_CLAMP_ADJ_PCT), 100); vrate = min(vrate, vrate_min); } else if (vrate > vrate_max) { vrate = div64_u64(vrate * (100 - VRATE_CLAMP_ADJ_PCT), 100); vrate = max(vrate, vrate_max); } else { int idx = min_t(int, abs(ioc->busy_level), ARRAY_SIZE(vrate_adj_pct) - 1); u32 adj_pct = vrate_adj_pct[idx]; if (ioc->busy_level > 0) adj_pct = 100 - adj_pct; else adj_pct = 100 + adj_pct; vrate = clamp(DIV64_U64_ROUND_UP(vrate * adj_pct, 100), vrate_min, vrate_max); } trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, nr_lagging, nr_shortages); ioc->vtime_base_rate = vrate; ioc_refresh_margins(ioc); } /* take a snapshot of the current [v]time and vrate */ static void ioc_now(struct ioc *ioc, struct ioc_now *now) { unsigned seq; u64 vrate; now->now_ns = blk_time_get_ns(); now->now = ktime_to_us(now->now_ns); vrate = atomic64_read(&ioc->vtime_rate); /* * The current vtime is * * vtime at period start + (wallclock time since the start) * vrate * * As a consistent snapshot of `period_at_vtime` and `period_at` is * needed, they're seqcount protected. */ do { seq = read_seqcount_begin(&ioc->period_seqcount); now->vnow = ioc->period_at_vtime + (now->now - ioc->period_at) * vrate; } while (read_seqcount_retry(&ioc->period_seqcount, seq)); } static void ioc_start_period(struct ioc *ioc, struct ioc_now *now) { WARN_ON_ONCE(ioc->running != IOC_RUNNING); write_seqcount_begin(&ioc->period_seqcount); ioc->period_at = now->now; ioc->period_at_vtime = now->vnow; write_seqcount_end(&ioc->period_seqcount); ioc->timer.expires = jiffies + usecs_to_jiffies(ioc->period_us); add_timer(&ioc->timer); } /* * Update @iocg's `active` and `inuse` to @active and @inuse, update level * weight sums and propagate upwards accordingly. If @save, the current margin * is saved to be used as reference for later inuse in-period adjustments. */ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, bool save, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; int lvl; lockdep_assert_held(&ioc->lock); /* * For an active leaf node, its inuse shouldn't be zero or exceed * @active. An active internal node's inuse is solely determined by the * inuse to active ratio of its children regardless of @inuse. */ if (list_empty(&iocg->active_list) && iocg->child_active_sum) { inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum, iocg->child_active_sum); } else { /* * It may be tempting to turn this into a clamp expression with * a lower limit of 1 but active may be 0, which cannot be used * as an upper limit in that situation. This expression allows * active to clamp inuse unless it is 0, in which case inuse * becomes 1. */ inuse = min(inuse, active) ?: 1; } iocg->last_inuse = iocg->inuse; if (save) iocg->saved_margin = now->vnow - atomic64_read(&iocg->vtime); if (active == iocg->active && inuse == iocg->inuse) return; for (lvl = iocg->level - 1; lvl >= 0; lvl--) { struct ioc_gq *parent = iocg->ancestors[lvl]; struct ioc_gq *child = iocg->ancestors[lvl + 1]; u32 parent_active = 0, parent_inuse = 0; /* update the level sums */ parent->child_active_sum += (s32)(active - child->active); parent->child_inuse_sum += (s32)(inuse - child->inuse); /* apply the updates */ child->active = active; child->inuse = inuse; /* * The delta between inuse and active sums indicates that * much of weight is being given away. Parent's inuse * and active should reflect the ratio. */ if (parent->child_active_sum) { parent_active = parent->weight; parent_inuse = DIV64_U64_ROUND_UP( parent_active * parent->child_inuse_sum, parent->child_active_sum); } /* do we need to keep walking up? */ if (parent_active == parent->active && parent_inuse == parent->inuse) break; active = parent_active; inuse = parent_inuse; } ioc->weights_updated = true; } static void commit_weights(struct ioc *ioc) { lockdep_assert_held(&ioc->lock); if (ioc->weights_updated) { /* paired with rmb in current_hweight(), see there */ smp_wmb(); atomic_inc(&ioc->hweight_gen); ioc->weights_updated = false; } } static void propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, bool save, struct ioc_now *now) { __propagate_weights(iocg, active, inuse, save, now); commit_weights(iocg->ioc); } static void current_hweight(struct ioc_gq *iocg, u32 *hw_activep, u32 *hw_inusep) { struct ioc *ioc = iocg->ioc; int lvl; u32 hwa, hwi; int ioc_gen; /* hot path - if uptodate, use cached */ ioc_gen = atomic_read(&ioc->hweight_gen); if (ioc_gen == iocg->hweight_gen) goto out; /* * Paired with wmb in commit_weights(). If we saw the updated * hweight_gen, all the weight updates from __propagate_weights() are * visible too. * * We can race with weight updates during calculation and get it * wrong. However, hweight_gen would have changed and a future * reader will recalculate and we're guaranteed to discard the * wrong result soon. */ smp_rmb(); hwa = hwi = WEIGHT_ONE; for (lvl = 0; lvl <= iocg->level - 1; lvl++) { struct ioc_gq *parent = iocg->ancestors[lvl]; struct ioc_gq *child = iocg->ancestors[lvl + 1]; u64 active_sum = READ_ONCE(parent->child_active_sum); u64 inuse_sum = READ_ONCE(parent->child_inuse_sum); u32 active = READ_ONCE(child->active); u32 inuse = READ_ONCE(child->inuse); /* we can race with deactivations and either may read as zero */ if (!active_sum || !inuse_sum) continue; active_sum = max_t(u64, active, active_sum); hwa = div64_u64((u64)hwa * active, active_sum); inuse_sum = max_t(u64, inuse, inuse_sum); hwi = div64_u64((u64)hwi * inuse, inuse_sum); } iocg->hweight_active = max_t(u32, hwa, 1); iocg->hweight_inuse = max_t(u32, hwi, 1); iocg->hweight_gen = ioc_gen; out: if (hw_activep) *hw_activep = iocg->hweight_active; if (hw_inusep) *hw_inusep = iocg->hweight_inuse; } /* * Calculate the hweight_inuse @iocg would get with max @inuse assuming all the * other weights stay unchanged. */ static u32 current_hweight_max(struct ioc_gq *iocg) { u32 hwm = WEIGHT_ONE; u32 inuse = iocg->active; u64 child_inuse_sum; int lvl; lockdep_assert_held(&iocg->ioc->lock); for (lvl = iocg->level - 1; lvl >= 0; lvl--) { struct ioc_gq *parent = iocg->ancestors[lvl]; struct ioc_gq *child = iocg->ancestors[lvl + 1]; child_inuse_sum = parent->child_inuse_sum + inuse - child->inuse; hwm = div64_u64((u64)hwm * inuse, child_inuse_sum); inuse = DIV64_U64_ROUND_UP(parent->active * child_inuse_sum, parent->child_active_sum); } return max_t(u32, hwm, 1); } static void weight_updated(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct blkcg_gq *blkg = iocg_to_blkg(iocg); struct ioc_cgrp *iocc = blkcg_to_iocc(blkg->blkcg); u32 weight; lockdep_assert_held(&ioc->lock); weight = iocg->cfg_weight ?: iocc->dfl_weight; if (weight != iocg->weight && iocg->active) propagate_weights(iocg, weight, iocg->inuse, true, now); iocg->weight = weight; } static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; u64 __maybe_unused last_period, cur_period; u64 vtime, vtarget; int i; /* * If seem to be already active, just update the stamp to tell the * timer that we're still active. We don't mind occassional races. */ if (!list_empty(&iocg->active_list)) { ioc_now(ioc, now); cur_period = atomic64_read(&ioc->cur_period); if (atomic64_read(&iocg->active_period) != cur_period) atomic64_set(&iocg->active_period, cur_period); return true; } /* racy check on internal node IOs, treat as root level IOs */ if (iocg->child_active_sum) return false; spin_lock_irq(&ioc->lock); ioc_now(ioc, now); /* update period */ cur_period = atomic64_read(&ioc->cur_period); last_period = atomic64_read(&iocg->active_period); atomic64_set(&iocg->active_period, cur_period); /* already activated or breaking leaf-only constraint? */ if (!list_empty(&iocg->active_list)) goto succeed_unlock; for (i = iocg->level - 1; i > 0; i--) if (!list_empty(&iocg->ancestors[i]->active_list)) goto fail_unlock; if (iocg->child_active_sum) goto fail_unlock; /* * Always start with the target budget. On deactivation, we throw away * anything above it. */ vtarget = now->vnow - ioc->margins.target; vtime = atomic64_read(&iocg->vtime); atomic64_add(vtarget - vtime, &iocg->vtime); atomic64_add(vtarget - vtime, &iocg->done_vtime); vtime = vtarget; /* * Activate, propagate weight and start period timer if not * running. Reset hweight_gen to avoid accidental match from * wrapping. */ iocg->hweight_gen = atomic_read(&ioc->hweight_gen) - 1; list_add(&iocg->active_list, &ioc->active_iocgs); propagate_weights(iocg, iocg->weight, iocg->last_inuse ?: iocg->weight, true, now); TRACE_IOCG_PATH(iocg_activate, iocg, now, last_period, cur_period, vtime); iocg->activated_at = now->now; if (ioc->running == IOC_IDLE) { ioc->running = IOC_RUNNING; ioc->dfgv_period_at = now->now; ioc->dfgv_period_rem = 0; ioc_start_period(ioc, now); } succeed_unlock: spin_unlock_irq(&ioc->lock); return true; fail_unlock: spin_unlock_irq(&ioc->lock); return false; } static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct blkcg_gq *blkg = iocg_to_blkg(iocg); u64 tdelta, delay, new_delay, shift; s64 vover, vover_pct; u32 hwa; lockdep_assert_held(&iocg->waitq.lock); /* * If the delay is set by another CPU, we may be in the past. No need to * change anything if so. This avoids decay calculation underflow. */ if (time_before64(now->now, iocg->delay_at)) return false; /* calculate the current delay in effect - 1/2 every second */ tdelta = now->now - iocg->delay_at; shift = div64_u64(tdelta, USEC_PER_SEC); if (iocg->delay && shift < BITS_PER_LONG) delay = iocg->delay >> shift; else delay = 0; /* calculate the new delay from the debt amount */ current_hweight(iocg, &hwa, NULL); vover = atomic64_read(&iocg->vtime) + abs_cost_to_cost(iocg->abs_vdebt, hwa) - now->vnow; vover_pct = div64_s64(100 * vover, ioc->period_us * ioc->vtime_base_rate); if (vover_pct <= MIN_DELAY_THR_PCT) new_delay = 0; else if (vover_pct >= MAX_DELAY_THR_PCT) new_delay = MAX_DELAY; else new_delay = MIN_DELAY + div_u64((MAX_DELAY - MIN_DELAY) * (vover_pct - MIN_DELAY_THR_PCT), MAX_DELAY_THR_PCT - MIN_DELAY_THR_PCT); /* pick the higher one and apply */ if (new_delay > delay) { iocg->delay = new_delay; iocg->delay_at = now->now; delay = new_delay; } if (delay >= MIN_DELAY) { if (!iocg->indelay_since) iocg->indelay_since = now->now; blkcg_set_delay(blkg, delay * NSEC_PER_USEC); return true; } else { if (iocg->indelay_since) { iocg->stat.indelay_us += now->now - iocg->indelay_since; iocg->indelay_since = 0; } iocg->delay = 0; blkcg_clear_delay(blkg); return false; } } static void iocg_incur_debt(struct ioc_gq *iocg, u64 abs_cost, struct ioc_now *now) { struct iocg_pcpu_stat *gcs; lockdep_assert_held(&iocg->ioc->lock); lockdep_assert_held(&iocg->waitq.lock); WARN_ON_ONCE(list_empty(&iocg->active_list)); /* * Once in debt, debt handling owns inuse. @iocg stays at the minimum * inuse donating all of it share to others until its debt is paid off. */ if (!iocg->abs_vdebt && abs_cost) { iocg->indebt_since = now->now; propagate_weights(iocg, iocg->active, 0, false, now); } iocg->abs_vdebt += abs_cost; gcs = get_cpu_ptr(iocg->pcpu_stat); local64_add(abs_cost, &gcs->abs_vusage); put_cpu_ptr(gcs); } static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay, struct ioc_now *now) { lockdep_assert_held(&iocg->ioc->lock); lockdep_assert_held(&iocg->waitq.lock); /* * make sure that nobody messed with @iocg. Check iocg->pd.online * to avoid warn when removing blkcg or disk. */ WARN_ON_ONCE(list_empty(&iocg->active_list) && iocg->pd.online); WARN_ON_ONCE(iocg->inuse > 1); iocg->abs_vdebt -= min(abs_vpay, iocg->abs_vdebt); /* if debt is paid in full, restore inuse */ if (!iocg->abs_vdebt) { iocg->stat.indebt_us += now->now - iocg->indebt_since; iocg->indebt_since = 0; propagate_weights(iocg, iocg->active, iocg->last_inuse, false, now); } } static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key) { struct iocg_wait *wait = container_of(wq_entry, struct iocg_wait, wait); struct iocg_wake_ctx *ctx = key; u64 cost = abs_cost_to_cost(wait->abs_cost, ctx->hw_inuse); ctx->vbudget -= cost; if (ctx->vbudget < 0) return -1; iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost); wait->committed = true; /* * autoremove_wake_function() removes the wait entry only when it * actually changed the task state. We want the wait always removed. * Remove explicitly and use default_wake_function(). Note that the * order of operations is important as finish_wait() tests whether * @wq_entry is removed without grabbing the lock. */ default_wake_function(wq_entry, mode, flags, key); list_del_init_careful(&wq_entry->entry); return 0; } /* * Calculate the accumulated budget, pay debt if @pay_debt and wake up waiters * accordingly. When @pay_debt is %true, the caller must be holding ioc->lock in * addition to iocg->waitq.lock. */ static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct iocg_wake_ctx ctx = { .iocg = iocg }; u64 vshortage, expires, oexpires; s64 vbudget; u32 hwa; lockdep_assert_held(&iocg->waitq.lock); current_hweight(iocg, &hwa, NULL); vbudget = now->vnow - atomic64_read(&iocg->vtime); /* pay off debt */ if (pay_debt && iocg->abs_vdebt && vbudget > 0) { u64 abs_vbudget = cost_to_abs_cost(vbudget, hwa); u64 abs_vpay = min_t(u64, abs_vbudget, iocg->abs_vdebt); u64 vpay = abs_cost_to_cost(abs_vpay, hwa); lockdep_assert_held(&ioc->lock); atomic64_add(vpay, &iocg->vtime); atomic64_add(vpay, &iocg->done_vtime); iocg_pay_debt(iocg, abs_vpay, now); vbudget -= vpay; } if (iocg->abs_vdebt || iocg->delay) iocg_kick_delay(iocg, now); /* * Debt can still be outstanding if we haven't paid all yet or the * caller raced and called without @pay_debt. Shouldn't wake up waiters * under debt. Make sure @vbudget reflects the outstanding amount and is * not positive. */ if (iocg->abs_vdebt) { s64 vdebt = abs_cost_to_cost(iocg->abs_vdebt, hwa); vbudget = min_t(s64, 0, vbudget - vdebt); } /* * Wake up the ones which are due and see how much vtime we'll need for * the next one. As paying off debt restores hw_inuse, it must be read * after the above debt payment. */ ctx.vbudget = vbudget; current_hweight(iocg, NULL, &ctx.hw_inuse); __wake_up_locked_key(&iocg->waitq, TASK_NORMAL, &ctx); if (!waitqueue_active(&iocg->waitq)) { if (iocg->wait_since) { iocg->stat.wait_us += now->now - iocg->wait_since; iocg->wait_since = 0; } return; } if (!iocg->wait_since) iocg->wait_since = now->now; if (WARN_ON_ONCE(ctx.vbudget >= 0)) return; /* determine next wakeup, add a timer margin to guarantee chunking */ vshortage = -ctx.vbudget; expires = now->now_ns + DIV64_U64_ROUND_UP(vshortage, ioc->vtime_base_rate) * NSEC_PER_USEC; expires += ioc->timer_slack_ns; /* if already active and close enough, don't bother */ oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->waitq_timer)); if (hrtimer_is_queued(&iocg->waitq_timer) && abs(oexpires - expires) <= ioc->timer_slack_ns) return; hrtimer_start_range_ns(&iocg->waitq_timer, ns_to_ktime(expires), ioc->timer_slack_ns, HRTIMER_MODE_ABS); } static enum hrtimer_restart iocg_waitq_timer_fn(struct hrtimer *timer) { struct ioc_gq *iocg = container_of(timer, struct ioc_gq, waitq_timer); bool pay_debt = READ_ONCE(iocg->abs_vdebt); struct ioc_now now; unsigned long flags; ioc_now(iocg->ioc, &now); iocg_lock(iocg, pay_debt, &flags); iocg_kick_waitq(iocg, pay_debt, &now); iocg_unlock(iocg, pay_debt, &flags); return HRTIMER_NORESTART; } static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p) { u32 nr_met[2] = { }; u32 nr_missed[2] = { }; u64 rq_wait_ns = 0; int cpu, rw; for_each_online_cpu(cpu) { struct ioc_pcpu_stat *stat = per_cpu_ptr(ioc->pcpu_stat, cpu); u64 this_rq_wait_ns; for (rw = READ; rw <= WRITE; rw++) { u32 this_met = local_read(&stat->missed[rw].nr_met); u32 this_missed = local_read(&stat->missed[rw].nr_missed); nr_met[rw] += this_met - stat->missed[rw].last_met; nr_missed[rw] += this_missed - stat->missed[rw].last_missed; stat->missed[rw].last_met = this_met; stat->missed[rw].last_missed = this_missed; } this_rq_wait_ns = local64_read(&stat->rq_wait_ns); rq_wait_ns += this_rq_wait_ns - stat->last_rq_wait_ns; stat->last_rq_wait_ns = this_rq_wait_ns; } for (rw = READ; rw <= WRITE; rw++) { if (nr_met[rw] + nr_missed[rw]) missed_ppm_ar[rw] = DIV64_U64_ROUND_UP((u64)nr_missed[rw] * MILLION, nr_met[rw] + nr_missed[rw]); else missed_ppm_ar[rw] = 0; } *rq_wait_pct_p = div64_u64(rq_wait_ns * 100, ioc->period_us * NSEC_PER_USEC); } /* was iocg idle this period? */ static bool iocg_is_idle(struct ioc_gq *iocg) { struct ioc *ioc = iocg->ioc; /* did something get issued this period? */ if (atomic64_read(&iocg->active_period) == atomic64_read(&ioc->cur_period)) return false; /* is something in flight? */ if (atomic64_read(&iocg->done_vtime) != atomic64_read(&iocg->vtime)) return false; return true; } /* * Call this function on the target leaf @iocg's to build pre-order traversal * list of all the ancestors in @inner_walk. The inner nodes are linked through * ->walk_list and the caller is responsible for dissolving the list after use. */ static void iocg_build_inner_walk(struct ioc_gq *iocg, struct list_head *inner_walk) { int lvl; WARN_ON_ONCE(!list_empty(&iocg->walk_list)); /* find the first ancestor which hasn't been visited yet */ for (lvl = iocg->level - 1; lvl >= 0; lvl--) { if (!list_empty(&iocg->ancestors[lvl]->walk_list)) break; } /* walk down and visit the inner nodes to get pre-order traversal */ while (++lvl <= iocg->level - 1) { struct ioc_gq *inner = iocg->ancestors[lvl]; /* record traversal order */ list_add_tail(&inner->walk_list, inner_walk); } } /* propagate the deltas to the parent */ static void iocg_flush_stat_upward(struct ioc_gq *iocg) { if (iocg->level > 0) { struct iocg_stat *parent_stat = &iocg->ancestors[iocg->level - 1]->stat; parent_stat->usage_us += iocg->stat.usage_us - iocg->last_stat.usage_us; parent_stat->wait_us += iocg->stat.wait_us - iocg->last_stat.wait_us; parent_stat->indebt_us += iocg->stat.indebt_us - iocg->last_stat.indebt_us; parent_stat->indelay_us += iocg->stat.indelay_us - iocg->last_stat.indelay_us; } iocg->last_stat = iocg->stat; } /* collect per-cpu counters and propagate the deltas to the parent */ static void iocg_flush_stat_leaf(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; u64 abs_vusage = 0; u64 vusage_delta; int cpu; lockdep_assert_held(&iocg->ioc->lock); /* collect per-cpu counters */ for_each_possible_cpu(cpu) { abs_vusage += local64_read( per_cpu_ptr(&iocg->pcpu_stat->abs_vusage, cpu)); } vusage_delta = abs_vusage - iocg->last_stat_abs_vusage; iocg->last_stat_abs_vusage = abs_vusage; iocg->usage_delta_us = div64_u64(vusage_delta, ioc->vtime_base_rate); iocg->stat.usage_us += iocg->usage_delta_us; iocg_flush_stat_upward(iocg); } /* get stat counters ready for reading on all active iocgs */ static void iocg_flush_stat(struct list_head *target_iocgs, struct ioc_now *now) { LIST_HEAD(inner_walk); struct ioc_gq *iocg, *tiocg; /* flush leaves and build inner node walk list */ list_for_each_entry(iocg, target_iocgs, active_list) { iocg_flush_stat_leaf(iocg, now); iocg_build_inner_walk(iocg, &inner_walk); } /* keep flushing upwards by walking the inner list backwards */ list_for_each_entry_safe_reverse(iocg, tiocg, &inner_walk, walk_list) { iocg_flush_stat_upward(iocg); list_del_init(&iocg->walk_list); } } /* * Determine what @iocg's hweight_inuse should be after donating unused * capacity. @hwm is the upper bound and used to signal no donation. This * function also throws away @iocg's excess budget. */ static u32 hweight_after_donation(struct ioc_gq *iocg, u32 old_hwi, u32 hwm, u32 usage, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; u64 vtime = atomic64_read(&iocg->vtime); s64 excess, delta, target, new_hwi; /* debt handling owns inuse for debtors */ if (iocg->abs_vdebt) return 1; /* see whether minimum margin requirement is met */ if (waitqueue_active(&iocg->waitq) || time_after64(vtime, now->vnow - ioc->margins.min)) return hwm; /* throw away excess above target */ excess = now->vnow - vtime - ioc->margins.target; if (excess > 0) { atomic64_add(excess, &iocg->vtime); atomic64_add(excess, &iocg->done_vtime); vtime += excess; ioc->vtime_err -= div64_u64(excess * old_hwi, WEIGHT_ONE); } /* * Let's say the distance between iocg's and device's vtimes as a * fraction of period duration is delta. Assuming that the iocg will * consume the usage determined above, we want to determine new_hwi so * that delta equals MARGIN_TARGET at the end of the next period. * * We need to execute usage worth of IOs while spending the sum of the * new budget (1 - MARGIN_TARGET) and the leftover from the last period * (delta): * * usage = (1 - MARGIN_TARGET + delta) * new_hwi * * Therefore, the new_hwi is: * * new_hwi = usage / (1 - MARGIN_TARGET + delta) */ delta = div64_s64(WEIGHT_ONE * (now->vnow - vtime), now->vnow - ioc->period_at_vtime); target = WEIGHT_ONE * MARGIN_TARGET_PCT / 100; new_hwi = div64_s64(WEIGHT_ONE * usage, WEIGHT_ONE - target + delta); return clamp_t(s64, new_hwi, 1, hwm); } /* * For work-conservation, an iocg which isn't using all of its share should * donate the leftover to other iocgs. There are two ways to achieve this - 1. * bumping up vrate accordingly 2. lowering the donating iocg's inuse weight. * * #1 is mathematically simpler but has the drawback of requiring synchronous * global hweight_inuse updates when idle iocg's get activated or inuse weights * change due to donation snapbacks as it has the possibility of grossly * overshooting what's allowed by the model and vrate. * * #2 is inherently safe with local operations. The donating iocg can easily * snap back to higher weights when needed without worrying about impacts on * other nodes as the impacts will be inherently correct. This also makes idle * iocg activations safe. The only effect activations have is decreasing * hweight_inuse of others, the right solution to which is for those iocgs to * snap back to higher weights. * * So, we go with #2. The challenge is calculating how each donating iocg's * inuse should be adjusted to achieve the target donation amounts. This is done * using Andy's method described in the following pdf. * * https://drive.google.com/file/d/1PsJwxPFtjUnwOY1QJ5AeICCcsL7BM3bo * * Given the weights and target after-donation hweight_inuse values, Andy's * method determines how the proportional distribution should look like at each * sibling level to maintain the relative relationship between all non-donating * pairs. To roughly summarize, it divides the tree into donating and * non-donating parts, calculates global donation rate which is used to * determine the target hweight_inuse for each node, and then derives per-level * proportions. * * The following pdf shows that global distribution calculated this way can be * achieved by scaling inuse weights of donating leaves and propagating the * adjustments upwards proportionally. * * https://drive.google.com/file/d/1vONz1-fzVO7oY5DXXsLjSxEtYYQbOvsE * * Combining the above two, we can determine how each leaf iocg's inuse should * be adjusted to achieve the target donation. * * https://drive.google.com/file/d/1WcrltBOSPN0qXVdBgnKm4mdp9FhuEFQN * * The inline comments use symbols from the last pdf. * * b is the sum of the absolute budgets in the subtree. 1 for the root node. * f is the sum of the absolute budgets of non-donating nodes in the subtree. * t is the sum of the absolute budgets of donating nodes in the subtree. * w is the weight of the node. w = w_f + w_t * w_f is the non-donating portion of w. w_f = w * f / b * w_b is the donating portion of w. w_t = w * t / b * s is the sum of all sibling weights. s = Sum(w) for siblings * s_f and s_t are the non-donating and donating portions of s. * * Subscript p denotes the parent's counterpart and ' the adjusted value - e.g. * w_pt is the donating portion of the parent's weight and w'_pt the same value * after adjustments. Subscript r denotes the root node's values. */ static void transfer_surpluses(struct list_head *surpluses, struct ioc_now *now) { LIST_HEAD(over_hwa); LIST_HEAD(inner_walk); struct ioc_gq *iocg, *tiocg, *root_iocg; u32 after_sum, over_sum, over_target, gamma; /* * It's pretty unlikely but possible for the total sum of * hweight_after_donation's to be higher than WEIGHT_ONE, which will * confuse the following calculations. If such condition is detected, * scale down everyone over its full share equally to keep the sum below * WEIGHT_ONE. */ after_sum = 0; over_sum = 0; list_for_each_entry(iocg, surpluses, surplus_list) { u32 hwa; current_hweight(iocg, &hwa, NULL); after_sum += iocg->hweight_after_donation; if (iocg->hweight_after_donation > hwa) { over_sum += iocg->hweight_after_donation; list_add(&iocg->walk_list, &over_hwa); } } if (after_sum >= WEIGHT_ONE) { /* * The delta should be deducted from the over_sum, calculate * target over_sum value. */ u32 over_delta = after_sum - (WEIGHT_ONE - 1); WARN_ON_ONCE(over_sum <= over_delta); over_target = over_sum - over_delta; } else { over_target = 0; } list_for_each_entry_safe(iocg, tiocg, &over_hwa, walk_list) { if (over_target) iocg->hweight_after_donation = div_u64((u64)iocg->hweight_after_donation * over_target, over_sum); list_del_init(&iocg->walk_list); } /* * Build pre-order inner node walk list and prepare for donation * adjustment calculations. */ list_for_each_entry(iocg, surpluses, surplus_list) { iocg_build_inner_walk(iocg, &inner_walk); } root_iocg = list_first_entry(&inner_walk, struct ioc_gq, walk_list); WARN_ON_ONCE(root_iocg->level > 0); list_for_each_entry(iocg, &inner_walk, walk_list) { iocg->child_adjusted_sum = 0; iocg->hweight_donating = 0; iocg->hweight_after_donation = 0; } /* * Propagate the donating budget (b_t) and after donation budget (b'_t) * up the hierarchy. */ list_for_each_entry(iocg, surpluses, surplus_list) { struct ioc_gq *parent = iocg->ancestors[iocg->level - 1]; parent->hweight_donating += iocg->hweight_donating; parent->hweight_after_donation += iocg->hweight_after_donation; } list_for_each_entry_reverse(iocg, &inner_walk, walk_list) { if (iocg->level > 0) { struct ioc_gq *parent = iocg->ancestors[iocg->level - 1]; parent->hweight_donating += iocg->hweight_donating; parent->hweight_after_donation += iocg->hweight_after_donation; } } /* * Calculate inner hwa's (b) and make sure the donation values are * within the accepted ranges as we're doing low res calculations with * roundups. */ list_for_each_entry(iocg, &inner_walk, walk_list) { if (iocg->level) { struct ioc_gq *parent = iocg->ancestors[iocg->level - 1]; iocg->hweight_active = DIV64_U64_ROUND_UP( (u64)parent->hweight_active * iocg->active, parent->child_active_sum); } iocg->hweight_donating = min(iocg->hweight_donating, iocg->hweight_active); iocg->hweight_after_donation = min(iocg->hweight_after_donation, iocg->hweight_donating - 1); if (WARN_ON_ONCE(iocg->hweight_active <= 1 || iocg->hweight_donating <= 1 || iocg->hweight_after_donation == 0)) { pr_warn("iocg: invalid donation weights in "); pr_cont_cgroup_path(iocg_to_blkg(iocg)->blkcg->css.cgroup); pr_cont(": active=%u donating=%u after=%u\n", iocg->hweight_active, iocg->hweight_donating, iocg->hweight_after_donation); } } /* * Calculate the global donation rate (gamma) - the rate to adjust * non-donating budgets by. * * No need to use 64bit multiplication here as the first operand is * guaranteed to be smaller than WEIGHT_ONE (1<<16). * * We know that there are beneficiary nodes and the sum of the donating * hweights can't be whole; however, due to the round-ups during hweight * calculations, root_iocg->hweight_donating might still end up equal to * or greater than whole. Limit the range when calculating the divider. * * gamma = (1 - t_r') / (1 - t_r) */ gamma = DIV_ROUND_UP( (WEIGHT_ONE - root_iocg->hweight_after_donation) * WEIGHT_ONE, WEIGHT_ONE - min_t(u32, root_iocg->hweight_donating, WEIGHT_ONE - 1)); /* * Calculate adjusted hwi, child_adjusted_sum and inuse for the inner * nodes. */ list_for_each_entry(iocg, &inner_walk, walk_list) { struct ioc_gq *parent; u32 inuse, wpt, wptp; u64 st, sf; if (iocg->level == 0) { /* adjusted weight sum for 1st level: s' = s * b_pf / b'_pf */ iocg->child_adjusted_sum = DIV64_U64_ROUND_UP( iocg->child_active_sum * (WEIGHT_ONE - iocg->hweight_donating), WEIGHT_ONE - iocg->hweight_after_donation); continue; } parent = iocg->ancestors[iocg->level - 1]; /* b' = gamma * b_f + b_t' */ iocg->hweight_inuse = DIV64_U64_ROUND_UP( (u64)gamma * (iocg->hweight_active - iocg->hweight_donating), WEIGHT_ONE) + iocg->hweight_after_donation; /* w' = s' * b' / b'_p */ inuse = DIV64_U64_ROUND_UP( (u64)parent->child_adjusted_sum * iocg->hweight_inuse, parent->hweight_inuse); /* adjusted weight sum for children: s' = s_f + s_t * w'_pt / w_pt */ st = DIV64_U64_ROUND_UP( iocg->child_active_sum * iocg->hweight_donating, iocg->hweight_active); sf = iocg->child_active_sum - st; wpt = DIV64_U64_ROUND_UP( (u64)iocg->active * iocg->hweight_donating, iocg->hweight_active); wptp = DIV64_U64_ROUND_UP( (u64)inuse * iocg->hweight_after_donation, iocg->hweight_inuse); iocg->child_adjusted_sum = sf + DIV64_U64_ROUND_UP(st * wptp, wpt); } /* * All inner nodes now have ->hweight_inuse and ->child_adjusted_sum and * we can finally determine leaf adjustments. */ list_for_each_entry(iocg, surpluses, surplus_list) { struct ioc_gq *parent = iocg->ancestors[iocg->level - 1]; u32 inuse; /* * In-debt iocgs participated in the donation calculation with * the minimum target hweight_inuse. Configuring inuse * accordingly would work fine but debt handling expects * @iocg->inuse stay at the minimum and we don't wanna * interfere. */ if (iocg->abs_vdebt) { WARN_ON_ONCE(iocg->inuse > 1); continue; } /* w' = s' * b' / b'_p, note that b' == b'_t for donating leaves */ inuse = DIV64_U64_ROUND_UP( parent->child_adjusted_sum * iocg->hweight_after_donation, parent->hweight_inuse); TRACE_IOCG_PATH(inuse_transfer, iocg, now, iocg->inuse, inuse, iocg->hweight_inuse, iocg->hweight_after_donation); __propagate_weights(iocg, iocg->active, inuse, true, now); } /* walk list should be dissolved after use */ list_for_each_entry_safe(iocg, tiocg, &inner_walk, walk_list) list_del_init(&iocg->walk_list); } /* * A low weight iocg can amass a large amount of debt, for example, when * anonymous memory gets reclaimed aggressively. If the system has a lot of * memory paired with a slow IO device, the debt can span multiple seconds or * more. If there are no other subsequent IO issuers, the in-debt iocg may end * up blocked paying its debt while the IO device is idle. * * The following protects against such cases. If the device has been * sufficiently idle for a while, the debts are halved and delays are * recalculated. */ static void ioc_forgive_debts(struct ioc *ioc, u64 usage_us_sum, int nr_debtors, struct ioc_now *now) { struct ioc_gq *iocg; u64 dur, usage_pct, nr_cycles, nr_cycles_shift; /* if no debtor, reset the cycle */ if (!nr_debtors) { ioc->dfgv_period_at = now->now; ioc->dfgv_period_rem = 0; ioc->dfgv_usage_us_sum = 0; return; } /* * Debtors can pass through a lot of writes choking the device and we * don't want to be forgiving debts while the device is struggling from * write bursts. If we're missing latency targets, consider the device * fully utilized. */ if (ioc->busy_level > 0) usage_us_sum = max_t(u64, usage_us_sum, ioc->period_us); ioc->dfgv_usage_us_sum += usage_us_sum; if (time_before64(now->now, ioc->dfgv_period_at + DFGV_PERIOD)) return; /* * At least DFGV_PERIOD has passed since the last period. Calculate the * average usage and reset the period counters. */ dur = now->now - ioc->dfgv_period_at; usage_pct = div64_u64(100 * ioc->dfgv_usage_us_sum, dur); ioc->dfgv_period_at = now->now; ioc->dfgv_usage_us_sum = 0; /* if was too busy, reset everything */ if (usage_pct > DFGV_USAGE_PCT) { ioc->dfgv_period_rem = 0; return; } /* * Usage is lower than threshold. Let's forgive some debts. Debt * forgiveness runs off of the usual ioc timer but its period usually * doesn't match ioc's. Compensate the difference by performing the * reduction as many times as would fit in the duration since the last * run and carrying over the left-over duration in @ioc->dfgv_period_rem * - if ioc period is 75% of DFGV_PERIOD, one out of three consecutive * reductions is doubled. */ nr_cycles = dur + ioc->dfgv_period_rem; ioc->dfgv_period_rem = do_div(nr_cycles, DFGV_PERIOD); list_for_each_entry(iocg, &ioc->active_iocgs, active_list) { u64 __maybe_unused old_debt, __maybe_unused old_delay; if (!iocg->abs_vdebt && !iocg->delay) continue; spin_lock(&iocg->waitq.lock); old_debt = iocg->abs_vdebt; old_delay = iocg->delay; nr_cycles_shift = min_t(u64, nr_cycles, BITS_PER_LONG - 1); if (iocg->abs_vdebt) iocg->abs_vdebt = iocg->abs_vdebt >> nr_cycles_shift ?: 1; if (iocg->delay) iocg->delay = iocg->delay >> nr_cycles_shift ?: 1; iocg_kick_waitq(iocg, true, now); TRACE_IOCG_PATH(iocg_forgive_debt, iocg, now, usage_pct, old_debt, iocg->abs_vdebt, old_delay, iocg->delay); spin_unlock(&iocg->waitq.lock); } } /* * Check the active iocgs' state to avoid oversleeping and deactive * idle iocgs. * * Since waiters determine the sleep durations based on the vrate * they saw at the time of sleep, if vrate has increased, some * waiters could be sleeping for too long. Wake up tardy waiters * which should have woken up in the last period and expire idle * iocgs. */ static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now) { int nr_debtors = 0; struct ioc_gq *iocg, *tiocg; list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && !iocg->delay && !iocg_is_idle(iocg)) continue; spin_lock(&iocg->waitq.lock); /* flush wait and indebt stat deltas */ if (iocg->wait_since) { iocg->stat.wait_us += now->now - iocg->wait_since; iocg->wait_since = now->now; } if (iocg->indebt_since) { iocg->stat.indebt_us += now->now - iocg->indebt_since; iocg->indebt_since = now->now; } if (iocg->indelay_since) { iocg->stat.indelay_us += now->now - iocg->indelay_since; iocg->indelay_since = now->now; } if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt || iocg->delay) { /* might be oversleeping vtime / hweight changes, kick */ iocg_kick_waitq(iocg, true, now); if (iocg->abs_vdebt || iocg->delay) nr_debtors++; } else if (iocg_is_idle(iocg)) { /* no waiter and idle, deactivate */ u64 vtime = atomic64_read(&iocg->vtime); s64 excess; /* * @iocg has been inactive for a full duration and will * have a high budget. Account anything above target as * error and throw away. On reactivation, it'll start * with the target budget. */ excess = now->vnow - vtime - ioc->margins.target; if (excess > 0) { u32 old_hwi; current_hweight(iocg, NULL, &old_hwi); ioc->vtime_err -= div64_u64(excess * old_hwi, WEIGHT_ONE); } TRACE_IOCG_PATH(iocg_idle, iocg, now, atomic64_read(&iocg->active_period), atomic64_read(&ioc->cur_period), vtime); __propagate_weights(iocg, 0, 0, false, now); list_del_init(&iocg->active_list); } spin_unlock(&iocg->waitq.lock); } commit_weights(ioc); return nr_debtors; } static void ioc_timer_fn(struct timer_list *timer) { struct ioc *ioc = container_of(timer, struct ioc, timer); struct ioc_gq *iocg, *tiocg; struct ioc_now now; LIST_HEAD(surpluses); int nr_debtors, nr_shortages = 0, nr_lagging = 0; u64 usage_us_sum = 0; u32 ppm_rthr; u32 ppm_wthr; u32 missed_ppm[2], rq_wait_pct; u64 period_vtime; int prev_busy_level; /* how were the latencies during the period? */ ioc_lat_stat(ioc, missed_ppm, &rq_wait_pct); /* take care of active iocgs */ spin_lock_irq(&ioc->lock); ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM]; ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM]; ioc_now(ioc, &now); period_vtime = now.vnow - ioc->period_at_vtime; if (WARN_ON_ONCE(!period_vtime)) { spin_unlock_irq(&ioc->lock); return; } nr_debtors = ioc_check_iocgs(ioc, &now); /* * Wait and indebt stat are flushed above and the donation calculation * below needs updated usage stat. Let's bring stat up-to-date. */ iocg_flush_stat(&ioc->active_iocgs, &now); /* calc usage and see whether some weights need to be moved around */ list_for_each_entry(iocg, &ioc->active_iocgs, active_list) { u64 vdone, vtime, usage_us; u32 hw_active, hw_inuse; /* * Collect unused and wind vtime closer to vnow to prevent * iocgs from accumulating a large amount of budget. */ vdone = atomic64_read(&iocg->done_vtime); vtime = atomic64_read(&iocg->vtime); current_hweight(iocg, &hw_active, &hw_inuse); /* * Latency QoS detection doesn't account for IOs which are * in-flight for longer than a period. Detect them by * comparing vdone against period start. If lagging behind * IOs from past periods, don't increase vrate. */ if ((ppm_rthr != MILLION || ppm_wthr != MILLION) && !atomic_read(&iocg_to_blkg(iocg)->use_delay) && time_after64(vtime, vdone) && time_after64(vtime, now.vnow - MAX_LAGGING_PERIODS * period_vtime) && time_before64(vdone, now.vnow - period_vtime)) nr_lagging++; /* * Determine absolute usage factoring in in-flight IOs to avoid * high-latency completions appearing as idle. */ usage_us = iocg->usage_delta_us; usage_us_sum += usage_us; /* see whether there's surplus vtime */ WARN_ON_ONCE(!list_empty(&iocg->surplus_list)); if (hw_inuse < hw_active || (!waitqueue_active(&iocg->waitq) && time_before64(vtime, now.vnow - ioc->margins.low))) { u32 hwa, old_hwi, hwm, new_hwi, usage; u64 usage_dur; if (vdone != vtime) { u64 inflight_us = DIV64_U64_ROUND_UP( cost_to_abs_cost(vtime - vdone, hw_inuse), ioc->vtime_base_rate); usage_us = max(usage_us, inflight_us); } /* convert to hweight based usage ratio */ if (time_after64(iocg->activated_at, ioc->period_at)) usage_dur = max_t(u64, now.now - iocg->activated_at, 1); else usage_dur = max_t(u64, now.now - ioc->period_at, 1); usage = clamp_t(u32, DIV64_U64_ROUND_UP(usage_us * WEIGHT_ONE, usage_dur), 1, WEIGHT_ONE); /* * Already donating or accumulated enough to start. * Determine the donation amount. */ current_hweight(iocg, &hwa, &old_hwi); hwm = current_hweight_max(iocg); new_hwi = hweight_after_donation(iocg, old_hwi, hwm, usage, &now); /* * Donation calculation assumes hweight_after_donation * to be positive, a condition that a donor w/ hwa < 2 * can't meet. Don't bother with donation if hwa is * below 2. It's not gonna make a meaningful difference * anyway. */ if (new_hwi < hwm && hwa >= 2) { iocg->hweight_donating = hwa; iocg->hweight_after_donation = new_hwi; list_add(&iocg->surplus_list, &surpluses); } else if (!iocg->abs_vdebt) { /* * @iocg doesn't have enough to donate. Reset * its inuse to active. * * Don't reset debtors as their inuse's are * owned by debt handling. This shouldn't affect * donation calculuation in any meaningful way * as @iocg doesn't have a meaningful amount of * share anyway. */ TRACE_IOCG_PATH(inuse_shortage, iocg, &now, iocg->inuse, iocg->active, iocg->hweight_inuse, new_hwi); __propagate_weights(iocg, iocg->active, iocg->active, true, &now); nr_shortages++; } } else { /* genuinely short on vtime */ nr_shortages++; } } if (!list_empty(&surpluses) && nr_shortages) transfer_surpluses(&surpluses, &now); commit_weights(ioc); /* surplus list should be dissolved after use */ list_for_each_entry_safe(iocg, tiocg, &surpluses, surplus_list) list_del_init(&iocg->surplus_list); /* * If q is getting clogged or we're missing too much, we're issuing * too much IO and should lower vtime rate. If we're not missing * and experiencing shortages but not surpluses, we're too stingy * and should increase vtime rate. */ prev_busy_level = ioc->busy_level; if (rq_wait_pct > RQ_WAIT_BUSY_PCT || missed_ppm[READ] > ppm_rthr || missed_ppm[WRITE] > ppm_wthr) { /* clearly missing QoS targets, slow down vrate */ ioc->busy_level = max(ioc->busy_level, 0); ioc->busy_level++; } else if (rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 && missed_ppm[READ] <= ppm_rthr * UNBUSY_THR_PCT / 100 && missed_ppm[WRITE] <= ppm_wthr * UNBUSY_THR_PCT / 100) { /* QoS targets are being met with >25% margin */ if (nr_shortages) { /* * We're throttling while the device has spare * capacity. If vrate was being slowed down, stop. */ ioc->busy_level = min(ioc->busy_level, 0); /* * If there are IOs spanning multiple periods, wait * them out before pushing the device harder. */ if (!nr_lagging) ioc->busy_level--; } else { /* * Nobody is being throttled and the users aren't * issuing enough IOs to saturate the device. We * simply don't know how close the device is to * saturation. Coast. */ ioc->busy_level = 0; } } else { /* inside the hysterisis margin, we're good */ ioc->busy_level = 0; } ioc->busy_level = clamp(ioc->busy_level, -1000, 1000); ioc_adjust_base_vrate(ioc, rq_wait_pct, nr_lagging, nr_shortages, prev_busy_level, missed_ppm); ioc_refresh_params(ioc, false); ioc_forgive_debts(ioc, usage_us_sum, nr_debtors, &now); /* * This period is done. Move onto the next one. If nothing's * going on with the device, stop the timer. */ atomic64_inc(&ioc->cur_period); if (ioc->running != IOC_STOP) { if (!list_empty(&ioc->active_iocgs)) { ioc_start_period(ioc, &now); } else { ioc->busy_level = 0; ioc->vtime_err = 0; ioc->running = IOC_IDLE; } ioc_refresh_vrate(ioc, &now); } spin_unlock_irq(&ioc->lock); } static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime, u64 abs_cost, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct ioc_margins *margins = &ioc->margins; u32 __maybe_unused old_inuse = iocg->inuse, __maybe_unused old_hwi; u32 hwi, adj_step; s64 margin; u64 cost, new_inuse; unsigned long flags; current_hweight(iocg, NULL, &hwi); old_hwi = hwi; cost = abs_cost_to_cost(abs_cost, hwi); margin = now->vnow - vtime - cost; /* debt handling owns inuse for debtors */ if (iocg->abs_vdebt) return cost; /* * We only increase inuse during period and do so if the margin has * deteriorated since the previous adjustment. */ if (margin >= iocg->saved_margin || margin >= margins->low || iocg->inuse == iocg->active) return cost; spin_lock_irqsave(&ioc->lock, flags); /* we own inuse only when @iocg is in the normal active state */ if (iocg->abs_vdebt || list_empty(&iocg->active_list)) { spin_unlock_irqrestore(&ioc->lock, flags); return cost; } /* * Bump up inuse till @abs_cost fits in the existing budget. * adj_step must be determined after acquiring ioc->lock - we might * have raced and lost to another thread for activation and could * be reading 0 iocg->active before ioc->lock which will lead to * infinite loop. */ new_inuse = iocg->inuse; adj_step = DIV_ROUND_UP(iocg->active * INUSE_ADJ_STEP_PCT, 100); do { new_inuse = new_inuse + adj_step; propagate_weights(iocg, iocg->active, new_inuse, true, now); current_hweight(iocg, NULL, &hwi); cost = abs_cost_to_cost(abs_cost, hwi); } while (time_after64(vtime + cost, now->vnow) && iocg->inuse != iocg->active); spin_unlock_irqrestore(&ioc->lock, flags); TRACE_IOCG_PATH(inuse_adjust, iocg, now, old_inuse, iocg->inuse, old_hwi, hwi); return cost; } static void calc_vtime_cost_builtin(struct bio *bio, struct ioc_gq *iocg, bool is_merge, u64 *costp) { struct ioc *ioc = iocg->ioc; u64 coef_seqio, coef_randio, coef_page; u64 pages = max_t(u64, bio_sectors(bio) >> IOC_SECT_TO_PAGE_SHIFT, 1); u64 seek_pages = 0; u64 cost = 0; /* Can't calculate cost for empty bio */ if (!bio->bi_iter.bi_size) goto out; switch (bio_op(bio)) { case REQ_OP_READ: coef_seqio = ioc->params.lcoefs[LCOEF_RSEQIO]; coef_randio = ioc->params.lcoefs[LCOEF_RRANDIO]; coef_page = ioc->params.lcoefs[LCOEF_RPAGE]; break; case REQ_OP_WRITE: coef_seqio = ioc->params.lcoefs[LCOEF_WSEQIO]; coef_randio = ioc->params.lcoefs[LCOEF_WRANDIO]; coef_page = ioc->params.lcoefs[LCOEF_WPAGE]; break; default: goto out; } if (iocg->cursor) { seek_pages = abs(bio->bi_iter.bi_sector - iocg->cursor); seek_pages >>= IOC_SECT_TO_PAGE_SHIFT; } if (!is_merge) { if (seek_pages > LCOEF_RANDIO_PAGES) { cost += coef_randio; } else { cost += coef_seqio; } } cost += pages * coef_page; out: *costp = cost; } static u64 calc_vtime_cost(struct bio *bio, struct ioc_gq *iocg, bool is_merge) { u64 cost; calc_vtime_cost_builtin(bio, iocg, is_merge, &cost); return cost; } static void calc_size_vtime_cost_builtin(struct request *rq, struct ioc *ioc, u64 *costp) { unsigned int pages = blk_rq_stats_sectors(rq) >> IOC_SECT_TO_PAGE_SHIFT; switch (req_op(rq)) { case REQ_OP_READ: *costp = pages * ioc->params.lcoefs[LCOEF_RPAGE]; break; case REQ_OP_WRITE: *costp = pages * ioc->params.lcoefs[LCOEF_WPAGE]; break; default: *costp = 0; } } static u64 calc_size_vtime_cost(struct request *rq, struct ioc *ioc) { u64 cost; calc_size_vtime_cost_builtin(rq, ioc, &cost); return cost; } static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) { struct blkcg_gq *blkg = bio->bi_blkg; struct ioc *ioc = rqos_to_ioc(rqos); struct ioc_gq *iocg = blkg_to_iocg(blkg); struct ioc_now now; struct iocg_wait wait; u64 abs_cost, cost, vtime; bool use_debt, ioc_locked; unsigned long flags; /* bypass IOs if disabled, still initializing, or for root cgroup */ if (!ioc->enabled || !iocg || !iocg->level) return; /* calculate the absolute vtime cost */ abs_cost = calc_vtime_cost(bio, iocg, false); if (!abs_cost) return; if (!iocg_activate(iocg, &now)) return; iocg->cursor = bio_end_sector(bio); vtime = atomic64_read(&iocg->vtime); cost = adjust_inuse_and_calc_cost(iocg, vtime, abs_cost, &now); /* * If no one's waiting and within budget, issue right away. The * tests are racy but the races aren't systemic - we only miss once * in a while which is fine. */ if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && time_before_eq64(vtime + cost, now.vnow)) { iocg_commit_bio(iocg, bio, abs_cost, cost); return; } /* * We're over budget. This can be handled in two ways. IOs which may * cause priority inversions are punted to @ioc->aux_iocg and charged as * debt. Otherwise, the issuer is blocked on @iocg->waitq. Debt handling * requires @ioc->lock, waitq handling @iocg->waitq.lock. Determine * whether debt handling is needed and acquire locks accordingly. */ use_debt = bio_issue_as_root_blkg(bio) || fatal_signal_pending(current); ioc_locked = use_debt || READ_ONCE(iocg->abs_vdebt); retry_lock: iocg_lock(iocg, ioc_locked, &flags); /* * @iocg must stay activated for debt and waitq handling. Deactivation * is synchronized against both ioc->lock and waitq.lock and we won't * get deactivated as long as we're waiting or has debt, so we're good * if we're activated here. In the unlikely cases that we aren't, just * issue the IO. */ if (unlikely(list_empty(&iocg->active_list))) { iocg_unlock(iocg, ioc_locked, &flags); iocg_commit_bio(iocg, bio, abs_cost, cost); return; } /* * We're over budget. If @bio has to be issued regardless, remember * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay * off the debt before waking more IOs. * * This way, the debt is continuously paid off each period with the * actual budget available to the cgroup. If we just wound vtime, we * would incorrectly use the current hw_inuse for the entire amount * which, for example, can lead to the cgroup staying blocked for a * long time even with substantially raised hw_inuse. * * An iocg with vdebt should stay online so that the timer can keep * deducting its vdebt and [de]activate use_delay mechanism * accordingly. We don't want to race against the timer trying to * clear them and leave @iocg inactive w/ dangling use_delay heavily * penalizing the cgroup and its descendants. */ if (use_debt) { iocg_incur_debt(iocg, abs_cost, &now); if (iocg_kick_delay(iocg, &now)) blkcg_schedule_throttle(rqos->disk, (bio->bi_opf & REQ_SWAP) == REQ_SWAP); iocg_unlock(iocg, ioc_locked, &flags); return; } /* guarantee that iocgs w/ waiters have maximum inuse */ if (!iocg->abs_vdebt && iocg->inuse != iocg->active) { if (!ioc_locked) { iocg_unlock(iocg, false, &flags); ioc_locked = true; goto retry_lock; } propagate_weights(iocg, iocg->active, iocg->active, true, &now); } /* * Append self to the waitq and schedule the wakeup timer if we're * the first waiter. The timer duration is calculated based on the * current vrate. vtime and hweight changes can make it too short * or too long. Each wait entry records the absolute cost it's * waiting for to allow re-evaluation using a custom wait entry. * * If too short, the timer simply reschedules itself. If too long, * the period timer will notice and trigger wakeups. * * All waiters are on iocg->waitq and the wait states are * synchronized using waitq.lock. */ init_wait_func(&wait.wait, iocg_wake_fn); wait.bio = bio; wait.abs_cost = abs_cost; wait.committed = false; /* will be set true by waker */ __add_wait_queue_entry_tail(&iocg->waitq, &wait.wait); iocg_kick_waitq(iocg, ioc_locked, &now); iocg_unlock(iocg, ioc_locked, &flags); while (true) { set_current_state(TASK_UNINTERRUPTIBLE); if (wait.committed) break; io_schedule(); } /* waker already committed us, proceed */ finish_wait(&iocg->waitq, &wait.wait); } static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) { struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg); struct ioc *ioc = rqos_to_ioc(rqos); sector_t bio_end = bio_end_sector(bio); struct ioc_now now; u64 vtime, abs_cost, cost; unsigned long flags; /* bypass if disabled, still initializing, or for root cgroup */ if (!ioc->enabled || !iocg || !iocg->level) return; abs_cost = calc_vtime_cost(bio, iocg, true); if (!abs_cost) return; ioc_now(ioc, &now); vtime = atomic64_read(&iocg->vtime); cost = adjust_inuse_and_calc_cost(iocg, vtime, abs_cost, &now); /* update cursor if backmerging into the request at the cursor */ if (blk_rq_pos(rq) < bio_end && blk_rq_pos(rq) + blk_rq_sectors(rq) == iocg->cursor) iocg->cursor = bio_end; /* * Charge if there's enough vtime budget and the existing request has * cost assigned. */ if (rq->bio && rq->bio->bi_iocost_cost && time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) { iocg_commit_bio(iocg, bio, abs_cost, cost); return; } /* * Otherwise, account it as debt if @iocg is online, which it should * be for the vast majority of cases. See debt handling in * ioc_rqos_throttle() for details. */ spin_lock_irqsave(&ioc->lock, flags); spin_lock(&iocg->waitq.lock); if (likely(!list_empty(&iocg->active_list))) { iocg_incur_debt(iocg, abs_cost, &now); if (iocg_kick_delay(iocg, &now)) blkcg_schedule_throttle(rqos->disk, (bio->bi_opf & REQ_SWAP) == REQ_SWAP); } else { iocg_commit_bio(iocg, bio, abs_cost, cost); } spin_unlock(&iocg->waitq.lock); spin_unlock_irqrestore(&ioc->lock, flags); } static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio) { struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg); if (iocg && bio->bi_iocost_cost) atomic64_add(bio->bi_iocost_cost, &iocg->done_vtime); } static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq) { struct ioc *ioc = rqos_to_ioc(rqos); struct ioc_pcpu_stat *ccs; u64 on_q_ns, rq_wait_ns, size_nsec; int pidx, rw; if (!ioc->enabled || !rq->alloc_time_ns || !rq->start_time_ns) return; switch (req_op(rq)) { case REQ_OP_READ: pidx = QOS_RLAT; rw = READ; break; case REQ_OP_WRITE: pidx = QOS_WLAT; rw = WRITE; break; default: return; } on_q_ns = blk_time_get_ns() - rq->alloc_time_ns; rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns; size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC); ccs = get_cpu_ptr(ioc->pcpu_stat); if (on_q_ns <= size_nsec || on_q_ns - size_nsec <= ioc->params.qos[pidx] * NSEC_PER_USEC) local_inc(&ccs->missed[rw].nr_met); else local_inc(&ccs->missed[rw].nr_missed); local64_add(rq_wait_ns, &ccs->rq_wait_ns); put_cpu_ptr(ccs); } static void ioc_rqos_queue_depth_changed(struct rq_qos *rqos) { struct ioc *ioc = rqos_to_ioc(rqos); spin_lock_irq(&ioc->lock); ioc_refresh_params(ioc, false); spin_unlock_irq(&ioc->lock); } static void ioc_rqos_exit(struct rq_qos *rqos) { struct ioc *ioc = rqos_to_ioc(rqos); blkcg_deactivate_policy(rqos->disk, &blkcg_policy_iocost); spin_lock_irq(&ioc->lock); ioc->running = IOC_STOP; spin_unlock_irq(&ioc->lock); timer_shutdown_sync(&ioc->timer); free_percpu(ioc->pcpu_stat); kfree(ioc); } static const struct rq_qos_ops ioc_rqos_ops = { .throttle = ioc_rqos_throttle, .merge = ioc_rqos_merge, .done_bio = ioc_rqos_done_bio, .done = ioc_rqos_done, .queue_depth_changed = ioc_rqos_queue_depth_changed, .exit = ioc_rqos_exit, }; static int blk_iocost_init(struct gendisk *disk) { struct ioc *ioc; int i, cpu, ret; ioc = kzalloc(sizeof(*ioc), GFP_KERNEL); if (!ioc) return -ENOMEM; ioc->pcpu_stat = alloc_percpu(struct ioc_pcpu_stat); if (!ioc->pcpu_stat) { kfree(ioc); return -ENOMEM; } for_each_possible_cpu(cpu) { struct ioc_pcpu_stat *ccs = per_cpu_ptr(ioc->pcpu_stat, cpu); for (i = 0; i < ARRAY_SIZE(ccs->missed); i++) { local_set(&ccs->missed[i].nr_met, 0); local_set(&ccs->missed[i].nr_missed, 0); } local64_set(&ccs->rq_wait_ns, 0); } spin_lock_init(&ioc->lock); timer_setup(&ioc->timer, ioc_timer_fn, 0); INIT_LIST_HEAD(&ioc->active_iocgs); ioc->running = IOC_IDLE; ioc->vtime_base_rate = VTIME_PER_USEC; atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC); seqcount_spinlock_init(&ioc->period_seqcount, &ioc->lock); ioc->period_at = ktime_to_us(blk_time_get()); atomic64_set(&ioc->cur_period, 0); atomic_set(&ioc->hweight_gen, 0); spin_lock_irq(&ioc->lock); ioc->autop_idx = AUTOP_INVALID; ioc_refresh_params_disk(ioc, true, disk); spin_unlock_irq(&ioc->lock); /* * rqos must be added before activation to allow ioc_pd_init() to * lookup the ioc from q. This means that the rqos methods may get * called before policy activation completion, can't assume that the * target bio has an iocg associated and need to test for NULL iocg. */ ret = rq_qos_add(&ioc->rqos, disk, RQ_QOS_COST, &ioc_rqos_ops); if (ret) goto err_free_ioc; ret = blkcg_activate_policy(disk, &blkcg_policy_iocost); if (ret) goto err_del_qos; return 0; err_del_qos: rq_qos_del(&ioc->rqos); err_free_ioc: free_percpu(ioc->pcpu_stat); kfree(ioc); return ret; } static struct blkcg_policy_data *ioc_cpd_alloc(gfp_t gfp) { struct ioc_cgrp *iocc; iocc = kzalloc(sizeof(struct ioc_cgrp), gfp); if (!iocc) return NULL; iocc->dfl_weight = CGROUP_WEIGHT_DFL * WEIGHT_ONE; return &iocc->cpd; } static void ioc_cpd_free(struct blkcg_policy_data *cpd) { kfree(container_of(cpd, struct ioc_cgrp, cpd)); } static struct blkg_policy_data *ioc_pd_alloc(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp) { int levels = blkcg->css.cgroup->level + 1; struct ioc_gq *iocg; iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp, disk->node_id); if (!iocg) return NULL; iocg->pcpu_stat = alloc_percpu_gfp(struct iocg_pcpu_stat, gfp); if (!iocg->pcpu_stat) { kfree(iocg); return NULL; } return &iocg->pd; } static void ioc_pd_init(struct blkg_policy_data *pd) { struct ioc_gq *iocg = pd_to_iocg(pd); struct blkcg_gq *blkg = pd_to_blkg(&iocg->pd); struct ioc *ioc = q_to_ioc(blkg->q); struct ioc_now now; struct blkcg_gq *tblkg; unsigned long flags; ioc_now(ioc, &now); iocg->ioc = ioc; atomic64_set(&iocg->vtime, now.vnow); atomic64_set(&iocg->done_vtime, now.vnow); atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period)); INIT_LIST_HEAD(&iocg->active_list); INIT_LIST_HEAD(&iocg->walk_list); INIT_LIST_HEAD(&iocg->surplus_list); iocg->hweight_active = WEIGHT_ONE; iocg->hweight_inuse = WEIGHT_ONE; init_waitqueue_head(&iocg->waitq); hrtimer_setup(&iocg->waitq_timer, iocg_waitq_timer_fn, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); iocg->level = blkg->blkcg->css.cgroup->level; for (tblkg = blkg; tblkg; tblkg = tblkg->parent) { struct ioc_gq *tiocg = blkg_to_iocg(tblkg); iocg->ancestors[tiocg->level] = tiocg; } spin_lock_irqsave(&ioc->lock, flags); weight_updated(iocg, &now); spin_unlock_irqrestore(&ioc->lock, flags); } static void ioc_pd_free(struct blkg_policy_data *pd) { struct ioc_gq *iocg = pd_to_iocg(pd); struct ioc *ioc = iocg->ioc; unsigned long flags; if (ioc) { spin_lock_irqsave(&ioc->lock, flags); if (!list_empty(&iocg->active_list)) { struct ioc_now now; ioc_now(ioc, &now); propagate_weights(iocg, 0, 0, false, &now); list_del_init(&iocg->active_list); } WARN_ON_ONCE(!list_empty(&iocg->walk_list)); WARN_ON_ONCE(!list_empty(&iocg->surplus_list)); spin_unlock_irqrestore(&ioc->lock, flags); hrtimer_cancel(&iocg->waitq_timer); } free_percpu(iocg->pcpu_stat); kfree(iocg); } static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) { struct ioc_gq *iocg = pd_to_iocg(pd); struct ioc *ioc = iocg->ioc; if (!ioc->enabled) return; if (iocg->level == 0) { unsigned vp10k = DIV64_U64_ROUND_CLOSEST( ioc->vtime_base_rate * 10000, VTIME_PER_USEC); seq_printf(s, " cost.vrate=%u.%02u", vp10k / 100, vp10k % 100); } seq_printf(s, " cost.usage=%llu", iocg->last_stat.usage_us); if (blkcg_debug_stats) seq_printf(s, " cost.wait=%llu cost.indebt=%llu cost.indelay=%llu", iocg->last_stat.wait_us, iocg->last_stat.indebt_us, iocg->last_stat.indelay_us); } static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd, int off) { const char *dname = blkg_dev_name(pd->blkg); struct ioc_gq *iocg = pd_to_iocg(pd); if (dname && iocg->cfg_weight) seq_printf(sf, "%s %u\n", dname, iocg->cfg_weight / WEIGHT_ONE); return 0; } static int ioc_weight_show(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); struct ioc_cgrp *iocc = blkcg_to_iocc(blkcg); seq_printf(sf, "default %u\n", iocc->dfl_weight / WEIGHT_ONE); blkcg_print_blkgs(sf, blkcg, ioc_weight_prfill, &blkcg_policy_iocost, seq_cft(sf)->private, false); return 0; } static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct ioc_cgrp *iocc = blkcg_to_iocc(blkcg); struct blkg_conf_ctx ctx; struct ioc_now now; struct ioc_gq *iocg; u32 v; int ret; if (!strchr(buf, ':')) { struct blkcg_gq *blkg; if (!sscanf(buf, "default %u", &v) && !sscanf(buf, "%u", &v)) return -EINVAL; if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX) return -EINVAL; spin_lock_irq(&blkcg->lock); iocc->dfl_weight = v * WEIGHT_ONE; hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct ioc_gq *iocg = blkg_to_iocg(blkg); if (iocg) { spin_lock(&iocg->ioc->lock); ioc_now(iocg->ioc, &now); weight_updated(iocg, &now); spin_unlock(&iocg->ioc->lock); } } spin_unlock_irq(&blkcg->lock); return nbytes; } blkg_conf_init(&ctx, buf); ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, &ctx); if (ret) goto err; iocg = blkg_to_iocg(ctx.blkg); if (!strncmp(ctx.body, "default", 7)) { v = 0; } else { if (!sscanf(ctx.body, "%u", &v)) goto einval; if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX) goto einval; } spin_lock(&iocg->ioc->lock); iocg->cfg_weight = v * WEIGHT_ONE; ioc_now(iocg->ioc, &now); weight_updated(iocg, &now); spin_unlock(&iocg->ioc->lock); blkg_conf_exit(&ctx); return nbytes; einval: ret = -EINVAL; err: blkg_conf_exit(&ctx); return ret; } static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd, int off) { const char *dname = blkg_dev_name(pd->blkg); struct ioc *ioc = pd_to_iocg(pd)->ioc; if (!dname) return 0; spin_lock(&ioc->lock); seq_printf(sf, "%s enable=%d ctrl=%s rpct=%u.%02u rlat=%u wpct=%u.%02u wlat=%u min=%u.%02u max=%u.%02u\n", dname, ioc->enabled, ioc->user_qos_params ? "user" : "auto", ioc->params.qos[QOS_RPPM] / 10000, ioc->params.qos[QOS_RPPM] % 10000 / 100, ioc->params.qos[QOS_RLAT], ioc->params.qos[QOS_WPPM] / 10000, ioc->params.qos[QOS_WPPM] % 10000 / 100, ioc->params.qos[QOS_WLAT], ioc->params.qos[QOS_MIN] / 10000, ioc->params.qos[QOS_MIN] % 10000 / 100, ioc->params.qos[QOS_MAX] / 10000, ioc->params.qos[QOS_MAX] % 10000 / 100); spin_unlock(&ioc->lock); return 0; } static int ioc_qos_show(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); blkcg_print_blkgs(sf, blkcg, ioc_qos_prfill, &blkcg_policy_iocost, seq_cft(sf)->private, false); return 0; } static const match_table_t qos_ctrl_tokens = { { QOS_ENABLE, "enable=%u" }, { QOS_CTRL, "ctrl=%s" }, { NR_QOS_CTRL_PARAMS, NULL }, }; static const match_table_t qos_tokens = { { QOS_RPPM, "rpct=%s" }, { QOS_RLAT, "rlat=%u" }, { QOS_WPPM, "wpct=%s" }, { QOS_WLAT, "wlat=%u" }, { QOS_MIN, "min=%s" }, { QOS_MAX, "max=%s" }, { NR_QOS_PARAMS, NULL }, }; static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, size_t nbytes, loff_t off) { struct blkg_conf_ctx ctx; struct gendisk *disk; struct ioc *ioc; u32 qos[NR_QOS_PARAMS]; bool enable, user; char *body, *p; unsigned long memflags; int ret; blkg_conf_init(&ctx, input); memflags = blkg_conf_open_bdev_frozen(&ctx); if (IS_ERR_VALUE(memflags)) { ret = memflags; goto err; } body = ctx.body; disk = ctx.bdev->bd_disk; if (!queue_is_mq(disk->queue)) { ret = -EOPNOTSUPP; goto err; } ioc = q_to_ioc(disk->queue); if (!ioc) { ret = blk_iocost_init(disk); if (ret) goto err; ioc = q_to_ioc(disk->queue); } blk_mq_quiesce_queue(disk->queue); spin_lock_irq(&ioc->lock); memcpy(qos, ioc->params.qos, sizeof(qos)); enable = ioc->enabled; user = ioc->user_qos_params; while ((p = strsep(&body, " \t\n"))) { substring_t args[MAX_OPT_ARGS]; char buf[32]; int tok; s64 v; if (!*p) continue; switch (match_token(p, qos_ctrl_tokens, args)) { case QOS_ENABLE: if (match_u64(&args[0], &v)) goto einval; enable = v; continue; case QOS_CTRL: match_strlcpy(buf, &args[0], sizeof(buf)); if (!strcmp(buf, "auto")) user = false; else if (!strcmp(buf, "user")) user = true; else goto einval; continue; } tok = match_token(p, qos_tokens, args); switch (tok) { case QOS_RPPM: case QOS_WPPM: if (match_strlcpy(buf, &args[0], sizeof(buf)) >= sizeof(buf)) goto einval; if (cgroup_parse_float(buf, 2, &v)) goto einval; if (v < 0 || v > 10000) goto einval; qos[tok] = v * 100; break; case QOS_RLAT: case QOS_WLAT: if (match_u64(&args[0], &v)) goto einval; qos[tok] = v; break; case QOS_MIN: case QOS_MAX: if (match_strlcpy(buf, &args[0], sizeof(buf)) >= sizeof(buf)) goto einval; if (cgroup_parse_float(buf, 2, &v)) goto einval; if (v < 0) goto einval; qos[tok] = clamp_t(s64, v * 100, VRATE_MIN_PPM, VRATE_MAX_PPM); break; default: goto einval; } user = true; } if (qos[QOS_MIN] > qos[QOS_MAX]) goto einval; if (enable && !ioc->enabled) { blk_stat_enable_accounting(disk->queue); blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); ioc->enabled = true; } else if (!enable && ioc->enabled) { blk_stat_disable_accounting(disk->queue); blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); ioc->enabled = false; } if (user) { memcpy(ioc->params.qos, qos, sizeof(qos)); ioc->user_qos_params = true; } else { ioc->user_qos_params = false; } ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); if (enable) wbt_disable_default(disk); else wbt_enable_default(disk); blk_mq_unquiesce_queue(disk->queue); blkg_conf_exit_frozen(&ctx, memflags); return nbytes; einval: spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(disk->queue); ret = -EINVAL; err: blkg_conf_exit_frozen(&ctx, memflags); return ret; } static u64 ioc_cost_model_prfill(struct seq_file *sf, struct blkg_policy_data *pd, int off) { const char *dname = blkg_dev_name(pd->blkg); struct ioc *ioc = pd_to_iocg(pd)->ioc; u64 *u = ioc->params.i_lcoefs; if (!dname) return 0; spin_lock(&ioc->lock); seq_printf(sf, "%s ctrl=%s model=linear " "rbps=%llu rseqiops=%llu rrandiops=%llu " "wbps=%llu wseqiops=%llu wrandiops=%llu\n", dname, ioc->user_cost_model ? "user" : "auto", u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS], u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]); spin_unlock(&ioc->lock); return 0; } static int ioc_cost_model_show(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); blkcg_print_blkgs(sf, blkcg, ioc_cost_model_prfill, &blkcg_policy_iocost, seq_cft(sf)->private, false); return 0; } static const match_table_t cost_ctrl_tokens = { { COST_CTRL, "ctrl=%s" }, { COST_MODEL, "model=%s" }, { NR_COST_CTRL_PARAMS, NULL }, }; static const match_table_t i_lcoef_tokens = { { I_LCOEF_RBPS, "rbps=%u" }, { I_LCOEF_RSEQIOPS, "rseqiops=%u" }, { I_LCOEF_RRANDIOPS, "rrandiops=%u" }, { I_LCOEF_WBPS, "wbps=%u" }, { I_LCOEF_WSEQIOPS, "wseqiops=%u" }, { I_LCOEF_WRANDIOPS, "wrandiops=%u" }, { NR_I_LCOEFS, NULL }, }; static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, size_t nbytes, loff_t off) { struct blkg_conf_ctx ctx; struct request_queue *q; unsigned int memflags; struct ioc *ioc; u64 u[NR_I_LCOEFS]; bool user; char *body, *p; int ret; blkg_conf_init(&ctx, input); ret = blkg_conf_open_bdev(&ctx); if (ret) goto err; body = ctx.body; q = bdev_get_queue(ctx.bdev); if (!queue_is_mq(q)) { ret = -EOPNOTSUPP; goto err; } ioc = q_to_ioc(q); if (!ioc) { ret = blk_iocost_init(ctx.bdev->bd_disk); if (ret) goto err; ioc = q_to_ioc(q); } memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); spin_lock_irq(&ioc->lock); memcpy(u, ioc->params.i_lcoefs, sizeof(u)); user = ioc->user_cost_model; while ((p = strsep(&body, " \t\n"))) { substring_t args[MAX_OPT_ARGS]; char buf[32]; int tok; u64 v; if (!*p) continue; switch (match_token(p, cost_ctrl_tokens, args)) { case COST_CTRL: match_strlcpy(buf, &args[0], sizeof(buf)); if (!strcmp(buf, "auto")) user = false; else if (!strcmp(buf, "user")) user = true; else goto einval; continue; case COST_MODEL: match_strlcpy(buf, &args[0], sizeof(buf)); if (strcmp(buf, "linear")) goto einval; continue; } tok = match_token(p, i_lcoef_tokens, args); if (tok == NR_I_LCOEFS) goto einval; if (match_u64(&args[0], &v)) goto einval; u[tok] = v; user = true; } if (user) { memcpy(ioc->params.i_lcoefs, u, sizeof(u)); ioc->user_cost_model = true; } else { ioc->user_cost_model = false; } ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q, memflags); blkg_conf_exit(&ctx); return nbytes; einval: spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q, memflags); ret = -EINVAL; err: blkg_conf_exit(&ctx); return ret; } static struct cftype ioc_files[] = { { .name = "weight", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = ioc_weight_show, .write = ioc_weight_write, }, { .name = "cost.qos", .flags = CFTYPE_ONLY_ON_ROOT, .seq_show = ioc_qos_show, .write = ioc_qos_write, }, { .name = "cost.model", .flags = CFTYPE_ONLY_ON_ROOT, .seq_show = ioc_cost_model_show, .write = ioc_cost_model_write, }, {} }; static struct blkcg_policy blkcg_policy_iocost = { .dfl_cftypes = ioc_files, .cpd_alloc_fn = ioc_cpd_alloc, .cpd_free_fn = ioc_cpd_free, .pd_alloc_fn = ioc_pd_alloc, .pd_init_fn = ioc_pd_init, .pd_free_fn = ioc_pd_free, .pd_stat_fn = ioc_pd_stat, }; static int __init ioc_init(void) { return blkcg_policy_register(&blkcg_policy_iocost); } static void __exit ioc_exit(void) { blkcg_policy_unregister(&blkcg_policy_iocost); } module_init(ioc_init); module_exit(ioc_exit); |
| 14 14 14 14 14 14 14 1 14 5 1 4 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/bcd.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/mc146818rtc.h> #ifdef CONFIG_ACPI #include <linux/acpi.h> #endif #define UIP_RECHECK_DELAY 100 /* usec */ #define UIP_RECHECK_DELAY_MS (USEC_PER_MSEC / UIP_RECHECK_DELAY) #define UIP_RECHECK_LOOPS_MS(x) (x / UIP_RECHECK_DELAY_MS) /* * Execute a function while the UIP (Update-in-progress) bit of the RTC is * unset. The timeout is configurable by the caller in ms. * * Warning: callback may be executed more then once. */ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), int timeout, void *param) { int i; unsigned long flags; unsigned char seconds; for (i = 0; UIP_RECHECK_LOOPS_MS(i) < timeout; i++) { spin_lock_irqsave(&rtc_lock, flags); /* * Check whether there is an update in progress during which the * readout is unspecified. The maximum update time is ~2ms. Poll * for completion. * * Store the second value before checking UIP so a long lasting * NMI which happens to hit after the UIP check cannot make * an update cycle invisible. */ seconds = CMOS_READ(RTC_SECONDS); if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { spin_unlock_irqrestore(&rtc_lock, flags); udelay(UIP_RECHECK_DELAY); continue; } /* Revalidate the above readout */ if (seconds != CMOS_READ(RTC_SECONDS)) { spin_unlock_irqrestore(&rtc_lock, flags); continue; } if (callback) callback(seconds, param); /* * Check for the UIP bit again. If it is set now then * the above values may contain garbage. */ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { spin_unlock_irqrestore(&rtc_lock, flags); udelay(UIP_RECHECK_DELAY); continue; } /* * A NMI might have interrupted the above sequence so check * whether the seconds value has changed which indicates that * the NMI took longer than the UIP bit was set. Unlikely, but * possible and there is also virt... */ if (seconds != CMOS_READ(RTC_SECONDS)) { spin_unlock_irqrestore(&rtc_lock, flags); continue; } spin_unlock_irqrestore(&rtc_lock, flags); if (UIP_RECHECK_LOOPS_MS(i) >= 100) pr_warn("Reading current time from RTC took around %li ms\n", UIP_RECHECK_LOOPS_MS(i)); return true; } return false; } EXPORT_SYMBOL_GPL(mc146818_avoid_UIP); /* * If the UIP (Update-in-progress) bit of the RTC is set for more then * 10ms, the RTC is apparently broken or not present. */ bool mc146818_does_rtc_work(void) { return mc146818_avoid_UIP(NULL, 1000, NULL); } EXPORT_SYMBOL_GPL(mc146818_does_rtc_work); struct mc146818_get_time_callback_param { struct rtc_time *time; unsigned char ctrl; #ifdef CONFIG_ACPI unsigned char century; #endif #ifdef CONFIG_MACH_DECSTATION unsigned int real_year; #endif }; static void mc146818_get_time_callback(unsigned char seconds, void *param_in) { struct mc146818_get_time_callback_param *p = param_in; /* * Only the values that we read from the RTC are set. We leave * tm_wday, tm_yday and tm_isdst untouched. Even though the * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated * by the RTC when initially set to a non-zero value. */ p->time->tm_sec = seconds; p->time->tm_min = CMOS_READ(RTC_MINUTES); p->time->tm_hour = CMOS_READ(RTC_HOURS); p->time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH); p->time->tm_mon = CMOS_READ(RTC_MONTH); p->time->tm_year = CMOS_READ(RTC_YEAR); #ifdef CONFIG_MACH_DECSTATION p->real_year = CMOS_READ(RTC_DEC_YEAR); #endif #ifdef CONFIG_ACPI if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && acpi_gbl_FADT.century) { p->century = CMOS_READ(acpi_gbl_FADT.century); } else { p->century = 0; } #endif p->ctrl = CMOS_READ(RTC_CONTROL); } /** * mc146818_get_time - Get the current time from the RTC * @time: pointer to struct rtc_time to store the current time * @timeout: timeout value in ms * * This function reads the current time from the RTC and stores it in the * provided struct rtc_time. The timeout parameter specifies the maximum * time to wait for the RTC to become ready. * * Return: 0 on success, -ETIMEDOUT if the RTC did not become ready within * the specified timeout, or another error code if an error occurred. */ int mc146818_get_time(struct rtc_time *time, int timeout) { struct mc146818_get_time_callback_param p = { .time = time }; if (!mc146818_avoid_UIP(mc146818_get_time_callback, timeout, &p)) { memset(time, 0, sizeof(*time)); return -ETIMEDOUT; } if (!(p.ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { time->tm_sec = bcd2bin(time->tm_sec); time->tm_min = bcd2bin(time->tm_min); time->tm_hour = bcd2bin(time->tm_hour); time->tm_mday = bcd2bin(time->tm_mday); time->tm_mon = bcd2bin(time->tm_mon); time->tm_year = bcd2bin(time->tm_year); #ifdef CONFIG_ACPI p.century = bcd2bin(p.century); #endif } #ifdef CONFIG_MACH_DECSTATION time->tm_year += p.real_year - 72; #endif #ifdef CONFIG_ACPI if (p.century > 19) time->tm_year += (p.century - 19) * 100; #endif /* * Account for differences between how the RTC uses the values * and how they are defined in a struct rtc_time; */ if (time->tm_year <= 69) time->tm_year += 100; time->tm_mon--; return 0; } EXPORT_SYMBOL_GPL(mc146818_get_time); /* AMD systems don't allow access to AltCentury with DV1 */ static bool apply_amd_register_a_behavior(void) { #ifdef CONFIG_X86 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) return true; #endif return false; } /* Set the current date and time in the real time clock. */ int mc146818_set_time(struct rtc_time *time) { unsigned long flags; unsigned char mon, day, hrs, min, sec; unsigned char save_control, save_freq_select; unsigned int yrs; #ifdef CONFIG_MACH_DECSTATION unsigned int real_yrs; #endif unsigned char century = 0; yrs = time->tm_year; mon = time->tm_mon + 1; /* tm_mon starts at zero */ day = time->tm_mday; hrs = time->tm_hour; min = time->tm_min; sec = time->tm_sec; if (yrs > 255) /* They are unsigned */ return -EINVAL; #ifdef CONFIG_MACH_DECSTATION real_yrs = yrs; yrs = 72; /* * We want to keep the year set to 73 until March * for non-leap years, so that Feb, 29th is handled * correctly. */ if (!is_leap_year(real_yrs + 1900) && mon < 3) { real_yrs--; yrs = 73; } #endif #ifdef CONFIG_ACPI if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && acpi_gbl_FADT.century) { century = (yrs + 1900) / 100; yrs %= 100; } #endif /* These limits and adjustments are independent of * whether the chip is in binary mode or not. */ if (yrs > 169) return -EINVAL; if (yrs >= 100) yrs -= 100; spin_lock_irqsave(&rtc_lock, flags); save_control = CMOS_READ(RTC_CONTROL); spin_unlock_irqrestore(&rtc_lock, flags); if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { sec = bin2bcd(sec); min = bin2bcd(min); hrs = bin2bcd(hrs); day = bin2bcd(day); mon = bin2bcd(mon); yrs = bin2bcd(yrs); century = bin2bcd(century); } spin_lock_irqsave(&rtc_lock, flags); save_control = CMOS_READ(RTC_CONTROL); CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); save_freq_select = CMOS_READ(RTC_FREQ_SELECT); if (apply_amd_register_a_behavior()) CMOS_WRITE((save_freq_select & ~RTC_AMD_BANK_SELECT), RTC_FREQ_SELECT); else CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); #ifdef CONFIG_MACH_DECSTATION CMOS_WRITE(real_yrs, RTC_DEC_YEAR); #endif CMOS_WRITE(yrs, RTC_YEAR); CMOS_WRITE(mon, RTC_MONTH); CMOS_WRITE(day, RTC_DAY_OF_MONTH); CMOS_WRITE(hrs, RTC_HOURS); CMOS_WRITE(min, RTC_MINUTES); CMOS_WRITE(sec, RTC_SECONDS); #ifdef CONFIG_ACPI if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && acpi_gbl_FADT.century) CMOS_WRITE(century, acpi_gbl_FADT.century); #endif CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); spin_unlock_irqrestore(&rtc_lock, flags); return 0; } EXPORT_SYMBOL_GPL(mc146818_set_time); |
| 9 9 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 | // SPDX-License-Identifier: GPL-2.0-only /* * The NFC Controller Interface is the communication protocol between an * NFC Controller (NFCC) and a Device Host (DH). * This is the HCI over NCI implementation, as specified in the 10.2 * section of the NCI 1.1 specification. * * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. */ #include <linux/skbuff.h> #include "../nfc.h" #include <net/nfc/nci.h> #include <net/nfc/nci_core.h> #include <linux/nfc.h> #include <linux/kcov.h> struct nci_data { u8 conn_id; u8 pipe; u8 cmd; const u8 *data; u32 data_len; } __packed; struct nci_hci_create_pipe_params { u8 src_gate; u8 dest_host; u8 dest_gate; } __packed; struct nci_hci_create_pipe_resp { u8 src_host; u8 src_gate; u8 dest_host; u8 dest_gate; u8 pipe; } __packed; struct nci_hci_delete_pipe_noti { u8 pipe; } __packed; struct nci_hci_all_pipe_cleared_noti { u8 host; } __packed; struct nci_hcp_message { u8 header; /* type -cmd,evt,rsp- + instruction */ u8 data[]; } __packed; struct nci_hcp_packet { u8 header; /* cbit+pipe */ struct nci_hcp_message message; } __packed; #define NCI_HCI_ANY_SET_PARAMETER 0x01 #define NCI_HCI_ANY_GET_PARAMETER 0x02 #define NCI_HCI_ANY_CLOSE_PIPE 0x04 #define NCI_HCI_ADM_CLEAR_ALL_PIPE 0x14 #define NCI_HFP_NO_CHAINING 0x80 #define NCI_NFCEE_ID_HCI 0x80 #define NCI_EVT_HOT_PLUG 0x03 #define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY 0x01 #define NCI_HCI_ADM_CREATE_PIPE 0x10 #define NCI_HCI_ADM_DELETE_PIPE 0x11 /* HCP headers */ #define NCI_HCI_HCP_PACKET_HEADER_LEN 1 #define NCI_HCI_HCP_MESSAGE_HEADER_LEN 1 #define NCI_HCI_HCP_HEADER_LEN 2 /* HCP types */ #define NCI_HCI_HCP_COMMAND 0x00 #define NCI_HCI_HCP_EVENT 0x01 #define NCI_HCI_HCP_RESPONSE 0x02 #define NCI_HCI_ADM_NOTIFY_PIPE_CREATED 0x12 #define NCI_HCI_ADM_NOTIFY_PIPE_DELETED 0x13 #define NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED 0x15 #define NCI_HCI_FRAGMENT 0x7f #define NCI_HCP_HEADER(type, instr) ((((type) & 0x03) << 6) |\ ((instr) & 0x3f)) #define NCI_HCP_MSG_GET_TYPE(header) ((header & 0xc0) >> 6) #define NCI_HCP_MSG_GET_CMD(header) (header & 0x3f) #define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f) static int nci_hci_result_to_errno(u8 result) { switch (result) { case NCI_HCI_ANY_OK: return 0; case NCI_HCI_ANY_E_REG_PAR_UNKNOWN: return -EOPNOTSUPP; case NCI_HCI_ANY_E_TIMEOUT: return -ETIME; default: return -1; } } /* HCI core */ static void nci_hci_reset_pipes(struct nci_hci_dev *hdev) { int i; for (i = 0; i < NCI_HCI_MAX_PIPES; i++) { hdev->pipes[i].gate = NCI_HCI_INVALID_GATE; hdev->pipes[i].host = NCI_HCI_INVALID_HOST; } memset(hdev->gate2pipe, NCI_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); } static void nci_hci_reset_pipes_per_host(struct nci_dev *ndev, u8 host) { int i; for (i = 0; i < NCI_HCI_MAX_PIPES; i++) { if (ndev->hci_dev->pipes[i].host == host) { ndev->hci_dev->pipes[i].gate = NCI_HCI_INVALID_GATE; ndev->hci_dev->pipes[i].host = NCI_HCI_INVALID_HOST; } } } /* Fragment HCI data over NCI packet. * NFC Forum NCI 10.2.2 Data Exchange: * The payload of the Data Packets sent on the Logical Connection SHALL be * valid HCP packets, as defined within [ETSI_102622]. Each Data Packet SHALL * contain a single HCP packet. NCI Segmentation and Reassembly SHALL NOT be * applied to Data Messages in either direction. The HCI fragmentation mechanism * is used if required. */ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, const u8 data_type, const u8 *data, size_t data_len) { const struct nci_conn_info *conn_info; struct sk_buff *skb; int len, i, r; u8 cb = pipe; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; i = 0; skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; skb_reserve(skb, NCI_DATA_HDR_SIZE + 2); *(u8 *)skb_push(skb, 1) = data_type; do { /* If last packet add NCI_HFP_NO_CHAINING */ if (i + conn_info->max_pkt_payload_len - (skb->len + 1) >= data_len) { cb |= NCI_HFP_NO_CHAINING; len = data_len - i; } else { len = conn_info->max_pkt_payload_len - skb->len - 1; } *(u8 *)skb_push(skb, 1) = cb; if (len > 0) skb_put_data(skb, data + i, len); r = nci_send_data(ndev, conn_info->conn_id, skb); if (r < 0) return r; i += len; if (i < data_len) { skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; skb_reserve(skb, NCI_DATA_HDR_SIZE + 1); } } while (i < data_len); return i; } static void nci_hci_send_data_req(struct nci_dev *ndev, const void *opt) { const struct nci_data *data = opt; nci_hci_send_data(ndev, data->pipe, data->cmd, data->data, data->data_len); } int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event, const u8 *param, size_t param_len) { u8 pipe = ndev->hci_dev->gate2pipe[gate]; if (pipe == NCI_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; return nci_hci_send_data(ndev, pipe, NCI_HCP_HEADER(NCI_HCI_HCP_EVENT, event), param, param_len); } EXPORT_SYMBOL(nci_hci_send_event); int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, const u8 *param, size_t param_len, struct sk_buff **skb) { const struct nci_hcp_message *message; const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; if (pipe == NCI_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; data.conn_id = conn_info->conn_id; data.pipe = pipe; data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, cmd); data.data = param; data.data_len = param_len; r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; r = nci_hci_result_to_errno( NCI_HCP_MSG_GET_CMD(message->header)); skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); if (!r && skb) *skb = conn_info->rx_skb; } return r; } EXPORT_SYMBOL(nci_hci_send_cmd); int nci_hci_clear_all_pipes(struct nci_dev *ndev) { int r; r = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADM_CLEAR_ALL_PIPE, NULL, 0, NULL); if (r < 0) return r; nci_hci_reset_pipes(ndev->hci_dev); return r; } EXPORT_SYMBOL(nci_hci_clear_all_pipes); static void nci_hci_event_received(struct nci_dev *ndev, u8 pipe, u8 event, struct sk_buff *skb) { if (ndev->ops->hci_event_received) ndev->ops->hci_event_received(ndev, pipe, event, skb); } static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd, struct sk_buff *skb) { u8 gate = ndev->hci_dev->pipes[pipe].gate; u8 status = NCI_HCI_ANY_OK | ~NCI_HCI_FRAGMENT; u8 dest_gate, new_pipe; struct nci_hci_create_pipe_resp *create_info; struct nci_hci_delete_pipe_noti *delete_info; struct nci_hci_all_pipe_cleared_noti *cleared_info; pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); switch (cmd) { case NCI_HCI_ADM_NOTIFY_PIPE_CREATED: if (skb->len != 5) { status = NCI_HCI_ANY_E_NOK; goto exit; } create_info = (struct nci_hci_create_pipe_resp *)skb->data; dest_gate = create_info->dest_gate; new_pipe = create_info->pipe; if (new_pipe >= NCI_HCI_MAX_PIPES) { status = NCI_HCI_ANY_E_NOK; goto exit; } /* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id * but since we received this cmd from host controller, we * are the destination and it is our local gate */ ndev->hci_dev->gate2pipe[dest_gate] = new_pipe; ndev->hci_dev->pipes[new_pipe].gate = dest_gate; ndev->hci_dev->pipes[new_pipe].host = create_info->src_host; break; case NCI_HCI_ANY_OPEN_PIPE: /* If the pipe is not created report an error */ if (gate == NCI_HCI_INVALID_GATE) { status = NCI_HCI_ANY_E_NOK; goto exit; } break; case NCI_HCI_ADM_NOTIFY_PIPE_DELETED: if (skb->len != 1) { status = NCI_HCI_ANY_E_NOK; goto exit; } delete_info = (struct nci_hci_delete_pipe_noti *)skb->data; if (delete_info->pipe >= NCI_HCI_MAX_PIPES) { status = NCI_HCI_ANY_E_NOK; goto exit; } ndev->hci_dev->pipes[delete_info->pipe].gate = NCI_HCI_INVALID_GATE; ndev->hci_dev->pipes[delete_info->pipe].host = NCI_HCI_INVALID_HOST; break; case NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED: if (skb->len != 1) { status = NCI_HCI_ANY_E_NOK; goto exit; } cleared_info = (struct nci_hci_all_pipe_cleared_noti *)skb->data; nci_hci_reset_pipes_per_host(ndev, cleared_info->host); break; default: pr_debug("Discarded unknown cmd %x to gate %x\n", cmd, gate); break; } if (ndev->ops->hci_cmd_received) ndev->ops->hci_cmd_received(ndev, pipe, cmd, skb); exit: nci_hci_send_data(ndev, pipe, status, NULL, 0); kfree_skb(skb); } static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, struct sk_buff *skb) { struct nci_conn_info *conn_info; conn_info = ndev->hci_dev->conn_info; if (!conn_info) goto exit; conn_info->rx_skb = skb; exit: nci_req_complete(ndev, NCI_STATUS_OK); } /* Receive hcp message for pipe, with type and cmd. * skb contains optional message data only. */ static void nci_hci_hcp_message_rx(struct nci_dev *ndev, u8 pipe, u8 type, u8 instruction, struct sk_buff *skb) { switch (type) { case NCI_HCI_HCP_RESPONSE: nci_hci_resp_received(ndev, pipe, skb); break; case NCI_HCI_HCP_COMMAND: nci_hci_cmd_received(ndev, pipe, instruction, skb); break; case NCI_HCI_HCP_EVENT: nci_hci_event_received(ndev, pipe, instruction, skb); break; default: pr_err("UNKNOWN MSG Type %d, instruction=%d\n", type, instruction); kfree_skb(skb); break; } nci_req_complete(ndev, NCI_STATUS_OK); } static void nci_hci_msg_rx_work(struct work_struct *work) { struct nci_hci_dev *hdev = container_of(work, struct nci_hci_dev, msg_rx_work); struct sk_buff *skb; const struct nci_hcp_message *message; u8 pipe, type, instruction; for (; (skb = skb_dequeue(&hdev->msg_rx_queue)); kcov_remote_stop()) { kcov_remote_start_common(skb_get_kcov_handle(skb)); pipe = NCI_HCP_MSG_GET_PIPE(skb->data[0]); skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN); message = (struct nci_hcp_message *)skb->data; type = NCI_HCP_MSG_GET_TYPE(message->header); instruction = NCI_HCP_MSG_GET_CMD(message->header); skb_pull(skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); nci_hci_hcp_message_rx(hdev->ndev, pipe, type, instruction, skb); } } void nci_hci_data_received_cb(void *context, struct sk_buff *skb, int err) { struct nci_dev *ndev = (struct nci_dev *)context; struct nci_hcp_packet *packet; u8 pipe, type; struct sk_buff *hcp_skb; struct sk_buff *frag_skb; int msg_len; if (err) { nci_req_complete(ndev, err); return; } packet = (struct nci_hcp_packet *)skb->data; if ((packet->header & ~NCI_HCI_FRAGMENT) == 0) { skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); return; } /* it's the last fragment. Does it need re-aggregation? */ if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) { pipe = NCI_HCP_MSG_GET_PIPE(packet->header); skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); msg_len = 0; skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { msg_len += (frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN); } hcp_skb = nfc_alloc_recv_skb(NCI_HCI_HCP_PACKET_HEADER_LEN + msg_len, GFP_KERNEL); if (!hcp_skb) { nci_req_complete(ndev, -ENOMEM); return; } skb_put_u8(hcp_skb, pipe); skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; skb_put_data(hcp_skb, frag_skb->data + NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len); } skb_queue_purge(&ndev->hci_dev->rx_hcp_frags); } else { packet->header &= NCI_HCI_FRAGMENT; hcp_skb = skb; } /* if this is a response, dispatch immediately to * unblock waiting cmd context. Otherwise, enqueue to dispatch * in separate context where handler can also execute command. */ packet = (struct nci_hcp_packet *)hcp_skb->data; type = NCI_HCP_MSG_GET_TYPE(packet->message.header); if (type == NCI_HCI_HCP_RESPONSE) { pipe = NCI_HCP_MSG_GET_PIPE(packet->header); skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN); nci_hci_hcp_message_rx(ndev, pipe, type, NCI_STATUS_OK, hcp_skb); } else { skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb); schedule_work(&ndev->hci_dev->msg_rx_work); } } int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe) { struct nci_data data; const struct nci_conn_info *conn_info; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; data.conn_id = conn_info->conn_id; data.pipe = pipe; data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, NCI_HCI_ANY_OPEN_PIPE); data.data = NULL; data.data_len = 0; return nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); } EXPORT_SYMBOL(nci_hci_open_pipe); static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host, u8 dest_gate, int *result) { u8 pipe; struct sk_buff *skb; struct nci_hci_create_pipe_params params; const struct nci_hci_create_pipe_resp *resp; pr_debug("gate=%d\n", dest_gate); params.src_gate = NCI_HCI_ADMIN_GATE; params.dest_host = dest_host; params.dest_gate = dest_gate; *result = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADM_CREATE_PIPE, (u8 *)¶ms, sizeof(params), &skb); if (*result < 0) return NCI_HCI_INVALID_PIPE; resp = (struct nci_hci_create_pipe_resp *)skb->data; pipe = resp->pipe; kfree_skb(skb); pr_debug("pipe created=%d\n", pipe); if (pipe >= NCI_HCI_MAX_PIPES) pipe = NCI_HCI_INVALID_PIPE; return pipe; } static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe) { return nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL); } int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, const u8 *param, size_t param_len) { const struct nci_hcp_message *message; const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 *tmp; u8 pipe = ndev->hci_dev->gate2pipe[gate]; pr_debug("idx=%d to gate %d\n", idx, gate); if (pipe == NCI_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; tmp = kmalloc(1 + param_len, GFP_KERNEL); if (!tmp) return -ENOMEM; *tmp = idx; memcpy(tmp + 1, param, param_len); data.conn_id = conn_info->conn_id; data.pipe = pipe; data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, NCI_HCI_ANY_SET_PARAMETER); data.data = tmp; data.data_len = param_len + 1; r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; r = nci_hci_result_to_errno( NCI_HCP_MSG_GET_CMD(message->header)); skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); } kfree(tmp); return r; } EXPORT_SYMBOL(nci_hci_set_param); int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, struct sk_buff **skb) { const struct nci_hcp_message *message; const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; pr_debug("idx=%d to gate %d\n", idx, gate); if (pipe == NCI_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; data.conn_id = conn_info->conn_id; data.pipe = pipe; data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, NCI_HCI_ANY_GET_PARAMETER); data.data = &idx; data.data_len = 1; r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; r = nci_hci_result_to_errno( NCI_HCP_MSG_GET_CMD(message->header)); skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); if (!r && skb) *skb = conn_info->rx_skb; } return r; } EXPORT_SYMBOL(nci_hci_get_param); int nci_hci_connect_gate(struct nci_dev *ndev, u8 dest_host, u8 dest_gate, u8 pipe) { bool pipe_created = false; int r; if (pipe == NCI_HCI_DO_NOT_OPEN_PIPE) return 0; if (ndev->hci_dev->gate2pipe[dest_gate] != NCI_HCI_INVALID_PIPE) return -EADDRINUSE; if (pipe != NCI_HCI_INVALID_PIPE) goto open_pipe; switch (dest_gate) { case NCI_HCI_LINK_MGMT_GATE: pipe = NCI_HCI_LINK_MGMT_PIPE; break; case NCI_HCI_ADMIN_GATE: pipe = NCI_HCI_ADMIN_PIPE; break; default: pipe = nci_hci_create_pipe(ndev, dest_host, dest_gate, &r); if (pipe == NCI_HCI_INVALID_PIPE) return r; pipe_created = true; break; } open_pipe: r = nci_hci_open_pipe(ndev, pipe); if (r < 0) { if (pipe_created) { if (nci_hci_delete_pipe(ndev, pipe) < 0) { /* TODO: Cannot clean by deleting pipe... * -> inconsistent state */ } } return r; } ndev->hci_dev->pipes[pipe].gate = dest_gate; ndev->hci_dev->pipes[pipe].host = dest_host; ndev->hci_dev->gate2pipe[dest_gate] = pipe; return 0; } EXPORT_SYMBOL(nci_hci_connect_gate); static int nci_hci_dev_connect_gates(struct nci_dev *ndev, u8 gate_count, const struct nci_hci_gate *gates) { int r; while (gate_count--) { r = nci_hci_connect_gate(ndev, gates->dest_host, gates->gate, gates->pipe); if (r < 0) return r; gates++; } return 0; } int nci_hci_dev_session_init(struct nci_dev *ndev) { struct nci_conn_info *conn_info; struct sk_buff *skb; int r; ndev->hci_dev->count_pipes = 0; ndev->hci_dev->expected_pipes = 0; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; conn_info->data_exchange_cb = nci_hci_data_received_cb; conn_info->data_exchange_cb_context = ndev; nci_hci_reset_pipes(ndev->hci_dev); if (ndev->hci_dev->init_data.gates[0].gate != NCI_HCI_ADMIN_GATE) return -EPROTO; r = nci_hci_connect_gate(ndev, ndev->hci_dev->init_data.gates[0].dest_host, ndev->hci_dev->init_data.gates[0].gate, ndev->hci_dev->init_data.gates[0].pipe); if (r < 0) return r; r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, &skb); if (r < 0) return r; if (skb->len && skb->len == strlen(ndev->hci_dev->init_data.session_id) && !memcmp(ndev->hci_dev->init_data.session_id, skb->data, skb->len) && ndev->ops->hci_load_session) { /* Restore gate<->pipe table from some proprietary location. */ r = ndev->ops->hci_load_session(ndev); } else { r = nci_hci_clear_all_pipes(ndev); if (r < 0) goto exit; r = nci_hci_dev_connect_gates(ndev, ndev->hci_dev->init_data.gate_count, ndev->hci_dev->init_data.gates); if (r < 0) goto exit; r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, ndev->hci_dev->init_data.session_id, strlen(ndev->hci_dev->init_data.session_id)); } exit: kfree_skb(skb); return r; } EXPORT_SYMBOL(nci_hci_dev_session_init); struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev) { struct nci_hci_dev *hdev; hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); if (!hdev) return NULL; skb_queue_head_init(&hdev->rx_hcp_frags); INIT_WORK(&hdev->msg_rx_work, nci_hci_msg_rx_work); skb_queue_head_init(&hdev->msg_rx_queue); hdev->ndev = ndev; return hdev; } void nci_hci_deallocate(struct nci_dev *ndev) { kfree(ndev->hci_dev); } |
| 7 7 7 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat * * based in parts on udlfb.c: * Copyright (C) 2009 Roberto De Ioris <roberto@unbit.it> * Copyright (C) 2009 Jaya Kumar <jayakumar.lkml@gmail.com> * Copyright (C) 2009 Bernie Thompson <bernie@plugable.com> */ #include <linux/bitfield.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> #include <drm/drm_edid.h> #include <drm/drm_fourcc.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_modeset_helper_vtables.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> #include "udl_drv.h" #include "udl_edid.h" #include "udl_proto.h" /* * All DisplayLink bulk operations start with 0xaf (UDL_MSG_BULK), followed by * a specific command code. All operations are written to a command buffer, which * the driver sends to the device. */ static char *udl_set_register(char *buf, u8 reg, u8 val) { *buf++ = UDL_MSG_BULK; *buf++ = UDL_CMD_WRITEREG; *buf++ = reg; *buf++ = val; return buf; } static char *udl_vidreg_lock(char *buf) { return udl_set_register(buf, UDL_REG_VIDREG, UDL_VIDREG_LOCK); } static char *udl_vidreg_unlock(char *buf) { return udl_set_register(buf, UDL_REG_VIDREG, UDL_VIDREG_UNLOCK); } static char *udl_set_blank_mode(char *buf, u8 mode) { return udl_set_register(buf, UDL_REG_BLANKMODE, mode); } static char *udl_set_color_depth(char *buf, u8 selection) { return udl_set_register(buf, UDL_REG_COLORDEPTH, selection); } static char *udl_set_base16bpp(char *buf, u32 base) { /* the base pointer is 24 bits wide, 0x20 is hi byte. */ u8 reg20 = FIELD_GET(UDL_BASE_ADDR2_MASK, base); u8 reg21 = FIELD_GET(UDL_BASE_ADDR1_MASK, base); u8 reg22 = FIELD_GET(UDL_BASE_ADDR0_MASK, base); buf = udl_set_register(buf, UDL_REG_BASE16BPP_ADDR2, reg20); buf = udl_set_register(buf, UDL_REG_BASE16BPP_ADDR1, reg21); buf = udl_set_register(buf, UDL_REG_BASE16BPP_ADDR0, reg22); return buf; } /* * DisplayLink HW has separate 16bpp and 8bpp framebuffers. * In 24bpp modes, the low 323 RGB bits go in the 8bpp framebuffer */ static char *udl_set_base8bpp(char *buf, u32 base) { /* the base pointer is 24 bits wide, 0x26 is hi byte. */ u8 reg26 = FIELD_GET(UDL_BASE_ADDR2_MASK, base); u8 reg27 = FIELD_GET(UDL_BASE_ADDR1_MASK, base); u8 reg28 = FIELD_GET(UDL_BASE_ADDR0_MASK, base); buf = udl_set_register(buf, UDL_REG_BASE8BPP_ADDR2, reg26); buf = udl_set_register(buf, UDL_REG_BASE8BPP_ADDR1, reg27); buf = udl_set_register(buf, UDL_REG_BASE8BPP_ADDR0, reg28); return buf; } static char *udl_set_register_16(char *wrptr, u8 reg, u16 value) { wrptr = udl_set_register(wrptr, reg, value >> 8); return udl_set_register(wrptr, reg+1, value); } /* * This is kind of weird because the controller takes some * register values in a different byte order than other registers. */ static char *udl_set_register_16be(char *wrptr, u8 reg, u16 value) { wrptr = udl_set_register(wrptr, reg, value); return udl_set_register(wrptr, reg+1, value >> 8); } /* * LFSR is linear feedback shift register. The reason we have this is * because the display controller needs to minimize the clock depth of * various counters used in the display path. So this code reverses the * provided value into the lfsr16 value by counting backwards to get * the value that needs to be set in the hardware comparator to get the * same actual count. This makes sense once you read above a couple of * times and think about it from a hardware perspective. */ static u16 udl_lfsr16(u16 actual_count) { u32 lv = 0xFFFF; /* This is the lfsr value that the hw starts with */ while (actual_count--) { lv = ((lv << 1) | (((lv >> 15) ^ (lv >> 4) ^ (lv >> 2) ^ (lv >> 1)) & 1)) & 0xFFFF; } return (u16) lv; } /* * This does LFSR conversion on the value that is to be written. * See LFSR explanation above for more detail. */ static char *udl_set_register_lfsr16(char *wrptr, u8 reg, u16 value) { return udl_set_register_16(wrptr, reg, udl_lfsr16(value)); } /* * Takes a DRM display mode and converts it into the DisplayLink * equivalent register commands. */ static char *udl_set_display_mode(char *buf, struct drm_display_mode *mode) { u16 reg01 = mode->crtc_htotal - mode->crtc_hsync_start; u16 reg03 = reg01 + mode->crtc_hdisplay; u16 reg05 = mode->crtc_vtotal - mode->crtc_vsync_start; u16 reg07 = reg05 + mode->crtc_vdisplay; u16 reg09 = mode->crtc_htotal - 1; u16 reg0b = 1; /* libdlo hardcodes hsync start to 1 */ u16 reg0d = mode->crtc_hsync_end - mode->crtc_hsync_start + 1; u16 reg0f = mode->hdisplay; u16 reg11 = mode->crtc_vtotal; u16 reg13 = 0; /* libdlo hardcodes vsync start to 0 */ u16 reg15 = mode->crtc_vsync_end - mode->crtc_vsync_start; u16 reg17 = mode->crtc_vdisplay; u16 reg1b = mode->clock / 5; buf = udl_set_register_lfsr16(buf, UDL_REG_XDISPLAYSTART, reg01); buf = udl_set_register_lfsr16(buf, UDL_REG_XDISPLAYEND, reg03); buf = udl_set_register_lfsr16(buf, UDL_REG_YDISPLAYSTART, reg05); buf = udl_set_register_lfsr16(buf, UDL_REG_YDISPLAYEND, reg07); buf = udl_set_register_lfsr16(buf, UDL_REG_XENDCOUNT, reg09); buf = udl_set_register_lfsr16(buf, UDL_REG_HSYNCSTART, reg0b); buf = udl_set_register_lfsr16(buf, UDL_REG_HSYNCEND, reg0d); buf = udl_set_register_16(buf, UDL_REG_HPIXELS, reg0f); buf = udl_set_register_lfsr16(buf, UDL_REG_YENDCOUNT, reg11); buf = udl_set_register_lfsr16(buf, UDL_REG_VSYNCSTART, reg13); buf = udl_set_register_lfsr16(buf, UDL_REG_VSYNCEND, reg15); buf = udl_set_register_16(buf, UDL_REG_VPIXELS, reg17); buf = udl_set_register_16be(buf, UDL_REG_PIXELCLOCK5KHZ, reg1b); return buf; } static char *udl_dummy_render(char *wrptr) { *wrptr++ = UDL_MSG_BULK; *wrptr++ = UDL_CMD_WRITECOPY16; *wrptr++ = 0x00; /* from addr */ *wrptr++ = 0x00; *wrptr++ = 0x00; *wrptr++ = 0x01; /* one pixel */ *wrptr++ = 0x00; /* to address */ *wrptr++ = 0x00; *wrptr++ = 0x00; return wrptr; } static long udl_log_cpp(unsigned int cpp) { if (WARN_ON(!is_power_of_2(cpp))) return -EINVAL; return __ffs(cpp); } static int udl_handle_damage(struct drm_framebuffer *fb, const struct iosys_map *map, const struct drm_rect *clip) { struct drm_device *dev = fb->dev; struct udl_device *udl = to_udl(dev); void *vaddr = map->vaddr; /* TODO: Use mapping abstraction properly */ int i, ret; char *cmd; struct urb *urb; int log_bpp; ret = udl_log_cpp(fb->format->cpp[0]); if (ret < 0) return ret; log_bpp = ret; urb = udl_get_urb(udl); if (!urb) return -ENOMEM; cmd = urb->transfer_buffer; for (i = clip->y1; i < clip->y2; i++) { const int line_offset = fb->pitches[0] * i; const int byte_offset = line_offset + (clip->x1 << log_bpp); const int dev_byte_offset = (fb->width * i + clip->x1) << log_bpp; const int byte_width = drm_rect_width(clip) << log_bpp; ret = udl_render_hline(udl, log_bpp, &urb, (char *)vaddr, &cmd, byte_offset, dev_byte_offset, byte_width); if (ret) return ret; } if (cmd > (char *)urb->transfer_buffer) { /* Send partial buffer remaining before exiting */ int len; if (cmd < (char *)urb->transfer_buffer + urb->transfer_buffer_length) *cmd++ = UDL_MSG_BULK; len = cmd - (char *)urb->transfer_buffer; ret = udl_submit_urb(udl, urb, len); } else { udl_urb_completion(urb); } return 0; } /* * Primary plane */ static const uint32_t udl_primary_plane_formats[] = { DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, }; static const uint64_t udl_primary_plane_fmtmods[] = { DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; static int udl_primary_plane_helper_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state) { struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); struct drm_crtc *new_crtc = new_plane_state->crtc; struct drm_crtc_state *new_crtc_state = NULL; if (new_crtc) new_crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc); return drm_atomic_helper_check_plane_state(new_plane_state, new_crtc_state, DRM_PLANE_NO_SCALING, DRM_PLANE_NO_SCALING, false, false); } static void udl_primary_plane_helper_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { struct drm_device *dev = plane->dev; struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane); struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); struct drm_framebuffer *fb = plane_state->fb; struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, plane); struct drm_atomic_helper_damage_iter iter; struct drm_rect damage; int ret, idx; if (!fb) return; /* no framebuffer; plane is disabled */ ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); if (ret) return; if (!drm_dev_enter(dev, &idx)) goto out_drm_gem_fb_end_cpu_access; drm_atomic_helper_damage_iter_init(&iter, old_plane_state, plane_state); drm_atomic_for_each_plane_damage(&iter, &damage) { udl_handle_damage(fb, &shadow_plane_state->data[0], &damage); } drm_dev_exit(idx); out_drm_gem_fb_end_cpu_access: drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); } static const struct drm_plane_helper_funcs udl_primary_plane_helper_funcs = { DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, .atomic_check = udl_primary_plane_helper_atomic_check, .atomic_update = udl_primary_plane_helper_atomic_update, }; static const struct drm_plane_funcs udl_primary_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .destroy = drm_plane_cleanup, DRM_GEM_SHADOW_PLANE_FUNCS, }; /* * CRTC */ static void udl_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_device *dev = crtc->dev; struct udl_device *udl = to_udl(dev); struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc); struct drm_display_mode *mode = &crtc_state->mode; struct urb *urb; char *buf; int idx; if (!drm_dev_enter(dev, &idx)) return; urb = udl_get_urb(udl); if (!urb) goto out; buf = (char *)urb->transfer_buffer; buf = udl_vidreg_lock(buf); buf = udl_set_color_depth(buf, UDL_COLORDEPTH_16BPP); /* set base for 16bpp segment to 0 */ buf = udl_set_base16bpp(buf, 0); /* set base for 8bpp segment to end of fb */ buf = udl_set_base8bpp(buf, 2 * mode->vdisplay * mode->hdisplay); buf = udl_set_display_mode(buf, mode); buf = udl_set_blank_mode(buf, UDL_BLANKMODE_ON); buf = udl_vidreg_unlock(buf); buf = udl_dummy_render(buf); udl_submit_urb(udl, urb, buf - (char *)urb->transfer_buffer); out: drm_dev_exit(idx); } static void udl_crtc_helper_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_device *dev = crtc->dev; struct udl_device *udl = to_udl(dev); struct urb *urb; char *buf; int idx; if (!drm_dev_enter(dev, &idx)) return; urb = udl_get_urb(udl); if (!urb) goto out; buf = (char *)urb->transfer_buffer; buf = udl_vidreg_lock(buf); buf = udl_set_blank_mode(buf, UDL_BLANKMODE_POWERDOWN); buf = udl_vidreg_unlock(buf); buf = udl_dummy_render(buf); udl_submit_urb(udl, urb, buf - (char *)urb->transfer_buffer); out: drm_dev_exit(idx); } static const struct drm_crtc_helper_funcs udl_crtc_helper_funcs = { .atomic_check = drm_crtc_helper_atomic_check, .atomic_enable = udl_crtc_helper_atomic_enable, .atomic_disable = udl_crtc_helper_atomic_disable, }; static const struct drm_crtc_funcs udl_crtc_funcs = { .reset = drm_atomic_helper_crtc_reset, .destroy = drm_crtc_cleanup, .set_config = drm_atomic_helper_set_config, .page_flip = drm_atomic_helper_page_flip, .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, }; /* * Encoder */ static const struct drm_encoder_funcs udl_encoder_funcs = { .destroy = drm_encoder_cleanup, }; /* * Connector */ static int udl_connector_helper_get_modes(struct drm_connector *connector) { const struct drm_edid *drm_edid; int count; drm_edid = udl_edid_read(connector); drm_edid_connector_update(connector, drm_edid); count = drm_edid_connector_add_modes(connector); drm_edid_free(drm_edid); return count; } static int udl_connector_helper_detect_ctx(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force) { struct udl_device *udl = to_udl(connector->dev); if (udl_probe_edid(udl)) return connector_status_connected; return connector_status_disconnected; } static const struct drm_connector_helper_funcs udl_connector_helper_funcs = { .get_modes = udl_connector_helper_get_modes, .detect_ctx = udl_connector_helper_detect_ctx, }; static const struct drm_connector_funcs udl_connector_funcs = { .reset = drm_atomic_helper_connector_reset, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = drm_connector_cleanup, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; /* * Modesetting */ static enum drm_mode_status udl_mode_config_mode_valid(struct drm_device *dev, const struct drm_display_mode *mode) { struct udl_device *udl = to_udl(dev); if (udl->sku_pixel_limit) { if (mode->vdisplay * mode->hdisplay > udl->sku_pixel_limit) return MODE_MEM; } return MODE_OK; } static const struct drm_mode_config_funcs udl_mode_config_funcs = { .fb_create = drm_gem_fb_create_with_dirty, .mode_valid = udl_mode_config_mode_valid, .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; int udl_modeset_init(struct udl_device *udl) { struct drm_device *dev = &udl->drm; struct drm_plane *primary_plane; struct drm_crtc *crtc; struct drm_encoder *encoder; struct drm_connector *connector; int ret; ret = drmm_mode_config_init(dev); if (ret) return ret; dev->mode_config.min_width = 640; dev->mode_config.min_height = 480; dev->mode_config.max_width = 2048; dev->mode_config.max_height = 2048; dev->mode_config.preferred_depth = 16; dev->mode_config.funcs = &udl_mode_config_funcs; primary_plane = &udl->primary_plane; ret = drm_universal_plane_init(dev, primary_plane, 0, &udl_primary_plane_funcs, udl_primary_plane_formats, ARRAY_SIZE(udl_primary_plane_formats), udl_primary_plane_fmtmods, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) return ret; drm_plane_helper_add(primary_plane, &udl_primary_plane_helper_funcs); drm_plane_enable_fb_damage_clips(primary_plane); crtc = &udl->crtc; ret = drm_crtc_init_with_planes(dev, crtc, primary_plane, NULL, &udl_crtc_funcs, NULL); if (ret) return ret; drm_crtc_helper_add(crtc, &udl_crtc_helper_funcs); encoder = &udl->encoder; ret = drm_encoder_init(dev, encoder, &udl_encoder_funcs, DRM_MODE_ENCODER_DAC, NULL); if (ret) return ret; encoder->possible_crtcs = drm_crtc_mask(crtc); connector = &udl->connector; ret = drm_connector_init(dev, connector, &udl_connector_funcs, DRM_MODE_CONNECTOR_VGA); if (ret) return ret; drm_connector_helper_add(connector, &udl_connector_helper_funcs); connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; ret = drm_connector_attach_encoder(connector, encoder); if (ret) return ret; drm_mode_config_reset(dev); drmm_kms_helper_poll_init(dev); return 0; } |
| 19 3110 2 8 3127 3126 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/export.h> #include <linux/uaccess.h> #include <linux/mm.h> #include <linux/bitops.h> #include <asm/word-at-a-time.h> /* * Do a strnlen, return length of string *with* final '\0'. * 'count' is the user-supplied count, while 'max' is the * address space maximum. * * Return 0 for exceptions (which includes hitting the address * space maximum), or 'count+1' if hitting the user-supplied * maximum count. * * NOTE! We can sometimes overshoot the user-supplied maximum * if it fits in a aligned 'long'. The caller needs to check * the return value against "> max". */ static __always_inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max) { const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; unsigned long align, res = 0; unsigned long c; /* * Do everything aligned. But that means that we * need to also expand the maximum.. */ align = (sizeof(unsigned long) - 1) & (unsigned long)src; src -= align; max += align; unsafe_get_user(c, (unsigned long __user *)src, efault); c |= aligned_byte_mask(align); for (;;) { unsigned long data; if (has_zero(c, &data, &constants)) { data = prep_zero_mask(c, data, &constants); data = create_zero_mask(data); return res + find_zero(data) + 1 - align; } res += sizeof(unsigned long); /* We already handled 'unsigned long' bytes. Did we do it all ? */ if (unlikely(max <= sizeof(unsigned long))) break; max -= sizeof(unsigned long); unsafe_get_user(c, (unsigned long __user *)(src+res), efault); } res -= align; /* * Uhhuh. We hit 'max'. But was that the user-specified maximum * too? If so, return the marker for "too long". */ if (res >= count) return count+1; /* * Nope: we hit the address space limit, and we still had more * characters the caller would have wanted. That's 0. */ efault: return 0; } /** * strnlen_user: - Get the size of a user string INCLUDING final NUL. * @str: The string to measure. * @count: Maximum count (including NUL character) * * Context: User context only. This function may sleep if pagefaults are * enabled. * * Get the size of a NUL-terminated string in user space. * * Returns the size of the string INCLUDING the terminating NUL. * If the string is too long, returns a number larger than @count. User * has to check the return value against "> count". * On exception (or invalid count), returns 0. * * NOTE! You should basically never use this function. There is * almost never any valid case for using the length of a user space * string, since the string can be changed at any time by other * threads. Use "strncpy_from_user()" instead to get a stable copy * of the string. */ long strnlen_user(const char __user *str, long count) { unsigned long max_addr, src_addr; if (unlikely(count <= 0)) return 0; if (can_do_masked_user_access()) { long retval; str = masked_user_access_begin(str); retval = do_strnlen_user(str, count, count); user_read_access_end(); return retval; } max_addr = TASK_SIZE_MAX; src_addr = (unsigned long)untagged_addr(str); if (likely(src_addr < max_addr)) { unsigned long max = max_addr - src_addr; long retval; /* * Truncate 'max' to the user-specified limit, so that * we only have one limit we need to check in the loop */ if (max > count) max = count; if (user_read_access_begin(str, max)) { retval = do_strnlen_user(str, count, max); user_read_access_end(); return retval; } } return 0; } EXPORT_SYMBOL(strnlen_user); |
| 2 2 3 2 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Driver for the MasterKit MA901 USB FM radio. This device plugs * into the USB port and an analog audio input or headphones, so this thing * only deals with initialization, frequency setting, volume. * * Copyright (c) 2012 Alexey Klimov <klimov.linux@gmail.com> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/input.h> #include <linux/videodev2.h> #include <media/v4l2-device.h> #include <media/v4l2-ioctl.h> #include <media/v4l2-ctrls.h> #include <media/v4l2-event.h> #include <linux/usb.h> #include <linux/mutex.h> #define DRIVER_AUTHOR "Alexey Klimov <klimov.linux@gmail.com>" #define DRIVER_DESC "Masterkit MA901 USB FM radio driver" #define DRIVER_VERSION "0.0.1" MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); MODULE_VERSION(DRIVER_VERSION); #define USB_MA901_VENDOR 0x16c0 #define USB_MA901_PRODUCT 0x05df /* dev_warn macro with driver name */ #define MA901_DRIVER_NAME "radio-ma901" #define ma901radio_dev_warn(dev, fmt, arg...) \ dev_warn(dev, MA901_DRIVER_NAME " - " fmt, ##arg) #define ma901radio_dev_err(dev, fmt, arg...) \ dev_err(dev, MA901_DRIVER_NAME " - " fmt, ##arg) /* Probably USB_TIMEOUT should be modified in module parameter */ #define BUFFER_LENGTH 8 #define USB_TIMEOUT 500 #define FREQ_MIN 87.5 #define FREQ_MAX 108.0 #define FREQ_MUL 16000 #define MA901_VOLUME_MAX 16 #define MA901_VOLUME_MIN 0 /* Commands that device should understand * List isn't full and will be updated with implementation of new functions */ #define MA901_RADIO_SET_FREQ 0x03 #define MA901_RADIO_SET_VOLUME 0x04 #define MA901_RADIO_SET_MONO_STEREO 0x05 /* Comfortable defines for ma901radio_set_stereo */ #define MA901_WANT_STEREO 0x50 #define MA901_WANT_MONO 0xd0 /* module parameter */ static int radio_nr = -1; module_param(radio_nr, int, 0); MODULE_PARM_DESC(radio_nr, "Radio file number"); /* Data for one (physical) device */ struct ma901radio_device { /* reference to USB and video device */ struct usb_device *usbdev; struct usb_interface *intf; struct video_device vdev; struct v4l2_device v4l2_dev; struct v4l2_ctrl_handler hdl; u8 *buffer; struct mutex lock; /* buffer locking */ int curfreq; u16 volume; int stereo; bool muted; }; static inline struct ma901radio_device *to_ma901radio_dev(struct v4l2_device *v4l2_dev) { return container_of(v4l2_dev, struct ma901radio_device, v4l2_dev); } /* set a frequency, freq is defined by v4l's TUNER_LOW, i.e. 1/16th kHz */ static int ma901radio_set_freq(struct ma901radio_device *radio, int freq) { unsigned int freq_send = 0x300 + (freq >> 5) / 25; int retval; radio->buffer[0] = 0x0a; radio->buffer[1] = MA901_RADIO_SET_FREQ; radio->buffer[2] = ((freq_send >> 8) & 0xff) + 0x80; radio->buffer[3] = freq_send & 0xff; radio->buffer[4] = 0x00; radio->buffer[5] = 0x00; radio->buffer[6] = 0x00; radio->buffer[7] = 0x00; retval = usb_control_msg(radio->usbdev, usb_sndctrlpipe(radio->usbdev, 0), 9, 0x21, 0x0300, 0, radio->buffer, BUFFER_LENGTH, USB_TIMEOUT); if (retval < 0) return retval; radio->curfreq = freq; return 0; } static int ma901radio_set_volume(struct ma901radio_device *radio, u16 vol_to_set) { int retval; radio->buffer[0] = 0x0a; radio->buffer[1] = MA901_RADIO_SET_VOLUME; radio->buffer[2] = 0xc2; radio->buffer[3] = vol_to_set + 0x20; radio->buffer[4] = 0x00; radio->buffer[5] = 0x00; radio->buffer[6] = 0x00; radio->buffer[7] = 0x00; retval = usb_control_msg(radio->usbdev, usb_sndctrlpipe(radio->usbdev, 0), 9, 0x21, 0x0300, 0, radio->buffer, BUFFER_LENGTH, USB_TIMEOUT); if (retval < 0) return retval; radio->volume = vol_to_set; return retval; } static int ma901_set_stereo(struct ma901radio_device *radio, u8 stereo) { int retval; radio->buffer[0] = 0x0a; radio->buffer[1] = MA901_RADIO_SET_MONO_STEREO; radio->buffer[2] = stereo; radio->buffer[3] = 0x00; radio->buffer[4] = 0x00; radio->buffer[5] = 0x00; radio->buffer[6] = 0x00; radio->buffer[7] = 0x00; retval = usb_control_msg(radio->usbdev, usb_sndctrlpipe(radio->usbdev, 0), 9, 0x21, 0x0300, 0, radio->buffer, BUFFER_LENGTH, USB_TIMEOUT); if (retval < 0) return retval; if (stereo == MA901_WANT_STEREO) radio->stereo = V4L2_TUNER_MODE_STEREO; else radio->stereo = V4L2_TUNER_MODE_MONO; return retval; } /* Handle unplugging the device. * We call video_unregister_device in any case. * The last function called in this procedure is * usb_ma901radio_device_release. */ static void usb_ma901radio_disconnect(struct usb_interface *intf) { struct ma901radio_device *radio = to_ma901radio_dev(usb_get_intfdata(intf)); mutex_lock(&radio->lock); video_unregister_device(&radio->vdev); usb_set_intfdata(intf, NULL); v4l2_device_disconnect(&radio->v4l2_dev); mutex_unlock(&radio->lock); v4l2_device_put(&radio->v4l2_dev); } /* vidioc_querycap - query device capabilities */ static int vidioc_querycap(struct file *file, void *priv, struct v4l2_capability *v) { struct ma901radio_device *radio = video_drvdata(file); strscpy(v->driver, "radio-ma901", sizeof(v->driver)); strscpy(v->card, "Masterkit MA901 USB FM Radio", sizeof(v->card)); usb_make_path(radio->usbdev, v->bus_info, sizeof(v->bus_info)); return 0; } /* vidioc_g_tuner - get tuner attributes */ static int vidioc_g_tuner(struct file *file, void *priv, struct v4l2_tuner *v) { struct ma901radio_device *radio = video_drvdata(file); if (v->index > 0) return -EINVAL; v->signal = 0; /* TODO: the same words like in _probe() goes here. * When receiving of stats will be implemented then we can call * ma901radio_get_stat(). * retval = ma901radio_get_stat(radio, &is_stereo, &v->signal); */ strscpy(v->name, "FM", sizeof(v->name)); v->type = V4L2_TUNER_RADIO; v->rangelow = FREQ_MIN * FREQ_MUL; v->rangehigh = FREQ_MAX * FREQ_MUL; v->capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO; /* v->rxsubchans = is_stereo ? V4L2_TUNER_SUB_STEREO : V4L2_TUNER_SUB_MONO; */ v->audmode = radio->stereo ? V4L2_TUNER_MODE_STEREO : V4L2_TUNER_MODE_MONO; return 0; } /* vidioc_s_tuner - set tuner attributes */ static int vidioc_s_tuner(struct file *file, void *priv, const struct v4l2_tuner *v) { struct ma901radio_device *radio = video_drvdata(file); if (v->index > 0) return -EINVAL; /* mono/stereo selector */ switch (v->audmode) { case V4L2_TUNER_MODE_MONO: return ma901_set_stereo(radio, MA901_WANT_MONO); default: return ma901_set_stereo(radio, MA901_WANT_STEREO); } } /* vidioc_s_frequency - set tuner radio frequency */ static int vidioc_s_frequency(struct file *file, void *priv, const struct v4l2_frequency *f) { struct ma901radio_device *radio = video_drvdata(file); if (f->tuner != 0) return -EINVAL; return ma901radio_set_freq(radio, clamp_t(unsigned, f->frequency, FREQ_MIN * FREQ_MUL, FREQ_MAX * FREQ_MUL)); } /* vidioc_g_frequency - get tuner radio frequency */ static int vidioc_g_frequency(struct file *file, void *priv, struct v4l2_frequency *f) { struct ma901radio_device *radio = video_drvdata(file); if (f->tuner != 0) return -EINVAL; f->frequency = radio->curfreq; return 0; } static int usb_ma901radio_s_ctrl(struct v4l2_ctrl *ctrl) { struct ma901radio_device *radio = container_of(ctrl->handler, struct ma901radio_device, hdl); switch (ctrl->id) { case V4L2_CID_AUDIO_VOLUME: /* set volume */ return ma901radio_set_volume(radio, (u16)ctrl->val); } return -EINVAL; } /* TODO: Should we really need to implement suspend and resume functions? * Radio has it's own memory and will continue playing if power is present * on usb port and on resume it will start to play again based on freq, volume * values in device memory. */ static int usb_ma901radio_suspend(struct usb_interface *intf, pm_message_t message) { return 0; } static int usb_ma901radio_resume(struct usb_interface *intf) { return 0; } static const struct v4l2_ctrl_ops usb_ma901radio_ctrl_ops = { .s_ctrl = usb_ma901radio_s_ctrl, }; /* File system interface */ static const struct v4l2_file_operations usb_ma901radio_fops = { .owner = THIS_MODULE, .open = v4l2_fh_open, .release = v4l2_fh_release, .poll = v4l2_ctrl_poll, .unlocked_ioctl = video_ioctl2, }; static const struct v4l2_ioctl_ops usb_ma901radio_ioctl_ops = { .vidioc_querycap = vidioc_querycap, .vidioc_g_tuner = vidioc_g_tuner, .vidioc_s_tuner = vidioc_s_tuner, .vidioc_g_frequency = vidioc_g_frequency, .vidioc_s_frequency = vidioc_s_frequency, .vidioc_log_status = v4l2_ctrl_log_status, .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, .vidioc_unsubscribe_event = v4l2_event_unsubscribe, }; static void usb_ma901radio_release(struct v4l2_device *v4l2_dev) { struct ma901radio_device *radio = to_ma901radio_dev(v4l2_dev); v4l2_ctrl_handler_free(&radio->hdl); v4l2_device_unregister(&radio->v4l2_dev); kfree(radio->buffer); kfree(radio); } /* check if the device is present and register with v4l and usb if it is */ static int usb_ma901radio_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *dev = interface_to_usbdev(intf); struct ma901radio_device *radio; int retval = 0; /* Masterkit MA901 usb radio has the same USB ID as many others * Atmel V-USB devices. Let's make additional checks to be sure * that this is our device. */ if (dev->product && dev->manufacturer && (strncmp(dev->product, "MA901", 5) != 0 || strncmp(dev->manufacturer, "www.masterkit.ru", 16) != 0)) return -ENODEV; radio = kzalloc(sizeof(struct ma901radio_device), GFP_KERNEL); if (!radio) { dev_err(&intf->dev, "kzalloc for ma901radio_device failed\n"); retval = -ENOMEM; goto err; } radio->buffer = kmalloc(BUFFER_LENGTH, GFP_KERNEL); if (!radio->buffer) { dev_err(&intf->dev, "kmalloc for radio->buffer failed\n"); retval = -ENOMEM; goto err_nobuf; } retval = v4l2_device_register(&intf->dev, &radio->v4l2_dev); if (retval < 0) { dev_err(&intf->dev, "couldn't register v4l2_device\n"); goto err_v4l2; } v4l2_ctrl_handler_init(&radio->hdl, 1); /* TODO:It looks like this radio doesn't have mute/unmute control * and windows program just emulate it using volume control. * Let's plan to do the same in this driver. * * v4l2_ctrl_new_std(&radio->hdl, &usb_ma901radio_ctrl_ops, * V4L2_CID_AUDIO_MUTE, 0, 1, 1, 1); */ v4l2_ctrl_new_std(&radio->hdl, &usb_ma901radio_ctrl_ops, V4L2_CID_AUDIO_VOLUME, MA901_VOLUME_MIN, MA901_VOLUME_MAX, 1, MA901_VOLUME_MAX); if (radio->hdl.error) { retval = radio->hdl.error; dev_err(&intf->dev, "couldn't register control\n"); goto err_ctrl; } mutex_init(&radio->lock); radio->v4l2_dev.ctrl_handler = &radio->hdl; radio->v4l2_dev.release = usb_ma901radio_release; strscpy(radio->vdev.name, radio->v4l2_dev.name, sizeof(radio->vdev.name)); radio->vdev.v4l2_dev = &radio->v4l2_dev; radio->vdev.fops = &usb_ma901radio_fops; radio->vdev.ioctl_ops = &usb_ma901radio_ioctl_ops; radio->vdev.release = video_device_release_empty; radio->vdev.lock = &radio->lock; radio->vdev.device_caps = V4L2_CAP_RADIO | V4L2_CAP_TUNER; radio->usbdev = interface_to_usbdev(intf); radio->intf = intf; usb_set_intfdata(intf, &radio->v4l2_dev); radio->curfreq = 95.21 * FREQ_MUL; video_set_drvdata(&radio->vdev, radio); /* TODO: we can get some statistics (freq, volume) from device * but it's not implemented yet. After insertion in usb-port radio * setups frequency and starts playing without any initialization. * So we don't call usb_ma901radio_init/get_stat() here. * retval = usb_ma901radio_init(radio); */ retval = video_register_device(&radio->vdev, VFL_TYPE_RADIO, radio_nr); if (retval < 0) { dev_err(&intf->dev, "could not register video device\n"); goto err_vdev; } return 0; err_vdev: v4l2_ctrl_handler_free(&radio->hdl); err_ctrl: v4l2_device_unregister(&radio->v4l2_dev); err_v4l2: kfree(radio->buffer); err_nobuf: kfree(radio); err: return retval; } /* USB Device ID List */ static const struct usb_device_id usb_ma901radio_device_table[] = { { USB_DEVICE_AND_INTERFACE_INFO(USB_MA901_VENDOR, USB_MA901_PRODUCT, USB_CLASS_HID, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, usb_ma901radio_device_table); /* USB subsystem interface */ static struct usb_driver usb_ma901radio_driver = { .name = MA901_DRIVER_NAME, .probe = usb_ma901radio_probe, .disconnect = usb_ma901radio_disconnect, .suspend = usb_ma901radio_suspend, .resume = usb_ma901radio_resume, .reset_resume = usb_ma901radio_resume, .id_table = usb_ma901radio_device_table, }; module_usb_driver(usb_ma901radio_driver); |
| 31 68 89 124 43 9 2 95 19 124 7 208 4 1 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM tcp #if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_TCP_H #include <linux/ipv6.h> #include <linux/tcp.h> #include <linux/tracepoint.h> #include <net/ipv6.h> #include <net/tcp.h> #include <linux/sock_diag.h> #include <net/rstreason.h> /* * tcp event with arguments sk and skb * * Note: this class requires a valid sk pointer; while skb pointer could * be NULL. */ DECLARE_EVENT_CLASS(tcp_event_sk_skb, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) ), TP_fast_assign( const struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skbaddr = skb; __entry->skaddr = sk; __entry->state = sk->sk_state; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", __entry->skbaddr, __entry->skaddr, show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, show_tcp_state_name(__entry->state)) ); DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); #undef FN #define FN(reason) TRACE_DEFINE_ENUM(SK_RST_REASON_##reason); DEFINE_RST_REASON(FN, FN) #undef FN #undef FNe #define FN(reason) { SK_RST_REASON_##reason, #reason }, #define FNe(reason) { SK_RST_REASON_##reason, #reason } /* * skb of trace_tcp_send_reset is the skb that caused RST. In case of * active reset, skb should be NULL */ TRACE_EVENT(tcp_send_reset, TP_PROTO(const struct sock *sk, const struct sk_buff *skb__nullable, const enum sk_rst_reason reason), TP_ARGS(sk, skb__nullable, reason), TP_STRUCT__entry( __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) __field(enum sk_rst_reason, reason) __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( __entry->skbaddr = skb__nullable; __entry->skaddr = sk; /* Zero means unknown state. */ __entry->state = sk ? sk->sk_state : 0; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); if (sk && sk_fullsock(sk)) { const struct inet_sock *inet = inet_sk(sk); TP_STORE_ADDR_PORTS(__entry, inet, sk); } else if (skb__nullable) { const struct tcphdr *th = (const struct tcphdr *)skb__nullable->data; /* * We should reverse the 4-tuple of skb, so later * it can print the right flow direction of rst. */ TP_STORE_ADDR_PORTS_SKB(skb__nullable, th, entry->daddr, entry->saddr); } __entry->reason = reason; ), TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s reason=%s", __entry->skbaddr, __entry->skaddr, __entry->saddr, __entry->daddr, __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN", __print_symbolic(__entry->reason, DEFINE_RST_REASON(FN, FNe))) ); #undef FN #undef FNe /* * tcp event with arguments sk * * Note: this class requires a valid sk pointer. */ DECLARE_EVENT_CLASS(tcp_event_sk, TP_PROTO(struct sock *sk), TP_ARGS(sk), TP_STRUCT__entry( __field(const void *, skaddr) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) __field(__u64, sock_cookie) ), TP_fast_assign( struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skaddr = sk; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); __entry->sock_cookie = sock_gen_cookie(sk); ), TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llx", show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, __entry->sock_cookie) ); DEFINE_EVENT(tcp_event_sk, tcp_receive_reset, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust, TP_PROTO(struct sock *sk), TP_ARGS(sk) ); TRACE_EVENT(tcp_rcvbuf_grow, TP_PROTO(struct sock *sk, int time), TP_ARGS(sk, time), TP_STRUCT__entry( __field(int, time) __field(__u32, rtt_us) __field(__u32, copied) __field(__u32, inq) __field(__u32, space) __field(__u32, ooo_space) __field(__u32, rcvbuf) __field(__u8, scaling_ratio) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) __field(const void *, skaddr) __field(__u64, sock_cookie) ), TP_fast_assign( struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); __be32 *p32; __entry->time = time; __entry->rtt_us = tp->rcv_rtt_est.rtt_us >> 3; __entry->copied = tp->copied_seq - tp->rcvq_space.seq; __entry->inq = tp->rcv_nxt - tp->copied_seq; __entry->space = tp->rcvq_space.space; __entry->ooo_space = RB_EMPTY_ROOT(&tp->out_of_order_queue) ? 0 : TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt; __entry->rcvbuf = sk->sk_rcvbuf; __entry->scaling_ratio = tp->scaling_ratio; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); __entry->skaddr = sk; __entry->sock_cookie = sock_gen_cookie(sk); ), TP_printk("time=%u rtt_us=%u copied=%u inq=%u space=%u ooo=%u scaling_ratio=%u rcvbuf=%u " "family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 " "saddrv6=%pI6c daddrv6=%pI6c skaddr=%p sock_cookie=%llx", __entry->time, __entry->rtt_us, __entry->copied, __entry->inq, __entry->space, __entry->ooo_space, __entry->scaling_ratio, __entry->rcvbuf, show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, __entry->skaddr, __entry->sock_cookie) ); TRACE_EVENT(tcp_retransmit_synack, TP_PROTO(const struct sock *sk, const struct request_sock *req), TP_ARGS(sk, req), TP_STRUCT__entry( __field(const void *, skaddr) __field(const void *, req) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) ), TP_fast_assign( struct inet_request_sock *ireq = inet_rsk(req); __be32 *p32; __entry->skaddr = sk; __entry->req = req; __entry->sport = ireq->ir_num; __entry->dport = ntohs(ireq->ir_rmt_port); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = ireq->ir_loc_addr; p32 = (__be32 *) __entry->daddr; *p32 = ireq->ir_rmt_addr; TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr, ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr); ), TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6) ); TRACE_EVENT(tcp_sendmsg_locked, TP_PROTO(const struct sock *sk, const struct msghdr *msg, const struct sk_buff *skb, int size_goal), TP_ARGS(sk, msg, skb, size_goal), TP_STRUCT__entry( __field(const void *, skb_addr) __field(int, skb_len) __field(int, msg_left) __field(int, size_goal) ), TP_fast_assign( __entry->skb_addr = skb; __entry->skb_len = skb ? skb->len : 0; __entry->msg_left = msg_data_left(msg); __entry->size_goal = size_goal; ), TP_printk("skb_addr %p skb_len %d msg_left %d size_goal %d", __entry->skb_addr, __entry->skb_len, __entry->msg_left, __entry->size_goal)); DECLARE_TRACE(tcp_cwnd_reduction, TP_PROTO(const struct sock *sk, int newly_acked_sacked, int newly_lost, int flag), TP_ARGS(sk, newly_acked_sacked, newly_lost, flag) ); #include <trace/events/net_probe_common.h> TRACE_EVENT(tcp_probe, TP_PROTO(struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(__u32, mark) __field(__u16, data_len) __field(__u32, snd_nxt) __field(__u32, snd_una) __field(__u32, snd_cwnd) __field(__u32, ssthresh) __field(__u32, snd_wnd) __field(__u32, srtt) __field(__u32, rcv_wnd) __field(__u64, sock_cookie) __field(const void *, skbaddr) __field(const void *, skaddr) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; const struct inet_sock *inet = inet_sk(sk); const struct tcp_sock *tp = tcp_sk(sk); memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS(__entry, inet, sk); /* For filtering use */ __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->mark = skb->mark; __entry->family = sk->sk_family; __entry->data_len = skb->len - __tcp_hdrlen(th); __entry->snd_nxt = tp->snd_nxt; __entry->snd_una = tp->snd_una; __entry->snd_cwnd = tcp_snd_cwnd(tp); __entry->snd_wnd = tp->snd_wnd; __entry->rcv_wnd = tp->rcv_wnd; __entry->ssthresh = tcp_current_ssthresh(sk); __entry->srtt = tp->srtt_us >> 3; __entry->sock_cookie = sock_gen_cookie(sk); __entry->skbaddr = skb; __entry->skaddr = sk; ), TP_printk("family=%s src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx skbaddr=%p skaddr=%p", show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->mark, __entry->data_len, __entry->snd_nxt, __entry->snd_una, __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd, __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie, __entry->skbaddr, __entry->skaddr) ); /* * tcp event with only skb */ DECLARE_EVENT_CLASS(tcp_event_skb, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb), TP_STRUCT__entry( __field(const void *, skbaddr) __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; __entry->skbaddr = skb; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); ), TP_printk("skbaddr=%p src=%pISpc dest=%pISpc", __entry->skbaddr, __entry->saddr, __entry->daddr) ); DEFINE_EVENT(tcp_event_skb, tcp_bad_csum, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); TRACE_EVENT(tcp_cong_state_set, TP_PROTO(struct sock *sk, const u8 ca_state), TP_ARGS(sk, ca_state), TP_STRUCT__entry( __field(const void *, skaddr) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) __field(__u8, cong_state) ), TP_fast_assign( struct inet_sock *inet = inet_sk(sk); __be32 *p32; __entry->skaddr = sk; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); __entry->cong_state = ca_state; ), TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u", show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, __entry->cong_state) ); DECLARE_EVENT_CLASS(tcp_hash_event, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(int, l3index) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(bool, fin) __field(bool, syn) __field(bool, rst) __field(bool, psh) __field(bool, ack) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skbaddr = skb; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; /* For filtering use */ __entry->sport = ntohs(th->source); __entry->dport = ntohs(th->dest); __entry->family = sk->sk_family; __entry->fin = th->fin; __entry->syn = th->syn; __entry->rst = th->rst; __entry->psh = th->psh; __entry->ack = th->ack; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c]", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->l3index, __entry->fin ? 'F' : ' ', __entry->syn ? 'S' : ' ', __entry->rst ? 'R' : ' ', __entry->psh ? 'P' : ' ', __entry->ack ? '.' : ' ') ); DEFINE_EVENT(tcp_hash_event, tcp_hash_bad_header, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_required, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_unexpected, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_mismatch, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DEFINE_EVENT(tcp_hash_event, tcp_hash_ao_required, TP_PROTO(const struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb) ); DECLARE_EVENT_CLASS(tcp_ao_event, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skbaddr) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(int, l3index) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(bool, fin) __field(bool, syn) __field(bool, rst) __field(bool, psh) __field(bool, ack) __field(__u8, keyid) __field(__u8, rnext) __field(__u8, maclen) ), TP_fast_assign( const struct tcphdr *th = (const struct tcphdr *)skb->data; __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skbaddr = skb; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; /* For filtering use */ __entry->sport = ntohs(th->source); __entry->dport = ntohs(th->dest); __entry->family = sk->sk_family; __entry->fin = th->fin; __entry->syn = th->syn; __entry->rst = th->rst; __entry->psh = th->psh; __entry->ack = th->ack; __entry->keyid = keyid; __entry->rnext = rnext; __entry->maclen = maclen; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->l3index, __entry->fin ? 'F' : ' ', __entry->syn ? 'S' : ' ', __entry->rst ? 'R' : ' ', __entry->psh ? 'P' : ' ', __entry->ack ? '.' : ' ', __entry->keyid, __entry->rnext, __entry->maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_handshake_failure, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_wrong_maclen, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_mismatch, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_key_not_found, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DEFINE_EVENT(tcp_ao_event, tcp_ao_rnext_request, TP_PROTO(const struct sock *sk, const struct sk_buff *skb, const __u8 keyid, const __u8 rnext, const __u8 maclen), TP_ARGS(sk, skb, keyid, rnext, maclen) ); DECLARE_EVENT_CLASS(tcp_ao_event_sk, TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), TP_ARGS(sk, keyid, rnext), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(__u8, keyid) __field(__u8, rnext) ), TP_fast_assign( const struct inet_sock *inet = inet_sk(sk); __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS(__entry, inet, sk); /* For filtering use */ __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; __entry->keyid = keyid; __entry->rnext = rnext; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc keyid=%u rnext=%u", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->keyid, __entry->rnext) ); DEFINE_EVENT(tcp_ao_event_sk, tcp_ao_synack_no_key, TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), TP_ARGS(sk, keyid, rnext) ); DECLARE_EVENT_CLASS(tcp_ao_event_sne, TP_PROTO(const struct sock *sk, __u32 new_sne), TP_ARGS(sk, new_sne), TP_STRUCT__entry( __field(__u64, net_cookie) __field(const void *, skaddr) __field(int, state) /* sockaddr_in6 is always bigger than sockaddr_in */ __array(__u8, saddr, sizeof(struct sockaddr_in6)) __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) __field(__u16, family) __field(__u32, new_sne) ), TP_fast_assign( const struct inet_sock *inet = inet_sk(sk); __entry->net_cookie = sock_net(sk)->net_cookie; __entry->skaddr = sk; __entry->state = sk->sk_state; memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); TP_STORE_ADDR_PORTS(__entry, inet, sk); /* For filtering use */ __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->family = sk->sk_family; __entry->new_sne = new_sne; ), TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc sne=%u", __entry->net_cookie, show_tcp_state_name(__entry->state), show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->new_sne) ); DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_snd_sne_update, TP_PROTO(const struct sock *sk, __u32 new_sne), TP_ARGS(sk, new_sne) ); DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_rcv_sne_update, TP_PROTO(const struct sock *sk, __u32 new_sne), TP_ARGS(sk, new_sne) ); #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
| 31 26 2 5 4 1 25 36 38 36 5 39 10 30 3 2 2 2 2 2 3 5 3 51 48 2 17 2 17 23 16 3 4 5 4 3 3 4 3 5 4 4 2 1 1 1 3 3 3 2 1 11 1 3 7 10 10 4 1 1 17 1 3 3 1 3 6 12 8 2 1 1 4 2 122 110 4 1 4 1 3 2 71 9 7 17 17 1 17 1 8 1 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992 Linus Torvalds * * Added support for a Unix98-style ptmx device. * -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998 * */ #include <linux/module.h> #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/tty.h> #include <linux/tty_flip.h> #include <linux/fcntl.h> #include <linux/sched/signal.h> #include <linux/string.h> #include <linux/major.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/device.h> #include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/devpts_fs.h> #include <linux/slab.h> #include <linux/mutex.h> #include <linux/poll.h> #include <linux/mount.h> #include <linux/file.h> #include <linux/ioctl.h> #include <linux/compat.h> #include "tty.h" #undef TTY_DEBUG_HANGUP #ifdef TTY_DEBUG_HANGUP # define tty_debug_hangup(tty, f, args...) tty_debug(tty, f, ##args) #else # define tty_debug_hangup(tty, f, args...) do {} while (0) #endif #ifdef CONFIG_UNIX98_PTYS static struct tty_driver *ptm_driver; static struct tty_driver *pts_driver; static DEFINE_MUTEX(devpts_mutex); #endif static void pty_close(struct tty_struct *tty, struct file *filp) { if (tty->driver->subtype == PTY_TYPE_MASTER) WARN_ON(tty->count > 1); else { if (tty_io_error(tty)) return; if (tty->count > 2) return; } set_bit(TTY_IO_ERROR, &tty->flags); wake_up_interruptible(&tty->read_wait); wake_up_interruptible(&tty->write_wait); spin_lock_irq(&tty->ctrl.lock); tty->ctrl.packet = false; spin_unlock_irq(&tty->ctrl.lock); /* Review - krefs on tty_link ?? */ if (!tty->link) return; set_bit(TTY_OTHER_CLOSED, &tty->link->flags); wake_up_interruptible(&tty->link->read_wait); wake_up_interruptible(&tty->link->write_wait); if (tty->driver->subtype == PTY_TYPE_MASTER) { set_bit(TTY_OTHER_CLOSED, &tty->flags); #ifdef CONFIG_UNIX98_PTYS if (tty->driver == ptm_driver) { mutex_lock(&devpts_mutex); if (tty->link->driver_data) devpts_pty_kill(tty->link->driver_data); mutex_unlock(&devpts_mutex); } #endif tty_vhangup(tty->link); } } /* * The unthrottle routine is called by the line discipline to signal * that it can receive more characters. For PTY's, the TTY_THROTTLED * flag is always set, to force the line discipline to always call the * unthrottle routine when there are fewer than TTY_THRESHOLD_UNTHROTTLE * characters in the queue. This is necessary since each time this * happens, we need to wake up any sleeping processes that could be * (1) trying to send data to the pty, or (2) waiting in wait_until_sent() * for the pty buffer to be drained. */ static void pty_unthrottle(struct tty_struct *tty) { tty_wakeup(tty->link); set_bit(TTY_THROTTLED, &tty->flags); } /** * pty_write - write to a pty * @tty: the tty we write from * @buf: kernel buffer of data * @c: bytes to write * * Our "hardware" write method. Data is coming from the ldisc which * may be in a non sleeping state. We simply throw this at the other * end of the link as if we were an IRQ handler receiving stuff for * the other side of the pty/tty pair. */ static ssize_t pty_write(struct tty_struct *tty, const u8 *buf, size_t c) { struct tty_struct *to = tty->link; if (tty->flow.stopped || !c) return 0; return tty_insert_flip_string_and_push_buffer(to->port, buf, c); } /** * pty_write_room - write space * @tty: tty we are writing from * * Report how many bytes the ldisc can send into the queue for * the other device. */ static unsigned int pty_write_room(struct tty_struct *tty) { if (tty->flow.stopped) return 0; return tty_buffer_space_avail(tty->link->port); } /* Set the lock flag on a pty */ static int pty_set_lock(struct tty_struct *tty, int __user *arg) { int val; if (get_user(val, arg)) return -EFAULT; if (val) set_bit(TTY_PTY_LOCK, &tty->flags); else clear_bit(TTY_PTY_LOCK, &tty->flags); return 0; } static int pty_get_lock(struct tty_struct *tty, int __user *arg) { int locked = test_bit(TTY_PTY_LOCK, &tty->flags); return put_user(locked, arg); } /* Set the packet mode on a pty */ static int pty_set_pktmode(struct tty_struct *tty, int __user *arg) { int pktmode; if (get_user(pktmode, arg)) return -EFAULT; spin_lock_irq(&tty->ctrl.lock); if (pktmode) { if (!tty->ctrl.packet) { tty->link->ctrl.pktstatus = 0; smp_mb(); tty->ctrl.packet = true; } } else tty->ctrl.packet = false; spin_unlock_irq(&tty->ctrl.lock); return 0; } /* Get the packet mode of a pty */ static int pty_get_pktmode(struct tty_struct *tty, int __user *arg) { int pktmode = tty->ctrl.packet; return put_user(pktmode, arg); } /* Send a signal to the slave */ static int pty_signal(struct tty_struct *tty, int sig) { struct pid *pgrp; if (sig != SIGINT && sig != SIGQUIT && sig != SIGTSTP) return -EINVAL; if (tty->link) { pgrp = tty_get_pgrp(tty->link); if (pgrp) kill_pgrp(pgrp, sig, 1); put_pid(pgrp); } return 0; } static void pty_flush_buffer(struct tty_struct *tty) { struct tty_struct *to = tty->link; if (!to) return; tty_buffer_flush(to, NULL); if (to->ctrl.packet) { spin_lock_irq(&tty->ctrl.lock); tty->ctrl.pktstatus |= TIOCPKT_FLUSHWRITE; wake_up_interruptible(&to->read_wait); spin_unlock_irq(&tty->ctrl.lock); } } static int pty_open(struct tty_struct *tty, struct file *filp) { if (!tty || !tty->link) return -ENODEV; if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) goto out; if (test_bit(TTY_PTY_LOCK, &tty->link->flags)) goto out; if (tty->driver->subtype == PTY_TYPE_SLAVE && tty->link->count != 1) goto out; clear_bit(TTY_IO_ERROR, &tty->flags); clear_bit(TTY_OTHER_CLOSED, &tty->link->flags); set_bit(TTY_THROTTLED, &tty->flags); return 0; out: set_bit(TTY_IO_ERROR, &tty->flags); return -EIO; } static void pty_set_termios(struct tty_struct *tty, const struct ktermios *old_termios) { /* See if packet mode change of state. */ if (tty->link && tty->link->ctrl.packet) { int extproc = (old_termios->c_lflag & EXTPROC) | L_EXTPROC(tty); int old_flow = ((old_termios->c_iflag & IXON) && (old_termios->c_cc[VSTOP] == '\023') && (old_termios->c_cc[VSTART] == '\021')); int new_flow = (I_IXON(tty) && STOP_CHAR(tty) == '\023' && START_CHAR(tty) == '\021'); if ((old_flow != new_flow) || extproc) { spin_lock_irq(&tty->ctrl.lock); if (old_flow != new_flow) { tty->ctrl.pktstatus &= ~(TIOCPKT_DOSTOP | TIOCPKT_NOSTOP); if (new_flow) tty->ctrl.pktstatus |= TIOCPKT_DOSTOP; else tty->ctrl.pktstatus |= TIOCPKT_NOSTOP; } if (extproc) tty->ctrl.pktstatus |= TIOCPKT_IOCTL; spin_unlock_irq(&tty->ctrl.lock); wake_up_interruptible(&tty->link->read_wait); } } tty->termios.c_cflag &= ~(CSIZE | PARENB); tty->termios.c_cflag |= (CS8 | CREAD); } /** * pty_resize - resize event * @tty: tty being resized * @ws: window size being set. * * Update the termios variables and send the necessary signals to * peform a terminal resize correctly */ static int pty_resize(struct tty_struct *tty, struct winsize *ws) { struct pid *pgrp, *rpgrp; struct tty_struct *pty = tty->link; /* For a PTY we need to lock the tty side */ mutex_lock(&tty->winsize_mutex); if (!memcmp(ws, &tty->winsize, sizeof(*ws))) goto done; /* Signal the foreground process group of both ptys */ pgrp = tty_get_pgrp(tty); rpgrp = tty_get_pgrp(pty); if (pgrp) kill_pgrp(pgrp, SIGWINCH, 1); if (rpgrp != pgrp && rpgrp) kill_pgrp(rpgrp, SIGWINCH, 1); put_pid(pgrp); put_pid(rpgrp); tty->winsize = *ws; pty->winsize = *ws; /* Never used so will go away soon */ done: mutex_unlock(&tty->winsize_mutex); return 0; } /** * pty_start - start() handler * pty_stop - stop() handler * @tty: tty being flow-controlled * * Propagates the TIOCPKT status to the master pty. * * NB: only the master pty can be in packet mode so only the slave * needs start()/stop() handlers */ static void pty_start(struct tty_struct *tty) { unsigned long flags; if (tty->link && tty->link->ctrl.packet) { spin_lock_irqsave(&tty->ctrl.lock, flags); tty->ctrl.pktstatus &= ~TIOCPKT_STOP; tty->ctrl.pktstatus |= TIOCPKT_START; spin_unlock_irqrestore(&tty->ctrl.lock, flags); wake_up_interruptible_poll(&tty->link->read_wait, EPOLLIN); } } static void pty_stop(struct tty_struct *tty) { unsigned long flags; if (tty->link && tty->link->ctrl.packet) { spin_lock_irqsave(&tty->ctrl.lock, flags); tty->ctrl.pktstatus &= ~TIOCPKT_START; tty->ctrl.pktstatus |= TIOCPKT_STOP; spin_unlock_irqrestore(&tty->ctrl.lock, flags); wake_up_interruptible_poll(&tty->link->read_wait, EPOLLIN); } } /** * pty_common_install - set up the pty pair * @driver: the pty driver * @tty: the tty being instantiated * @legacy: true if this is BSD style * * Perform the initial set up for the tty/pty pair. Called from the * tty layer when the port is first opened. * * Locking: the caller must hold the tty_mutex */ static int pty_common_install(struct tty_driver *driver, struct tty_struct *tty, bool legacy) { struct tty_struct *o_tty; struct tty_port *ports[2]; int idx = tty->index; int retval = -ENOMEM; /* Opening the slave first has always returned -EIO */ if (driver->subtype != PTY_TYPE_MASTER) return -EIO; ports[0] = kmalloc(sizeof **ports, GFP_KERNEL); ports[1] = kmalloc(sizeof **ports, GFP_KERNEL); if (!ports[0] || !ports[1]) goto err; if (!try_module_get(driver->other->owner)) { /* This cannot in fact currently happen */ goto err; } o_tty = alloc_tty_struct(driver->other, idx); if (!o_tty) goto err_put_module; tty_set_lock_subclass(o_tty); lockdep_set_subclass(&o_tty->termios_rwsem, TTY_LOCK_SLAVE); if (legacy) { /* We always use new tty termios data so we can do this the easy way .. */ tty_init_termios(tty); tty_init_termios(o_tty); driver->other->ttys[idx] = o_tty; driver->ttys[idx] = tty; } else { memset(&tty->termios_locked, 0, sizeof(tty->termios_locked)); tty->termios = driver->init_termios; memset(&o_tty->termios_locked, 0, sizeof(tty->termios_locked)); o_tty->termios = driver->other->init_termios; } /* * Everything allocated ... set up the o_tty structure. */ tty_driver_kref_get(driver->other); /* Establish the links in both directions */ tty->link = o_tty; o_tty->link = tty; tty_port_init(ports[0]); tty_port_init(ports[1]); tty_buffer_set_limit(ports[0], 8192); tty_buffer_set_limit(ports[1], 8192); o_tty->port = ports[0]; tty->port = ports[1]; o_tty->port->itty = o_tty; tty_buffer_set_lock_subclass(o_tty->port); tty_driver_kref_get(driver); tty->count++; o_tty->count++; return 0; err_put_module: module_put(driver->other->owner); err: kfree(ports[0]); kfree(ports[1]); return retval; } static void pty_cleanup(struct tty_struct *tty) { tty_port_put(tty->port); } /* Traditional BSD devices */ #ifdef CONFIG_LEGACY_PTYS static int pty_install(struct tty_driver *driver, struct tty_struct *tty) { return pty_common_install(driver, tty, true); } static void pty_remove(struct tty_driver *driver, struct tty_struct *tty) { struct tty_struct *pair = tty->link; driver->ttys[tty->index] = NULL; if (pair) pair->driver->ttys[pair->index] = NULL; } static int pty_bsd_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { switch (cmd) { case TIOCSPTLCK: /* Set PT Lock (disallow slave open) */ return pty_set_lock(tty, (int __user *) arg); case TIOCGPTLCK: /* Get PT Lock status */ return pty_get_lock(tty, (int __user *)arg); case TIOCPKT: /* Set PT packet mode */ return pty_set_pktmode(tty, (int __user *)arg); case TIOCGPKT: /* Get PT packet mode */ return pty_get_pktmode(tty, (int __user *)arg); case TIOCSIG: /* Send signal to other side of pty */ return pty_signal(tty, (int) arg); case TIOCGPTN: /* TTY returns ENOTTY, but glibc expects EINVAL here */ return -EINVAL; } return -ENOIOCTLCMD; } #ifdef CONFIG_COMPAT static long pty_bsd_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { /* * PTY ioctls don't require any special translation between 32-bit and * 64-bit userspace, they are already compatible. */ return pty_bsd_ioctl(tty, cmd, (unsigned long)compat_ptr(arg)); } #else #define pty_bsd_compat_ioctl NULL #endif static int legacy_count = CONFIG_LEGACY_PTY_COUNT; /* * not really modular, but the easiest way to keep compat with existing * bootargs behaviour is to continue using module_param here. */ module_param(legacy_count, int, 0); /* * The master side of a pty can do TIOCSPTLCK and thus * has pty_bsd_ioctl. */ static const struct tty_operations master_pty_ops_bsd = { .install = pty_install, .open = pty_open, .close = pty_close, .write = pty_write, .write_room = pty_write_room, .flush_buffer = pty_flush_buffer, .unthrottle = pty_unthrottle, .ioctl = pty_bsd_ioctl, .compat_ioctl = pty_bsd_compat_ioctl, .cleanup = pty_cleanup, .resize = pty_resize, .remove = pty_remove }; static const struct tty_operations slave_pty_ops_bsd = { .install = pty_install, .open = pty_open, .close = pty_close, .write = pty_write, .write_room = pty_write_room, .flush_buffer = pty_flush_buffer, .unthrottle = pty_unthrottle, .set_termios = pty_set_termios, .cleanup = pty_cleanup, .resize = pty_resize, .start = pty_start, .stop = pty_stop, .remove = pty_remove }; static void __init legacy_pty_init(void) { struct tty_driver *pty_driver, *pty_slave_driver; if (legacy_count <= 0) return; pty_driver = tty_alloc_driver(legacy_count, TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_ALLOC); if (IS_ERR(pty_driver)) panic("Couldn't allocate pty driver"); pty_slave_driver = tty_alloc_driver(legacy_count, TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_ALLOC); if (IS_ERR(pty_slave_driver)) panic("Couldn't allocate pty slave driver"); pty_driver->driver_name = "pty_master"; pty_driver->name = "pty"; pty_driver->major = PTY_MASTER_MAJOR; pty_driver->minor_start = 0; pty_driver->type = TTY_DRIVER_TYPE_PTY; pty_driver->subtype = PTY_TYPE_MASTER; pty_driver->init_termios = tty_std_termios; pty_driver->init_termios.c_iflag = 0; pty_driver->init_termios.c_oflag = 0; pty_driver->init_termios.c_cflag = B38400 | CS8 | CREAD; pty_driver->init_termios.c_lflag = 0; pty_driver->init_termios.c_ispeed = 38400; pty_driver->init_termios.c_ospeed = 38400; pty_driver->other = pty_slave_driver; tty_set_operations(pty_driver, &master_pty_ops_bsd); pty_slave_driver->driver_name = "pty_slave"; pty_slave_driver->name = "ttyp"; pty_slave_driver->major = PTY_SLAVE_MAJOR; pty_slave_driver->minor_start = 0; pty_slave_driver->type = TTY_DRIVER_TYPE_PTY; pty_slave_driver->subtype = PTY_TYPE_SLAVE; pty_slave_driver->init_termios = tty_std_termios; pty_slave_driver->init_termios.c_cflag = B38400 | CS8 | CREAD; pty_slave_driver->init_termios.c_ispeed = 38400; pty_slave_driver->init_termios.c_ospeed = 38400; pty_slave_driver->other = pty_driver; tty_set_operations(pty_slave_driver, &slave_pty_ops_bsd); if (tty_register_driver(pty_driver)) panic("Couldn't register pty driver"); if (tty_register_driver(pty_slave_driver)) panic("Couldn't register pty slave driver"); } #else static inline void legacy_pty_init(void) { } #endif /* Unix98 devices */ #ifdef CONFIG_UNIX98_PTYS static struct cdev ptmx_cdev; /** * ptm_open_peer - open the peer of a pty * @master: the open struct file of the ptmx device node * @tty: the master of the pty being opened * @flags: the flags for open * * Provide a race free way for userspace to open the slave end of a pty * (where they have the master fd and cannot access or trust the mount * namespace /dev/pts was mounted inside). */ int ptm_open_peer(struct file *master, struct tty_struct *tty, int flags) { int fd; struct file *filp; int retval = -EINVAL; struct path path; if (tty->driver != ptm_driver) return -EIO; fd = get_unused_fd_flags(flags); if (fd < 0) { retval = fd; goto err; } /* Compute the slave's path */ path.mnt = devpts_mntget(master, tty->driver_data); if (IS_ERR(path.mnt)) { retval = PTR_ERR(path.mnt); goto err_put; } path.dentry = tty->link->driver_data; filp = dentry_open(&path, flags, current_cred()); mntput(path.mnt); if (IS_ERR(filp)) { retval = PTR_ERR(filp); goto err_put; } fd_install(fd, filp); return fd; err_put: put_unused_fd(fd); err: return retval; } static int pty_unix98_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { switch (cmd) { case TIOCSPTLCK: /* Set PT Lock (disallow slave open) */ return pty_set_lock(tty, (int __user *)arg); case TIOCGPTLCK: /* Get PT Lock status */ return pty_get_lock(tty, (int __user *)arg); case TIOCPKT: /* Set PT packet mode */ return pty_set_pktmode(tty, (int __user *)arg); case TIOCGPKT: /* Get PT packet mode */ return pty_get_pktmode(tty, (int __user *)arg); case TIOCGPTN: /* Get PT Number */ return put_user(tty->index, (unsigned int __user *)arg); case TIOCSIG: /* Send signal to other side of pty */ return pty_signal(tty, (int) arg); } return -ENOIOCTLCMD; } #ifdef CONFIG_COMPAT static long pty_unix98_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { /* * PTY ioctls don't require any special translation between 32-bit and * 64-bit userspace, they are already compatible. */ return pty_unix98_ioctl(tty, cmd, cmd == TIOCSIG ? arg : (unsigned long)compat_ptr(arg)); } #else #define pty_unix98_compat_ioctl NULL #endif /** * ptm_unix98_lookup - find a pty master * @driver: ptm driver * @file: unused * @idx: tty index * * Look up a pty master device. Called under the tty_mutex for now. * This provides our locking. */ static struct tty_struct *ptm_unix98_lookup(struct tty_driver *driver, struct file *file, int idx) { /* Master must be open via /dev/ptmx */ return ERR_PTR(-EIO); } /** * pts_unix98_lookup - find a pty slave * @driver: pts driver * @file: file pointer to tty * @idx: tty index * * Look up a pty master device. Called under the tty_mutex for now. * This provides our locking for the tty pointer. */ static struct tty_struct *pts_unix98_lookup(struct tty_driver *driver, struct file *file, int idx) { struct tty_struct *tty; mutex_lock(&devpts_mutex); tty = devpts_get_priv(file->f_path.dentry); mutex_unlock(&devpts_mutex); /* Master must be open before slave */ if (!tty) return ERR_PTR(-EIO); return tty; } static int pty_unix98_install(struct tty_driver *driver, struct tty_struct *tty) { return pty_common_install(driver, tty, false); } /* this is called once with whichever end is closed last */ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty) { struct pts_fs_info *fsi; if (tty->driver->subtype == PTY_TYPE_MASTER) fsi = tty->driver_data; else fsi = tty->link->driver_data; if (fsi) { devpts_kill_index(fsi, tty->index); devpts_release(fsi); } } static void pty_show_fdinfo(struct tty_struct *tty, struct seq_file *m) { seq_printf(m, "tty-index:\t%d\n", tty->index); } static const struct tty_operations ptm_unix98_ops = { .lookup = ptm_unix98_lookup, .install = pty_unix98_install, .remove = pty_unix98_remove, .open = pty_open, .close = pty_close, .write = pty_write, .write_room = pty_write_room, .flush_buffer = pty_flush_buffer, .unthrottle = pty_unthrottle, .ioctl = pty_unix98_ioctl, .compat_ioctl = pty_unix98_compat_ioctl, .resize = pty_resize, .cleanup = pty_cleanup, .show_fdinfo = pty_show_fdinfo, }; static const struct tty_operations pty_unix98_ops = { .lookup = pts_unix98_lookup, .install = pty_unix98_install, .remove = pty_unix98_remove, .open = pty_open, .close = pty_close, .write = pty_write, .write_room = pty_write_room, .flush_buffer = pty_flush_buffer, .unthrottle = pty_unthrottle, .set_termios = pty_set_termios, .start = pty_start, .stop = pty_stop, .cleanup = pty_cleanup, }; /** * ptmx_open - open a unix 98 pty master * @inode: inode of device file * @filp: file pointer to tty * * Allocate a unix98 pty master device from the ptmx driver. * * Locking: tty_mutex protects the init_dev work. tty->count should * protect the rest. * allocated_ptys_lock handles the list of free pty numbers */ static int ptmx_open(struct inode *inode, struct file *filp) { struct pts_fs_info *fsi; struct tty_struct *tty; struct dentry *dentry; int retval; int index; nonseekable_open(inode, filp); /* We refuse fsnotify events on ptmx, since it's a shared resource */ file_set_fsnotify_mode(filp, FMODE_NONOTIFY); retval = tty_alloc_file(filp); if (retval) return retval; fsi = devpts_acquire(filp); if (IS_ERR(fsi)) { retval = PTR_ERR(fsi); goto out_free_file; } /* find a device that is not in use. */ mutex_lock(&devpts_mutex); index = devpts_new_index(fsi); mutex_unlock(&devpts_mutex); retval = index; if (index < 0) goto out_put_fsi; mutex_lock(&tty_mutex); tty = tty_init_dev(ptm_driver, index); /* The tty returned here is locked so we can safely drop the mutex */ mutex_unlock(&tty_mutex); retval = PTR_ERR(tty); if (IS_ERR(tty)) goto out; /* * From here on out, the tty is "live", and the index and * fsi will be killed/put by the tty_release() */ set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ tty->driver_data = fsi; tty_add_file(tty, filp); dentry = devpts_pty_new(fsi, index, tty->link); if (IS_ERR(dentry)) { retval = PTR_ERR(dentry); goto err_release; } tty->link->driver_data = dentry; retval = ptm_driver->ops->open(tty, filp); if (retval) goto err_release; tty_debug_hangup(tty, "opening (count=%d)\n", tty->count); tty_unlock(tty); return 0; err_release: tty_unlock(tty); // This will also put-ref the fsi tty_release(inode, filp); return retval; out: devpts_kill_index(fsi, index); out_put_fsi: devpts_release(fsi); out_free_file: tty_free_file(filp); return retval; } static struct file_operations ptmx_fops __ro_after_init; static void __init unix98_pty_init(void) { ptm_driver = tty_alloc_driver(NR_UNIX98_PTY_MAX, TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV | TTY_DRIVER_DEVPTS_MEM | TTY_DRIVER_DYNAMIC_ALLOC); if (IS_ERR(ptm_driver)) panic("Couldn't allocate Unix98 ptm driver"); pts_driver = tty_alloc_driver(NR_UNIX98_PTY_MAX, TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV | TTY_DRIVER_DEVPTS_MEM | TTY_DRIVER_DYNAMIC_ALLOC); if (IS_ERR(pts_driver)) panic("Couldn't allocate Unix98 pts driver"); ptm_driver->driver_name = "pty_master"; ptm_driver->name = "ptm"; ptm_driver->major = UNIX98_PTY_MASTER_MAJOR; ptm_driver->minor_start = 0; ptm_driver->type = TTY_DRIVER_TYPE_PTY; ptm_driver->subtype = PTY_TYPE_MASTER; ptm_driver->init_termios = tty_std_termios; ptm_driver->init_termios.c_iflag = 0; ptm_driver->init_termios.c_oflag = 0; ptm_driver->init_termios.c_cflag = B38400 | CS8 | CREAD; ptm_driver->init_termios.c_lflag = 0; ptm_driver->init_termios.c_ispeed = 38400; ptm_driver->init_termios.c_ospeed = 38400; ptm_driver->other = pts_driver; tty_set_operations(ptm_driver, &ptm_unix98_ops); pts_driver->driver_name = "pty_slave"; pts_driver->name = "pts"; pts_driver->major = UNIX98_PTY_SLAVE_MAJOR; pts_driver->minor_start = 0; pts_driver->type = TTY_DRIVER_TYPE_PTY; pts_driver->subtype = PTY_TYPE_SLAVE; pts_driver->init_termios = tty_std_termios; pts_driver->init_termios.c_cflag = B38400 | CS8 | CREAD; pts_driver->init_termios.c_ispeed = 38400; pts_driver->init_termios.c_ospeed = 38400; pts_driver->other = ptm_driver; tty_set_operations(pts_driver, &pty_unix98_ops); if (tty_register_driver(ptm_driver)) panic("Couldn't register Unix98 ptm driver"); if (tty_register_driver(pts_driver)) panic("Couldn't register Unix98 pts driver"); /* Now create the /dev/ptmx special device */ tty_default_fops(&ptmx_fops); ptmx_fops.open = ptmx_open; cdev_init(&ptmx_cdev, &ptmx_fops); if (cdev_add(&ptmx_cdev, MKDEV(TTYAUX_MAJOR, 2), 1) || register_chrdev_region(MKDEV(TTYAUX_MAJOR, 2), 1, "/dev/ptmx") < 0) panic("Couldn't register /dev/ptmx driver"); device_create(&tty_class, NULL, MKDEV(TTYAUX_MAJOR, 2), NULL, "ptmx"); } #else static inline void unix98_pty_init(void) { } #endif static int __init pty_init(void) { legacy_pty_init(); unix98_pty_init(); return 0; } device_initcall(pty_init); |
| 1 146 429 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 | /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. */ #ifndef IB_ADDR_H #define IB_ADDR_H #include <linux/ethtool.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/if_arp.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/socket.h> #include <linux/if_vlan.h> #include <net/ipv6.h> #include <net/if_inet6.h> #include <net/ip.h> #include <rdma/ib_verbs.h> #include <rdma/ib_pack.h> #include <net/net_namespace.h> /** * struct rdma_dev_addr - Contains resolved RDMA hardware addresses * @src_dev_addr: Source MAC address. * @dst_dev_addr: Destination MAC address. * @broadcast: Broadcast address of the device. * @dev_type: The interface hardware type of the device. * @bound_dev_if: An optional device interface index. * @transport: The transport type used. * @net: Network namespace containing the bound_dev_if net_dev. * @sgid_attr: GID attribute to use for identified SGID */ struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; unsigned short dev_type; int bound_dev_if; enum rdma_transport_type transport; struct net *net; const struct ib_gid_attr *sgid_attr; enum rdma_network_type network; int hoplimit; }; /** * rdma_translate_ip - Translate a local IP address to an RDMA hardware * address. * * The dev_addr->net field must be initialized. */ int rdma_translate_ip(const struct sockaddr *addr, struct rdma_dev_addr *dev_addr); /** * rdma_resolve_ip - Resolve source and destination IP addresses to * RDMA hardware addresses. * @src_addr: An optional source address to use in the resolution. If a * source address is not provided, a usable address will be returned via * the callback. * @dst_addr: The destination address to resolve. * @addr: A reference to a data location that will receive the resolved * addresses. The data location must remain valid until the callback has * been invoked. The net field of the addr struct must be valid. * @timeout_ms: Amount of time to wait for the address resolution to complete. * @callback: Call invoked once address resolution has completed, timed out, * or been canceled. A status of 0 indicates success. * @resolve_by_gid_attr: Resolve the ip based on the GID attribute from * rdma_dev_addr. * @context: User-specified context associated with the call. */ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, struct rdma_dev_addr *addr, unsigned long timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), bool resolve_by_gid_attr, void *context); void rdma_addr_cancel(struct rdma_dev_addr *addr); int rdma_addr_size(const struct sockaddr *addr); int rdma_addr_size_in6(struct sockaddr_in6 *addr); int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr); static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9]; } static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey) { dev_addr->broadcast[8] = pkey >> 8; dev_addr->broadcast[9] = (unsigned char) pkey; } static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->broadcast + 4, sizeof *gid); } static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) { return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev) { return is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 0xffff; } static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid) { switch (addr->sa_family) { case AF_INET: ipv6_addr_set_v4mapped(((struct sockaddr_in *) addr)->sin_addr.s_addr, (struct in6_addr *)gid); break; case AF_INET6: *(struct in6_addr *)&gid->raw = ((struct sockaddr_in6 *)addr)->sin6_addr; break; default: return -EINVAL; } return 0; } /* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */ static inline void rdma_gid2ip(struct sockaddr *out, const union ib_gid *gid) { if (ipv6_addr_v4mapped((struct in6_addr *)gid)) { struct sockaddr_in *out_in = (struct sockaddr_in *)out; memset(out_in, 0, sizeof(*out_in)); out_in->sin_family = AF_INET; memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4); } else { struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out; memset(out_in, 0, sizeof(*out_in)); out_in->sin6_family = AF_INET6; memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16); } } /* * rdma_get/set_sgid/dgid() APIs are applicable to IB, and iWarp. * They are not applicable to RoCE. * RoCE GIDs are derived from the IP addresses. */ static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof(*gid)); } static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } static inline enum ib_mtu iboe_get_mtu(int mtu) { /* * Reduce IB headers from effective IBoE MTU. */ mtu = mtu - (IB_GRH_BYTES + IB_UDP_BYTES + IB_BTH_BYTES + IB_EXT_XRC_BYTES + IB_EXT_ATOMICETH_BYTES + IB_ICRC_BYTES); if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) return IB_MTU_4096; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_2048)) return IB_MTU_2048; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_1024)) return IB_MTU_1024; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_512)) return IB_MTU_512; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_256)) return IB_MTU_256; else return 0; } static inline int rdma_link_local_addr(struct in6_addr *addr) { if (addr->s6_addr32[0] == htonl(0xfe800000) && addr->s6_addr32[1] == 0) return 1; return 0; } static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) { memcpy(mac, &addr->s6_addr[8], 3); memcpy(mac + 3, &addr->s6_addr[13], 3); mac[0] ^= 2; } static inline int rdma_is_multicast_addr(struct in6_addr *addr) { __be32 ipv4_addr; if (addr->s6_addr[0] == 0xff) return 1; ipv4_addr = addr->s6_addr32[3]; return (ipv6_addr_v4mapped(addr) && ipv4_is_multicast(ipv4_addr)); } static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) { int i; mac[0] = 0x33; mac[1] = 0x33; for (i = 2; i < 6; ++i) mac[i] = addr->s6_addr[i + 10]; } static inline u16 rdma_get_vlan_id(union ib_gid *dgid) { u16 vid; vid = dgid->raw[11] << 8 | dgid->raw[12]; return vid < 0x1000 ? vid : 0xffff; } static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev) { return is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : NULL; } #endif /* IB_ADDR_H */ |
| 11 7 12 25 3 2 1 1 2 61 3 3 10 47 60 38 48 49 18 10 26 33 23 22 1 2 11 9 9 11 16 4 9 11 10 7 2 3 12 3 6 13 5 5 8 13 5 8 2 11 9 4 13 12 24 1 23 25 25 10 9 1 10 5 5 2 6 6 6 7 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_tbf.c Token Bucket Filter queue. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs - * original idea by Martin Devera */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <net/gso.h> #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> #include <net/pkt_sched.h> /* Simple Token Bucket Filter. ======================================= SOURCE. ------- None. Description. ------------ A data flow obeys TBF with rate R and depth B, if for any time interval t_i...t_f the number of transmitted bits does not exceed B + R*(t_f-t_i). Packetized version of this definition: The sequence of packets of sizes s_i served at moments t_i obeys TBF, if for any i<=k: s_i+....+s_k <= B + R*(t_k - t_i) Algorithm. ---------- Let N(t_i) be B/R initially and N(t) grow continuously with time as: N(t+delta) = min{B/R, N(t) + delta} If the first packet in queue has length S, it may be transmitted only at the time t_* when S/R <= N(t_*), and in this case N(t) jumps: N(t_* + 0) = N(t_* - 0) - S/R. Actually, QoS requires two TBF to be applied to a data stream. One of them controls steady state burst size, another one with rate P (peak rate) and depth M (equal to link MTU) limits bursts at a smaller time scale. It is easy to see that P>R, and B>M. If P is infinity, this double TBF is equivalent to a single one. When TBF works in reshaping mode, latency is estimated as: lat = max ((L-B)/R, (L-M)/P) NOTES. ------ If TBF throttles, it starts a watchdog timer, which will wake it up when it is ready to transmit. Note that the minimal timer resolution is 1/HZ. If no new packets arrive during this period, or if the device is not awaken by EOI for some previous packet, TBF can stop its activity for 1/HZ. This means, that with depth B, the maximal rate is R_crit = B*HZ F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes. Note that the peak rate TBF is much more tough: with MTU 1500 P_crit = 150Kbytes/sec. So, if you need greater peak rates, use alpha with HZ=1000 :-) With classful TBF, limit is just kept for backwards compatibility. It is passed to the default bfifo qdisc - if the inner qdisc is changed the limit is not effective anymore. */ struct tbf_sched_data { /* Parameters */ u32 limit; /* Maximal length of backlog: bytes */ u32 max_size; s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ s64 mtu; struct psched_ratecfg rate; struct psched_ratecfg peak; /* Variables */ s64 tokens; /* Current number of B tokens */ s64 ptokens; /* Current number of P tokens */ s64 t_c; /* Time check-point */ struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */ struct qdisc_watchdog watchdog; /* Watchdog timer */ }; /* Time to Length, convert time in ns to length in bytes * to determinate how many bytes can be sent in given time. */ static u64 psched_ns_t2l(const struct psched_ratecfg *r, u64 time_in_ns) { /* The formula is : * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC */ u64 len = time_in_ns * r->rate_bytes_ps; do_div(len, NSEC_PER_SEC); if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) { do_div(len, 53); len = len * 48; } if (len > r->overhead) len -= r->overhead; else len = 0; return len; } static void tbf_offload_change(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_tbf_qopt_offload qopt; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; qopt.command = TC_TBF_REPLACE; qopt.handle = sch->handle; qopt.parent = sch->parent; qopt.replace_params.rate = q->rate; qopt.replace_params.max_size = q->max_size; qopt.replace_params.qstats = &sch->qstats; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt); } static void tbf_offload_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct tc_tbf_qopt_offload qopt; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; qopt.command = TC_TBF_DESTROY; qopt.handle = sch->handle; qopt.parent = sch->parent; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt); } static int tbf_offload_dump(struct Qdisc *sch) { struct tc_tbf_qopt_offload qopt; qopt.command = TC_TBF_STATS; qopt.handle = sch->handle; qopt.parent = sch->parent; qopt.stats.bstats = &sch->bstats; qopt.stats.qstats = &sch->qstats; return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt); } static void tbf_offload_graft(struct Qdisc *sch, struct Qdisc *new, struct Qdisc *old, struct netlink_ext_ack *extack) { struct tc_tbf_qopt_offload graft_offload = { .handle = sch->handle, .parent = sch->parent, .child_handle = new->handle, .command = TC_TBF_GRAFT, }; qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old, TC_SETUP_QDISC_TBF, &graft_offload, extack); } /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *segs, *nskb; netdev_features_t features = netif_skb_features(skb); unsigned int len = 0, prev_len = qdisc_pkt_len(skb), seg_len; int ret, nb; segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) return qdisc_drop(skb, sch, to_free); nb = 0; skb_list_walk_safe(segs, segs, nskb) { skb_mark_not_on_list(segs); seg_len = segs->len; qdisc_skb_cb(segs)->pkt_len = seg_len; ret = qdisc_enqueue(segs, q->qdisc, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); } else { nb++; len += seg_len; } } sch->q.qlen += nb; sch->qstats.backlog += len; if (nb > 0) { qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); consume_skb(skb); return NET_XMIT_SUCCESS; } kfree_skb(skb); return NET_XMIT_DROP; } static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); unsigned int len = qdisc_pkt_len(skb); int ret; if (qdisc_pkt_len(skb) > q->max_size) { if (skb_is_gso(skb) && skb_gso_validate_mac_len(skb, q->max_size)) return tbf_segment(skb, sch, to_free); return qdisc_drop(skb, sch, to_free); } ret = qdisc_enqueue(skb, q->qdisc, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); return ret; } sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } static bool tbf_peak_present(const struct tbf_sched_data *q) { return q->peak.rate_bytes_ps; } static struct sk_buff *tbf_dequeue(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; skb = q->qdisc->ops->peek(q->qdisc); if (skb) { s64 now; s64 toks; s64 ptoks = 0; unsigned int len = qdisc_pkt_len(skb); now = ktime_get_ns(); toks = min_t(s64, now - q->t_c, q->buffer); if (tbf_peak_present(q)) { ptoks = toks + q->ptokens; if (ptoks > q->mtu) ptoks = q->mtu; ptoks -= (s64) psched_l2t_ns(&q->peak, len); } toks += q->tokens; if (toks > q->buffer) toks = q->buffer; toks -= (s64) psched_l2t_ns(&q->rate, len); if ((toks|ptoks) >= 0) { skb = qdisc_dequeue_peeked(q->qdisc); if (unlikely(!skb)) return NULL; q->t_c = now; q->tokens = toks; q->ptokens = ptoks; qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; qdisc_bstats_update(sch, skb); return skb; } qdisc_watchdog_schedule_ns(&q->watchdog, now + max_t(long, -toks, -ptoks)); /* Maybe we have a shorter packet in the queue, which can be sent now. It sounds cool, but, however, this is wrong in principle. We MUST NOT reorder packets under these circumstances. Really, if we split the flow into independent subflows, it would be a very good solution. This is the main idea of all FQ algorithms (cf. CSZ, HPFQ, HFSC) */ qdisc_qstats_overlimit(sch); } return NULL; } static void tbf_reset(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); q->t_c = ktime_get_ns(); q->tokens = q->buffer; q->ptokens = q->mtu; qdisc_watchdog_cancel(&q->watchdog); } static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) }, [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_TBF_RATE64] = { .type = NLA_U64 }, [TCA_TBF_PRATE64] = { .type = NLA_U64 }, [TCA_TBF_BURST] = { .type = NLA_U32 }, [TCA_TBF_PBURST] = { .type = NLA_U32 }, }; static int tbf_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { int err; struct tbf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_TBF_MAX + 1]; struct tc_tbf_qopt *qopt; struct Qdisc *child = NULL; struct Qdisc *old = NULL; struct psched_ratecfg rate; struct psched_ratecfg peak; u64 max_size; s64 buffer, mtu; u64 rate64 = 0, prate64 = 0; err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy, NULL); if (err < 0) return err; err = -EINVAL; if (tb[TCA_TBF_PARMS] == NULL) goto done; qopt = nla_data(tb[TCA_TBF_PARMS]); if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) qdisc_put_rtab(qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB], NULL)); if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB], NULL)); buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); if (tb[TCA_TBF_RATE64]) rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); psched_ratecfg_precompute(&rate, &qopt->rate, rate64); if (tb[TCA_TBF_BURST]) { max_size = nla_get_u32(tb[TCA_TBF_BURST]); buffer = psched_l2t_ns(&rate, max_size); } else { max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); } if (qopt->peakrate.rate) { if (tb[TCA_TBF_PRATE64]) prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64); if (peak.rate_bytes_ps <= rate.rate_bytes_ps) { pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n", peak.rate_bytes_ps, rate.rate_bytes_ps); err = -EINVAL; goto done; } if (tb[TCA_TBF_PBURST]) { u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]); max_size = min_t(u32, max_size, pburst); mtu = psched_l2t_ns(&peak, pburst); } else { max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); } } else { memset(&peak, 0, sizeof(peak)); } if (max_size < psched_mtu(qdisc_dev(sch))) pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n", max_size, qdisc_dev(sch)->name, psched_mtu(qdisc_dev(sch))); if (!max_size) { err = -EINVAL; goto done; } if (q->qdisc != &noop_qdisc) { err = fifo_set_limit(q->qdisc, qopt->limit); if (err) goto done; } else if (qopt->limit > 0) { child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit, extack); if (IS_ERR(child)) { err = PTR_ERR(child); goto done; } /* child is fifo, no need to check for noop_qdisc */ qdisc_hash_add(child, true); } sch_tree_lock(sch); if (child) { qdisc_purge_queue(q->qdisc); old = q->qdisc; q->qdisc = child; } q->limit = qopt->limit; if (tb[TCA_TBF_PBURST]) q->mtu = mtu; else q->mtu = PSCHED_TICKS2NS(qopt->mtu); q->max_size = max_size; if (tb[TCA_TBF_BURST]) q->buffer = buffer; else q->buffer = PSCHED_TICKS2NS(qopt->buffer); q->tokens = q->buffer; q->ptokens = q->mtu; memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg)); memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); sch_tree_unlock(sch); qdisc_put(old); err = 0; tbf_offload_change(sch); done: return err; } static int tbf_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct tbf_sched_data *q = qdisc_priv(sch); qdisc_watchdog_init(&q->watchdog, sch); q->qdisc = &noop_qdisc; if (!opt) return -EINVAL; q->t_c = ktime_get_ns(); return tbf_change(sch, opt, extack); } static void tbf_destroy(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); tbf_offload_destroy(sch); qdisc_put(q->qdisc); } static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) { struct tbf_sched_data *q = qdisc_priv(sch); struct nlattr *nest; struct tc_tbf_qopt opt; int err; err = tbf_offload_dump(sch); if (err) return err; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; opt.limit = q->limit; psched_ratecfg_getrate(&opt.rate, &q->rate); if (tbf_peak_present(q)) psched_ratecfg_getrate(&opt.peakrate, &q->peak); else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); opt.mtu = PSCHED_NS2TICKS(q->mtu); opt.buffer = PSCHED_NS2TICKS(q->buffer); if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if (q->rate.rate_bytes_ps >= (1ULL << 32) && nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps, TCA_TBF_PAD)) goto nla_put_failure; if (tbf_peak_present(q) && q->peak.rate_bytes_ps >= (1ULL << 32) && nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps, TCA_TBF_PAD)) goto nla_put_failure; return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static int tbf_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { struct tbf_sched_data *q = qdisc_priv(sch); tcm->tcm_handle |= TC_H_MIN(1); tcm->tcm_info = q->qdisc->handle; return 0; } static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct tbf_sched_data *q = qdisc_priv(sch); if (new == NULL) new = &noop_qdisc; *old = qdisc_replace(sch, new, &q->qdisc); tbf_offload_graft(sch, new, *old, extack); return 0; } static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg) { struct tbf_sched_data *q = qdisc_priv(sch); return q->qdisc; } static unsigned long tbf_find(struct Qdisc *sch, u32 classid) { return 1; } static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { tc_qdisc_stats_dump(sch, 1, walker); } } static const struct Qdisc_class_ops tbf_class_ops = { .graft = tbf_graft, .leaf = tbf_leaf, .find = tbf_find, .walk = tbf_walk, .dump = tbf_dump_class, }; static struct Qdisc_ops tbf_qdisc_ops __read_mostly = { .next = NULL, .cl_ops = &tbf_class_ops, .id = "tbf", .priv_size = sizeof(struct tbf_sched_data), .enqueue = tbf_enqueue, .dequeue = tbf_dequeue, .peek = qdisc_peek_dequeued, .init = tbf_init, .reset = tbf_reset, .destroy = tbf_destroy, .change = tbf_change, .dump = tbf_dump, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("tbf"); static int __init tbf_module_init(void) { return register_qdisc(&tbf_qdisc_ops); } static void __exit tbf_module_exit(void) { unregister_qdisc(&tbf_qdisc_ops); } module_init(tbf_module_init) module_exit(tbf_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Token Bucket Filter qdisc"); |
| 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 Patrick McHardy <kaber@trash.net> */ #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_SYNPROXY.h> #include <net/netfilter/nf_synproxy.h> static unsigned int synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; struct net *net = xt_net(par); struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; if (nf_ip_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP)) return NF_DROP; th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); if (th == NULL) return NF_DROP; if (!synproxy_parse_options(skb, par->thoff, th, &opts)) return NF_DROP; if (th->syn && !(th->ack || th->fin || th->rst)) { /* Initial SYN from client */ this_cpu_inc(snet->stats->syn_received); if (th->ece && th->cwr) opts.options |= XT_SYNPROXY_OPT_ECN; opts.options &= info->options; opts.mss_encode = opts.mss_option; opts.mss_option = info->mss; if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, &opts); else opts.options &= ~(XT_SYNPROXY_OPT_WSCALE | XT_SYNPROXY_OPT_SACK_PERM | XT_SYNPROXY_OPT_ECN); synproxy_send_client_synack(net, skb, th, &opts); consume_skb(skb); return NF_STOLEN; } else if (th->ack && !(th->fin || th->rst || th->syn)) { /* ACK from client */ if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq))) { consume_skb(skb); return NF_STOLEN; } else { return NF_DROP; } } return XT_CONTINUE; } static int synproxy_tg4_check(const struct xt_tgchk_param *par) { struct synproxy_net *snet = synproxy_pernet(par->net); const struct ipt_entry *e = par->entryinfo; int err; if (e->ip.proto != IPPROTO_TCP || e->ip.invflags & XT_INV_PROTO) return -EINVAL; err = nf_ct_netns_get(par->net, par->family); if (err) return err; err = nf_synproxy_ipv4_init(snet, par->net); if (err) { nf_ct_netns_put(par->net, par->family); return err; } return err; } static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par) { struct synproxy_net *snet = synproxy_pernet(par->net); nf_synproxy_ipv4_fini(snet, par->net); nf_ct_netns_put(par->net, par->family); } static struct xt_target synproxy_tg4_reg __read_mostly = { .name = "SYNPROXY", .family = NFPROTO_IPV4, .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD), .target = synproxy_tg4, .targetsize = sizeof(struct xt_synproxy_info), .checkentry = synproxy_tg4_check, .destroy = synproxy_tg4_destroy, .me = THIS_MODULE, }; static int __init synproxy_tg4_init(void) { return xt_register_target(&synproxy_tg4_reg); } static void __exit synproxy_tg4_exit(void) { xt_unregister_target(&synproxy_tg4_reg); } module_init(synproxy_tg4_init); module_exit(synproxy_tg4_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("Intercept TCP connections and establish them using syncookies"); |
| 5 5 5 5 5 5 5 5 5 5 5 8 8 5 5 5 5 5 5 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 | // SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/module.h> #include <linux/err.h> #include <linux/highmem.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/uaccess.h> #ifdef CONFIG_BLOCK #include <linux/bio.h> #endif #include <linux/ceph/ceph_features.h> #include <linux/ceph/libceph.h> #include <linux/ceph/osd_client.h> #include <linux/ceph/messenger.h> #include <linux/ceph/decode.h> #include <linux/ceph/auth.h> #include <linux/ceph/pagelist.h> #include <linux/ceph/striper.h> #define OSD_OPREPLY_FRONT_LEN 512 static struct kmem_cache *ceph_osd_request_cache; static const struct ceph_connection_operations osd_con_ops; /* * Implement client access to distributed object storage cluster. * * All data objects are stored within a cluster/cloud of OSDs, or * "object storage devices." (Note that Ceph OSDs have _nothing_ to * do with the T10 OSD extensions to SCSI.) Ceph OSDs are simply * remote daemons serving up and coordinating consistent and safe * access to storage. * * Cluster membership and the mapping of data objects onto storage devices * are described by the osd map. * * We keep track of pending OSD requests (read, write), resubmit * requests to different OSDs when the cluster topology/data layout * change, or retry the affected requests when the communications * channel with an OSD is reset. */ static void link_request(struct ceph_osd *osd, struct ceph_osd_request *req); static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req); static void link_linger(struct ceph_osd *osd, struct ceph_osd_linger_request *lreq); static void unlink_linger(struct ceph_osd *osd, struct ceph_osd_linger_request *lreq); static void clear_backoffs(struct ceph_osd *osd); #if 1 static inline bool rwsem_is_wrlocked(struct rw_semaphore *sem) { bool wrlocked = true; if (unlikely(down_read_trylock(sem))) { wrlocked = false; up_read(sem); } return wrlocked; } static inline void verify_osdc_locked(struct ceph_osd_client *osdc) { WARN_ON(!rwsem_is_locked(&osdc->lock)); } static inline void verify_osdc_wrlocked(struct ceph_osd_client *osdc) { WARN_ON(!rwsem_is_wrlocked(&osdc->lock)); } static inline void verify_osd_locked(struct ceph_osd *osd) { struct ceph_osd_client *osdc = osd->o_osdc; WARN_ON(!(mutex_is_locked(&osd->lock) && rwsem_is_locked(&osdc->lock)) && !rwsem_is_wrlocked(&osdc->lock)); } static inline void verify_lreq_locked(struct ceph_osd_linger_request *lreq) { WARN_ON(!mutex_is_locked(&lreq->lock)); } #else static inline void verify_osdc_locked(struct ceph_osd_client *osdc) { } static inline void verify_osdc_wrlocked(struct ceph_osd_client *osdc) { } static inline void verify_osd_locked(struct ceph_osd *osd) { } static inline void verify_lreq_locked(struct ceph_osd_linger_request *lreq) { } #endif /* * calculate the mapping of a file extent onto an object, and fill out the * request accordingly. shorten extent as necessary if it crosses an * object boundary. * * fill osd op in request message. */ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen, u64 *objnum, u64 *objoff, u64 *objlen) { u64 orig_len = *plen; u32 xlen; /* object extent? */ ceph_calc_file_object_mapping(layout, off, orig_len, objnum, objoff, &xlen); *objlen = xlen; if (*objlen < orig_len) { *plen = *objlen; dout(" skipping last %llu, final file extent %llu~%llu\n", orig_len - *plen, off, *plen); } dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen); return 0; } static void ceph_osd_data_init(struct ceph_osd_data *osd_data) { memset(osd_data, 0, sizeof (*osd_data)); osd_data->type = CEPH_OSD_DATA_TYPE_NONE; } /* * Consumes @pages if @own_pages is true. */ static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages) { osd_data->type = CEPH_OSD_DATA_TYPE_PAGES; osd_data->pages = pages; osd_data->length = length; osd_data->alignment = alignment; osd_data->pages_from_pool = pages_from_pool; osd_data->own_pages = own_pages; } /* * Consumes a ref on @pagelist. */ static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data, struct ceph_pagelist *pagelist) { osd_data->type = CEPH_OSD_DATA_TYPE_PAGELIST; osd_data->pagelist = pagelist; } #ifdef CONFIG_BLOCK static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data, struct ceph_bio_iter *bio_pos, u32 bio_length) { osd_data->type = CEPH_OSD_DATA_TYPE_BIO; osd_data->bio_pos = *bio_pos; osd_data->bio_length = bio_length; } #endif /* CONFIG_BLOCK */ static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data, struct ceph_bvec_iter *bvec_pos, u32 num_bvecs) { osd_data->type = CEPH_OSD_DATA_TYPE_BVECS; osd_data->bvec_pos = *bvec_pos; osd_data->num_bvecs = num_bvecs; } static void ceph_osd_iter_init(struct ceph_osd_data *osd_data, struct iov_iter *iter) { osd_data->type = CEPH_OSD_DATA_TYPE_ITER; osd_data->iter = *iter; } static struct ceph_osd_data * osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which) { BUG_ON(which >= osd_req->r_num_ops); return &osd_req->r_ops[which].raw_data_in; } struct ceph_osd_data * osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req, unsigned int which) { return osd_req_op_data(osd_req, which, extent, osd_data); } EXPORT_SYMBOL(osd_req_op_extent_osd_data); void osd_req_op_raw_data_in_pages(struct ceph_osd_request *osd_req, unsigned int which, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages) { struct ceph_osd_data *osd_data; osd_data = osd_req_op_raw_data_in(osd_req, which); ceph_osd_data_pages_init(osd_data, pages, length, alignment, pages_from_pool, own_pages); } EXPORT_SYMBOL(osd_req_op_raw_data_in_pages); void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req, unsigned int which, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages) { struct ceph_osd_data *osd_data; osd_data = osd_req_op_data(osd_req, which, extent, osd_data); ceph_osd_data_pages_init(osd_data, pages, length, alignment, pages_from_pool, own_pages); } EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages); #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, unsigned int which, struct ceph_bio_iter *bio_pos, u32 bio_length) { struct ceph_osd_data *osd_data; osd_data = osd_req_op_data(osd_req, which, extent, osd_data); ceph_osd_data_bio_init(osd_data, bio_pos, bio_length); } EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio); #endif /* CONFIG_BLOCK */ void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req, unsigned int which, struct bio_vec *bvecs, u32 num_bvecs, u32 bytes) { struct ceph_osd_data *osd_data; struct ceph_bvec_iter it = { .bvecs = bvecs, .iter = { .bi_size = bytes }, }; osd_data = osd_req_op_data(osd_req, which, extent, osd_data); ceph_osd_data_bvecs_init(osd_data, &it, num_bvecs); } EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvecs); void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, unsigned int which, struct ceph_bvec_iter *bvec_pos) { struct ceph_osd_data *osd_data; osd_data = osd_req_op_data(osd_req, which, extent, osd_data); ceph_osd_data_bvecs_init(osd_data, bvec_pos, 0); } EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos); /** * osd_req_op_extent_osd_iter - Set up an operation with an iterator buffer * @osd_req: The request to set up * @which: Index of the operation in which to set the iter * @iter: The buffer iterator */ void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, unsigned int which, struct iov_iter *iter) { struct ceph_osd_data *osd_data; osd_data = osd_req_op_data(osd_req, which, extent, osd_data); ceph_osd_iter_init(osd_data, iter); } EXPORT_SYMBOL(osd_req_op_extent_osd_iter); static void osd_req_op_cls_request_info_pagelist( struct ceph_osd_request *osd_req, unsigned int which, struct ceph_pagelist *pagelist) { struct ceph_osd_data *osd_data; osd_data = osd_req_op_data(osd_req, which, cls, request_info); ceph_osd_data_pagelist_init(osd_data, pagelist); } void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req, unsigned int which, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages) { struct ceph_osd_data *osd_data; osd_data = osd_req_op_data(osd_req, which, cls, request_data); ceph_osd_data_pages_init(osd_data, pages, length, alignment, pages_from_pool, own_pages); osd_req->r_ops[which].cls.indata_len += length; osd_req->r_ops[which].indata_len += length; } EXPORT_SYMBOL(osd_req_op_cls_request_data_pages); void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req, unsigned int which, struct bio_vec *bvecs, u32 num_bvecs, u32 bytes) { struct ceph_osd_data *osd_data; struct ceph_bvec_iter it = { .bvecs = bvecs, .iter = { .bi_size = bytes }, }; osd_data = osd_req_op_data(osd_req, which, cls, request_data); ceph_osd_data_bvecs_init(osd_data, &it, num_bvecs); osd_req->r_ops[which].cls.indata_len += bytes; osd_req->r_ops[which].indata_len += bytes; } EXPORT_SYMBOL(osd_req_op_cls_request_data_bvecs); void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req, unsigned int which, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages) { struct ceph_osd_data *osd_data; osd_data = osd_req_op_data(osd_req, which, cls, response_data); ceph_osd_data_pages_init(osd_data, pages, length, alignment, pages_from_pool, own_pages); } EXPORT_SYMBOL(osd_req_op_cls_response_data_pages); static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data) { switch (osd_data->type) { case CEPH_OSD_DATA_TYPE_NONE: return 0; case CEPH_OSD_DATA_TYPE_PAGES: return osd_data->length; case CEPH_OSD_DATA_TYPE_PAGELIST: return (u64)osd_data->pagelist->length; #ifdef CONFIG_BLOCK case CEPH_OSD_DATA_TYPE_BIO: return (u64)osd_data->bio_length; #endif /* CONFIG_BLOCK */ case CEPH_OSD_DATA_TYPE_BVECS: return osd_data->bvec_pos.iter.bi_size; case CEPH_OSD_DATA_TYPE_ITER: return iov_iter_count(&osd_data->iter); default: WARN(true, "unrecognized data type %d\n", (int)osd_data->type); return 0; } } static void ceph_osd_data_release(struct ceph_osd_data *osd_data) { if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES && osd_data->own_pages) { int num_pages; num_pages = calc_pages_for((u64)osd_data->alignment, (u64)osd_data->length); ceph_release_page_vector(osd_data->pages, num_pages); } else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) { ceph_pagelist_release(osd_data->pagelist); } ceph_osd_data_init(osd_data); } static void osd_req_op_data_release(struct ceph_osd_request *osd_req, unsigned int which) { struct ceph_osd_req_op *op; BUG_ON(which >= osd_req->r_num_ops); op = &osd_req->r_ops[which]; switch (op->op) { case CEPH_OSD_OP_READ: case CEPH_OSD_OP_SPARSE_READ: case CEPH_OSD_OP_WRITE: case CEPH_OSD_OP_WRITEFULL: kfree(op->extent.sparse_ext); ceph_osd_data_release(&op->extent.osd_data); break; case CEPH_OSD_OP_CALL: ceph_osd_data_release(&op->cls.request_info); ceph_osd_data_release(&op->cls.request_data); ceph_osd_data_release(&op->cls.response_data); break; case CEPH_OSD_OP_SETXATTR: case CEPH_OSD_OP_CMPXATTR: ceph_osd_data_release(&op->xattr.osd_data); break; case CEPH_OSD_OP_STAT: ceph_osd_data_release(&op->raw_data_in); break; case CEPH_OSD_OP_NOTIFY_ACK: ceph_osd_data_release(&op->notify_ack.request_data); break; case CEPH_OSD_OP_NOTIFY: ceph_osd_data_release(&op->notify.request_data); ceph_osd_data_release(&op->notify.response_data); break; case CEPH_OSD_OP_LIST_WATCHERS: ceph_osd_data_release(&op->list_watchers.response_data); break; case CEPH_OSD_OP_COPY_FROM2: ceph_osd_data_release(&op->copy_from.osd_data); break; default: break; } } /* * Assumes @t is zero-initialized. */ static void target_init(struct ceph_osd_request_target *t) { ceph_oid_init(&t->base_oid); ceph_oloc_init(&t->base_oloc); ceph_oid_init(&t->target_oid); ceph_oloc_init(&t->target_oloc); ceph_osds_init(&t->acting); ceph_osds_init(&t->up); t->size = -1; t->min_size = -1; t->osd = CEPH_HOMELESS_OSD; } static void target_copy(struct ceph_osd_request_target *dest, const struct ceph_osd_request_target *src) { ceph_oid_copy(&dest->base_oid, &src->base_oid); ceph_oloc_copy(&dest->base_oloc, &src->base_oloc); ceph_oid_copy(&dest->target_oid, &src->target_oid); ceph_oloc_copy(&dest->target_oloc, &src->target_oloc); dest->pgid = src->pgid; /* struct */ dest->spgid = src->spgid; /* struct */ dest->pg_num = src->pg_num; dest->pg_num_mask = src->pg_num_mask; ceph_osds_copy(&dest->acting, &src->acting); ceph_osds_copy(&dest->up, &src->up); dest->size = src->size; dest->min_size = src->min_size; dest->sort_bitwise = src->sort_bitwise; dest->recovery_deletes = src->recovery_deletes; dest->flags = src->flags; dest->used_replica = src->used_replica; dest->paused = src->paused; dest->epoch = src->epoch; dest->last_force_resend = src->last_force_resend; dest->osd = src->osd; } static void target_destroy(struct ceph_osd_request_target *t) { ceph_oid_destroy(&t->base_oid); ceph_oloc_destroy(&t->base_oloc); ceph_oid_destroy(&t->target_oid); ceph_oloc_destroy(&t->target_oloc); } /* * requests */ static void request_release_checks(struct ceph_osd_request *req) { WARN_ON(!RB_EMPTY_NODE(&req->r_node)); WARN_ON(!RB_EMPTY_NODE(&req->r_mc_node)); WARN_ON(!list_empty(&req->r_private_item)); WARN_ON(req->r_osd); } static void ceph_osdc_release_request(struct kref *kref) { struct ceph_osd_request *req = container_of(kref, struct ceph_osd_request, r_kref); unsigned int which; dout("%s %p (r_request %p r_reply %p)\n", __func__, req, req->r_request, req->r_reply); request_release_checks(req); if (req->r_request) ceph_msg_put(req->r_request); if (req->r_reply) ceph_msg_put(req->r_reply); for (which = 0; which < req->r_num_ops; which++) osd_req_op_data_release(req, which); target_destroy(&req->r_t); ceph_put_snap_context(req->r_snapc); if (req->r_mempool) mempool_free(req, req->r_osdc->req_mempool); else if (req->r_num_ops <= CEPH_OSD_SLAB_OPS) kmem_cache_free(ceph_osd_request_cache, req); else kfree(req); } void ceph_osdc_get_request(struct ceph_osd_request *req) { dout("%s %p (was %d)\n", __func__, req, kref_read(&req->r_kref)); kref_get(&req->r_kref); } EXPORT_SYMBOL(ceph_osdc_get_request); void ceph_osdc_put_request(struct ceph_osd_request *req) { if (req) { dout("%s %p (was %d)\n", __func__, req, kref_read(&req->r_kref)); kref_put(&req->r_kref, ceph_osdc_release_request); } } EXPORT_SYMBOL(ceph_osdc_put_request); static void request_init(struct ceph_osd_request *req) { /* req only, each op is zeroed in osd_req_op_init() */ memset(req, 0, sizeof(*req)); kref_init(&req->r_kref); init_completion(&req->r_completion); RB_CLEAR_NODE(&req->r_node); RB_CLEAR_NODE(&req->r_mc_node); INIT_LIST_HEAD(&req->r_private_item); target_init(&req->r_t); } struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, unsigned int num_ops, bool use_mempool, gfp_t gfp_flags) { struct ceph_osd_request *req; if (use_mempool) { BUG_ON(num_ops > CEPH_OSD_SLAB_OPS); req = mempool_alloc(osdc->req_mempool, gfp_flags); } else if (num_ops <= CEPH_OSD_SLAB_OPS) { req = kmem_cache_alloc(ceph_osd_request_cache, gfp_flags); } else { BUG_ON(num_ops > CEPH_OSD_MAX_OPS); req = kmalloc(struct_size(req, r_ops, num_ops), gfp_flags); } if (unlikely(!req)) return NULL; request_init(req); req->r_osdc = osdc; req->r_mempool = use_mempool; req->r_num_ops = num_ops; req->r_snapid = CEPH_NOSNAP; req->r_snapc = ceph_get_snap_context(snapc); dout("%s req %p\n", __func__, req); return req; } EXPORT_SYMBOL(ceph_osdc_alloc_request); static int ceph_oloc_encoding_size(const struct ceph_object_locator *oloc) { return 8 + 4 + 4 + 4 + (oloc->pool_ns ? oloc->pool_ns->len : 0); } static int __ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp, int num_request_data_items, int num_reply_data_items) { struct ceph_osd_client *osdc = req->r_osdc; struct ceph_msg *msg; int msg_size; WARN_ON(req->r_request || req->r_reply); WARN_ON(ceph_oid_empty(&req->r_base_oid)); WARN_ON(ceph_oloc_empty(&req->r_base_oloc)); /* create request message */ msg_size = CEPH_ENCODING_START_BLK_LEN + CEPH_PGID_ENCODING_LEN + 1; /* spgid */ msg_size += 4 + 4 + 4; /* hash, osdmap_epoch, flags */ msg_size += CEPH_ENCODING_START_BLK_LEN + sizeof(struct ceph_osd_reqid); /* reqid */ msg_size += sizeof(struct ceph_blkin_trace_info); /* trace */ msg_size += 4 + sizeof(struct ceph_timespec); /* client_inc, mtime */ msg_size += CEPH_ENCODING_START_BLK_LEN + ceph_oloc_encoding_size(&req->r_base_oloc); /* oloc */ msg_size += 4 + req->r_base_oid.name_len; /* oid */ msg_size += 2 + req->r_num_ops * sizeof(struct ceph_osd_op); msg_size += 8; /* snapid */ msg_size += 8; /* snap_seq */ msg_size += 4 + 8 * (req->r_snapc ? req->r_snapc->num_snaps : 0); msg_size += 4 + 8; /* retry_attempt, features */ if (req->r_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, msg_size, num_request_data_items); else msg = ceph_msg_new2(CEPH_MSG_OSD_OP, msg_size, num_request_data_items, gfp, true); if (!msg) return -ENOMEM; memset(msg->front.iov_base, 0, msg->front.iov_len); req->r_request = msg; /* create reply message */ msg_size = OSD_OPREPLY_FRONT_LEN; msg_size += req->r_base_oid.name_len; msg_size += req->r_num_ops * sizeof(struct ceph_osd_op); if (req->r_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op_reply, msg_size, num_reply_data_items); else msg = ceph_msg_new2(CEPH_MSG_OSD_OPREPLY, msg_size, num_reply_data_items, gfp, true); if (!msg) return -ENOMEM; req->r_reply = msg; return 0; } static bool osd_req_opcode_valid(u16 opcode) { switch (opcode) { #define GENERATE_CASE(op, opcode, str) case CEPH_OSD_OP_##op: return true; __CEPH_FORALL_OSD_OPS(GENERATE_CASE) #undef GENERATE_CASE default: return false; } } static void get_num_data_items(struct ceph_osd_request *req, int *num_request_data_items, int *num_reply_data_items) { struct ceph_osd_req_op *op; *num_request_data_items = 0; *num_reply_data_items = 0; for (op = req->r_ops; op != &req->r_ops[req->r_num_ops]; op++) { switch (op->op) { /* request */ case CEPH_OSD_OP_WRITE: case CEPH_OSD_OP_WRITEFULL: case CEPH_OSD_OP_SETXATTR: case CEPH_OSD_OP_CMPXATTR: case CEPH_OSD_OP_NOTIFY_ACK: case CEPH_OSD_OP_COPY_FROM2: *num_request_data_items += 1; break; /* reply */ case CEPH_OSD_OP_STAT: case CEPH_OSD_OP_READ: case CEPH_OSD_OP_SPARSE_READ: case CEPH_OSD_OP_LIST_WATCHERS: *num_reply_data_items += 1; break; /* both */ case CEPH_OSD_OP_NOTIFY: *num_request_data_items += 1; *num_reply_data_items += 1; break; case CEPH_OSD_OP_CALL: *num_request_data_items += 2; *num_reply_data_items += 1; break; default: WARN_ON(!osd_req_opcode_valid(op->op)); break; } } } /* * oid, oloc and OSD op opcode(s) must be filled in before this function * is called. */ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp) { int num_request_data_items, num_reply_data_items; get_num_data_items(req, &num_request_data_items, &num_reply_data_items); return __ceph_osdc_alloc_messages(req, gfp, num_request_data_items, num_reply_data_items); } EXPORT_SYMBOL(ceph_osdc_alloc_messages); /* * This is an osd op init function for opcodes that have no data or * other information associated with them. It also serves as a * common init routine for all the other init functions, below. */ struct ceph_osd_req_op * osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u32 flags) { struct ceph_osd_req_op *op; BUG_ON(which >= osd_req->r_num_ops); BUG_ON(!osd_req_opcode_valid(opcode)); op = &osd_req->r_ops[which]; memset(op, 0, sizeof (*op)); op->op = opcode; op->flags = flags; return op; } EXPORT_SYMBOL(osd_req_op_init); void osd_req_op_extent_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u64 offset, u64 length, u64 truncate_size, u32 truncate_seq) { struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which, opcode, 0); size_t payload_len = 0; BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && opcode != CEPH_OSD_OP_WRITEFULL && opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE && opcode != CEPH_OSD_OP_SPARSE_READ); op->extent.offset = offset; op->extent.length = length; op->extent.truncate_size = truncate_size; op->extent.truncate_seq = truncate_seq; if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL) payload_len += length; op->indata_len = payload_len; } EXPORT_SYMBOL(osd_req_op_extent_init); void osd_req_op_extent_update(struct ceph_osd_request *osd_req, unsigned int which, u64 length) { struct ceph_osd_req_op *op; u64 previous; BUG_ON(which >= osd_req->r_num_ops); op = &osd_req->r_ops[which]; previous = op->extent.length; if (length == previous) return; /* Nothing to do */ BUG_ON(length > previous); op->extent.length = length; if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL) op->indata_len -= previous - length; } EXPORT_SYMBOL(osd_req_op_extent_update); void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req, unsigned int which, u64 offset_inc) { struct ceph_osd_req_op *op, *prev_op; BUG_ON(which + 1 >= osd_req->r_num_ops); prev_op = &osd_req->r_ops[which]; op = osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags); /* dup previous one */ op->indata_len = prev_op->indata_len; op->outdata_len = prev_op->outdata_len; op->extent = prev_op->extent; /* adjust offset */ op->extent.offset += offset_inc; op->extent.length -= offset_inc; if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL) op->indata_len -= offset_inc; } EXPORT_SYMBOL(osd_req_op_extent_dup_last); int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, const char *class, const char *method) { struct ceph_osd_req_op *op; struct ceph_pagelist *pagelist; size_t payload_len = 0; size_t size; int ret; op = osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0); pagelist = ceph_pagelist_alloc(GFP_NOFS); if (!pagelist) return -ENOMEM; op->cls.class_name = class; size = strlen(class); BUG_ON(size > (size_t) U8_MAX); op->cls.class_len = size; ret = ceph_pagelist_append(pagelist, class, size); if (ret) goto err_pagelist_free; payload_len += size; op->cls.method_name = method; size = strlen(method); BUG_ON(size > (size_t) U8_MAX); op->cls.method_len = size; ret = ceph_pagelist_append(pagelist, method, size); if (ret) goto err_pagelist_free; payload_len += size; osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist); op->indata_len = payload_len; return 0; err_pagelist_free: ceph_pagelist_release(pagelist); return ret; } EXPORT_SYMBOL(osd_req_op_cls_init); int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *name, const void *value, size_t size, u8 cmp_op, u8 cmp_mode) { struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which, opcode, 0); struct ceph_pagelist *pagelist; size_t payload_len; int ret; BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR); pagelist = ceph_pagelist_alloc(GFP_NOFS); if (!pagelist) return -ENOMEM; payload_len = strlen(name); op->xattr.name_len = payload_len; ret = ceph_pagelist_append(pagelist, name, payload_len); if (ret) goto err_pagelist_free; op->xattr.value_len = size; ret = ceph_pagelist_append(pagelist, value, size); if (ret) goto err_pagelist_free; payload_len += size; op->xattr.cmp_op = cmp_op; op->xattr.cmp_mode = cmp_mode; ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist); op->indata_len = payload_len; return 0; err_pagelist_free: ceph_pagelist_release(pagelist); return ret; } EXPORT_SYMBOL(osd_req_op_xattr_init); /* * @watch_opcode: CEPH_OSD_WATCH_OP_* */ static void osd_req_op_watch_init(struct ceph_osd_request *req, int which, u8 watch_opcode, u64 cookie, u32 gen) { struct ceph_osd_req_op *op; op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0); op->watch.cookie = cookie; op->watch.op = watch_opcode; op->watch.gen = gen; } /* * prot_ver, timeout and notify payload (may be empty) should already be * encoded in @request_pl */ static void osd_req_op_notify_init(struct ceph_osd_request *req, int which, u64 cookie, struct ceph_pagelist *request_pl) { struct ceph_osd_req_op *op; op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); op->notify.cookie = cookie; ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl); op->indata_len = request_pl->length; } /* * @flags: CEPH_OSD_OP_ALLOC_HINT_FLAG_* */ void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, unsigned int which, u64 expected_object_size, u64 expected_write_size, u32 flags) { struct ceph_osd_req_op *op; op = osd_req_op_init(osd_req, which, CEPH_OSD_OP_SETALLOCHINT, 0); op->alloc_hint.expected_object_size = expected_object_size; op->alloc_hint.expected_write_size = expected_write_size; op->alloc_hint.flags = flags; /* * CEPH_OSD_OP_SETALLOCHINT op is advisory and therefore deemed * not worth a feature bit. Set FAILOK per-op flag to make * sure older osds don't trip over an unsupported opcode. */ op->flags |= CEPH_OSD_OP_FLAG_FAILOK; } EXPORT_SYMBOL(osd_req_op_alloc_hint_init); static void ceph_osdc_msg_data_add(struct ceph_msg *msg, struct ceph_osd_data *osd_data) { u64 length = ceph_osd_data_length(osd_data); if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { BUG_ON(length > (u64) SIZE_MAX); if (length) ceph_msg_data_add_pages(msg, osd_data->pages, length, osd_data->alignment, false); } else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) { BUG_ON(!length); ceph_msg_data_add_pagelist(msg, osd_data->pagelist); #ifdef CONFIG_BLOCK } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) { ceph_msg_data_add_bio(msg, &osd_data->bio_pos, length); #endif } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) { ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos); } else if (osd_data->type == CEPH_OSD_DATA_TYPE_ITER) { ceph_msg_data_add_iter(msg, &osd_data->iter); } else { BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE); } } static u32 osd_req_encode_op(struct ceph_osd_op *dst, const struct ceph_osd_req_op *src) { switch (src->op) { case CEPH_OSD_OP_STAT: break; case CEPH_OSD_OP_READ: case CEPH_OSD_OP_SPARSE_READ: case CEPH_OSD_OP_WRITE: case CEPH_OSD_OP_WRITEFULL: case CEPH_OSD_OP_ZERO: case CEPH_OSD_OP_TRUNCATE: dst->extent.offset = cpu_to_le64(src->extent.offset); dst->extent.length = cpu_to_le64(src->extent.length); dst->extent.truncate_size = cpu_to_le64(src->extent.truncate_size); dst->extent.truncate_seq = cpu_to_le32(src->extent.truncate_seq); break; case CEPH_OSD_OP_CALL: dst->cls.class_len = src->cls.class_len; dst->cls.method_len = src->cls.method_len; dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); break; case CEPH_OSD_OP_WATCH: dst->watch.cookie = cpu_to_le64(src->watch.cookie); dst->watch.ver = cpu_to_le64(0); dst->watch.op = src->watch.op; dst->watch.gen = cpu_to_le32(src->watch.gen); break; case CEPH_OSD_OP_NOTIFY_ACK: break; case CEPH_OSD_OP_NOTIFY: dst->notify.cookie = cpu_to_le64(src->notify.cookie); break; case CEPH_OSD_OP_LIST_WATCHERS: break; case CEPH_OSD_OP_SETALLOCHINT: dst->alloc_hint.expected_object_size = cpu_to_le64(src->alloc_hint.expected_object_size); dst->alloc_hint.expected_write_size = cpu_to_le64(src->alloc_hint.expected_write_size); dst->alloc_hint.flags = cpu_to_le32(src->alloc_hint.flags); break; case CEPH_OSD_OP_SETXATTR: case CEPH_OSD_OP_CMPXATTR: dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); dst->xattr.cmp_op = src->xattr.cmp_op; dst->xattr.cmp_mode = src->xattr.cmp_mode; break; case CEPH_OSD_OP_CREATE: case CEPH_OSD_OP_DELETE: break; case CEPH_OSD_OP_COPY_FROM2: dst->copy_from.snapid = cpu_to_le64(src->copy_from.snapid); dst->copy_from.src_version = cpu_to_le64(src->copy_from.src_version); dst->copy_from.flags = src->copy_from.flags; dst->copy_from.src_fadvise_flags = cpu_to_le32(src->copy_from.src_fadvise_flags); break; case CEPH_OSD_OP_ASSERT_VER: dst->assert_ver.unused = cpu_to_le64(0); dst->assert_ver.ver = cpu_to_le64(src->assert_ver.ver); break; default: pr_err("unsupported osd opcode %s\n", ceph_osd_op_name(src->op)); WARN_ON(1); return 0; } dst->op = cpu_to_le16(src->op); dst->flags = cpu_to_le32(src->flags); dst->payload_len = cpu_to_le32(src->indata_len); return src->indata_len; } /* * build new request AND message, calculate layout, and adjust file * extent as needed. * * if the file was recently truncated, we include information about its * old and new size so that the object can be updated appropriately. (we * avoid synchronously deleting truncated objects because it's slow.) */ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, struct ceph_vino vino, u64 off, u64 *plen, unsigned int which, int num_ops, int opcode, int flags, struct ceph_snap_context *snapc, u32 truncate_seq, u64 truncate_size, bool use_mempool) { struct ceph_osd_request *req; u64 objnum = 0; u64 objoff = 0; u64 objlen = 0; int r; BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE && opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_SPARSE_READ); req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, GFP_NOFS); if (!req) { r = -ENOMEM; goto fail; } /* calculate max write size */ r = calc_layout(layout, off, plen, &objnum, &objoff, &objlen); if (r) goto fail; if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) { osd_req_op_init(req, which, opcode, 0); } else { u32 object_size = layout->object_size; u32 object_base = off - objoff; if (!(truncate_seq == 1 && truncate_size == -1ULL)) { if (truncate_size <= object_base) { truncate_size = 0; } else { truncate_size -= object_base; if (truncate_size > object_size) truncate_size = object_size; } } osd_req_op_extent_init(req, which, opcode, objoff, objlen, truncate_size, truncate_seq); } req->r_base_oloc.pool = layout->pool_id; req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns); ceph_oid_printf(&req->r_base_oid, "%llx.%08llx", vino.ino, objnum); req->r_flags = flags | osdc->client->options->read_from_replica; req->r_snapid = vino.snap; if (flags & CEPH_OSD_FLAG_WRITE) req->r_data_offset = off; if (num_ops > 1) { int num_req_ops, num_rep_ops; /* * If this is a multi-op write request, assume that we'll need * request ops. If it's a multi-op read then assume we'll need * reply ops. Anything else and call it -EINVAL. */ if (flags & CEPH_OSD_FLAG_WRITE) { num_req_ops = num_ops; num_rep_ops = 0; } else if (flags & CEPH_OSD_FLAG_READ) { num_req_ops = 0; num_rep_ops = num_ops; } else { r = -EINVAL; goto fail; } r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_req_ops, num_rep_ops); } else { r = ceph_osdc_alloc_messages(req, GFP_NOFS); } if (r) goto fail; return req; fail: ceph_osdc_put_request(req); return ERR_PTR(r); } EXPORT_SYMBOL(ceph_osdc_new_request); int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt) { WARN_ON(op->op != CEPH_OSD_OP_SPARSE_READ); op->extent.sparse_ext_cnt = cnt; op->extent.sparse_ext = kmalloc_array(cnt, sizeof(*op->extent.sparse_ext), GFP_NOFS); if (!op->extent.sparse_ext) return -ENOMEM; return 0; } EXPORT_SYMBOL(__ceph_alloc_sparse_ext_map); /* * We keep osd requests in an rbtree, sorted by ->r_tid. */ DEFINE_RB_FUNCS(request, struct ceph_osd_request, r_tid, r_node) DEFINE_RB_FUNCS(request_mc, struct ceph_osd_request, r_tid, r_mc_node) /* * Call @fn on each OSD request as long as @fn returns 0. */ static void for_each_request(struct ceph_osd_client *osdc, int (*fn)(struct ceph_osd_request *req, void *arg), void *arg) { struct rb_node *n, *p; for (n = rb_first(&osdc->osds); n; n = rb_next(n)) { struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); for (p = rb_first(&osd->o_requests); p; ) { struct ceph_osd_request *req = rb_entry(p, struct ceph_osd_request, r_node); p = rb_next(p); if (fn(req, arg)) return; } } for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) { struct ceph_osd_request *req = rb_entry(p, struct ceph_osd_request, r_node); p = rb_next(p); if (fn(req, arg)) return; } } static bool osd_homeless(struct ceph_osd *osd) { return osd->o_osd == CEPH_HOMELESS_OSD; } static bool osd_registered(struct ceph_osd *osd) { verify_osdc_locked(osd->o_osdc); return !RB_EMPTY_NODE(&osd->o_node); } /* * Assumes @osd is zero-initialized. */ static void osd_init(struct ceph_osd *osd) { refcount_set(&osd->o_ref, 1); RB_CLEAR_NODE(&osd->o_node); spin_lock_init(&osd->o_requests_lock); osd->o_requests = RB_ROOT; osd->o_linger_requests = RB_ROOT; osd->o_backoff_mappings = RB_ROOT; osd->o_backoffs_by_id = RB_ROOT; INIT_LIST_HEAD(&osd->o_osd_lru); INIT_LIST_HEAD(&osd->o_keepalive_item); osd->o_incarnation = 1; mutex_init(&osd->lock); } static void ceph_init_sparse_read(struct ceph_sparse_read *sr) { kfree(sr->sr_extent); memset(sr, '\0', sizeof(*sr)); sr->sr_state = CEPH_SPARSE_READ_HDR; } static void osd_cleanup(struct ceph_osd *osd) { WARN_ON(!RB_EMPTY_NODE(&osd->o_node)); WARN_ON(!RB_EMPTY_ROOT(&osd->o_requests)); WARN_ON(!RB_EMPTY_ROOT(&osd->o_linger_requests)); WARN_ON(!RB_EMPTY_ROOT(&osd->o_backoff_mappings)); WARN_ON(!RB_EMPTY_ROOT(&osd->o_backoffs_by_id)); WARN_ON(!list_empty(&osd->o_osd_lru)); WARN_ON(!list_empty(&osd->o_keepalive_item)); ceph_init_sparse_read(&osd->o_sparse_read); if (osd->o_auth.authorizer) { WARN_ON(osd_homeless(osd)); ceph_auth_destroy_authorizer(osd->o_auth.authorizer); } } /* * Track open sessions with osds. */ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) { struct ceph_osd *osd; WARN_ON(onum == CEPH_HOMELESS_OSD); osd = kzalloc(sizeof(*osd), GFP_NOIO | __GFP_NOFAIL); osd_init(osd); osd->o_osdc = osdc; osd->o_osd = onum; osd->o_sparse_op_idx = -1; ceph_init_sparse_read(&osd->o_sparse_read); ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr); return osd; } static struct ceph_osd *get_osd(struct ceph_osd *osd) { if (refcount_inc_not_zero(&osd->o_ref)) { dout("get_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref)-1, refcount_read(&osd->o_ref)); return osd; } else { dout("get_osd %p FAIL\n", osd); return NULL; } } static void put_osd(struct ceph_osd *osd) { dout("put_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref), refcount_read(&osd->o_ref) - 1); if (refcount_dec_and_test(&osd->o_ref)) { osd_cleanup(osd); kfree(osd); } } DEFINE_RB_FUNCS(osd, struct ceph_osd, o_osd, o_node) static void __move_osd_to_lru(struct ceph_osd *osd) { struct ceph_osd_client *osdc = osd->o_osdc; dout("%s osd %p osd%d\n", __func__, osd, osd->o_osd); BUG_ON(!list_empty(&osd->o_osd_lru)); spin_lock(&osdc->osd_lru_lock); list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); spin_unlock(&osdc->osd_lru_lock); osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl; } static void maybe_move_osd_to_lru(struct ceph_osd *osd) { if (RB_EMPTY_ROOT(&osd->o_requests) && RB_EMPTY_ROOT(&osd->o_linger_requests)) __move_osd_to_lru(osd); } static void __remove_osd_from_lru(struct ceph_osd *osd) { struct ceph_osd_client *osdc = osd->o_osdc; dout("%s osd %p osd%d\n", __func__, osd, osd->o_osd); spin_lock(&osdc->osd_lru_lock); if (!list_empty(&osd->o_osd_lru)) list_del_init(&osd->o_osd_lru); spin_unlock(&osdc->osd_lru_lock); } /* * Close the connection and assign any leftover requests to the * homeless session. */ static void close_osd(struct ceph_osd *osd) { struct ceph_osd_client *osdc = osd->o_osdc; struct rb_node *n; verify_osdc_wrlocked(osdc); dout("%s osd %p osd%d\n", __func__, osd, osd->o_osd); ceph_con_close(&osd->o_con); for (n = rb_first(&osd->o_requests); n; ) { struct ceph_osd_request *req = rb_entry(n, struct ceph_osd_request, r_node); n = rb_next(n); /* unlink_request() */ dout(" reassigning req %p tid %llu\n", req, req->r_tid); unlink_request(osd, req); link_request(&osdc->homeless_osd, req); } for (n = rb_first(&osd->o_linger_requests); n; ) { struct ceph_osd_linger_request *lreq = rb_entry(n, struct ceph_osd_linger_request, node); n = rb_next(n); /* unlink_linger() */ dout(" reassigning lreq %p linger_id %llu\n", lreq, lreq->linger_id); unlink_linger(osd, lreq); link_linger(&osdc->homeless_osd, lreq); } clear_backoffs(osd); __remove_osd_from_lru(osd); erase_osd(&osdc->osds, osd); put_osd(osd); } /* * reset osd connect */ static int reopen_osd(struct ceph_osd *osd) { struct ceph_entity_addr *peer_addr; dout("%s osd %p osd%d\n", __func__, osd, osd->o_osd); if (RB_EMPTY_ROOT(&osd->o_requests) && RB_EMPTY_ROOT(&osd->o_linger_requests)) { close_osd(osd); return -ENODEV; } peer_addr = &osd->o_osdc->osdmap->osd_addr[osd->o_osd]; if (!memcmp(peer_addr, &osd->o_con.peer_addr, sizeof (*peer_addr)) && !ceph_con_opened(&osd->o_con)) { struct rb_node *n; dout("osd addr hasn't changed and connection never opened, " "letting msgr retry\n"); /* touch each r_stamp for handle_timeout()'s benfit */ for (n = rb_first(&osd->o_requests); n; n = rb_next(n)) { struct ceph_osd_request *req = rb_entry(n, struct ceph_osd_request, r_node); req->r_stamp = jiffies; } return -EAGAIN; } ceph_con_close(&osd->o_con); ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, peer_addr); osd->o_incarnation++; return 0; } static struct ceph_osd *lookup_create_osd(struct ceph_osd_client *osdc, int o, bool wrlocked) { struct ceph_osd *osd; if (wrlocked) verify_osdc_wrlocked(osdc); else verify_osdc_locked(osdc); if (o != CEPH_HOMELESS_OSD) osd = lookup_osd(&osdc->osds, o); else osd = &osdc->homeless_osd; if (!osd) { if (!wrlocked) return ERR_PTR(-EAGAIN); osd = create_osd(osdc, o); insert_osd(&osdc->osds, osd); ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, &osdc->osdmap->osd_addr[osd->o_osd]); } dout("%s osdc %p osd%d -> osd %p\n", __func__, osdc, o, osd); return osd; } /* * Create request <-> OSD session relation. * * @req has to be assigned a tid, @osd may be homeless. */ static void link_request(struct ceph_osd *osd, struct ceph_osd_request *req) { verify_osd_locked(osd); WARN_ON(!req->r_tid || req->r_osd); dout("%s osd %p osd%d req %p tid %llu\n", __func__, osd, osd->o_osd, req, req->r_tid); if (!osd_homeless(osd)) __remove_osd_from_lru(osd); else atomic_inc(&osd->o_osdc->num_homeless); get_osd(osd); spin_lock(&osd->o_requests_lock); insert_request(&osd->o_requests, req); spin_unlock(&osd->o_requests_lock); req->r_osd = osd; } static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req) { verify_osd_locked(osd); WARN_ON(req->r_osd != osd); dout("%s osd %p osd%d req %p tid %llu\n", __func__, osd, osd->o_osd, req, req->r_tid); req->r_osd = NULL; spin_lock(&osd->o_requests_lock); erase_request(&osd->o_requests, req); spin_unlock(&osd->o_requests_lock); put_osd(osd); if (!osd_homeless(osd)) maybe_move_osd_to_lru(osd); else atomic_dec(&osd->o_osdc->num_homeless); } static bool __pool_full(struct ceph_pg_pool_info *pi) { return pi->flags & CEPH_POOL_FLAG_FULL; } static bool have_pool_full(struct ceph_osd_client *osdc) { struct rb_node *n; for (n = rb_first(&osdc->osdmap->pg_pools); n; n = rb_next(n)) { struct ceph_pg_pool_info *pi = rb_entry(n, struct ceph_pg_pool_info, node); if (__pool_full(pi)) return true; } return false; } static bool pool_full(struct ceph_osd_client *osdc, s64 pool_id) { struct ceph_pg_pool_info *pi; pi = ceph_pg_pool_by_id(osdc->osdmap, pool_id); if (!pi) return false; return __pool_full(pi); } /* * Returns whether a request should be blocked from being sent * based on the current osdmap and osd_client settings. */ static bool target_should_be_paused(struct ceph_osd_client *osdc, const struct ceph_osd_request_target *t, struct ceph_pg_pool_info *pi) { bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || __pool_full(pi); WARN_ON(pi->id != t->target_oloc.pool); return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) || ((t->flags & CEPH_OSD_FLAG_WRITE) && pausewr) || (osdc->osdmap->epoch < osdc->epoch_barrier); } static int pick_random_replica(const struct ceph_osds *acting) { int i = get_random_u32_below(acting->size); dout("%s picked osd%d, primary osd%d\n", __func__, acting->osds[i], acting->primary); return i; } /* * Picks the closest replica based on client's location given by * crush_location option. Prefers the primary if the locality is * the same. */ static int pick_closest_replica(struct ceph_osd_client *osdc, const struct ceph_osds *acting) { struct ceph_options *opt = osdc->client->options; int best_i, best_locality; int i = 0, locality; do { locality = ceph_get_crush_locality(osdc->osdmap, acting->osds[i], &opt->crush_locs); if (i == 0 || (locality >= 0 && best_locality < 0) || (locality >= 0 && best_locality >= 0 && locality < best_locality)) { best_i = i; best_locality = locality; } } while (++i < acting->size); dout("%s picked osd%d with locality %d, primary osd%d\n", __func__, acting->osds[best_i], best_locality, acting->primary); return best_i; } enum calc_target_result { CALC_TARGET_NO_ACTION = 0, CALC_TARGET_NEED_RESEND, CALC_TARGET_POOL_DNE, }; static enum calc_target_result calc_target(struct ceph_osd_client *osdc, struct ceph_osd_request_target *t, bool any_change) { struct ceph_pg_pool_info *pi; struct ceph_pg pgid, last_pgid; struct ceph_osds up, acting; bool is_read = t->flags & CEPH_OSD_FLAG_READ; bool is_write = t->flags & CEPH_OSD_FLAG_WRITE; bool force_resend = false; bool unpaused = false; bool legacy_change = false; bool split = false; bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); bool recovery_deletes = ceph_osdmap_flag(osdc, CEPH_OSDMAP_RECOVERY_DELETES); enum calc_target_result ct_res; t->epoch = osdc->osdmap->epoch; pi = ceph_pg_pool_by_id(osdc->osdmap, t->base_oloc.pool); if (!pi) { t->osd = CEPH_HOMELESS_OSD; ct_res = CALC_TARGET_POOL_DNE; goto out; } if (osdc->osdmap->epoch == pi->last_force_request_resend) { if (t->last_force_resend < pi->last_force_request_resend) { t->last_force_resend = pi->last_force_request_resend; force_resend = true; } else if (t->last_force_resend == 0) { force_resend = true; } } /* apply tiering */ ceph_oid_copy(&t->target_oid, &t->base_oid); ceph_oloc_copy(&t->target_oloc, &t->base_oloc); if ((t->flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) { if (is_read && pi->read_tier >= 0) t->target_oloc.pool = pi->read_tier; if (is_write && pi->write_tier >= 0) t->target_oloc.pool = pi->write_tier; pi = ceph_pg_pool_by_id(osdc->osdmap, t->target_oloc.pool); if (!pi) { t->osd = CEPH_HOMELESS_OSD; ct_res = CALC_TARGET_POOL_DNE; goto out; } } __ceph_object_locator_to_pg(pi, &t->target_oid, &t->target_oloc, &pgid); last_pgid.pool = pgid.pool; last_pgid.seed = ceph_stable_mod(pgid.seed, t->pg_num, t->pg_num_mask); ceph_pg_to_up_acting_osds(osdc->osdmap, pi, &pgid, &up, &acting); if (any_change && ceph_is_new_interval(&t->acting, &acting, &t->up, &up, t->size, pi->size, t->min_size, pi->min_size, t->pg_num, pi->pg_num, t->sort_bitwise, sort_bitwise, t->recovery_deletes, recovery_deletes, &last_pgid)) force_resend = true; if (t->paused && !target_should_be_paused(osdc, t, pi)) { t->paused = false; unpaused = true; } legacy_change = ceph_pg_compare(&t->pgid, &pgid) || ceph_osds_changed(&t->acting, &acting, t->used_replica || any_change); if (t->pg_num) split = ceph_pg_is_split(&last_pgid, t->pg_num, pi->pg_num); if (legacy_change || force_resend || split) { t->pgid = pgid; /* struct */ ceph_pg_to_primary_shard(osdc->osdmap, pi, &pgid, &t->spgid); ceph_osds_copy(&t->acting, &acting); ceph_osds_copy(&t->up, &up); t->size = pi->size; t->min_size = pi->min_size; t->pg_num = pi->pg_num; t->pg_num_mask = pi->pg_num_mask; t->sort_bitwise = sort_bitwise; t->recovery_deletes = recovery_deletes; if ((t->flags & (CEPH_OSD_FLAG_BALANCE_READS | CEPH_OSD_FLAG_LOCALIZE_READS)) && !is_write && pi->type == CEPH_POOL_TYPE_REP && acting.size > 1) { int pos; WARN_ON(!is_read || acting.osds[0] != acting.primary); if (t->flags & CEPH_OSD_FLAG_BALANCE_READS) { pos = pick_random_replica(&acting); } else { pos = pick_closest_replica(osdc, &acting); } t->osd = acting.osds[pos]; t->used_replica = pos > 0; } else { t->osd = acting.primary; t->used_replica = false; } } if (unpaused || legacy_change || force_resend || split) ct_res = CALC_TARGET_NEED_RESEND; else ct_res = CALC_TARGET_NO_ACTION; out: dout("%s t %p -> %d%d%d%d ct_res %d osd%d\n", __func__, t, unpaused, legacy_change, force_resend, split, ct_res, t->osd); return ct_res; } static struct ceph_spg_mapping *alloc_spg_mapping(void) { struct ceph_spg_mapping *spg; spg = kmalloc(sizeof(*spg), GFP_NOIO); if (!spg) return NULL; RB_CLEAR_NODE(&spg->node); spg->backoffs = RB_ROOT; return spg; } static void free_spg_mapping(struct ceph_spg_mapping *spg) { WARN_ON(!RB_EMPTY_NODE(&spg->node)); WARN_ON(!RB_EMPTY_ROOT(&spg->backoffs)); kfree(spg); } /* * rbtree of ceph_spg_mapping for handling map<spg_t, ...>, similar to * ceph_pg_mapping. Used to track OSD backoffs -- a backoff [range] is * defined only within a specific spgid; it does not pass anything to * children on split, or to another primary. */ DEFINE_RB_FUNCS2(spg_mapping, struct ceph_spg_mapping, spgid, ceph_spg_compare, RB_BYPTR, const struct ceph_spg *, node) static u64 hoid_get_bitwise_key(const struct ceph_hobject_id *hoid) { return hoid->is_max ? 0x100000000ull : hoid->hash_reverse_bits; } static void hoid_get_effective_key(const struct ceph_hobject_id *hoid, void **pkey, size_t *pkey_len) { if (hoid->key_len) { *pkey = hoid->key; *pkey_len = hoid->key_len; } else { *pkey = hoid->oid; *pkey_len = hoid->oid_len; } } static int compare_names(const void *name1, size_t name1_len, const void *name2, size_t name2_len) { int ret; ret = memcmp(name1, name2, min(name1_len, name2_len)); if (!ret) { if (name1_len < name2_len) ret = -1; else if (name1_len > name2_len) ret = 1; } return ret; } static int hoid_compare(const struct ceph_hobject_id *lhs, const struct ceph_hobject_id *rhs) { void *effective_key1, *effective_key2; size_t effective_key1_len, effective_key2_len; int ret; if (lhs->is_max < rhs->is_max) return -1; if (lhs->is_max > rhs->is_max) return 1; if (lhs->pool < rhs->pool) return -1; if (lhs->pool > rhs->pool) return 1; if (hoid_get_bitwise_key(lhs) < hoid_get_bitwise_key(rhs)) return -1; if (hoid_get_bitwise_key(lhs) > hoid_get_bitwise_key(rhs)) return 1; ret = compare_names(lhs->nspace, lhs->nspace_len, rhs->nspace, rhs->nspace_len); if (ret) return ret; hoid_get_effective_key(lhs, &effective_key1, &effective_key1_len); hoid_get_effective_key(rhs, &effective_key2, &effective_key2_len); ret = compare_names(effective_key1, effective_key1_len, effective_key2, effective_key2_len); if (ret) return ret; ret = compare_names(lhs->oid, lhs->oid_len, rhs->oid, rhs->oid_len); if (ret) return ret; if (lhs->snapid < rhs->snapid) return -1; if (lhs->snapid > rhs->snapid) return 1; return 0; } /* * For decoding ->begin and ->end of MOSDBackoff only -- no MIN/MAX * compat stuff here. * * Assumes @hoid is zero-initialized. */ static int decode_hoid(void **p, void *end, struct ceph_hobject_id *hoid) { u8 struct_v; u32 struct_len; int ret; ret = ceph_start_decoding(p, end, 4, "hobject_t", &struct_v, &struct_len); if (ret) return ret; if (struct_v < 4) { pr_err("got struct_v %d < 4 of hobject_t\n", struct_v); goto e_inval; } hoid->key = ceph_extract_encoded_string(p, end, &hoid->key_len, GFP_NOIO); if (IS_ERR(hoid->key)) { ret = PTR_ERR(hoid->key); hoid->key = NULL; return ret; } hoid->oid = ceph_extract_encoded_string(p, end, &hoid->oid_len, GFP_NOIO); if (IS_ERR(hoid->oid)) { ret = PTR_ERR(hoid->oid); hoid->oid = NULL; return ret; } ceph_decode_64_safe(p, end, hoid->snapid, e_inval); ceph_decode_32_safe(p, end, hoid->hash, e_inval); ceph_decode_8_safe(p, end, hoid->is_max, e_inval); hoid->nspace = ceph_extract_encoded_string(p, end, &hoid->nspace_len, GFP_NOIO); if (IS_ERR(hoid->nspace)) { ret = PTR_ERR(hoid->nspace); hoid->nspace = NULL; return ret; } ceph_decode_64_safe(p, end, hoid->pool, e_inval); ceph_hoid_build_hash_cache(hoid); return 0; e_inval: return -EINVAL; } static int hoid_encoding_size(const struct ceph_hobject_id *hoid) { return 8 + 4 + 1 + 8 + /* snapid, hash, is_max, pool */ 4 + hoid->key_len + 4 + hoid->oid_len + 4 + hoid->nspace_len; } static void encode_hoid(void **p, void *end, const struct ceph_hobject_id *hoid) { ceph_start_encoding(p, 4, 3, hoid_encoding_size(hoid)); ceph_encode_string(p, end, hoid->key, hoid->key_len); ceph_encode_string(p, end, hoid->oid, hoid->oid_len); ceph_encode_64(p, hoid->snapid); ceph_encode_32(p, hoid->hash); ceph_encode_8(p, hoid->is_max); ceph_encode_string(p, end, hoid->nspace, hoid->nspace_len); ceph_encode_64(p, hoid->pool); } static void free_hoid(struct ceph_hobject_id *hoid) { if (hoid) { kfree(hoid->key); kfree(hoid->oid); kfree(hoid->nspace); kfree(hoid); } } static struct ceph_osd_backoff *alloc_backoff(void) { struct ceph_osd_backoff *backoff; backoff = kzalloc(sizeof(*backoff), GFP_NOIO); if (!backoff) return NULL; RB_CLEAR_NODE(&backoff->spg_node); RB_CLEAR_NODE(&backoff->id_node); return backoff; } static void free_backoff(struct ceph_osd_backoff *backoff) { WARN_ON(!RB_EMPTY_NODE(&backoff->spg_node)); WARN_ON(!RB_EMPTY_NODE(&backoff->id_node)); free_hoid(backoff->begin); free_hoid(backoff->end); kfree(backoff); } /* * Within a specific spgid, backoffs are managed by ->begin hoid. */ DEFINE_RB_INSDEL_FUNCS2(backoff, struct ceph_osd_backoff, begin, hoid_compare, RB_BYVAL, spg_node); static struct ceph_osd_backoff *lookup_containing_backoff(struct rb_root *root, const struct ceph_hobject_id *hoid) { struct rb_node *n = root->rb_node; while (n) { struct ceph_osd_backoff *cur = rb_entry(n, struct ceph_osd_backoff, spg_node); int cmp; cmp = hoid_compare(hoid, cur->begin); if (cmp < 0) { n = n->rb_left; } else if (cmp > 0) { if (hoid_compare(hoid, cur->end) < 0) return cur; n = n->rb_right; } else { return cur; } } return NULL; } /* * Each backoff has a unique id within its OSD session. */ DEFINE_RB_FUNCS(backoff_by_id, struct ceph_osd_backoff, id, id_node) static void clear_backoffs(struct ceph_osd *osd) { while (!RB_EMPTY_ROOT(&osd->o_backoff_mappings)) { struct ceph_spg_mapping *spg = rb_entry(rb_first(&osd->o_backoff_mappings), struct ceph_spg_mapping, node); while (!RB_EMPTY_ROOT(&spg->backoffs)) { struct ceph_osd_backoff *backoff = rb_entry(rb_first(&spg->backoffs), struct ceph_osd_backoff, spg_node); erase_backoff(&spg->backoffs, backoff); erase_backoff_by_id(&osd->o_backoffs_by_id, backoff); free_backoff(backoff); } erase_spg_mapping(&osd->o_backoff_mappings, spg); free_spg_mapping(spg); } } /* * Set up a temporary, non-owning view into @t. */ static void hoid_fill_from_target(struct ceph_hobject_id *hoid, const struct ceph_osd_request_target *t) { hoid->key = NULL; hoid->key_len = 0; hoid->oid = t->target_oid.name; hoid->oid_len = t->target_oid.name_len; hoid->snapid = CEPH_NOSNAP; hoid->hash = t->pgid.seed; hoid->is_max = false; if (t->target_oloc.pool_ns) { hoid->nspace = t->target_oloc.pool_ns->str; hoid->nspace_len = t->target_oloc.pool_ns->len; } else { hoid->nspace = NULL; hoid->nspace_len = 0; } hoid->pool = t->target_oloc.pool; ceph_hoid_build_hash_cache(hoid); } static bool should_plug_request(struct ceph_osd_request *req) { struct ceph_osd *osd = req->r_osd; struct ceph_spg_mapping *spg; struct ceph_osd_backoff *backoff; struct ceph_hobject_id hoid; spg = lookup_spg_mapping(&osd->o_backoff_mappings, &req->r_t.spgid); if (!spg) return false; hoid_fill_from_target(&hoid, &req->r_t); backoff = lookup_containing_backoff(&spg->backoffs, &hoid); if (!backoff) return false; dout("%s req %p tid %llu backoff osd%d spgid %llu.%xs%d id %llu\n", __func__, req, req->r_tid, osd->o_osd, backoff->spgid.pgid.pool, backoff->spgid.pgid.seed, backoff->spgid.shard, backoff->id); return true; } /* * Keep get_num_data_items() in sync with this function. */ static void setup_request_data(struct ceph_osd_request *req) { struct ceph_msg *request_msg = req->r_request; struct ceph_msg *reply_msg = req->r_reply; struct ceph_osd_req_op *op; if (req->r_request->num_data_items || req->r_reply->num_data_items) return; WARN_ON(request_msg->data_length || reply_msg->data_length); for (op = req->r_ops; op != &req->r_ops[req->r_num_ops]; op++) { switch (op->op) { /* request */ case CEPH_OSD_OP_WRITE: case CEPH_OSD_OP_WRITEFULL: WARN_ON(op->indata_len != op->extent.length); ceph_osdc_msg_data_add(request_msg, &op->extent.osd_data); break; case CEPH_OSD_OP_SETXATTR: case CEPH_OSD_OP_CMPXATTR: WARN_ON(op->indata_len != op->xattr.name_len + op->xattr.value_len); ceph_osdc_msg_data_add(request_msg, &op->xattr.osd_data); break; case CEPH_OSD_OP_NOTIFY_ACK: ceph_osdc_msg_data_add(request_msg, &op->notify_ack.request_data); break; case CEPH_OSD_OP_COPY_FROM2: ceph_osdc_msg_data_add(request_msg, &op->copy_from.osd_data); break; /* reply */ case CEPH_OSD_OP_STAT: ceph_osdc_msg_data_add(reply_msg, &op->raw_data_in); break; case CEPH_OSD_OP_READ: case CEPH_OSD_OP_SPARSE_READ: ceph_osdc_msg_data_add(reply_msg, &op->extent.osd_data); break; case CEPH_OSD_OP_LIST_WATCHERS: ceph_osdc_msg_data_add(reply_msg, &op->list_watchers.response_data); break; /* both */ case CEPH_OSD_OP_CALL: WARN_ON(op->indata_len != op->cls.class_len + op->cls.method_len + op->cls.indata_len); ceph_osdc_msg_data_add(request_msg, &op->cls.request_info); /* optional, can be NONE */ ceph_osdc_msg_data_add(request_msg, &op->cls.request_data); /* optional, can be NONE */ ceph_osdc_msg_data_add(reply_msg, &op->cls.response_data); break; case CEPH_OSD_OP_NOTIFY: ceph_osdc_msg_data_add(request_msg, &op->notify.request_data); ceph_osdc_msg_data_add(reply_msg, &op->notify.response_data); break; } } } static void encode_pgid(void **p, const struct ceph_pg *pgid) { ceph_encode_8(p, 1); ceph_encode_64(p, pgid->pool); ceph_encode_32(p, pgid->seed); ceph_encode_32(p, -1); /* preferred */ } static void encode_spgid(void **p, const struct ceph_spg *spgid) { ceph_start_encoding(p, 1, 1, CEPH_PGID_ENCODING_LEN + 1); encode_pgid(p, &spgid->pgid); ceph_encode_8(p, spgid->shard); } static void encode_oloc(void **p, void *end, const struct ceph_object_locator *oloc) { ceph_start_encoding(p, 5, 4, ceph_oloc_encoding_size(oloc)); ceph_encode_64(p, oloc->pool); ceph_encode_32(p, -1); /* preferred */ ceph_encode_32(p, 0); /* key len */ if (oloc->pool_ns) ceph_encode_string(p, end, oloc->pool_ns->str, oloc->pool_ns->len); else ceph_encode_32(p, 0); } static void encode_request_partial(struct ceph_osd_request *req, struct ceph_msg *msg) { void *p = msg->front.iov_base; void *const end = p + msg->front_alloc_len; u32 data_len = 0; int i; if (req->r_flags & CEPH_OSD_FLAG_WRITE) { /* snapshots aren't writeable */ WARN_ON(req->r_snapid != CEPH_NOSNAP); } else { WARN_ON(req->r_mtime.tv_sec || req->r_mtime.tv_nsec || req->r_data_offset || req->r_snapc); } setup_request_data(req); encode_spgid(&p, &req->r_t.spgid); /* actual spg */ ceph_encode_32(&p, req->r_t.pgid.seed); /* raw hash */ ceph_encode_32(&p, req->r_osdc->osdmap->epoch); ceph_encode_32(&p, req->r_flags); /* reqid */ ceph_start_encoding(&p, 2, 2, sizeof(struct ceph_osd_reqid)); memset(p, 0, sizeof(struct ceph_osd_reqid)); p += sizeof(struct ceph_osd_reqid); /* trace */ memset(p, 0, sizeof(struct ceph_blkin_trace_info)); p += sizeof(struct ceph_blkin_trace_info); ceph_encode_32(&p, 0); /* client_inc, always 0 */ ceph_encode_timespec64(p, &req->r_mtime); p += sizeof(struct ceph_timespec); encode_oloc(&p, end, &req->r_t.target_oloc); ceph_encode_string(&p, end, req->r_t.target_oid.name, req->r_t.target_oid.name_len); /* ops, can imply data */ ceph_encode_16(&p, req->r_num_ops); for (i = 0; i < req->r_num_ops; i++) { data_len += osd_req_encode_op(p, &req->r_ops[i]); p += sizeof(struct ceph_osd_op); } ceph_encode_64(&p, req->r_snapid); /* snapid */ if (req->r_snapc) { ceph_encode_64(&p, req->r_snapc->seq); ceph_encode_32(&p, req->r_snapc->num_snaps); for (i = 0; i < req->r_snapc->num_snaps; i++) ceph_encode_64(&p, req->r_snapc->snaps[i]); } else { ceph_encode_64(&p, 0); /* snap_seq */ ceph_encode_32(&p, 0); /* snaps len */ } ceph_encode_32(&p, req->r_attempts); /* retry_attempt */ BUG_ON(p > end - 8); /* space for features */ msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */ /* front_len is finalized in encode_request_finish() */ msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); msg->hdr.data_len = cpu_to_le32(data_len); /* * The header "data_off" is a hint to the receiver allowing it * to align received data into its buffers such that there's no * need to re-copy it before writing it to disk (direct I/O). */ msg->hdr.data_off = cpu_to_le16(req->r_data_offset); dout("%s req %p msg %p oid %s oid_len %d\n", __func__, req, msg, req->r_t.target_oid.name, req->r_t.target_oid.name_len); } static void encode_request_finish(struct ceph_msg *msg) { void *p = msg->front.iov_base; void *const partial_end = p + msg->front.iov_len; void *const end = p + msg->front_alloc_len; if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) { /* luminous OSD -- encode features and be done */ p = partial_end; ceph_encode_64(&p, msg->con->peer_features); } else { struct { char spgid[CEPH_ENCODING_START_BLK_LEN + CEPH_PGID_ENCODING_LEN + 1]; __le32 hash; __le32 epoch; __le32 flags; char reqid[CEPH_ENCODING_START_BLK_LEN + sizeof(struct ceph_osd_reqid)]; char trace[sizeof(struct ceph_blkin_trace_info)]; __le32 client_inc; struct ceph_timespec mtime; } __packed head; struct ceph_pg pgid; void *oloc, *oid, *tail; int oloc_len, oid_len, tail_len; int len; /* * Pre-luminous OSD -- reencode v8 into v4 using @head * as a temporary buffer. Encode the raw PG; the rest * is just a matter of moving oloc, oid and tail blobs * around. */ memcpy(&head, p, sizeof(head)); p += sizeof(head); oloc = p; p += CEPH_ENCODING_START_BLK_LEN; pgid.pool = ceph_decode_64(&p); p += 4 + 4; /* preferred, key len */ len = ceph_decode_32(&p); p += len; /* nspace */ oloc_len = p - oloc; oid = p; len = ceph_decode_32(&p); p += len; oid_len = p - oid; tail = p; tail_len = partial_end - p; p = msg->front.iov_base; ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc)); ceph_encode_copy(&p, &head.epoch, sizeof(head.epoch)); ceph_encode_copy(&p, &head.flags, sizeof(head.flags)); ceph_encode_copy(&p, &head.mtime, sizeof(head.mtime)); /* reassert_version */ memset(p, 0, sizeof(struct ceph_eversion)); p += sizeof(struct ceph_eversion); BUG_ON(p >= oloc); memmove(p, oloc, oloc_len); p += oloc_len; pgid.seed = le32_to_cpu(head.hash); encode_pgid(&p, &pgid); /* raw pg */ BUG_ON(p >= oid); memmove(p, oid, oid_len); p += oid_len; /* tail -- ops, snapid, snapc, retry_attempt */ BUG_ON(p >= tail); memmove(p, tail, tail_len); p += tail_len; msg->hdr.version = cpu_to_le16(4); /* MOSDOp v4 */ } BUG_ON(p > end); msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); dout("%s msg %p tid %llu %u+%u+%u v%d\n", __func__, msg, le64_to_cpu(msg->hdr.tid), le32_to_cpu(msg->hdr.front_len), le32_to_cpu(msg->hdr.middle_len), le32_to_cpu(msg->hdr.data_len), le16_to_cpu(msg->hdr.version)); } /* * @req has to be assigned a tid and registered. */ static void send_request(struct ceph_osd_request *req) { struct ceph_osd *osd = req->r_osd; verify_osd_locked(osd); WARN_ON(osd->o_osd != req->r_t.osd); /* backoff? */ if (should_plug_request(req)) return; /* * We may have a previously queued request message hanging * around. Cancel it to avoid corrupting the msgr. */ if (req->r_sent) ceph_msg_revoke(req->r_request); req->r_flags |= CEPH_OSD_FLAG_KNOWN_REDIR; if (req->r_attempts) req->r_flags |= CEPH_OSD_FLAG_RETRY; else WARN_ON(req->r_flags & CEPH_OSD_FLAG_RETRY); encode_request_partial(req, req->r_request); dout("%s req %p tid %llu to pgid %llu.%x spgid %llu.%xs%d osd%d e%u flags 0x%x attempt %d\n", __func__, req, req->r_tid, req->r_t.pgid.pool, req->r_t.pgid.seed, req->r_t.spgid.pgid.pool, req->r_t.spgid.pgid.seed, req->r_t.spgid.shard, osd->o_osd, req->r_t.epoch, req->r_flags, req->r_attempts); req->r_t.paused = false; req->r_stamp = jiffies; req->r_attempts++; req->r_sent = osd->o_incarnation; req->r_request->hdr.tid = cpu_to_le64(req->r_tid); ceph_con_send(&osd->o_con, ceph_msg_get(req->r_request)); } static void maybe_request_map(struct ceph_osd_client *osdc) { bool continuous = false; verify_osdc_locked(osdc); WARN_ON(!osdc->osdmap->epoch); if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD) || ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) { dout("%s osdc %p continuous\n", __func__, osdc); continuous = true; } else { dout("%s osdc %p onetime\n", __func__, osdc); } if (ceph_monc_want_map(&osdc->client->monc, CEPH_SUB_OSDMAP, osdc->osdmap->epoch + 1, continuous)) ceph_monc_renew_subs(&osdc->client->monc); } static void complete_request(struct ceph_osd_request *req, int err); static void send_map_check(struct ceph_osd_request *req); static void __submit_request(struct ceph_osd_request *req, bool wrlocked) { struct ceph_osd_client *osdc = req->r_osdc; struct ceph_osd *osd; enum calc_target_result ct_res; int err = 0; bool need_send = false; bool promoted = false; WARN_ON(req->r_tid); dout("%s req %p wrlocked %d\n", __func__, req, wrlocked); again: ct_res = calc_target(osdc, &req->r_t, false); if (ct_res == CALC_TARGET_POOL_DNE && !wrlocked) goto promote; osd = lookup_create_osd(osdc, req->r_t.osd, wrlocked); if (IS_ERR(osd)) { WARN_ON(PTR_ERR(osd) != -EAGAIN || wrlocked); goto promote; } if (osdc->abort_err) { dout("req %p abort_err %d\n", req, osdc->abort_err); err = osdc->abort_err; } else if (osdc->osdmap->epoch < osdc->epoch_barrier) { dout("req %p epoch %u barrier %u\n", req, osdc->osdmap->epoch, osdc->epoch_barrier); req->r_t.paused = true; maybe_request_map(osdc); } else if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) { dout("req %p pausewr\n", req); req->r_t.paused = true; maybe_request_map(osdc); } else if ((req->r_flags & CEPH_OSD_FLAG_READ) && ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) { dout("req %p pauserd\n", req); req->r_t.paused = true; maybe_request_map(osdc); } else if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY | CEPH_OSD_FLAG_FULL_FORCE)) && (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || pool_full(osdc, req->r_t.base_oloc.pool))) { dout("req %p full/pool_full\n", req); if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) { err = -ENOSPC; } else { if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) pr_warn_ratelimited("cluster is full (osdmap FULL)\n"); else pr_warn_ratelimited("pool %lld is full or reached quota\n", req->r_t.base_oloc.pool); req->r_t.paused = true; maybe_request_map(osdc); } } else if (!osd_homeless(osd)) { need_send = true; } else { maybe_request_map(osdc); } mutex_lock(&osd->lock); /* * Assign the tid atomically with send_request() to protect * multiple writes to the same object from racing with each * other, resulting in out of order ops on the OSDs. */ req->r_tid = atomic64_inc_return(&osdc->last_tid); link_request(osd, req); if (need_send) send_request(req); else if (err) complete_request(req, err); mutex_unlock(&osd->lock); if (!err && ct_res == CALC_TARGET_POOL_DNE) send_map_check(req); if (promoted) downgrade_write(&osdc->lock); return; promote: up_read(&osdc->lock); down_write(&osdc->lock); wrlocked = true; promoted = true; goto again; } static void account_request(struct ceph_osd_request *req) { WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK)); WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE))); req->r_flags |= CEPH_OSD_FLAG_ONDISK; atomic_inc(&req->r_osdc->num_requests); req->r_start_stamp = jiffies; req->r_start_latency = ktime_get(); } static void submit_request(struct ceph_osd_request *req, bool wrlocked) { ceph_osdc_get_request(req); account_request(req); __submit_request(req, wrlocked); } static void finish_request(struct ceph_osd_request *req) { struct ceph_osd_client *osdc = req->r_osdc; WARN_ON(lookup_request_mc(&osdc->map_checks, req->r_tid)); dout("%s req %p tid %llu\n", __func__, req, req->r_tid); req->r_end_latency = ktime_get(); if (req->r_osd) { ceph_init_sparse_read(&req->r_osd->o_sparse_read); unlink_request(req->r_osd, req); } atomic_dec(&osdc->num_requests); /* * If an OSD has failed or returned and a request has been sent * twice, it's possible to get a reply and end up here while the * request message is queued for delivery. We will ignore the * reply, so not a big deal, but better to try and catch it. */ ceph_msg_revoke(req->r_request); ceph_msg_revoke_incoming(req->r_reply); } static void __complete_request(struct ceph_osd_request *req) { dout("%s req %p tid %llu cb %ps result %d\n", __func__, req, req->r_tid, req->r_callback, req->r_result); if (req->r_callback) req->r_callback(req); complete_all(&req->r_completion); ceph_osdc_put_request(req); } static void complete_request_workfn(struct work_struct *work) { struct ceph_osd_request *req = container_of(work, struct ceph_osd_request, r_complete_work); __complete_request(req); } /* * This is open-coded in handle_reply(). */ static void complete_request(struct ceph_osd_request *req, int err) { dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err); req->r_result = err; finish_request(req); INIT_WORK(&req->r_complete_work, complete_request_workfn); queue_work(req->r_osdc->completion_wq, &req->r_complete_work); } static void cancel_map_check(struct ceph_osd_request *req) { struct ceph_osd_client *osdc = req->r_osdc; struct ceph_osd_request *lookup_req; verify_osdc_wrlocked(osdc); lookup_req = lookup_request_mc(&osdc->map_checks, req->r_tid); if (!lookup_req) return; WARN_ON(lookup_req != req); erase_request_mc(&osdc->map_checks, req); ceph_osdc_put_request(req); } static void cancel_request(struct ceph_osd_request *req) { dout("%s req %p tid %llu\n", __func__, req, req->r_tid); cancel_map_check(req); finish_request(req); complete_all(&req->r_completion); ceph_osdc_put_request(req); } static void abort_request(struct ceph_osd_request *req, int err) { dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err); cancel_map_check(req); complete_request(req, err); } static int abort_fn(struct ceph_osd_request *req, void *arg) { int err = *(int *)arg; abort_request(req, err); return 0; /* continue iteration */ } /* * Abort all in-flight requests with @err and arrange for all future * requests to be failed immediately. */ void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err) { dout("%s osdc %p err %d\n", __func__, osdc, err); down_write(&osdc->lock); for_each_request(osdc, abort_fn, &err); osdc->abort_err = err; up_write(&osdc->lock); } EXPORT_SYMBOL(ceph_osdc_abort_requests); void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc) { down_write(&osdc->lock); osdc->abort_err = 0; up_write(&osdc->lock); } EXPORT_SYMBOL(ceph_osdc_clear_abort_err); static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb) { if (likely(eb > osdc->epoch_barrier)) { dout("updating epoch_barrier from %u to %u\n", osdc->epoch_barrier, eb); osdc->epoch_barrier = eb; /* Request map if we're not to the barrier yet */ if (eb > osdc->osdmap->epoch) maybe_request_map(osdc); } } void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb) { down_read(&osdc->lock); if (unlikely(eb > osdc->epoch_barrier)) { up_read(&osdc->lock); down_write(&osdc->lock); update_epoch_barrier(osdc, eb); up_write(&osdc->lock); } else { up_read(&osdc->lock); } } EXPORT_SYMBOL(ceph_osdc_update_epoch_barrier); /* * We can end up releasing caps as a result of abort_request(). * In that case, we probably want to ensure that the cap release message * has an updated epoch barrier in it, so set the epoch barrier prior to * aborting the first request. */ static int abort_on_full_fn(struct ceph_osd_request *req, void *arg) { struct ceph_osd_client *osdc = req->r_osdc; bool *victims = arg; if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || pool_full(osdc, req->r_t.base_oloc.pool))) { if (!*victims) { update_epoch_barrier(osdc, osdc->osdmap->epoch); *victims = true; } abort_request(req, -ENOSPC); } return 0; /* continue iteration */ } /* * Drop all pending requests that are stalled waiting on a full condition to * clear, and complete them with ENOSPC as the return code. Set the * osdc->epoch_barrier to the latest map epoch that we've seen if any were * cancelled. */ static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc) { bool victims = false; if (ceph_test_opt(osdc->client, ABORT_ON_FULL) && (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || have_pool_full(osdc))) for_each_request(osdc, abort_on_full_fn, &victims); } static void check_pool_dne(struct ceph_osd_request *req) { struct ceph_osd_client *osdc = req->r_osdc; struct ceph_osdmap *map = osdc->osdmap; verify_osdc_wrlocked(osdc); WARN_ON(!map->epoch); if (req->r_attempts) { /* * We sent a request earlier, which means that * previously the pool existed, and now it does not * (i.e., it was deleted). */ req->r_map_dne_bound = map->epoch; dout("%s req %p tid %llu pool disappeared\n", __func__, req, req->r_tid); } else { dout("%s req %p tid %llu map_dne_bound %u have %u\n", __func__, req, req->r_tid, req->r_map_dne_bound, map->epoch); } if (req->r_map_dne_bound) { if (map->epoch >= req->r_map_dne_bound) { /* we had a new enough map */ pr_info_ratelimited("tid %llu pool does not exist\n", req->r_tid); complete_request(req, -ENOENT); } } else { send_map_check(req); } } static void map_check_cb(struct ceph_mon_generic_request *greq) { struct ceph_osd_client *osdc = &greq->monc->client->osdc; struct ceph_osd_request *req; u64 tid = greq->private_data; WARN_ON(greq->result || !greq->u.newest); down_write(&osdc->lock); req = lookup_request_mc(&osdc->map_checks, tid); if (!req) { dout("%s tid %llu dne\n", __func__, tid); goto out_unlock; } dout("%s req %p tid %llu map_dne_bound %u newest %llu\n", __func__, req, req->r_tid, req->r_map_dne_bound, greq->u.newest); if (!req->r_map_dne_bound) req->r_map_dne_bound = greq->u.newest; erase_request_mc(&osdc->map_checks, req); check_pool_dne(req); ceph_osdc_put_request(req); out_unlock: up_write(&osdc->lock); } static void send_map_check(struct ceph_osd_request *req) { struct ceph_osd_client *osdc = req->r_osdc; struct ceph_osd_request *lookup_req; int ret; verify_osdc_wrlocked(osdc); lookup_req = lookup_request_mc(&osdc->map_checks, req->r_tid); if (lookup_req) { WARN_ON(lookup_req != req); return; } ceph_osdc_get_request(req); insert_request_mc(&osdc->map_checks, req); ret = ceph_monc_get_version_async(&osdc->client->monc, "osdmap", map_check_cb, req->r_tid); WARN_ON(ret); } /* * lingering requests, watch/notify v2 infrastructure */ static void linger_release(struct kref *kref) { struct ceph_osd_linger_request *lreq = container_of(kref, struct ceph_osd_linger_request, kref); dout("%s lreq %p reg_req %p ping_req %p\n", __func__, lreq, lreq->reg_req, lreq->ping_req); WARN_ON(!RB_EMPTY_NODE(&lreq->node)); WARN_ON(!RB_EMPTY_NODE(&lreq->osdc_node)); WARN_ON(!RB_EMPTY_NODE(&lreq->mc_node)); WARN_ON(!list_empty(&lreq->scan_item)); WARN_ON(!list_empty(&lreq->pending_lworks)); WARN_ON(lreq->osd); if (lreq->request_pl) ceph_pagelist_release(lreq->request_pl); if (lreq->notify_id_pages) ceph_release_page_vector(lreq->notify_id_pages, 1); ceph_osdc_put_request(lreq->reg_req); ceph_osdc_put_request(lreq->ping_req); target_destroy(&lreq->t); kfree(lreq); } static void linger_put(struct ceph_osd_linger_request *lreq) { if (lreq) kref_put(&lreq->kref, linger_release); } static struct ceph_osd_linger_request * linger_get(struct ceph_osd_linger_request *lreq) { kref_get(&lreq->kref); return lreq; } static struct ceph_osd_linger_request * linger_alloc(struct ceph_osd_client *osdc) { struct ceph_osd_linger_request *lreq; lreq = kzalloc(sizeof(*lreq), GFP_NOIO); if (!lreq) return NULL; kref_init(&lreq->kref); mutex_init(&lreq->lock); RB_CLEAR_NODE(&lreq->node); RB_CLEAR_NODE(&lreq->osdc_node); RB_CLEAR_NODE(&lreq->mc_node); INIT_LIST_HEAD(&lreq->scan_item); INIT_LIST_HEAD(&lreq->pending_lworks); init_completion(&lreq->reg_commit_wait); init_completion(&lreq->notify_finish_wait); lreq->osdc = osdc; target_init(&lreq->t); dout("%s lreq %p\n", __func__, lreq); return lreq; } DEFINE_RB_INSDEL_FUNCS(linger, struct ceph_osd_linger_request, linger_id, node) DEFINE_RB_FUNCS(linger_osdc, struct ceph_osd_linger_request, linger_id, osdc_node) DEFINE_RB_FUNCS(linger_mc, struct ceph_osd_linger_request, linger_id, mc_node) /* * Create linger request <-> OSD session relation. * * @lreq has to be registered, @osd may be homeless. */ static void link_linger(struct ceph_osd *osd, struct ceph_osd_linger_request *lreq) { verify_osd_locked(osd); WARN_ON(!lreq->linger_id || lreq->osd); dout("%s osd %p osd%d lreq %p linger_id %llu\n", __func__, osd, osd->o_osd, lreq, lreq->linger_id); if (!osd_homeless(osd)) __remove_osd_from_lru(osd); else atomic_inc(&osd->o_osdc->num_homeless); get_osd(osd); insert_linger(&osd->o_linger_requests, lreq); lreq->osd = osd; } static void unlink_linger(struct ceph_osd *osd, struct ceph_osd_linger_request *lreq) { verify_osd_locked(osd); WARN_ON(lreq->osd != osd); dout("%s osd %p osd%d lreq %p linger_id %llu\n", __func__, osd, osd->o_osd, lreq, lreq->linger_id); lreq->osd = NULL; erase_linger(&osd->o_linger_requests, lreq); put_osd(osd); if (!osd_homeless(osd)) maybe_move_osd_to_lru(osd); else atomic_dec(&osd->o_osdc->num_homeless); } static bool __linger_registered(struct ceph_osd_linger_request *lreq) { verify_osdc_locked(lreq->osdc); return !RB_EMPTY_NODE(&lreq->osdc_node); } static bool linger_registered(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; bool registered; down_read(&osdc->lock); registered = __linger_registered(lreq); up_read(&osdc->lock); return registered; } static void linger_register(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; verify_osdc_wrlocked(osdc); WARN_ON(lreq->linger_id); linger_get(lreq); lreq->linger_id = ++osdc->last_linger_id; insert_linger_osdc(&osdc->linger_requests, lreq); } static void linger_unregister(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; verify_osdc_wrlocked(osdc); erase_linger_osdc(&osdc->linger_requests, lreq); linger_put(lreq); } static void cancel_linger_request(struct ceph_osd_request *req) { struct ceph_osd_linger_request *lreq = req->r_priv; WARN_ON(!req->r_linger); cancel_request(req); linger_put(lreq); } struct linger_work { struct work_struct work; struct ceph_osd_linger_request *lreq; struct list_head pending_item; unsigned long queued_stamp; union { struct { u64 notify_id; u64 notifier_id; void *payload; /* points into @msg front */ size_t payload_len; struct ceph_msg *msg; /* for ceph_msg_put() */ } notify; struct { int err; } error; }; }; static struct linger_work *lwork_alloc(struct ceph_osd_linger_request *lreq, work_func_t workfn) { struct linger_work *lwork; lwork = kzalloc(sizeof(*lwork), GFP_NOIO); if (!lwork) return NULL; INIT_WORK(&lwork->work, workfn); INIT_LIST_HEAD(&lwork->pending_item); lwork->lreq = linger_get(lreq); return lwork; } static void lwork_free(struct linger_work *lwork) { struct ceph_osd_linger_request *lreq = lwork->lreq; mutex_lock(&lreq->lock); list_del(&lwork->pending_item); mutex_unlock(&lreq->lock); linger_put(lreq); kfree(lwork); } static void lwork_queue(struct linger_work *lwork) { struct ceph_osd_linger_request *lreq = lwork->lreq; struct ceph_osd_client *osdc = lreq->osdc; verify_lreq_locked(lreq); WARN_ON(!list_empty(&lwork->pending_item)); lwork->queued_stamp = jiffies; list_add_tail(&lwork->pending_item, &lreq->pending_lworks); queue_work(osdc->notify_wq, &lwork->work); } static void do_watch_notify(struct work_struct *w) { struct linger_work *lwork = container_of(w, struct linger_work, work); struct ceph_osd_linger_request *lreq = lwork->lreq; if (!linger_registered(lreq)) { dout("%s lreq %p not registered\n", __func__, lreq); goto out; } WARN_ON(!lreq->is_watch); dout("%s lreq %p notify_id %llu notifier_id %llu payload_len %zu\n", __func__, lreq, lwork->notify.notify_id, lwork->notify.notifier_id, lwork->notify.payload_len); lreq->wcb(lreq->data, lwork->notify.notify_id, lreq->linger_id, lwork->notify.notifier_id, lwork->notify.payload, lwork->notify.payload_len); out: ceph_msg_put(lwork->notify.msg); lwork_free(lwork); } static void do_watch_error(struct work_struct *w) { struct linger_work *lwork = container_of(w, struct linger_work, work); struct ceph_osd_linger_request *lreq = lwork->lreq; if (!linger_registered(lreq)) { dout("%s lreq %p not registered\n", __func__, lreq); goto out; } dout("%s lreq %p err %d\n", __func__, lreq, lwork->error.err); lreq->errcb(lreq->data, lreq->linger_id, lwork->error.err); out: lwork_free(lwork); } static void queue_watch_error(struct ceph_osd_linger_request *lreq) { struct linger_work *lwork; lwork = lwork_alloc(lreq, do_watch_error); if (!lwork) { pr_err("failed to allocate error-lwork\n"); return; } lwork->error.err = lreq->last_error; lwork_queue(lwork); } static void linger_reg_commit_complete(struct ceph_osd_linger_request *lreq, int result) { if (!completion_done(&lreq->reg_commit_wait)) { lreq->reg_commit_error = (result <= 0 ? result : 0); complete_all(&lreq->reg_commit_wait); } } static void linger_commit_cb(struct ceph_osd_request *req) { struct ceph_osd_linger_request *lreq = req->r_priv; mutex_lock(&lreq->lock); if (req != lreq->reg_req) { dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", __func__, lreq, lreq->linger_id, req, lreq->reg_req); goto out; } dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq, lreq->linger_id, req->r_result); linger_reg_commit_complete(lreq, req->r_result); lreq->committed = true; if (!lreq->is_watch) { struct ceph_osd_data *osd_data = osd_req_op_data(req, 0, notify, response_data); void *p = page_address(osd_data->pages[0]); WARN_ON(req->r_ops[0].op != CEPH_OSD_OP_NOTIFY || osd_data->type != CEPH_OSD_DATA_TYPE_PAGES); /* make note of the notify_id */ if (req->r_ops[0].outdata_len >= sizeof(u64)) { lreq->notify_id = ceph_decode_64(&p); dout("lreq %p notify_id %llu\n", lreq, lreq->notify_id); } else { dout("lreq %p no notify_id\n", lreq); } } out: mutex_unlock(&lreq->lock); linger_put(lreq); } static int normalize_watch_error(int err) { /* * Translate ENOENT -> ENOTCONN so that a delete->disconnection * notification and a failure to reconnect because we raced with * the delete appear the same to the user. */ if (err == -ENOENT) err = -ENOTCONN; return err; } static void linger_reconnect_cb(struct ceph_osd_request *req) { struct ceph_osd_linger_request *lreq = req->r_priv; mutex_lock(&lreq->lock); if (req != lreq->reg_req) { dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", __func__, lreq, lreq->linger_id, req, lreq->reg_req); goto out; } dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__, lreq, lreq->linger_id, req->r_result, lreq->last_error); if (req->r_result < 0) { if (!lreq->last_error) { lreq->last_error = normalize_watch_error(req->r_result); queue_watch_error(lreq); } } out: mutex_unlock(&lreq->lock); linger_put(lreq); } static void send_linger(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; struct ceph_osd_request *req; int ret; verify_osdc_wrlocked(osdc); mutex_lock(&lreq->lock); dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); if (lreq->reg_req) { if (lreq->reg_req->r_osd) cancel_linger_request(lreq->reg_req); ceph_osdc_put_request(lreq->reg_req); } req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); BUG_ON(!req); target_copy(&req->r_t, &lreq->t); req->r_mtime = lreq->mtime; if (lreq->is_watch && lreq->committed) { osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT, lreq->linger_id, ++lreq->register_gen); dout("lreq %p reconnect register_gen %u\n", lreq, req->r_ops[0].watch.gen); req->r_callback = linger_reconnect_cb; } else { if (lreq->is_watch) { osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH, lreq->linger_id, 0); } else { lreq->notify_id = 0; refcount_inc(&lreq->request_pl->refcnt); osd_req_op_notify_init(req, 0, lreq->linger_id, lreq->request_pl); ceph_osd_data_pages_init( osd_req_op_data(req, 0, notify, response_data), lreq->notify_id_pages, PAGE_SIZE, 0, false, false); } dout("lreq %p register\n", lreq); req->r_callback = linger_commit_cb; } ret = ceph_osdc_alloc_messages(req, GFP_NOIO); BUG_ON(ret); req->r_priv = linger_get(lreq); req->r_linger = true; lreq->reg_req = req; mutex_unlock(&lreq->lock); submit_request(req, true); } static void linger_ping_cb(struct ceph_osd_request *req) { struct ceph_osd_linger_request *lreq = req->r_priv; mutex_lock(&lreq->lock); if (req != lreq->ping_req) { dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", __func__, lreq, lreq->linger_id, req, lreq->ping_req); goto out; } dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n", __func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent, lreq->last_error); if (lreq->register_gen == req->r_ops[0].watch.gen) { if (!req->r_result) { lreq->watch_valid_thru = lreq->ping_sent; } else if (!lreq->last_error) { lreq->last_error = normalize_watch_error(req->r_result); queue_watch_error(lreq); } } else { dout("lreq %p register_gen %u ignoring old pong %u\n", lreq, lreq->register_gen, req->r_ops[0].watch.gen); } out: mutex_unlock(&lreq->lock); linger_put(lreq); } static void send_linger_ping(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; struct ceph_osd_request *req; int ret; if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) { dout("%s PAUSERD\n", __func__); return; } lreq->ping_sent = jiffies; dout("%s lreq %p linger_id %llu ping_sent %lu register_gen %u\n", __func__, lreq, lreq->linger_id, lreq->ping_sent, lreq->register_gen); if (lreq->ping_req) { if (lreq->ping_req->r_osd) cancel_linger_request(lreq->ping_req); ceph_osdc_put_request(lreq->ping_req); } req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); BUG_ON(!req); target_copy(&req->r_t, &lreq->t); osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id, lreq->register_gen); req->r_callback = linger_ping_cb; ret = ceph_osdc_alloc_messages(req, GFP_NOIO); BUG_ON(ret); req->r_priv = linger_get(lreq); req->r_linger = true; lreq->ping_req = req; ceph_osdc_get_request(req); account_request(req); req->r_tid = atomic64_inc_return(&osdc->last_tid); link_request(lreq->osd, req); send_request(req); } static void linger_submit(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; struct ceph_osd *osd; down_write(&osdc->lock); linger_register(lreq); calc_target(osdc, &lreq->t, false); osd = lookup_create_osd(osdc, lreq->t.osd, true); link_linger(osd, lreq); send_linger(lreq); up_write(&osdc->lock); } static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; struct ceph_osd_linger_request *lookup_lreq; verify_osdc_wrlocked(osdc); lookup_lreq = lookup_linger_mc(&osdc->linger_map_checks, lreq->linger_id); if (!lookup_lreq) return; WARN_ON(lookup_lreq != lreq); erase_linger_mc(&osdc->linger_map_checks, lreq); linger_put(lreq); } /* * @lreq has to be both registered and linked. */ static void __linger_cancel(struct ceph_osd_linger_request *lreq) { if (lreq->ping_req && lreq->ping_req->r_osd) cancel_linger_request(lreq->ping_req); if (lreq->reg_req && lreq->reg_req->r_osd) cancel_linger_request(lreq->reg_req); cancel_linger_map_check(lreq); unlink_linger(lreq->osd, lreq); linger_unregister(lreq); } static void linger_cancel(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; down_write(&osdc->lock); if (__linger_registered(lreq)) __linger_cancel(lreq); up_write(&osdc->lock); } static void send_linger_map_check(struct ceph_osd_linger_request *lreq); static void check_linger_pool_dne(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; struct ceph_osdmap *map = osdc->osdmap; verify_osdc_wrlocked(osdc); WARN_ON(!map->epoch); if (lreq->register_gen) { lreq->map_dne_bound = map->epoch; dout("%s lreq %p linger_id %llu pool disappeared\n", __func__, lreq, lreq->linger_id); } else { dout("%s lreq %p linger_id %llu map_dne_bound %u have %u\n", __func__, lreq, lreq->linger_id, lreq->map_dne_bound, map->epoch); } if (lreq->map_dne_bound) { if (map->epoch >= lreq->map_dne_bound) { /* we had a new enough map */ pr_info("linger_id %llu pool does not exist\n", lreq->linger_id); linger_reg_commit_complete(lreq, -ENOENT); __linger_cancel(lreq); } } else { send_linger_map_check(lreq); } } static void linger_map_check_cb(struct ceph_mon_generic_request *greq) { struct ceph_osd_client *osdc = &greq->monc->client->osdc; struct ceph_osd_linger_request *lreq; u64 linger_id = greq->private_data; WARN_ON(greq->result || !greq->u.newest); down_write(&osdc->lock); lreq = lookup_linger_mc(&osdc->linger_map_checks, linger_id); if (!lreq) { dout("%s linger_id %llu dne\n", __func__, linger_id); goto out_unlock; } dout("%s lreq %p linger_id %llu map_dne_bound %u newest %llu\n", __func__, lreq, lreq->linger_id, lreq->map_dne_bound, greq->u.newest); if (!lreq->map_dne_bound) lreq->map_dne_bound = greq->u.newest; erase_linger_mc(&osdc->linger_map_checks, lreq); check_linger_pool_dne(lreq); linger_put(lreq); out_unlock: up_write(&osdc->lock); } static void send_linger_map_check(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; struct ceph_osd_linger_request *lookup_lreq; int ret; verify_osdc_wrlocked(osdc); lookup_lreq = lookup_linger_mc(&osdc->linger_map_checks, lreq->linger_id); if (lookup_lreq) { WARN_ON(lookup_lreq != lreq); return; } linger_get(lreq); insert_linger_mc(&osdc->linger_map_checks, lreq); ret = ceph_monc_get_version_async(&osdc->client->monc, "osdmap", linger_map_check_cb, lreq->linger_id); WARN_ON(ret); } static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq) { int ret; dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); ret = wait_for_completion_killable(&lreq->reg_commit_wait); return ret ?: lreq->reg_commit_error; } static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq, unsigned long timeout) { long left; dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait, ceph_timeout_jiffies(timeout)); if (left <= 0) left = left ?: -ETIMEDOUT; else left = lreq->notify_finish_error; /* completed */ return left; } /* * Timeout callback, called every N seconds. When 1 or more OSD * requests has been active for more than N seconds, we send a keepalive * (tag + timestamp) to its OSD to ensure any communications channel * reset is detected. */ static void handle_timeout(struct work_struct *work) { struct ceph_osd_client *osdc = container_of(work, struct ceph_osd_client, timeout_work.work); struct ceph_options *opts = osdc->client->options; unsigned long cutoff = jiffies - opts->osd_keepalive_timeout; unsigned long expiry_cutoff = jiffies - opts->osd_request_timeout; LIST_HEAD(slow_osds); struct rb_node *n, *p; dout("%s osdc %p\n", __func__, osdc); down_write(&osdc->lock); /* * ping osds that are a bit slow. this ensures that if there * is a break in the TCP connection we will notice, and reopen * a connection with that osd (from the fault callback). */ for (n = rb_first(&osdc->osds); n; n = rb_next(n)) { struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); bool found = false; for (p = rb_first(&osd->o_requests); p; ) { struct ceph_osd_request *req = rb_entry(p, struct ceph_osd_request, r_node); p = rb_next(p); /* abort_request() */ if (time_before(req->r_stamp, cutoff)) { dout(" req %p tid %llu on osd%d is laggy\n", req, req->r_tid, osd->o_osd); found = true; } if (opts->osd_request_timeout && time_before(req->r_start_stamp, expiry_cutoff)) { pr_err_ratelimited("tid %llu on osd%d timeout\n", req->r_tid, osd->o_osd); abort_request(req, -ETIMEDOUT); } } for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) { struct ceph_osd_linger_request *lreq = rb_entry(p, struct ceph_osd_linger_request, node); dout(" lreq %p linger_id %llu is served by osd%d\n", lreq, lreq->linger_id, osd->o_osd); found = true; mutex_lock(&lreq->lock); if (lreq->is_watch && lreq->committed && !lreq->last_error) send_linger_ping(lreq); mutex_unlock(&lreq->lock); } if (found) list_move_tail(&osd->o_keepalive_item, &slow_osds); } if (opts->osd_request_timeout) { for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) { struct ceph_osd_request *req = rb_entry(p, struct ceph_osd_request, r_node); p = rb_next(p); /* abort_request() */ if (time_before(req->r_start_stamp, expiry_cutoff)) { pr_err_ratelimited("tid %llu on osd%d timeout\n", req->r_tid, osdc->homeless_osd.o_osd); abort_request(req, -ETIMEDOUT); } } } if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds)) maybe_request_map(osdc); while (!list_empty(&slow_osds)) { struct ceph_osd *osd = list_first_entry(&slow_osds, struct ceph_osd, o_keepalive_item); list_del_init(&osd->o_keepalive_item); ceph_con_keepalive(&osd->o_con); } up_write(&osdc->lock); schedule_delayed_work(&osdc->timeout_work, osdc->client->options->osd_keepalive_timeout); } static void handle_osds_timeout(struct work_struct *work) { struct ceph_osd_client *osdc = container_of(work, struct ceph_osd_client, osds_timeout_work.work); unsigned long delay = osdc->client->options->osd_idle_ttl / 4; struct ceph_osd *osd, *nosd; dout("%s osdc %p\n", __func__, osdc); down_write(&osdc->lock); list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { if (time_before(jiffies, osd->lru_ttl)) break; WARN_ON(!RB_EMPTY_ROOT(&osd->o_requests)); WARN_ON(!RB_EMPTY_ROOT(&osd->o_linger_requests)); close_osd(osd); } up_write(&osdc->lock); schedule_delayed_work(&osdc->osds_timeout_work, round_jiffies_relative(delay)); } static int ceph_oloc_decode(void **p, void *end, struct ceph_object_locator *oloc) { u8 struct_v, struct_cv; u32 len; void *struct_end; int ret = 0; ceph_decode_need(p, end, 1 + 1 + 4, e_inval); struct_v = ceph_decode_8(p); struct_cv = ceph_decode_8(p); if (struct_v < 3) { pr_warn("got v %d < 3 cv %d of ceph_object_locator\n", struct_v, struct_cv); goto e_inval; } if (struct_cv > 6) { pr_warn("got v %d cv %d > 6 of ceph_object_locator\n", struct_v, struct_cv); goto e_inval; } len = ceph_decode_32(p); ceph_decode_need(p, end, len, e_inval); struct_end = *p + len; oloc->pool = ceph_decode_64(p); *p += 4; /* skip preferred */ len = ceph_decode_32(p); if (len > 0) { pr_warn("ceph_object_locator::key is set\n"); goto e_inval; } if (struct_v >= 5) { bool changed = false; len = ceph_decode_32(p); if (len > 0) { ceph_decode_need(p, end, len, e_inval); if (!oloc->pool_ns || ceph_compare_string(oloc->pool_ns, *p, len)) changed = true; *p += len; } else { if (oloc->pool_ns) changed = true; } if (changed) { /* redirect changes namespace */ pr_warn("ceph_object_locator::nspace is changed\n"); goto e_inval; } } if (struct_v >= 6) { s64 hash = ceph_decode_64(p); if (hash != -1) { pr_warn("ceph_object_locator::hash is set\n"); goto e_inval; } } /* skip the rest */ *p = struct_end; out: return ret; e_inval: ret = -EINVAL; goto out; } static int ceph_redirect_decode(void **p, void *end, struct ceph_request_redirect *redir) { u8 struct_v, struct_cv; u32 len; void *struct_end; int ret; ceph_decode_need(p, end, 1 + 1 + 4, e_inval); struct_v = ceph_decode_8(p); struct_cv = ceph_decode_8(p); if (struct_cv > 1) { pr_warn("got v %d cv %d > 1 of ceph_request_redirect\n", struct_v, struct_cv); goto e_inval; } len = ceph_decode_32(p); ceph_decode_need(p, end, len, e_inval); struct_end = *p + len; ret = ceph_oloc_decode(p, end, &redir->oloc); if (ret) goto out; len = ceph_decode_32(p); if (len > 0) { pr_warn("ceph_request_redirect::object_name is set\n"); goto e_inval; } /* skip the rest */ *p = struct_end; out: return ret; e_inval: ret = -EINVAL; goto out; } struct MOSDOpReply { struct ceph_pg pgid; u64 flags; int result; u32 epoch; int num_ops; u32 outdata_len[CEPH_OSD_MAX_OPS]; s32 rval[CEPH_OSD_MAX_OPS]; int retry_attempt; struct ceph_eversion replay_version; u64 user_version; struct ceph_request_redirect redirect; }; static int decode_MOSDOpReply(const struct ceph_msg *msg, struct MOSDOpReply *m) { void *p = msg->front.iov_base; void *const end = p + msg->front.iov_len; u16 version = le16_to_cpu(msg->hdr.version); struct ceph_eversion bad_replay_version; u8 decode_redir; u32 len; int ret; int i; ceph_decode_32_safe(&p, end, len, e_inval); ceph_decode_need(&p, end, len, e_inval); p += len; /* skip oid */ ret = ceph_decode_pgid(&p, end, &m->pgid); if (ret) return ret; ceph_decode_64_safe(&p, end, m->flags, e_inval); ceph_decode_32_safe(&p, end, m->result, e_inval); ceph_decode_need(&p, end, sizeof(bad_replay_version), e_inval); memcpy(&bad_replay_version, p, sizeof(bad_replay_version)); p += sizeof(bad_replay_version); ceph_decode_32_safe(&p, end, m->epoch, e_inval); ceph_decode_32_safe(&p, end, m->num_ops, e_inval); if (m->num_ops > ARRAY_SIZE(m->outdata_len)) goto e_inval; ceph_decode_need(&p, end, m->num_ops * sizeof(struct ceph_osd_op), e_inval); for (i = 0; i < m->num_ops; i++) { struct ceph_osd_op *op = p; m->outdata_len[i] = le32_to_cpu(op->payload_len); p += sizeof(*op); } ceph_decode_32_safe(&p, end, m->retry_attempt, e_inval); for (i = 0; i < m->num_ops; i++) ceph_decode_32_safe(&p, end, m->rval[i], e_inval); if (version >= 5) { ceph_decode_need(&p, end, sizeof(m->replay_version), e_inval); memcpy(&m->replay_version, p, sizeof(m->replay_version)); p += sizeof(m->replay_version); ceph_decode_64_safe(&p, end, m->user_version, e_inval); } else { m->replay_version = bad_replay_version; /* struct */ m->user_version = le64_to_cpu(m->replay_version.version); } if (version >= 6) { if (version >= 7) ceph_decode_8_safe(&p, end, decode_redir, e_inval); else decode_redir = 1; } else { decode_redir = 0; } if (decode_redir) { ret = ceph_redirect_decode(&p, end, &m->redirect); if (ret) return ret; } else { ceph_oloc_init(&m->redirect.oloc); } return 0; e_inval: return -EINVAL; } /* * Handle MOSDOpReply. Set ->r_result and call the callback if it is * specified. */ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) { struct ceph_osd_client *osdc = osd->o_osdc; struct ceph_osd_request *req; struct MOSDOpReply m; u64 tid = le64_to_cpu(msg->hdr.tid); u32 data_len = 0; int ret; int i; dout("%s msg %p tid %llu\n", __func__, msg, tid); down_read(&osdc->lock); if (!osd_registered(osd)) { dout("%s osd%d unknown\n", __func__, osd->o_osd); goto out_unlock_osdc; } WARN_ON(osd->o_osd != le64_to_cpu(msg->hdr.src.num)); mutex_lock(&osd->lock); req = lookup_request(&osd->o_requests, tid); if (!req) { dout("%s osd%d tid %llu unknown\n", __func__, osd->o_osd, tid); goto out_unlock_session; } m.redirect.oloc.pool_ns = req->r_t.target_oloc.pool_ns; ret = decode_MOSDOpReply(msg, &m); m.redirect.oloc.pool_ns = NULL; if (ret) { pr_err("failed to decode MOSDOpReply for tid %llu: %d\n", req->r_tid, ret); ceph_msg_dump(msg); goto fail_request; } dout("%s req %p tid %llu flags 0x%llx pgid %llu.%x epoch %u attempt %d v %u'%llu uv %llu\n", __func__, req, req->r_tid, m.flags, m.pgid.pool, m.pgid.seed, m.epoch, m.retry_attempt, le32_to_cpu(m.replay_version.epoch), le64_to_cpu(m.replay_version.version), m.user_version); if (m.retry_attempt >= 0) { if (m.retry_attempt != req->r_attempts - 1) { dout("req %p tid %llu retry_attempt %d != %d, ignoring\n", req, req->r_tid, m.retry_attempt, req->r_attempts - 1); goto out_unlock_session; } } else { WARN_ON(1); /* MOSDOpReply v4 is assumed */ } if (!ceph_oloc_empty(&m.redirect.oloc)) { dout("req %p tid %llu redirect pool %lld\n", req, req->r_tid, m.redirect.oloc.pool); unlink_request(osd, req); mutex_unlock(&osd->lock); /* * Not ceph_oloc_copy() - changing pool_ns is not * supported. */ req->r_t.target_oloc.pool = m.redirect.oloc.pool; req->r_flags |= CEPH_OSD_FLAG_REDIRECTED | CEPH_OSD_FLAG_IGNORE_OVERLAY | CEPH_OSD_FLAG_IGNORE_CACHE; req->r_tid = 0; __submit_request(req, false); goto out_unlock_osdc; } if (m.result == -EAGAIN) { dout("req %p tid %llu EAGAIN\n", req, req->r_tid); unlink_request(osd, req); mutex_unlock(&osd->lock); /* * The object is missing on the replica or not (yet) * readable. Clear pgid to force a resend to the primary * via legacy_change. */ req->r_t.pgid.pool = 0; req->r_t.pgid.seed = 0; WARN_ON(!req->r_t.used_replica); req->r_flags &= ~(CEPH_OSD_FLAG_BALANCE_READS | CEPH_OSD_FLAG_LOCALIZE_READS); req->r_tid = 0; __submit_request(req, false); goto out_unlock_osdc; } if (m.num_ops != req->r_num_ops) { pr_err("num_ops %d != %d for tid %llu\n", m.num_ops, req->r_num_ops, req->r_tid); goto fail_request; } for (i = 0; i < req->r_num_ops; i++) { dout(" req %p tid %llu op %d rval %d len %u\n", req, req->r_tid, i, m.rval[i], m.outdata_len[i]); req->r_ops[i].rval = m.rval[i]; req->r_ops[i].outdata_len = m.outdata_len[i]; data_len += m.outdata_len[i]; } if (data_len != le32_to_cpu(msg->hdr.data_len)) { pr_err("sum of lens %u != %u for tid %llu\n", data_len, le32_to_cpu(msg->hdr.data_len), req->r_tid); goto fail_request; } dout("%s req %p tid %llu result %d data_len %u\n", __func__, req, req->r_tid, m.result, data_len); /* * Since we only ever request ONDISK, we should only ever get * one (type of) reply back. */ WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK)); req->r_version = m.user_version; req->r_result = m.result ?: data_len; finish_request(req); mutex_unlock(&osd->lock); up_read(&osdc->lock); __complete_request(req); return; fail_request: complete_request(req, -EIO); out_unlock_session: mutex_unlock(&osd->lock); out_unlock_osdc: up_read(&osdc->lock); } static void set_pool_was_full(struct ceph_osd_client *osdc) { struct rb_node *n; for (n = rb_first(&osdc->osdmap->pg_pools); n; n = rb_next(n)) { struct ceph_pg_pool_info *pi = rb_entry(n, struct ceph_pg_pool_info, node); pi->was_full = __pool_full(pi); } } static bool pool_cleared_full(struct ceph_osd_client *osdc, s64 pool_id) { struct ceph_pg_pool_info *pi; pi = ceph_pg_pool_by_id(osdc->osdmap, pool_id); if (!pi) return false; return pi->was_full && !__pool_full(pi); } static enum calc_target_result recalc_linger_target(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; enum calc_target_result ct_res; ct_res = calc_target(osdc, &lreq->t, true); if (ct_res == CALC_TARGET_NEED_RESEND) { struct ceph_osd *osd; osd = lookup_create_osd(osdc, lreq->t.osd, true); if (osd != lreq->osd) { unlink_linger(lreq->osd, lreq); link_linger(osd, lreq); } } return ct_res; } /* * Requeue requests whose mapping to an OSD has changed. */ static void scan_requests(struct ceph_osd *osd, bool force_resend, bool cleared_full, bool check_pool_cleared_full, struct rb_root *need_resend, struct list_head *need_resend_linger) { struct ceph_osd_client *osdc = osd->o_osdc; struct rb_node *n; bool force_resend_writes; for (n = rb_first(&osd->o_linger_requests); n; ) { struct ceph_osd_linger_request *lreq = rb_entry(n, struct ceph_osd_linger_request, node); enum calc_target_result ct_res; n = rb_next(n); /* recalc_linger_target() */ dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); ct_res = recalc_linger_target(lreq); switch (ct_res) { case CALC_TARGET_NO_ACTION: force_resend_writes = cleared_full || (check_pool_cleared_full && pool_cleared_full(osdc, lreq->t.base_oloc.pool)); if (!force_resend && !force_resend_writes) break; fallthrough; case CALC_TARGET_NEED_RESEND: cancel_linger_map_check(lreq); /* * scan_requests() for the previous epoch(s) * may have already added it to the list, since * it's not unlinked here. */ if (list_empty(&lreq->scan_item)) list_add_tail(&lreq->scan_item, need_resend_linger); break; case CALC_TARGET_POOL_DNE: list_del_init(&lreq->scan_item); check_linger_pool_dne(lreq); break; } } for (n = rb_first(&osd->o_requests); n; ) { struct ceph_osd_request *req = rb_entry(n, struct ceph_osd_request, r_node); enum calc_target_result ct_res; n = rb_next(n); /* unlink_request(), check_pool_dne() */ dout("%s req %p tid %llu\n", __func__, req, req->r_tid); ct_res = calc_target(osdc, &req->r_t, false); switch (ct_res) { case CALC_TARGET_NO_ACTION: force_resend_writes = cleared_full || (check_pool_cleared_full && pool_cleared_full(osdc, req->r_t.base_oloc.pool)); if (!force_resend && (!(req->r_flags & CEPH_OSD_FLAG_WRITE) || !force_resend_writes)) break; fallthrough; case CALC_TARGET_NEED_RESEND: cancel_map_check(req); unlink_request(osd, req); insert_request(need_resend, req); break; case CALC_TARGET_POOL_DNE: check_pool_dne(req); break; } } } static int handle_one_map(struct ceph_osd_client *osdc, void *p, void *end, bool incremental, struct rb_root *need_resend, struct list_head *need_resend_linger) { struct ceph_osdmap *newmap; struct rb_node *n; bool skipped_map = false; bool was_full; was_full = ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL); set_pool_was_full(osdc); if (incremental) newmap = osdmap_apply_incremental(&p, end, ceph_msgr2(osdc->client), osdc->osdmap); else newmap = ceph_osdmap_decode(&p, end, ceph_msgr2(osdc->client)); if (IS_ERR(newmap)) return PTR_ERR(newmap); if (newmap != osdc->osdmap) { /* * Preserve ->was_full before destroying the old map. * For pools that weren't in the old map, ->was_full * should be false. */ for (n = rb_first(&newmap->pg_pools); n; n = rb_next(n)) { struct ceph_pg_pool_info *pi = rb_entry(n, struct ceph_pg_pool_info, node); struct ceph_pg_pool_info *old_pi; old_pi = ceph_pg_pool_by_id(osdc->osdmap, pi->id); if (old_pi) pi->was_full = old_pi->was_full; else WARN_ON(pi->was_full); } if (osdc->osdmap->epoch && osdc->osdmap->epoch + 1 < newmap->epoch) { WARN_ON(incremental); skipped_map = true; } ceph_osdmap_destroy(osdc->osdmap); osdc->osdmap = newmap; } was_full &= !ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL); scan_requests(&osdc->homeless_osd, skipped_map, was_full, true, need_resend, need_resend_linger); for (n = rb_first(&osdc->osds); n; ) { struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); n = rb_next(n); /* close_osd() */ scan_requests(osd, skipped_map, was_full, true, need_resend, need_resend_linger); if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) || memcmp(&osd->o_con.peer_addr, ceph_osd_addr(osdc->osdmap, osd->o_osd), sizeof(struct ceph_entity_addr))) close_osd(osd); } return 0; } static void kick_requests(struct ceph_osd_client *osdc, struct rb_root *need_resend, struct list_head *need_resend_linger) { struct ceph_osd_linger_request *lreq, *nlreq; enum calc_target_result ct_res; struct rb_node *n; /* make sure need_resend targets reflect latest map */ for (n = rb_first(need_resend); n; ) { struct ceph_osd_request *req = rb_entry(n, struct ceph_osd_request, r_node); n = rb_next(n); if (req->r_t.epoch < osdc->osdmap->epoch) { ct_res = calc_target(osdc, &req->r_t, false); if (ct_res == CALC_TARGET_POOL_DNE) { erase_request(need_resend, req); check_pool_dne(req); } } } for (n = rb_first(need_resend); n; ) { struct ceph_osd_request *req = rb_entry(n, struct ceph_osd_request, r_node); struct ceph_osd *osd; n = rb_next(n); erase_request(need_resend, req); /* before link_request() */ osd = lookup_create_osd(osdc, req->r_t.osd, true); link_request(osd, req); if (!req->r_linger) { if (!osd_homeless(osd) && !req->r_t.paused) send_request(req); } else { cancel_linger_request(req); } } list_for_each_entry_safe(lreq, nlreq, need_resend_linger, scan_item) { if (!osd_homeless(lreq->osd)) send_linger(lreq); list_del_init(&lreq->scan_item); } } /* * Process updated osd map. * * The message contains any number of incremental and full maps, normally * indicating some sort of topology change in the cluster. Kick requests * off to different OSDs as needed. */ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) { void *p = msg->front.iov_base; void *const end = p + msg->front.iov_len; u32 nr_maps, maplen; u32 epoch; struct ceph_fsid fsid; struct rb_root need_resend = RB_ROOT; LIST_HEAD(need_resend_linger); bool handled_incremental = false; bool was_pauserd, was_pausewr; bool pauserd, pausewr; int err; dout("%s have %u\n", __func__, osdc->osdmap->epoch); down_write(&osdc->lock); /* verify fsid */ ceph_decode_need(&p, end, sizeof(fsid), bad); ceph_decode_copy(&p, &fsid, sizeof(fsid)); if (ceph_check_fsid(osdc->client, &fsid) < 0) goto bad; was_pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); was_pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || have_pool_full(osdc); /* incremental maps */ ceph_decode_32_safe(&p, end, nr_maps, bad); dout(" %d inc maps\n", nr_maps); while (nr_maps > 0) { ceph_decode_need(&p, end, 2*sizeof(u32), bad); epoch = ceph_decode_32(&p); maplen = ceph_decode_32(&p); ceph_decode_need(&p, end, maplen, bad); if (osdc->osdmap->epoch && osdc->osdmap->epoch + 1 == epoch) { dout("applying incremental map %u len %d\n", epoch, maplen); err = handle_one_map(osdc, p, p + maplen, true, &need_resend, &need_resend_linger); if (err) goto bad; handled_incremental = true; } else { dout("ignoring incremental map %u len %d\n", epoch, maplen); } p += maplen; nr_maps--; } if (handled_incremental) goto done; /* full maps */ ceph_decode_32_safe(&p, end, nr_maps, bad); dout(" %d full maps\n", nr_maps); while (nr_maps) { ceph_decode_need(&p, end, 2*sizeof(u32), bad); epoch = ceph_decode_32(&p); maplen = ceph_decode_32(&p); ceph_decode_need(&p, end, maplen, bad); if (nr_maps > 1) { dout("skipping non-latest full map %u len %d\n", epoch, maplen); } else if (osdc->osdmap->epoch >= epoch) { dout("skipping full map %u len %d, " "older than our %u\n", epoch, maplen, osdc->osdmap->epoch); } else { dout("taking full map %u len %d\n", epoch, maplen); err = handle_one_map(osdc, p, p + maplen, false, &need_resend, &need_resend_linger); if (err) goto bad; } p += maplen; nr_maps--; } done: /* * subscribe to subsequent osdmap updates if full to ensure * we find out when we are no longer full and stop returning * ENOSPC. */ pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || have_pool_full(osdc); if (was_pauserd || was_pausewr || pauserd || pausewr || osdc->osdmap->epoch < osdc->epoch_barrier) maybe_request_map(osdc); kick_requests(osdc, &need_resend, &need_resend_linger); ceph_osdc_abort_on_full(osdc); ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP, osdc->osdmap->epoch); up_write(&osdc->lock); wake_up_all(&osdc->client->auth_wq); return; bad: pr_err("osdc handle_map corrupt msg\n"); ceph_msg_dump(msg); up_write(&osdc->lock); } /* * Resubmit requests pending on the given osd. */ static void kick_osd_requests(struct ceph_osd *osd) { struct rb_node *n; clear_backoffs(osd); for (n = rb_first(&osd->o_requests); n; ) { struct ceph_osd_request *req = rb_entry(n, struct ceph_osd_request, r_node); n = rb_next(n); /* cancel_linger_request() */ if (!req->r_linger) { if (!req->r_t.paused) send_request(req); } else { cancel_linger_request(req); } } for (n = rb_first(&osd->o_linger_requests); n; n = rb_next(n)) { struct ceph_osd_linger_request *lreq = rb_entry(n, struct ceph_osd_linger_request, node); send_linger(lreq); } } /* * If the osd connection drops, we need to resubmit all requests. */ static void osd_fault(struct ceph_connection *con) { struct ceph_osd *osd = con->private; struct ceph_osd_client *osdc = osd->o_osdc; dout("%s osd %p osd%d\n", __func__, osd, osd->o_osd); down_write(&osdc->lock); if (!osd_registered(osd)) { dout("%s osd%d unknown\n", __func__, osd->o_osd); goto out_unlock; } if (!reopen_osd(osd)) kick_osd_requests(osd); maybe_request_map(osdc); out_unlock: up_write(&osdc->lock); } struct MOSDBackoff { struct ceph_spg spgid; u32 map_epoch; u8 op; u64 id; struct ceph_hobject_id *begin; struct ceph_hobject_id *end; }; static int decode_MOSDBackoff(const struct ceph_msg *msg, struct MOSDBackoff *m) { void *p = msg->front.iov_base; void *const end = p + msg->front.iov_len; u8 struct_v; u32 struct_len; int ret; ret = ceph_start_decoding(&p, end, 1, "spg_t", &struct_v, &struct_len); if (ret) return ret; ret = ceph_decode_pgid(&p, end, &m->spgid.pgid); if (ret) return ret; ceph_decode_8_safe(&p, end, m->spgid.shard, e_inval); ceph_decode_32_safe(&p, end, m->map_epoch, e_inval); ceph_decode_8_safe(&p, end, m->op, e_inval); ceph_decode_64_safe(&p, end, m->id, e_inval); m->begin = kzalloc(sizeof(*m->begin), GFP_NOIO); if (!m->begin) return -ENOMEM; ret = decode_hoid(&p, end, m->begin); if (ret) { free_hoid(m->begin); return ret; } m->end = kzalloc(sizeof(*m->end), GFP_NOIO); if (!m->end) { free_hoid(m->begin); return -ENOMEM; } ret = decode_hoid(&p, end, m->end); if (ret) { free_hoid(m->begin); free_hoid(m->end); return ret; } return 0; e_inval: return -EINVAL; } static struct ceph_msg *create_backoff_message( const struct ceph_osd_backoff *backoff, u32 map_epoch) { struct ceph_msg *msg; void *p, *end; int msg_size; msg_size = CEPH_ENCODING_START_BLK_LEN + CEPH_PGID_ENCODING_LEN + 1; /* spgid */ msg_size += 4 + 1 + 8; /* map_epoch, op, id */ msg_size += CEPH_ENCODING_START_BLK_LEN + hoid_encoding_size(backoff->begin); msg_size += CEPH_ENCODING_START_BLK_LEN + hoid_encoding_size(backoff->end); msg = ceph_msg_new(CEPH_MSG_OSD_BACKOFF, msg_size, GFP_NOIO, true); if (!msg) return NULL; p = msg->front.iov_base; end = p + msg->front_alloc_len; encode_spgid(&p, &backoff->spgid); ceph_encode_32(&p, map_epoch); ceph_encode_8(&p, CEPH_OSD_BACKOFF_OP_ACK_BLOCK); ceph_encode_64(&p, backoff->id); encode_hoid(&p, end, backoff->begin); encode_hoid(&p, end, backoff->end); BUG_ON(p != end); msg->front.iov_len = p - msg->front.iov_base; msg->hdr.version = cpu_to_le16(1); /* MOSDBackoff v1 */ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); return msg; } static void handle_backoff_block(struct ceph_osd *osd, struct MOSDBackoff *m) { struct ceph_spg_mapping *spg; struct ceph_osd_backoff *backoff; struct ceph_msg *msg; dout("%s osd%d spgid %llu.%xs%d id %llu\n", __func__, osd->o_osd, m->spgid.pgid.pool, m->spgid.pgid.seed, m->spgid.shard, m->id); spg = lookup_spg_mapping(&osd->o_backoff_mappings, &m->spgid); if (!spg) { spg = alloc_spg_mapping(); if (!spg) { pr_err("%s failed to allocate spg\n", __func__); return; } spg->spgid = m->spgid; /* struct */ insert_spg_mapping(&osd->o_backoff_mappings, spg); } backoff = alloc_backoff(); if (!backoff) { pr_err("%s failed to allocate backoff\n", __func__); return; } backoff->spgid = m->spgid; /* struct */ backoff->id = m->id; backoff->begin = m->begin; m->begin = NULL; /* backoff now owns this */ backoff->end = m->end; m->end = NULL; /* ditto */ insert_backoff(&spg->backoffs, backoff); insert_backoff_by_id(&osd->o_backoffs_by_id, backoff); /* * Ack with original backoff's epoch so that the OSD can * discard this if there was a PG split. */ msg = create_backoff_message(backoff, m->map_epoch); if (!msg) { pr_err("%s failed to allocate msg\n", __func__); return; } ceph_con_send(&osd->o_con, msg); } static bool target_contained_by(const struct ceph_osd_request_target *t, const struct ceph_hobject_id *begin, const struct ceph_hobject_id *end) { struct ceph_hobject_id hoid; int cmp; hoid_fill_from_target(&hoid, t); cmp = hoid_compare(&hoid, begin); return !cmp || (cmp > 0 && hoid_compare(&hoid, end) < 0); } static void handle_backoff_unblock(struct ceph_osd *osd, const struct MOSDBackoff *m) { struct ceph_spg_mapping *spg; struct ceph_osd_backoff *backoff; struct rb_node *n; dout("%s osd%d spgid %llu.%xs%d id %llu\n", __func__, osd->o_osd, m->spgid.pgid.pool, m->spgid.pgid.seed, m->spgid.shard, m->id); backoff = lookup_backoff_by_id(&osd->o_backoffs_by_id, m->id); if (!backoff) { pr_err("%s osd%d spgid %llu.%xs%d id %llu backoff dne\n", __func__, osd->o_osd, m->spgid.pgid.pool, m->spgid.pgid.seed, m->spgid.shard, m->id); return; } if (hoid_compare(backoff->begin, m->begin) && hoid_compare(backoff->end, m->end)) { pr_err("%s osd%d spgid %llu.%xs%d id %llu bad range?\n", __func__, osd->o_osd, m->spgid.pgid.pool, m->spgid.pgid.seed, m->spgid.shard, m->id); /* unblock it anyway... */ } spg = lookup_spg_mapping(&osd->o_backoff_mappings, &backoff->spgid); BUG_ON(!spg); erase_backoff(&spg->backoffs, backoff); erase_backoff_by_id(&osd->o_backoffs_by_id, backoff); free_backoff(backoff); if (RB_EMPTY_ROOT(&spg->backoffs)) { erase_spg_mapping(&osd->o_backoff_mappings, spg); free_spg_mapping(spg); } for (n = rb_first(&osd->o_requests); n; n = rb_next(n)) { struct ceph_osd_request *req = rb_entry(n, struct ceph_osd_request, r_node); if (!ceph_spg_compare(&req->r_t.spgid, &m->spgid)) { /* * Match against @m, not @backoff -- the PG may * have split on the OSD. */ if (target_contained_by(&req->r_t, m->begin, m->end)) { /* * If no other installed backoff applies, * resend. */ send_request(req); } } } } static void handle_backoff(struct ceph_osd *osd, struct ceph_msg *msg) { struct ceph_osd_client *osdc = osd->o_osdc; struct MOSDBackoff m; int ret; down_read(&osdc->lock); if (!osd_registered(osd)) { dout("%s osd%d unknown\n", __func__, osd->o_osd); up_read(&osdc->lock); return; } WARN_ON(osd->o_osd != le64_to_cpu(msg->hdr.src.num)); mutex_lock(&osd->lock); ret = decode_MOSDBackoff(msg, &m); if (ret) { pr_err("failed to decode MOSDBackoff: %d\n", ret); ceph_msg_dump(msg); goto out_unlock; } switch (m.op) { case CEPH_OSD_BACKOFF_OP_BLOCK: handle_backoff_block(osd, &m); break; case CEPH_OSD_BACKOFF_OP_UNBLOCK: handle_backoff_unblock(osd, &m); break; default: pr_err("%s osd%d unknown op %d\n", __func__, osd->o_osd, m.op); } free_hoid(m.begin); free_hoid(m.end); out_unlock: mutex_unlock(&osd->lock); up_read(&osdc->lock); } /* * Process osd watch notifications */ static void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) { void *p = msg->front.iov_base; void *const end = p + msg->front.iov_len; struct ceph_osd_linger_request *lreq; struct linger_work *lwork; u8 proto_ver, opcode; u64 cookie, notify_id; u64 notifier_id = 0; s32 return_code = 0; void *payload = NULL; u32 payload_len = 0; ceph_decode_8_safe(&p, end, proto_ver, bad); ceph_decode_8_safe(&p, end, opcode, bad); ceph_decode_64_safe(&p, end, cookie, bad); p += 8; /* skip ver */ ceph_decode_64_safe(&p, end, notify_id, bad); if (proto_ver >= 1) { ceph_decode_32_safe(&p, end, payload_len, bad); ceph_decode_need(&p, end, payload_len, bad); payload = p; p += payload_len; } if (le16_to_cpu(msg->hdr.version) >= 2) ceph_decode_32_safe(&p, end, return_code, bad); if (le16_to_cpu(msg->hdr.version) >= 3) ceph_decode_64_safe(&p, end, notifier_id, bad); down_read(&osdc->lock); lreq = lookup_linger_osdc(&osdc->linger_requests, cookie); if (!lreq) { dout("%s opcode %d cookie %llu dne\n", __func__, opcode, cookie); goto out_unlock_osdc; } mutex_lock(&lreq->lock); dout("%s opcode %d cookie %llu lreq %p is_watch %d\n", __func__, opcode, cookie, lreq, lreq->is_watch); if (opcode == CEPH_WATCH_EVENT_DISCONNECT) { if (!lreq->last_error) { lreq->last_error = -ENOTCONN; queue_watch_error(lreq); } } else if (!lreq->is_watch) { /* CEPH_WATCH_EVENT_NOTIFY_COMPLETE */ if (lreq->notify_id && lreq->notify_id != notify_id) { dout("lreq %p notify_id %llu != %llu, ignoring\n", lreq, lreq->notify_id, notify_id); } else if (!completion_done(&lreq->notify_finish_wait)) { struct ceph_msg_data *data = msg->num_data_items ? &msg->data[0] : NULL; if (data) { if (lreq->preply_pages) { WARN_ON(data->type != CEPH_MSG_DATA_PAGES); *lreq->preply_pages = data->pages; *lreq->preply_len = data->length; data->own_pages = false; } } lreq->notify_finish_error = return_code; complete_all(&lreq->notify_finish_wait); } } else { /* CEPH_WATCH_EVENT_NOTIFY */ lwork = lwork_alloc(lreq, do_watch_notify); if (!lwork) { pr_err("failed to allocate notify-lwork\n"); goto out_unlock_lreq; } lwork->notify.notify_id = notify_id; lwork->notify.notifier_id = notifier_id; lwork->notify.payload = payload; lwork->notify.payload_len = payload_len; lwork->notify.msg = ceph_msg_get(msg); lwork_queue(lwork); } out_unlock_lreq: mutex_unlock(&lreq->lock); out_unlock_osdc: up_read(&osdc->lock); return; bad: pr_err("osdc handle_watch_notify corrupt msg\n"); } /* * Register request, send initial attempt. */ void ceph_osdc_start_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { down_read(&osdc->lock); submit_request(req, false); up_read(&osdc->lock); } EXPORT_SYMBOL(ceph_osdc_start_request); /* * Unregister request. If @req was registered, it isn't completed: * r_result isn't set and __complete_request() isn't invoked. * * If @req wasn't registered, this call may have raced with * handle_reply(), in which case r_result would already be set and * __complete_request() would be getting invoked, possibly even * concurrently with this call. */ void ceph_osdc_cancel_request(struct ceph_osd_request *req) { struct ceph_osd_client *osdc = req->r_osdc; down_write(&osdc->lock); if (req->r_osd) cancel_request(req); up_write(&osdc->lock); } EXPORT_SYMBOL(ceph_osdc_cancel_request); /* * @timeout: in jiffies, 0 means "wait forever" */ static int wait_request_timeout(struct ceph_osd_request *req, unsigned long timeout) { long left; dout("%s req %p tid %llu\n", __func__, req, req->r_tid); left = wait_for_completion_killable_timeout(&req->r_completion, ceph_timeout_jiffies(timeout)); if (left <= 0) { left = left ?: -ETIMEDOUT; ceph_osdc_cancel_request(req); } else { left = req->r_result; /* completed */ } return left; } /* * wait for a request to complete */ int ceph_osdc_wait_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { return wait_request_timeout(req, 0); } EXPORT_SYMBOL(ceph_osdc_wait_request); /* * sync - wait for all in-flight requests to flush. avoid starvation. */ void ceph_osdc_sync(struct ceph_osd_client *osdc) { struct rb_node *n, *p; u64 last_tid = atomic64_read(&osdc->last_tid); again: down_read(&osdc->lock); for (n = rb_first(&osdc->osds); n; n = rb_next(n)) { struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); mutex_lock(&osd->lock); for (p = rb_first(&osd->o_requests); p; p = rb_next(p)) { struct ceph_osd_request *req = rb_entry(p, struct ceph_osd_request, r_node); if (req->r_tid > last_tid) break; if (!(req->r_flags & CEPH_OSD_FLAG_WRITE)) continue; ceph_osdc_get_request(req); mutex_unlock(&osd->lock); up_read(&osdc->lock); dout("%s waiting on req %p tid %llu last_tid %llu\n", __func__, req, req->r_tid, last_tid); wait_for_completion(&req->r_completion); ceph_osdc_put_request(req); goto again; } mutex_unlock(&osd->lock); } up_read(&osdc->lock); dout("%s done last_tid %llu\n", __func__, last_tid); } EXPORT_SYMBOL(ceph_osdc_sync); /* * Returns a handle, caller owns a ref. */ struct ceph_osd_linger_request * ceph_osdc_watch(struct ceph_osd_client *osdc, struct ceph_object_id *oid, struct ceph_object_locator *oloc, rados_watchcb2_t wcb, rados_watcherrcb_t errcb, void *data) { struct ceph_osd_linger_request *lreq; int ret; lreq = linger_alloc(osdc); if (!lreq) return ERR_PTR(-ENOMEM); lreq->is_watch = true; lreq->wcb = wcb; lreq->errcb = errcb; lreq->data = data; lreq->watch_valid_thru = jiffies; ceph_oid_copy(&lreq->t.base_oid, oid); ceph_oloc_copy(&lreq->t.base_oloc, oloc); lreq->t.flags = CEPH_OSD_FLAG_WRITE; ktime_get_real_ts64(&lreq->mtime); linger_submit(lreq); ret = linger_reg_commit_wait(lreq); if (ret) { linger_cancel(lreq); goto err_put_lreq; } return lreq; err_put_lreq: linger_put(lreq); return ERR_PTR(ret); } EXPORT_SYMBOL(ceph_osdc_watch); /* * Releases a ref. * * Times out after mount_timeout to preserve rbd unmap behaviour * introduced in 2894e1d76974 ("rbd: timeout watch teardown on unmap * with mount_timeout"). */ int ceph_osdc_unwatch(struct ceph_osd_client *osdc, struct ceph_osd_linger_request *lreq) { struct ceph_options *opts = osdc->client->options; struct ceph_osd_request *req; int ret; req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO); if (!req) return -ENOMEM; ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); req->r_flags = CEPH_OSD_FLAG_WRITE; ktime_get_real_ts64(&req->r_mtime); osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH, lreq->linger_id, 0); ret = ceph_osdc_alloc_messages(req, GFP_NOIO); if (ret) goto out_put_req; ceph_osdc_start_request(osdc, req); linger_cancel(lreq); linger_put(lreq); ret = wait_request_timeout(req, opts->mount_timeout); out_put_req: ceph_osdc_put_request(req); return ret; } EXPORT_SYMBOL(ceph_osdc_unwatch); static int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which, u64 notify_id, u64 cookie, void *payload, u32 payload_len) { struct ceph_osd_req_op *op; struct ceph_pagelist *pl; int ret; op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0); pl = ceph_pagelist_alloc(GFP_NOIO); if (!pl) return -ENOMEM; ret = ceph_pagelist_encode_64(pl, notify_id); ret |= ceph_pagelist_encode_64(pl, cookie); if (payload) { ret |= ceph_pagelist_encode_32(pl, payload_len); ret |= ceph_pagelist_append(pl, payload, payload_len); } else { ret |= ceph_pagelist_encode_32(pl, 0); } if (ret) { ceph_pagelist_release(pl); return -ENOMEM; } ceph_osd_data_pagelist_init(&op->notify_ack.request_data, pl); op->indata_len = pl->length; return 0; } int ceph_osdc_notify_ack(struct ceph_osd_client *osdc, struct ceph_object_id *oid, struct ceph_object_locator *oloc, u64 notify_id, u64 cookie, void *payload, u32 payload_len) { struct ceph_osd_request *req; int ret; req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO); if (!req) return -ENOMEM; ceph_oid_copy(&req->r_base_oid, oid); ceph_oloc_copy(&req->r_base_oloc, oloc); req->r_flags = CEPH_OSD_FLAG_READ; ret = osd_req_op_notify_ack_init(req, 0, notify_id, cookie, payload, payload_len); if (ret) goto out_put_req; ret = ceph_osdc_alloc_messages(req, GFP_NOIO); if (ret) goto out_put_req; ceph_osdc_start_request(osdc, req); ret = ceph_osdc_wait_request(osdc, req); out_put_req: ceph_osdc_put_request(req); return ret; } EXPORT_SYMBOL(ceph_osdc_notify_ack); /* * @timeout: in seconds * * @preply_{pages,len} are initialized both on success and error. * The caller is responsible for: * * ceph_release_page_vector(reply_pages, calc_pages_for(0, reply_len)) */ int ceph_osdc_notify(struct ceph_osd_client *osdc, struct ceph_object_id *oid, struct ceph_object_locator *oloc, void *payload, u32 payload_len, u32 timeout, struct page ***preply_pages, size_t *preply_len) { struct ceph_osd_linger_request *lreq; int ret; WARN_ON(!timeout); if (preply_pages) { *preply_pages = NULL; *preply_len = 0; } lreq = linger_alloc(osdc); if (!lreq) return -ENOMEM; lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO); if (!lreq->request_pl) { ret = -ENOMEM; goto out_put_lreq; } ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */ ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout); ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len); ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len); if (ret) { ret = -ENOMEM; goto out_put_lreq; } /* for notify_id */ lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO); if (IS_ERR(lreq->notify_id_pages)) { ret = PTR_ERR(lreq->notify_id_pages); lreq->notify_id_pages = NULL; goto out_put_lreq; } lreq->preply_pages = preply_pages; lreq->preply_len = preply_len; ceph_oid_copy(&lreq->t.base_oid, oid); ceph_oloc_copy(&lreq->t.base_oloc, oloc); lreq->t.flags = CEPH_OSD_FLAG_READ; linger_submit(lreq); ret = linger_reg_commit_wait(lreq); if (!ret) ret = linger_notify_finish_wait(lreq, msecs_to_jiffies(2 * timeout * MSEC_PER_SEC)); else dout("lreq %p failed to initiate notify %d\n", lreq, ret); linger_cancel(lreq); out_put_lreq: linger_put(lreq); return ret; } EXPORT_SYMBOL(ceph_osdc_notify); static int decode_watcher(void **p, void *end, struct ceph_watch_item *item) { u8 struct_v; u32 struct_len; int ret; ret = ceph_start_decoding(p, end, 2, "watch_item_t", &struct_v, &struct_len); if (ret) goto bad; ret = -EINVAL; ceph_decode_copy_safe(p, end, &item->name, sizeof(item->name), bad); ceph_decode_64_safe(p, end, item->cookie, bad); ceph_decode_skip_32(p, end, bad); /* skip timeout seconds */ if (struct_v >= 2) { ret = ceph_decode_entity_addr(p, end, &item->addr); if (ret) goto bad; } else { ret = 0; } dout("%s %s%llu cookie %llu addr %s\n", __func__, ENTITY_NAME(item->name), item->cookie, ceph_pr_addr(&item->addr)); bad: return ret; } static int decode_watchers(void **p, void *end, struct ceph_watch_item **watchers, u32 *num_watchers) { u8 struct_v; u32 struct_len; int i; int ret; ret = ceph_start_decoding(p, end, 1, "obj_list_watch_response_t", &struct_v, &struct_len); if (ret) return ret; *num_watchers = ceph_decode_32(p); *watchers = kcalloc(*num_watchers, sizeof(**watchers), GFP_NOIO); if (!*watchers) return -ENOMEM; for (i = 0; i < *num_watchers; i++) { ret = decode_watcher(p, end, *watchers + i); if (ret) { kfree(*watchers); return ret; } } return 0; } /* * On success, the caller is responsible for: * * kfree(watchers); */ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, struct ceph_object_id *oid, struct ceph_object_locator *oloc, struct ceph_watch_item **watchers, u32 *num_watchers) { struct ceph_osd_request *req; struct page **pages; int ret; req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO); if (!req) return -ENOMEM; ceph_oid_copy(&req->r_base_oid, oid); ceph_oloc_copy(&req->r_base_oloc, oloc); req->r_flags = CEPH_OSD_FLAG_READ; pages = ceph_alloc_page_vector(1, GFP_NOIO); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out_put_req; } osd_req_op_init(req, 0, CEPH_OSD_OP_LIST_WATCHERS, 0); ceph_osd_data_pages_init(osd_req_op_data(req, 0, list_watchers, response_data), pages, PAGE_SIZE, 0, false, true); ret = ceph_osdc_alloc_messages(req, GFP_NOIO); if (ret) goto out_put_req; ceph_osdc_start_request(osdc, req); ret = ceph_osdc_wait_request(osdc, req); if (ret >= 0) { void *p = page_address(pages[0]); void *const end = p + req->r_ops[0].outdata_len; ret = decode_watchers(&p, end, watchers, num_watchers); } out_put_req: ceph_osdc_put_request(req); return ret; } EXPORT_SYMBOL(ceph_osdc_list_watchers); /* * Call all pending notify callbacks - for use after a watch is * unregistered, to make sure no more callbacks for it will be invoked */ void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc) { dout("%s osdc %p\n", __func__, osdc); flush_workqueue(osdc->notify_wq); } EXPORT_SYMBOL(ceph_osdc_flush_notifies); void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc) { down_read(&osdc->lock); maybe_request_map(osdc); up_read(&osdc->lock); } EXPORT_SYMBOL(ceph_osdc_maybe_request_map); /* * Execute an OSD class method on an object. * * @flags: CEPH_OSD_FLAG_* * @resp_len: in/out param for reply length */ int ceph_osdc_call(struct ceph_osd_client *osdc, struct ceph_object_id *oid, struct ceph_object_locator *oloc, const char *class, const char *method, unsigned int flags, struct page *req_page, size_t req_len, struct page **resp_pages, size_t *resp_len) { struct ceph_osd_request *req; int ret; if (req_len > PAGE_SIZE) return -E2BIG; req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO); if (!req) return -ENOMEM; ceph_oid_copy(&req->r_base_oid, oid); ceph_oloc_copy(&req->r_base_oloc, oloc); req->r_flags = flags; ret = osd_req_op_cls_init(req, 0, class, method); if (ret) goto out_put_req; if (req_page) osd_req_op_cls_request_data_pages(req, 0, &req_page, req_len, 0, false, false); if (resp_pages) osd_req_op_cls_response_data_pages(req, 0, resp_pages, *resp_len, 0, false, false); ret = ceph_osdc_alloc_messages(req, GFP_NOIO); if (ret) goto out_put_req; ceph_osdc_start_request(osdc, req); ret = ceph_osdc_wait_request(osdc, req); if (ret >= 0) { ret = req->r_ops[0].rval; if (resp_pages) *resp_len = req->r_ops[0].outdata_len; } out_put_req: ceph_osdc_put_request(req); return ret; } EXPORT_SYMBOL(ceph_osdc_call); /* * reset all osd connections */ void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc) { struct rb_node *n; down_write(&osdc->lock); for (n = rb_first(&osdc->osds); n; ) { struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); n = rb_next(n); if (!reopen_osd(osd)) kick_osd_requests(osd); } up_write(&osdc->lock); } /* * init, shutdown */ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) { int err; dout("init\n"); osdc->client = client; init_rwsem(&osdc->lock); osdc->osds = RB_ROOT; INIT_LIST_HEAD(&osdc->osd_lru); spin_lock_init(&osdc->osd_lru_lock); osd_init(&osdc->homeless_osd); osdc->homeless_osd.o_osdc = osdc; osdc->homeless_osd.o_osd = CEPH_HOMELESS_OSD; osdc->last_linger_id = CEPH_LINGER_ID_START; osdc->linger_requests = RB_ROOT; osdc->map_checks = RB_ROOT; osdc->linger_map_checks = RB_ROOT; INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout); INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); err = -ENOMEM; osdc->osdmap = ceph_osdmap_alloc(); if (!osdc->osdmap) goto out; osdc->req_mempool = mempool_create_slab_pool(10, ceph_osd_request_cache); if (!osdc->req_mempool) goto out_map; err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP, PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10, "osd_op"); if (err < 0) goto out_mempool; err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY, PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10, "osd_op_reply"); if (err < 0) goto out_msgpool; err = -ENOMEM; osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify"); if (!osdc->notify_wq) goto out_msgpool_reply; osdc->completion_wq = create_singlethread_workqueue("ceph-completion"); if (!osdc->completion_wq) goto out_notify_wq; schedule_delayed_work(&osdc->timeout_work, osdc->client->options->osd_keepalive_timeout); schedule_delayed_work(&osdc->osds_timeout_work, round_jiffies_relative(osdc->client->options->osd_idle_ttl)); return 0; out_notify_wq: destroy_workqueue(osdc->notify_wq); out_msgpool_reply: ceph_msgpool_destroy(&osdc->msgpool_op_reply); out_msgpool: ceph_msgpool_destroy(&osdc->msgpool_op); out_mempool: mempool_destroy(osdc->req_mempool); out_map: ceph_osdmap_destroy(osdc->osdmap); out: return err; } void ceph_osdc_stop(struct ceph_osd_client *osdc) { destroy_workqueue(osdc->completion_wq); destroy_workqueue(osdc->notify_wq); cancel_delayed_work_sync(&osdc->timeout_work); cancel_delayed_work_sync(&osdc->osds_timeout_work); down_write(&osdc->lock); while (!RB_EMPTY_ROOT(&osdc->osds)) { struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), struct ceph_osd, o_node); close_osd(osd); } up_write(&osdc->lock); WARN_ON(refcount_read(&osdc->homeless_osd.o_ref) != 1); osd_cleanup(&osdc->homeless_osd); WARN_ON(!list_empty(&osdc->osd_lru)); WARN_ON(!RB_EMPTY_ROOT(&osdc->linger_requests)); WARN_ON(!RB_EMPTY_ROOT(&osdc->map_checks)); WARN_ON(!RB_EMPTY_ROOT(&osdc->linger_map_checks)); WARN_ON(atomic_read(&osdc->num_requests)); WARN_ON(atomic_read(&osdc->num_homeless)); ceph_osdmap_destroy(osdc->osdmap); mempool_destroy(osdc->req_mempool); ceph_msgpool_destroy(&osdc->msgpool_op); ceph_msgpool_destroy(&osdc->msgpool_op_reply); } int osd_req_op_copy_from_init(struct ceph_osd_request *req, u64 src_snapid, u64 src_version, struct ceph_object_id *src_oid, struct ceph_object_locator *src_oloc, u32 src_fadvise_flags, u32 dst_fadvise_flags, u32 truncate_seq, u64 truncate_size, u8 copy_from_flags) { struct ceph_osd_req_op *op; struct page **pages; void *p, *end; pages = ceph_alloc_page_vector(1, GFP_KERNEL); if (IS_ERR(pages)) return PTR_ERR(pages); op = osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2, dst_fadvise_flags); op->copy_from.snapid = src_snapid; op->copy_from.src_version = src_version; op->copy_from.flags = copy_from_flags; op->copy_from.src_fadvise_flags = src_fadvise_flags; p = page_address(pages[0]); end = p + PAGE_SIZE; ceph_encode_string(&p, end, src_oid->name, src_oid->name_len); encode_oloc(&p, end, src_oloc); ceph_encode_32(&p, truncate_seq); ceph_encode_64(&p, truncate_size); op->indata_len = PAGE_SIZE - (end - p); ceph_osd_data_pages_init(&op->copy_from.osd_data, pages, op->indata_len, 0, false, true); return 0; } EXPORT_SYMBOL(osd_req_op_copy_from_init); int __init ceph_osdc_setup(void) { size_t size = sizeof(struct ceph_osd_request) + CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op); BUG_ON(ceph_osd_request_cache); ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", size, 0, 0, NULL); return ceph_osd_request_cache ? 0 : -ENOMEM; } void ceph_osdc_cleanup(void) { BUG_ON(!ceph_osd_request_cache); kmem_cache_destroy(ceph_osd_request_cache); ceph_osd_request_cache = NULL; } /* * handle incoming message */ static void osd_dispatch(struct ceph_connection *con, struct ceph_msg *msg) { struct ceph_osd *osd = con->private; struct ceph_osd_client *osdc = osd->o_osdc; int type = le16_to_cpu(msg->hdr.type); switch (type) { case CEPH_MSG_OSD_MAP: ceph_osdc_handle_map(osdc, msg); break; case CEPH_MSG_OSD_OPREPLY: handle_reply(osd, msg); break; case CEPH_MSG_OSD_BACKOFF: handle_backoff(osd, msg); break; case CEPH_MSG_WATCH_NOTIFY: handle_watch_notify(osdc, msg); break; default: pr_err("received unknown message type %d %s\n", type, ceph_msg_type_name(type)); } ceph_msg_put(msg); } /* How much sparse data was requested? */ static u64 sparse_data_requested(struct ceph_osd_request *req) { u64 len = 0; if (req->r_flags & CEPH_OSD_FLAG_READ) { int i; for (i = 0; i < req->r_num_ops; ++i) { struct ceph_osd_req_op *op = &req->r_ops[i]; if (op->op == CEPH_OSD_OP_SPARSE_READ) len += op->extent.length; } } return len; } /* * Lookup and return message for incoming reply. Don't try to do * anything about a larger than preallocated data portion of the * message at the moment - for now, just skip the message. */ static struct ceph_msg *get_reply(struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip) { struct ceph_osd *osd = con->private; struct ceph_osd_client *osdc = osd->o_osdc; struct ceph_msg *m = NULL; struct ceph_osd_request *req; int front_len = le32_to_cpu(hdr->front_len); int data_len = le32_to_cpu(hdr->data_len); u64 tid = le64_to_cpu(hdr->tid); u64 srlen; down_read(&osdc->lock); if (!osd_registered(osd)) { dout("%s osd%d unknown, skipping\n", __func__, osd->o_osd); *skip = 1; goto out_unlock_osdc; } WARN_ON(osd->o_osd != le64_to_cpu(hdr->src.num)); mutex_lock(&osd->lock); req = lookup_request(&osd->o_requests, tid); if (!req) { dout("%s osd%d tid %llu unknown, skipping\n", __func__, osd->o_osd, tid); *skip = 1; goto out_unlock_session; } ceph_msg_revoke_incoming(req->r_reply); if (front_len > req->r_reply->front_alloc_len) { pr_warn("%s osd%d tid %llu front %d > preallocated %d\n", __func__, osd->o_osd, req->r_tid, front_len, req->r_reply->front_alloc_len); m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, false); if (!m) goto out_unlock_session; ceph_msg_put(req->r_reply); req->r_reply = m; } srlen = sparse_data_requested(req); if (!srlen && data_len > req->r_reply->data_length) { pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n", __func__, osd->o_osd, req->r_tid, data_len, req->r_reply->data_length); m = NULL; *skip = 1; goto out_unlock_session; } m = ceph_msg_get(req->r_reply); m->sparse_read_total = srlen; dout("get_reply tid %lld %p\n", tid, m); out_unlock_session: mutex_unlock(&osd->lock); out_unlock_osdc: up_read(&osdc->lock); return m; } static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr) { struct ceph_msg *m; int type = le16_to_cpu(hdr->type); u32 front_len = le32_to_cpu(hdr->front_len); u32 data_len = le32_to_cpu(hdr->data_len); m = ceph_msg_new2(type, front_len, 1, GFP_NOIO, false); if (!m) return NULL; if (data_len) { struct page **pages; pages = ceph_alloc_page_vector(calc_pages_for(0, data_len), GFP_NOIO); if (IS_ERR(pages)) { ceph_msg_put(m); return NULL; } ceph_msg_data_add_pages(m, pages, data_len, 0, true); } return m; } static struct ceph_msg *osd_alloc_msg(struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip) { struct ceph_osd *osd = con->private; int type = le16_to_cpu(hdr->type); *skip = 0; switch (type) { case CEPH_MSG_OSD_MAP: case CEPH_MSG_OSD_BACKOFF: case CEPH_MSG_WATCH_NOTIFY: return alloc_msg_with_page_vector(hdr); case CEPH_MSG_OSD_OPREPLY: return get_reply(con, hdr, skip); default: pr_warn("%s osd%d unknown msg type %d, skipping\n", __func__, osd->o_osd, type); *skip = 1; return NULL; } } /* * Wrappers to refcount containing ceph_osd struct */ static struct ceph_connection *osd_get_con(struct ceph_connection *con) { struct ceph_osd *osd = con->private; if (get_osd(osd)) return con; return NULL; } static void osd_put_con(struct ceph_connection *con) { struct ceph_osd *osd = con->private; put_osd(osd); } /* * authentication */ /* * Note: returned pointer is the address of a structure that's * managed separately. Caller must *not* attempt to free it. */ static struct ceph_auth_handshake * osd_get_authorizer(struct ceph_connection *con, int *proto, int force_new) { struct ceph_osd *o = con->private; struct ceph_osd_client *osdc = o->o_osdc; struct ceph_auth_client *ac = osdc->client->monc.auth; struct ceph_auth_handshake *auth = &o->o_auth; int ret; ret = __ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD, force_new, proto, NULL, NULL); if (ret) return ERR_PTR(ret); return auth; } static int osd_add_authorizer_challenge(struct ceph_connection *con, void *challenge_buf, int challenge_buf_len) { struct ceph_osd *o = con->private; struct ceph_osd_client *osdc = o->o_osdc; struct ceph_auth_client *ac = osdc->client->monc.auth; return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer, challenge_buf, challenge_buf_len); } static int osd_verify_authorizer_reply(struct ceph_connection *con) { struct ceph_osd *o = con->private; struct ceph_osd_client *osdc = o->o_osdc; struct ceph_auth_client *ac = osdc->client->monc.auth; struct ceph_auth_handshake *auth = &o->o_auth; return ceph_auth_verify_authorizer_reply(ac, auth->authorizer, auth->authorizer_reply_buf, auth->authorizer_reply_buf_len, NULL, NULL, NULL, NULL); } static int osd_invalidate_authorizer(struct ceph_connection *con) { struct ceph_osd *o = con->private; struct ceph_osd_client *osdc = o->o_osdc; struct ceph_auth_client *ac = osdc->client->monc.auth; ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD); return ceph_monc_validate_auth(&osdc->client->monc); } static int osd_get_auth_request(struct ceph_connection *con, void *buf, int *buf_len, void **authorizer, int *authorizer_len) { struct ceph_osd *o = con->private; struct ceph_auth_client *ac = o->o_osdc->client->monc.auth; struct ceph_auth_handshake *auth = &o->o_auth; int ret; ret = ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD, buf, buf_len); if (ret) return ret; *authorizer = auth->authorizer_buf; *authorizer_len = auth->authorizer_buf_len; return 0; } static int osd_handle_auth_reply_more(struct ceph_connection *con, void *reply, int reply_len, void *buf, int *buf_len, void **authorizer, int *authorizer_len) { struct ceph_osd *o = con->private; struct ceph_auth_client *ac = o->o_osdc->client->monc.auth; struct ceph_auth_handshake *auth = &o->o_auth; int ret; ret = ceph_auth_handle_svc_reply_more(ac, auth, reply, reply_len, buf, buf_len); if (ret) return ret; *authorizer = auth->authorizer_buf; *authorizer_len = auth->authorizer_buf_len; return 0; } static int osd_handle_auth_done(struct ceph_connection *con, u64 global_id, void *reply, int reply_len, u8 *session_key, int *session_key_len, u8 *con_secret, int *con_secret_len) { struct ceph_osd *o = con->private; struct ceph_auth_client *ac = o->o_osdc->client->monc.auth; struct ceph_auth_handshake *auth = &o->o_auth; return ceph_auth_handle_svc_reply_done(ac, auth, reply, reply_len, session_key, session_key_len, con_secret, con_secret_len); } static int osd_handle_auth_bad_method(struct ceph_connection *con, int used_proto, int result, const int *allowed_protos, int proto_cnt, const int *allowed_modes, int mode_cnt) { struct ceph_osd *o = con->private; struct ceph_mon_client *monc = &o->o_osdc->client->monc; int ret; if (ceph_auth_handle_bad_authorizer(monc->auth, CEPH_ENTITY_TYPE_OSD, used_proto, result, allowed_protos, proto_cnt, allowed_modes, mode_cnt)) { ret = ceph_monc_validate_auth(monc); if (ret) return ret; } return -EACCES; } static void osd_reencode_message(struct ceph_msg *msg) { int type = le16_to_cpu(msg->hdr.type); if (type == CEPH_MSG_OSD_OP) encode_request_finish(msg); } static int osd_sign_message(struct ceph_msg *msg) { struct ceph_osd *o = msg->con->private; struct ceph_auth_handshake *auth = &o->o_auth; return ceph_auth_sign_message(auth, msg); } static int osd_check_message_signature(struct ceph_msg *msg) { struct ceph_osd *o = msg->con->private; struct ceph_auth_handshake *auth = &o->o_auth; return ceph_auth_check_message_signature(auth, msg); } static void advance_cursor(struct ceph_msg_data_cursor *cursor, size_t len, bool zero) { while (len) { struct page *page; size_t poff, plen; page = ceph_msg_data_next(cursor, &poff, &plen); if (plen > len) plen = len; if (zero) zero_user_segment(page, poff, poff + plen); len -= plen; ceph_msg_data_advance(cursor, plen); } } static int prep_next_sparse_read(struct ceph_connection *con, struct ceph_msg_data_cursor *cursor) { struct ceph_osd *o = con->private; struct ceph_sparse_read *sr = &o->o_sparse_read; struct ceph_osd_request *req; struct ceph_osd_req_op *op; spin_lock(&o->o_requests_lock); req = lookup_request(&o->o_requests, le64_to_cpu(con->in_msg->hdr.tid)); if (!req) { spin_unlock(&o->o_requests_lock); return -EBADR; } if (o->o_sparse_op_idx < 0) { dout("%s: [%d] starting new sparse read req\n", __func__, o->o_osd); } else { u64 end; op = &req->r_ops[o->o_sparse_op_idx]; WARN_ON_ONCE(op->extent.sparse_ext); /* hand back buffer we took earlier */ op->extent.sparse_ext = sr->sr_extent; sr->sr_extent = NULL; op->extent.sparse_ext_cnt = sr->sr_count; sr->sr_ext_len = 0; dout("%s: [%d] completed extent array len %d cursor->resid %zd\n", __func__, o->o_osd, op->extent.sparse_ext_cnt, cursor->resid); /* Advance to end of data for this operation */ end = ceph_sparse_ext_map_end(op); if (end < sr->sr_req_len) advance_cursor(cursor, sr->sr_req_len - end, false); } ceph_init_sparse_read(sr); /* find next op in this request (if any) */ while (++o->o_sparse_op_idx < req->r_num_ops) { op = &req->r_ops[o->o_sparse_op_idx]; if (op->op == CEPH_OSD_OP_SPARSE_READ) goto found; } /* reset for next sparse read request */ spin_unlock(&o->o_requests_lock); o->o_sparse_op_idx = -1; return 0; found: sr->sr_req_off = op->extent.offset; sr->sr_req_len = op->extent.length; sr->sr_pos = sr->sr_req_off; dout("%s: [%d] new sparse read op at idx %d 0x%llx~0x%llx\n", __func__, o->o_osd, o->o_sparse_op_idx, sr->sr_req_off, sr->sr_req_len); /* hand off request's sparse extent map buffer */ sr->sr_ext_len = op->extent.sparse_ext_cnt; op->extent.sparse_ext_cnt = 0; sr->sr_extent = op->extent.sparse_ext; op->extent.sparse_ext = NULL; spin_unlock(&o->o_requests_lock); return 1; } #ifdef __BIG_ENDIAN static inline void convert_extent_map(struct ceph_sparse_read *sr) { int i; for (i = 0; i < sr->sr_count; i++) { struct ceph_sparse_extent *ext = &sr->sr_extent[i]; ext->off = le64_to_cpu((__force __le64)ext->off); ext->len = le64_to_cpu((__force __le64)ext->len); } } #else static inline void convert_extent_map(struct ceph_sparse_read *sr) { } #endif static int osd_sparse_read(struct ceph_connection *con, struct ceph_msg_data_cursor *cursor, char **pbuf) { struct ceph_osd *o = con->private; struct ceph_sparse_read *sr = &o->o_sparse_read; u32 count = sr->sr_count; u64 eoff, elen, len = 0; int i, ret; switch (sr->sr_state) { case CEPH_SPARSE_READ_HDR: next_op: ret = prep_next_sparse_read(con, cursor); if (ret <= 0) return ret; /* number of extents */ ret = sizeof(sr->sr_count); *pbuf = (char *)&sr->sr_count; sr->sr_state = CEPH_SPARSE_READ_EXTENTS; break; case CEPH_SPARSE_READ_EXTENTS: /* Convert sr_count to host-endian */ count = le32_to_cpu((__force __le32)sr->sr_count); sr->sr_count = count; dout("[%d] got %u extents\n", o->o_osd, count); if (count > 0) { if (!sr->sr_extent || count > sr->sr_ext_len) { /* no extent array provided, or too short */ kfree(sr->sr_extent); sr->sr_extent = kmalloc_array(count, sizeof(*sr->sr_extent), GFP_NOIO); if (!sr->sr_extent) { pr_err("%s: failed to allocate %u extents\n", __func__, count); return -ENOMEM; } sr->sr_ext_len = count; } ret = count * sizeof(*sr->sr_extent); *pbuf = (char *)sr->sr_extent; sr->sr_state = CEPH_SPARSE_READ_DATA_LEN; break; } /* No extents? Read data len */ fallthrough; case CEPH_SPARSE_READ_DATA_LEN: convert_extent_map(sr); ret = sizeof(sr->sr_datalen); *pbuf = (char *)&sr->sr_datalen; sr->sr_state = CEPH_SPARSE_READ_DATA_PRE; break; case CEPH_SPARSE_READ_DATA_PRE: /* Convert sr_datalen to host-endian */ sr->sr_datalen = le32_to_cpu((__force __le32)sr->sr_datalen); for (i = 0; i < count; i++) len += sr->sr_extent[i].len; if (sr->sr_datalen != len) { pr_warn_ratelimited("data len %u != extent len %llu\n", sr->sr_datalen, len); return -EREMOTEIO; } sr->sr_state = CEPH_SPARSE_READ_DATA; fallthrough; case CEPH_SPARSE_READ_DATA: if (sr->sr_index >= count) { sr->sr_state = CEPH_SPARSE_READ_HDR; goto next_op; } eoff = sr->sr_extent[sr->sr_index].off; elen = sr->sr_extent[sr->sr_index].len; dout("[%d] ext %d off 0x%llx len 0x%llx\n", o->o_osd, sr->sr_index, eoff, elen); if (elen > INT_MAX) { dout("Sparse read extent length too long (0x%llx)\n", elen); return -EREMOTEIO; } /* zero out anything from sr_pos to start of extent */ if (sr->sr_pos < eoff) advance_cursor(cursor, eoff - sr->sr_pos, true); /* Set position to end of extent */ sr->sr_pos = eoff + elen; /* send back the new length and nullify the ptr */ cursor->sr_resid = elen; ret = elen; *pbuf = NULL; /* Bump the array index */ ++sr->sr_index; break; } return ret; } static const struct ceph_connection_operations osd_con_ops = { .get = osd_get_con, .put = osd_put_con, .sparse_read = osd_sparse_read, .alloc_msg = osd_alloc_msg, .dispatch = osd_dispatch, .fault = osd_fault, .reencode_message = osd_reencode_message, .get_authorizer = osd_get_authorizer, .add_authorizer_challenge = osd_add_authorizer_challenge, .verify_authorizer_reply = osd_verify_authorizer_reply, .invalidate_authorizer = osd_invalidate_authorizer, .sign_message = osd_sign_message, .check_message_signature = osd_check_message_signature, .get_auth_request = osd_get_auth_request, .handle_auth_reply_more = osd_handle_auth_reply_more, .handle_auth_done = osd_handle_auth_done, .handle_auth_bad_method = osd_handle_auth_bad_method, }; |
| 10 2 5 7 8 5 7 7 1 5 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | /* * Copyright (c) 2006 Oracle. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/highmem.h> #include <linux/gfp.h> #include <linux/cpu.h> #include <linux/export.h> #include "rds.h" struct rds_page_remainder { struct page *r_page; unsigned long r_offset; local_lock_t bh_lock; }; static DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; /** * rds_page_remainder_alloc - build up regions of a message. * * @scat: Scatter list for message * @bytes: the number of bytes needed. * @gfp: the waiting behaviour of the allocation * * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to * kmap the pages, etc. * * If @bytes is at least a full page then this just returns a page from * alloc_page(). * * If @bytes is a partial page then this stores the unused region of the * page in a per-cpu structure. Future partial-page allocations may be * satisfied from that cached region. This lets us waste less memory on * small allocations with minimal complexity. It works because the transmit * path passes read-only page regions down to devices. They hold a page * reference until they are done with the region. */ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, gfp_t gfp) { struct rds_page_remainder *rem; struct page *page; int ret; gfp |= __GFP_HIGHMEM; /* jump straight to allocation if we're trying for a huge page */ if (bytes >= PAGE_SIZE) { page = alloc_page(gfp); if (!page) { ret = -ENOMEM; } else { sg_set_page(scat, page, PAGE_SIZE, 0); ret = 0; } goto out; } local_bh_disable(); local_lock_nested_bh(&rds_page_remainders.bh_lock); rem = this_cpu_ptr(&rds_page_remainders); while (1) { /* avoid a tiny region getting stuck by tossing it */ if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) { rds_stats_inc(s_page_remainder_miss); __free_page(rem->r_page); rem->r_page = NULL; } /* hand out a fragment from the cached page */ if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) { sg_set_page(scat, rem->r_page, bytes, rem->r_offset); get_page(sg_page(scat)); if (rem->r_offset != 0) rds_stats_inc(s_page_remainder_hit); rem->r_offset += ALIGN(bytes, 8); if (rem->r_offset >= PAGE_SIZE) { __free_page(rem->r_page); rem->r_page = NULL; } ret = 0; break; } /* alloc if there is nothing for us to use */ local_unlock_nested_bh(&rds_page_remainders.bh_lock); local_bh_enable(); page = alloc_page(gfp); local_bh_disable(); local_lock_nested_bh(&rds_page_remainders.bh_lock); rem = this_cpu_ptr(&rds_page_remainders); if (!page) { ret = -ENOMEM; break; } /* did someone race to fill the remainder before us? */ if (rem->r_page) { __free_page(page); continue; } /* otherwise install our page and loop around to alloc */ rem->r_page = page; rem->r_offset = 0; } local_unlock_nested_bh(&rds_page_remainders.bh_lock); local_bh_enable(); out: rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret, ret ? NULL : sg_page(scat), ret ? 0 : scat->offset, ret ? 0 : scat->length); return ret; } EXPORT_SYMBOL_GPL(rds_page_remainder_alloc); void rds_page_exit(void) { unsigned int cpu; for_each_possible_cpu(cpu) { struct rds_page_remainder *rem; rem = &per_cpu(rds_page_remainders, cpu); rdsdebug("cpu %u\n", cpu); if (rem->r_page) __free_page(rem->r_page); rem->r_page = NULL; } } |
| 5 40 44 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001 Intel Corp. * * This file is part of the SCTP kernel implementation * * These are definitions needed by the state machine. * * Please send any bug reports or fixes you make to the * email addresses: * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Xingang Guo <xingang.guo@intel.com> * Jon Grimm <jgrimm@us.ibm.com> * Dajiang Zhang <dajiang.zhang@nokia.com> * Sridhar Samudrala <sri@us.ibm.com> * Daisy Chang <daisyc@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> * Kevin Gao <kevin.gao@intel.com> */ #include <linux/types.h> #include <linux/compiler.h> #include <linux/slab.h> #include <linux/in.h> #include <net/sctp/command.h> #include <net/sctp/sctp.h> #ifndef __sctp_sm_h__ #define __sctp_sm_h__ /* * Possible values for the disposition are: */ enum sctp_disposition { SCTP_DISPOSITION_DISCARD, /* No further processing. */ SCTP_DISPOSITION_CONSUME, /* Process return values normally. */ SCTP_DISPOSITION_NOMEM, /* We ran out of memory--recover. */ SCTP_DISPOSITION_DELETE_TCB, /* Close the association. */ SCTP_DISPOSITION_ABORT, /* Close the association NOW. */ SCTP_DISPOSITION_VIOLATION, /* The peer is misbehaving. */ SCTP_DISPOSITION_NOT_IMPL, /* This entry is not implemented. */ SCTP_DISPOSITION_ERROR, /* This is plain old user error. */ SCTP_DISPOSITION_BUG, /* This is a bug. */ }; typedef enum sctp_disposition (sctp_state_fn_t) ( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); typedef void (sctp_timer_event_t) (struct timer_list *); struct sctp_sm_table_entry { sctp_state_fn_t *fn; const char *name; }; /* A naming convention of "sctp_sf_xxx" applies to all the state functions * currently in use. */ /* Prototypes for generic state functions. */ sctp_state_fn_t sctp_sf_not_impl; sctp_state_fn_t sctp_sf_bug; /* Prototypes for gener timer state functions. */ sctp_state_fn_t sctp_sf_timer_ignore; /* Prototypes for chunk state functions. */ sctp_state_fn_t sctp_sf_do_9_1_abort; sctp_state_fn_t sctp_sf_cookie_wait_abort; sctp_state_fn_t sctp_sf_cookie_echoed_abort; sctp_state_fn_t sctp_sf_shutdown_pending_abort; sctp_state_fn_t sctp_sf_shutdown_sent_abort; sctp_state_fn_t sctp_sf_shutdown_ack_sent_abort; sctp_state_fn_t sctp_sf_do_5_1B_init; sctp_state_fn_t sctp_sf_do_5_1C_ack; sctp_state_fn_t sctp_sf_do_5_1D_ce; sctp_state_fn_t sctp_sf_do_5_1E_ca; sctp_state_fn_t sctp_sf_do_4_C; sctp_state_fn_t sctp_sf_eat_data_6_2; sctp_state_fn_t sctp_sf_eat_data_fast_4_4; sctp_state_fn_t sctp_sf_eat_sack_6_2; sctp_state_fn_t sctp_sf_operr_notify; sctp_state_fn_t sctp_sf_t1_init_timer_expire; sctp_state_fn_t sctp_sf_t1_cookie_timer_expire; sctp_state_fn_t sctp_sf_t2_timer_expire; sctp_state_fn_t sctp_sf_t4_timer_expire; sctp_state_fn_t sctp_sf_t5_timer_expire; sctp_state_fn_t sctp_sf_sendbeat_8_3; sctp_state_fn_t sctp_sf_beat_8_3; sctp_state_fn_t sctp_sf_backbeat_8_3; sctp_state_fn_t sctp_sf_do_9_2_final; sctp_state_fn_t sctp_sf_do_9_2_shutdown; sctp_state_fn_t sctp_sf_do_9_2_shut_ctsn; sctp_state_fn_t sctp_sf_do_ecn_cwr; sctp_state_fn_t sctp_sf_do_ecne; sctp_state_fn_t sctp_sf_ootb; sctp_state_fn_t sctp_sf_pdiscard; sctp_state_fn_t sctp_sf_violation; sctp_state_fn_t sctp_sf_discard_chunk; sctp_state_fn_t sctp_sf_do_5_2_1_siminit; sctp_state_fn_t sctp_sf_do_5_2_2_dupinit; sctp_state_fn_t sctp_sf_do_5_2_3_initack; sctp_state_fn_t sctp_sf_do_5_2_4_dupcook; sctp_state_fn_t sctp_sf_unk_chunk; sctp_state_fn_t sctp_sf_do_8_5_1_E_sa; sctp_state_fn_t sctp_sf_cookie_echoed_err; sctp_state_fn_t sctp_sf_do_asconf; sctp_state_fn_t sctp_sf_do_asconf_ack; sctp_state_fn_t sctp_sf_do_reconf; sctp_state_fn_t sctp_sf_do_9_2_reshutack; sctp_state_fn_t sctp_sf_eat_fwd_tsn; sctp_state_fn_t sctp_sf_eat_fwd_tsn_fast; sctp_state_fn_t sctp_sf_eat_auth; /* Prototypes for primitive event state functions. */ sctp_state_fn_t sctp_sf_do_prm_asoc; sctp_state_fn_t sctp_sf_do_prm_send; sctp_state_fn_t sctp_sf_do_9_2_prm_shutdown; sctp_state_fn_t sctp_sf_cookie_wait_prm_shutdown; sctp_state_fn_t sctp_sf_cookie_echoed_prm_shutdown; sctp_state_fn_t sctp_sf_do_9_1_prm_abort; sctp_state_fn_t sctp_sf_cookie_wait_prm_abort; sctp_state_fn_t sctp_sf_cookie_echoed_prm_abort; sctp_state_fn_t sctp_sf_shutdown_pending_prm_abort; sctp_state_fn_t sctp_sf_shutdown_sent_prm_abort; sctp_state_fn_t sctp_sf_shutdown_ack_sent_prm_abort; sctp_state_fn_t sctp_sf_error_closed; sctp_state_fn_t sctp_sf_error_shutdown; sctp_state_fn_t sctp_sf_ignore_primitive; sctp_state_fn_t sctp_sf_do_prm_requestheartbeat; sctp_state_fn_t sctp_sf_do_prm_asconf; sctp_state_fn_t sctp_sf_do_prm_reconf; /* Prototypes for other event state functions. */ sctp_state_fn_t sctp_sf_do_no_pending_tsn; sctp_state_fn_t sctp_sf_do_9_2_start_shutdown; sctp_state_fn_t sctp_sf_do_9_2_shutdown_ack; sctp_state_fn_t sctp_sf_ignore_other; sctp_state_fn_t sctp_sf_cookie_wait_icmp_abort; /* Prototypes for timeout event state functions. */ sctp_state_fn_t sctp_sf_do_6_3_3_rtx; sctp_state_fn_t sctp_sf_send_reconf; sctp_state_fn_t sctp_sf_send_probe; sctp_state_fn_t sctp_sf_do_6_2_sack; sctp_state_fn_t sctp_sf_autoclose_timer_expire; /* Prototypes for utility support functions. */ const struct sctp_sm_table_entry *sctp_sm_lookup_event( struct net *net, enum sctp_event_type event_type, enum sctp_state state, union sctp_subtype event_subtype); struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *, struct sctp_chunk *, gfp_t gfp); /* Prototypes for chunk-building functions. */ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, const struct sctp_bind_addr *bp, gfp_t gfp, int vparam_len); struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, const struct sctp_chunk *chunk, const gfp_t gfp, const int unkparam_len); struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc, const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc, const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc, const __u32 lowest_tsn, const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_idata(const struct sctp_association *asoc, __u8 flags, int paylen, gfp_t gfp); struct sctp_chunk *sctp_make_ifwdtsn(const struct sctp_association *asoc, __u32 new_cum_tsn, size_t nstreams, struct sctp_ifwdtsn_skip *skiplist); struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc, const struct sctp_sndrcvinfo *sinfo, int len, __u8 flags, gfp_t gfp); struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc, const __u32 lowest_tsn); struct sctp_chunk *sctp_make_sack(struct sctp_association *asoc); struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc, const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_shutdown_complete( const struct sctp_association *asoc, const struct sctp_chunk *chunk); int sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen); struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc, const struct sctp_chunk *chunk, const size_t hint); struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *asoc, const struct sctp_chunk *chunk, __u32 tsn); struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc, struct msghdr *msg, size_t msg_len); struct sctp_chunk *sctp_make_abort_violation( const struct sctp_association *asoc, const struct sctp_chunk *chunk, const __u8 *payload, const size_t paylen); struct sctp_chunk *sctp_make_violation_paramlen( const struct sctp_association *asoc, const struct sctp_chunk *chunk, struct sctp_paramhdr *param); struct sctp_chunk *sctp_make_violation_max_retrans( const struct sctp_association *asoc, const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_new_encap_port( const struct sctp_association *asoc, const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, const struct sctp_transport *transport, __u32 probe_size); struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc, const struct sctp_chunk *chunk, const void *payload, const size_t paylen); struct sctp_chunk *sctp_make_pad(const struct sctp_association *asoc, int len); struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, const struct sctp_chunk *chunk, __be16 cause_code, const void *payload, size_t paylen, size_t reserve_tail); struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, union sctp_addr *laddr, struct sockaddr *addrs, int addrcnt, __be16 flags); struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc, union sctp_addr *addr); bool sctp_verify_asconf(const struct sctp_association *asoc, struct sctp_chunk *chunk, bool addr_param_needed, struct sctp_paramhdr **errp); struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, struct sctp_chunk *asconf); int sctp_process_asconf_ack(struct sctp_association *asoc, struct sctp_chunk *asconf_ack); struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, __u32 new_cum_tsn, size_t nstreams, struct sctp_fwdtsn_skip *skiplist); struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc, __u16 key_id); struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc, __u16 stream_num, __be16 *stream_list, bool out, bool in); struct sctp_chunk *sctp_make_strreset_tsnreq( const struct sctp_association *asoc); struct sctp_chunk *sctp_make_strreset_addstrm( const struct sctp_association *asoc, __u16 out, __u16 in); struct sctp_chunk *sctp_make_strreset_resp(const struct sctp_association *asoc, __u32 result, __u32 sn); struct sctp_chunk *sctp_make_strreset_tsnresp(struct sctp_association *asoc, __u32 result, __u32 sn, __u32 sender_tsn, __u32 receiver_tsn); bool sctp_verify_reconf(const struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_paramhdr **errp); void sctp_chunk_assign_tsn(struct sctp_chunk *chunk); void sctp_chunk_assign_ssn(struct sctp_chunk *chunk); /* Prototypes for stream-processing functions. */ struct sctp_chunk *sctp_process_strreset_outreq( struct sctp_association *asoc, union sctp_params param, struct sctp_ulpevent **evp); struct sctp_chunk *sctp_process_strreset_inreq( struct sctp_association *asoc, union sctp_params param, struct sctp_ulpevent **evp); struct sctp_chunk *sctp_process_strreset_tsnreq( struct sctp_association *asoc, union sctp_params param, struct sctp_ulpevent **evp); struct sctp_chunk *sctp_process_strreset_addstrm_out( struct sctp_association *asoc, union sctp_params param, struct sctp_ulpevent **evp); struct sctp_chunk *sctp_process_strreset_addstrm_in( struct sctp_association *asoc, union sctp_params param, struct sctp_ulpevent **evp); struct sctp_chunk *sctp_process_strreset_resp( struct sctp_association *asoc, union sctp_params param, struct sctp_ulpevent **evp); /* Prototypes for statetable processing. */ int sctp_do_sm(struct net *net, enum sctp_event_type event_type, union sctp_subtype subtype, enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, gfp_t gfp); /* 2nd level prototypes */ void sctp_generate_t3_rtx_event(struct timer_list *t); void sctp_generate_heartbeat_event(struct timer_list *t); void sctp_generate_reconf_event(struct timer_list *t); void sctp_generate_probe_event(struct timer_list *t); void sctp_generate_proto_unreach_event(struct timer_list *t); void sctp_ootb_pkt_free(struct sctp_packet *packet); struct sctp_association *sctp_unpack_cookie( const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, gfp_t gfp, int *err, struct sctp_chunk **err_chk_p); /* 3rd level prototypes */ __u32 sctp_generate_tag(const struct sctp_endpoint *ep); __u32 sctp_generate_tsn(const struct sctp_endpoint *ep); /* Extern declarations for major data structures. */ extern sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES]; /* Get the size of a DATA chunk payload. */ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) { __u16 size; size = ntohs(chunk->chunk_hdr->length); size -= sctp_datachk_len(&chunk->asoc->stream); return size; } /* Compare two TSNs */ #define TSN_lt(a,b) \ (typecheck(__u32, a) && \ typecheck(__u32, b) && \ ((__s32)((a) - (b)) < 0)) #define TSN_lte(a,b) \ (typecheck(__u32, a) && \ typecheck(__u32, b) && \ ((__s32)((a) - (b)) <= 0)) /* Compare two MIDs */ #define MID_lt(a, b) \ (typecheck(__u32, a) && \ typecheck(__u32, b) && \ ((__s32)((a) - (b)) < 0)) /* Compare two SSNs */ #define SSN_lt(a,b) \ (typecheck(__u16, a) && \ typecheck(__u16, b) && \ ((__s16)((a) - (b)) < 0)) /* ADDIP 3.1.1 */ #define ADDIP_SERIAL_gte(a,b) \ (typecheck(__u32, a) && \ typecheck(__u32, b) && \ ((__s32)((b) - (a)) <= 0)) /* Check VTAG of the packet matches the sender's own tag. */ static inline int sctp_vtag_verify(const struct sctp_chunk *chunk, const struct sctp_association *asoc) { /* RFC 2960 Sec 8.5 When receiving an SCTP packet, the endpoint * MUST ensure that the value in the Verification Tag field of * the received SCTP packet matches its own Tag. If the received * Verification Tag value does not match the receiver's own * tag value, the receiver shall silently discard the packet... */ if (ntohl(chunk->sctp_hdr->vtag) != asoc->c.my_vtag) return 0; chunk->transport->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port; return 1; } /* Check VTAG of the packet matches the sender's own tag and the T bit is * not set, OR its peer's tag and the T bit is set in the Chunk Flags. */ static inline int sctp_vtag_verify_either(const struct sctp_chunk *chunk, const struct sctp_association *asoc) { /* RFC 2960 Section 8.5.1, sctpimpguide Section 2.41 * * B) The receiver of a ABORT MUST accept the packet * if the Verification Tag field of the packet matches its own tag * and the T bit is not set * OR * it is set to its peer's tag and the T bit is set in the Chunk * Flags. * Otherwise, the receiver MUST silently discard the packet * and take no further action. * * C) The receiver of a SHUTDOWN COMPLETE shall accept the packet * if the Verification Tag field of the packet matches its own tag * and the T bit is not set * OR * it is set to its peer's tag and the T bit is set in the Chunk * Flags. * Otherwise, the receiver MUST silently discard the packet * and take no further action. An endpoint MUST ignore the * SHUTDOWN COMPLETE if it is not in the SHUTDOWN-ACK-SENT state. */ if ((!sctp_test_T_bit(chunk) && (ntohl(chunk->sctp_hdr->vtag) == asoc->c.my_vtag)) || (sctp_test_T_bit(chunk) && asoc->c.peer_vtag && (ntohl(chunk->sctp_hdr->vtag) == asoc->c.peer_vtag))) { return 1; } return 0; } #endif /* __sctp_sm_h__ */ |
| 4 1 3 4 2 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 | // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) /* Copyright(c) 2014 - 2020 Intel Corporation */ #include <linux/mutex.h> #include <linux/list.h> #include "adf_cfg.h" #include "adf_common_drv.h" static LIST_HEAD(accel_table); static LIST_HEAD(vfs_table); static DEFINE_MUTEX(table_lock); static u32 num_devices; static u8 id_map[ADF_MAX_DEVICES]; struct vf_id_map { u32 bdf; u32 id; u32 fake_id; bool attached; struct list_head list; }; static int adf_get_vf_id(struct adf_accel_dev *vf) { return ((7 * (PCI_SLOT(accel_to_pci_dev(vf)->devfn) - 1)) + PCI_FUNC(accel_to_pci_dev(vf)->devfn) + (PCI_SLOT(accel_to_pci_dev(vf)->devfn) - 1)); } static int adf_get_vf_num(struct adf_accel_dev *vf) { return (accel_to_pci_dev(vf)->bus->number << 8) | adf_get_vf_id(vf); } static struct vf_id_map *adf_find_vf(u32 bdf) { struct list_head *itr; list_for_each(itr, &vfs_table) { struct vf_id_map *ptr = list_entry(itr, struct vf_id_map, list); if (ptr->bdf == bdf) return ptr; } return NULL; } static int adf_get_vf_real_id(u32 fake) { struct list_head *itr; list_for_each(itr, &vfs_table) { struct vf_id_map *ptr = list_entry(itr, struct vf_id_map, list); if (ptr->fake_id == fake) return ptr->id; } return -1; } /** * adf_clean_vf_map() - Cleans VF id mappings * @vf: flag indicating whether mappings is cleaned * for vfs only or for vfs and pfs * * Function cleans internal ids for virtual functions. */ void adf_clean_vf_map(bool vf) { struct vf_id_map *map; struct list_head *ptr, *tmp; mutex_lock(&table_lock); list_for_each_safe(ptr, tmp, &vfs_table) { map = list_entry(ptr, struct vf_id_map, list); if (map->bdf != -1) { id_map[map->id] = 0; num_devices--; } if (vf && map->bdf == -1) continue; list_del(ptr); kfree(map); } mutex_unlock(&table_lock); } EXPORT_SYMBOL_GPL(adf_clean_vf_map); /** * adf_devmgr_update_class_index() - Update internal index * @hw_data: Pointer to internal device data. * * Function updates internal dev index for VFs */ void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data) { struct adf_hw_device_class *class = hw_data->dev_class; struct list_head *itr; int i = 0; list_for_each(itr, &accel_table) { struct adf_accel_dev *ptr = list_entry(itr, struct adf_accel_dev, list); if (ptr->hw_device->dev_class == class) ptr->hw_device->instance_id = i++; if (i == class->instances) break; } } EXPORT_SYMBOL_GPL(adf_devmgr_update_class_index); static unsigned int adf_find_free_id(void) { unsigned int i; for (i = 0; i < ADF_MAX_DEVICES; i++) { if (!id_map[i]) { id_map[i] = 1; return i; } } return ADF_MAX_DEVICES + 1; } /** * adf_devmgr_add_dev() - Add accel_dev to the acceleration framework * @accel_dev: Pointer to acceleration device. * @pf: Corresponding PF if the accel_dev is a VF * * Function adds acceleration device to the acceleration framework. * To be used by QAT device specific drivers. * * Return: 0 on success, error code otherwise. */ int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev, struct adf_accel_dev *pf) { struct list_head *itr; int ret = 0; if (num_devices == ADF_MAX_DEVICES) { dev_err(&GET_DEV(accel_dev), "Only support up to %d devices\n", ADF_MAX_DEVICES); return -EFAULT; } mutex_lock(&table_lock); atomic_set(&accel_dev->ref_count, 0); /* PF on host or VF on guest - optimized to remove redundant is_vf */ if (!accel_dev->is_vf || !pf) { struct vf_id_map *map; list_for_each(itr, &accel_table) { struct adf_accel_dev *ptr = list_entry(itr, struct adf_accel_dev, list); if (ptr == accel_dev) { ret = -EEXIST; goto unlock; } } list_add_tail(&accel_dev->list, &accel_table); accel_dev->accel_id = adf_find_free_id(); if (accel_dev->accel_id > ADF_MAX_DEVICES) { ret = -EFAULT; goto unlock; } num_devices++; map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) { ret = -ENOMEM; goto unlock; } map->bdf = ~0; map->id = accel_dev->accel_id; map->fake_id = map->id; map->attached = true; list_add_tail(&map->list, &vfs_table); } else if (accel_dev->is_vf && pf) { /* VF on host */ struct vf_id_map *map; map = adf_find_vf(adf_get_vf_num(accel_dev)); if (map) { struct vf_id_map *next; accel_dev->accel_id = map->id; list_add_tail(&accel_dev->list, &accel_table); map->fake_id++; map->attached = true; next = list_next_entry(map, list); while (next && &next->list != &vfs_table) { next->fake_id++; next = list_next_entry(next, list); } ret = 0; goto unlock; } map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) { ret = -ENOMEM; goto unlock; } accel_dev->accel_id = adf_find_free_id(); if (accel_dev->accel_id > ADF_MAX_DEVICES) { kfree(map); ret = -EFAULT; goto unlock; } num_devices++; list_add_tail(&accel_dev->list, &accel_table); map->bdf = adf_get_vf_num(accel_dev); map->id = accel_dev->accel_id; map->fake_id = map->id; map->attached = true; list_add_tail(&map->list, &vfs_table); } mutex_init(&accel_dev->state_lock); unlock: mutex_unlock(&table_lock); return ret; } EXPORT_SYMBOL_GPL(adf_devmgr_add_dev); struct list_head *adf_devmgr_get_head(void) { return &accel_table; } /** * adf_devmgr_rm_dev() - Remove accel_dev from the acceleration framework. * @accel_dev: Pointer to acceleration device. * @pf: Corresponding PF if the accel_dev is a VF * * Function removes acceleration device from the acceleration framework. * To be used by QAT device specific drivers. * * Return: void */ void adf_devmgr_rm_dev(struct adf_accel_dev *accel_dev, struct adf_accel_dev *pf) { mutex_lock(&table_lock); /* PF on host or VF on guest - optimized to remove redundant is_vf */ if (!accel_dev->is_vf || !pf) { id_map[accel_dev->accel_id] = 0; num_devices--; } else if (accel_dev->is_vf && pf) { struct vf_id_map *map, *next; map = adf_find_vf(adf_get_vf_num(accel_dev)); if (!map) { dev_err(&GET_DEV(accel_dev), "Failed to find VF map\n"); goto unlock; } map->fake_id--; map->attached = false; next = list_next_entry(map, list); while (next && &next->list != &vfs_table) { next->fake_id--; next = list_next_entry(next, list); } } unlock: mutex_destroy(&accel_dev->state_lock); list_del(&accel_dev->list); mutex_unlock(&table_lock); } EXPORT_SYMBOL_GPL(adf_devmgr_rm_dev); /** * adf_devmgr_pci_to_accel_dev() - Get accel_dev associated with the pci_dev. * @pci_dev: Pointer to PCI device. * * Function returns acceleration device associated with the given PCI device. * To be used by QAT device specific drivers. * * Return: pointer to accel_dev or NULL if not found. */ struct adf_accel_dev *adf_devmgr_pci_to_accel_dev(struct pci_dev *pci_dev) { struct list_head *itr; mutex_lock(&table_lock); list_for_each(itr, &accel_table) { struct adf_accel_dev *ptr = list_entry(itr, struct adf_accel_dev, list); if (ptr->accel_pci_dev.pci_dev == pci_dev) { mutex_unlock(&table_lock); return ptr; } } mutex_unlock(&table_lock); return NULL; } EXPORT_SYMBOL_GPL(adf_devmgr_pci_to_accel_dev); struct adf_accel_dev *adf_devmgr_get_dev_by_id(u32 id) { struct list_head *itr; int real_id; mutex_lock(&table_lock); real_id = adf_get_vf_real_id(id); if (real_id < 0) goto unlock; id = real_id; list_for_each(itr, &accel_table) { struct adf_accel_dev *ptr = list_entry(itr, struct adf_accel_dev, list); if (ptr->accel_id == id) { mutex_unlock(&table_lock); return ptr; } } unlock: mutex_unlock(&table_lock); return NULL; } int adf_devmgr_verify_id(u32 id) { if (id == ADF_CFG_ALL_DEVICES) return 0; if (adf_devmgr_get_dev_by_id(id)) return 0; return -ENODEV; } static int adf_get_num_dettached_vfs(void) { struct list_head *itr; int vfs = 0; mutex_lock(&table_lock); list_for_each(itr, &vfs_table) { struct vf_id_map *ptr = list_entry(itr, struct vf_id_map, list); if (ptr->bdf != ~0 && !ptr->attached) vfs++; } mutex_unlock(&table_lock); return vfs; } void adf_devmgr_get_num_dev(u32 *num) { *num = num_devices - adf_get_num_dettached_vfs(); } /** * adf_dev_in_use() - Check whether accel_dev is currently in use * @accel_dev: Pointer to acceleration device. * * To be used by QAT device specific drivers. * * Return: 1 when device is in use, 0 otherwise. */ int adf_dev_in_use(struct adf_accel_dev *accel_dev) { return atomic_read(&accel_dev->ref_count) != 0; } EXPORT_SYMBOL_GPL(adf_dev_in_use); /** * adf_dev_get() - Increment accel_dev reference count * @accel_dev: Pointer to acceleration device. * * Increment the accel_dev refcount and if this is the first time * incrementing it during this period the accel_dev is in use, * increment the module refcount too. * To be used by QAT device specific drivers. * * Return: 0 when successful, EFAULT when fail to bump module refcount */ int adf_dev_get(struct adf_accel_dev *accel_dev) { if (atomic_add_return(1, &accel_dev->ref_count) == 1) if (!try_module_get(accel_dev->owner)) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(adf_dev_get); /** * adf_dev_put() - Decrement accel_dev reference count * @accel_dev: Pointer to acceleration device. * * Decrement the accel_dev refcount and if this is the last time * decrementing it during this period the accel_dev is in use, * decrement the module refcount too. * To be used by QAT device specific drivers. * * Return: void */ void adf_dev_put(struct adf_accel_dev *accel_dev) { if (atomic_sub_return(1, &accel_dev->ref_count) == 0) module_put(accel_dev->owner); } EXPORT_SYMBOL_GPL(adf_dev_put); /** * adf_devmgr_in_reset() - Check whether device is in reset * @accel_dev: Pointer to acceleration device. * * To be used by QAT device specific drivers. * * Return: 1 when the device is being reset, 0 otherwise. */ int adf_devmgr_in_reset(struct adf_accel_dev *accel_dev) { return test_bit(ADF_STATUS_RESTARTING, &accel_dev->status); } EXPORT_SYMBOL_GPL(adf_devmgr_in_reset); /** * adf_dev_started() - Check whether device has started * @accel_dev: Pointer to acceleration device. * * To be used by QAT device specific drivers. * * Return: 1 when the device has started, 0 otherwise */ int adf_dev_started(struct adf_accel_dev *accel_dev) { return test_bit(ADF_STATUS_STARTED, &accel_dev->status); } EXPORT_SYMBOL_GPL(adf_dev_started); |
| 26 38 38 12 26 38 37 38 38 38 38 38 38 37 38 37 235 237 14 7 14 3 38 38 38 21 17 38 38 38 38 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | /* * Rusty Russell (C)2000 -- This code is GPL. * Patrick McHardy (c) 2006-2012 */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/skbuff.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> #include <net/protocol.h> #include <net/netfilter/nf_queue.h> #include <net/dst.h> #include "nf_internals.h" static const struct nf_queue_handler __rcu *nf_queue_handler; /* * Hook for nfnetlink_queue to register its queue handler. * We do this so that most of the NFQUEUE code can be modular. * * Once the queue is registered it must reinject all packets it * receives, no matter what. */ void nf_register_queue_handler(const struct nf_queue_handler *qh) { /* should never happen, we only have one queueing backend in kernel */ WARN_ON(rcu_access_pointer(nf_queue_handler)); rcu_assign_pointer(nf_queue_handler, qh); } EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ void nf_unregister_queue_handler(void) { RCU_INIT_POINTER(nf_queue_handler, NULL); } EXPORT_SYMBOL(nf_unregister_queue_handler); static void nf_queue_sock_put(struct sock *sk) { #ifdef CONFIG_INET sock_gen_put(sk); #else sock_put(sk); #endif } static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; /* Release those devices we held, or Alexey will kill me. */ dev_put(state->in); dev_put(state->out); if (state->sk) nf_queue_sock_put(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dev_put(entry->physin); dev_put(entry->physout); #endif } void nf_queue_entry_free(struct nf_queue_entry *entry) { nf_queue_entry_release_refs(entry); kfree(entry); } EXPORT_SYMBOL_GPL(nf_queue_entry_free); static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const struct sk_buff *skb = entry->skb; if (nf_bridge_info_exists(skb)) { entry->physin = nf_bridge_get_physindev(skb, entry->state.net); entry->physout = nf_bridge_get_physoutdev(skb); } else { entry->physin = NULL; entry->physout = NULL; } #endif } /* Bump dev refs so they don't vanish while packet is out */ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt)) return false; dev_hold(state->in); dev_hold(state->out); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dev_hold(entry->physin); dev_hold(entry->physout); #endif return true; } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); void nf_queue_nf_hook_drop(struct net *net) { const struct nf_queue_handler *qh; rcu_read_lock(); qh = rcu_dereference(nf_queue_handler); if (qh) qh->nf_hook_drop(net); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop); static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_queue_entry *entry) { struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); if (entry->state.hook == NF_INET_LOCAL_OUT) { const struct iphdr *iph = ip_hdr(skb); rt_info->tos = iph->tos; rt_info->daddr = iph->daddr; rt_info->saddr = iph->saddr; rt_info->mark = skb->mark; } } static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_queue_entry *entry) { struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); if (entry->state.hook == NF_INET_LOCAL_OUT) { const struct ipv6hdr *iph = ipv6_hdr(skb); rt_info->daddr = iph->daddr; rt_info->saddr = iph->saddr; rt_info->mark = skb->mark; } } static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, unsigned int index, unsigned int queuenum) { struct nf_queue_entry *entry = NULL; const struct nf_queue_handler *qh; unsigned int route_key_size; int status; /* QUEUE == DROP if no one is waiting, to be safe. */ qh = rcu_dereference(nf_queue_handler); if (!qh) return -ESRCH; switch (state->pf) { case AF_INET: route_key_size = sizeof(struct ip_rt_info); break; case AF_INET6: route_key_size = sizeof(struct ip6_rt_info); break; default: route_key_size = 0; break; } if (skb_sk_is_prefetched(skb)) { struct sock *sk = skb->sk; if (!sk_is_refcounted(sk)) { if (!refcount_inc_not_zero(&sk->sk_refcnt)) return -ENOTCONN; /* drop refcount on skb_orphan */ skb->destructor = sock_edemux; } } entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC); if (!entry) return -ENOMEM; if (skb_dst(skb) && !skb_dst_force(skb)) { kfree(entry); return -ENETDOWN; } *entry = (struct nf_queue_entry) { .skb = skb, .state = *state, .hook_index = index, .size = sizeof(*entry) + route_key_size, }; __nf_queue_entry_init_physdevs(entry); if (!nf_queue_entry_get_refs(entry)) { kfree(entry); return -ENOTCONN; } switch (entry->state.pf) { case AF_INET: nf_ip_saveroute(skb, entry); break; case AF_INET6: nf_ip6_saveroute(skb, entry); break; } status = qh->outfn(entry, queuenum); if (status < 0) { nf_queue_entry_free(entry); return status; } return 0; } /* Packets leaving via this function must come back through nf_reinject(). */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, unsigned int index, unsigned int verdict) { int ret; ret = __nf_queue(skb, state, index, verdict >> NF_VERDICT_QBITS); if (ret < 0) { if (ret == -ESRCH && (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) return 1; kfree_skb(skb); } return 0; } EXPORT_SYMBOL_GPL(nf_queue); |
| 4 3 3 3 3 12 4 1 27 27 7 10 10 6 1 5 4 1 11 11 6 2 2 2 10 7 2 14 3 3 1 15 2 14 10 10 11 11 11 19 17 21 19 11 15 15 12 9 20 21 8 13 19 2 9 9 9 17 17 17 17 4 17 17 22 21 1 1 14 14 6 12 1 11 12 11 12 1 7 8 7 5 5 5 5 24 24 9 15 17 2 1 12 4 27 4 9 41 66 62 1 3 2 1 47 14 1 56 4 1 56 3 1 28 17 19 18 2 4 2 38 38 9 29 29 29 3 1 25 25 3 1 27 59 51 1 7 5 1 40 16 1 1 13 41 12 3 2 39 8 43 9 48 4 44 6 2 48 2 45 4 4 5 37 1 4 7 41 3 4 45 3 5 32 4 5 17 9 6 14 23 40 41 3 13 27 6 5 6 5 6 5 6 5 18 8 10 10 10 7 7 62 114 2 92 89 1 1 2 114 1 114 114 1612 1612 8 1 42 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 | // SPDX-License-Identifier: GPL-2.0-only /* * GENEVE: Generic Network Virtualization Encapsulation * * Copyright (c) 2015 Red Hat, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/ethtool.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/etherdevice.h> #include <linux/hash.h> #include <net/ipv6_stubs.h> #include <net/dst_metadata.h> #include <net/gro_cells.h> #include <net/rtnetlink.h> #include <net/geneve.h> #include <net/gro.h> #include <net/netdev_lock.h> #include <net/protocol.h> #define GENEVE_NETDEV_VER "0.6" #define GENEVE_N_VID (1u << 24) #define GENEVE_VID_MASK (GENEVE_N_VID - 1) #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); #define GENEVE_VER 0 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN) #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN) /* per-network namespace private data for this module */ struct geneve_net { struct list_head geneve_list; struct list_head sock_list; }; static unsigned int geneve_net_id; struct geneve_dev_node { struct hlist_node hlist; struct geneve_dev *geneve; }; struct geneve_config { struct ip_tunnel_info info; bool collect_md; bool use_udp6_rx_checksums; bool ttl_inherit; enum ifla_geneve_df df; bool inner_proto_inherit; u16 port_min; u16 port_max; }; /* Pseudo network device */ struct geneve_dev { struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */ #if IS_ENABLED(CONFIG_IPV6) struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */ #endif struct net *net; /* netns for packet i/o */ struct net_device *dev; /* netdev for geneve tunnel */ struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */ #if IS_ENABLED(CONFIG_IPV6) struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */ #endif struct list_head next; /* geneve's per namespace list */ struct gro_cells gro_cells; struct geneve_config cfg; }; struct geneve_sock { bool collect_md; struct list_head list; struct socket *sock; struct rcu_head rcu; int refcnt; struct hlist_head vni_list[VNI_HASH_SIZE]; }; static inline __u32 geneve_net_vni_hash(u8 vni[3]) { __u32 vnid; vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; return hash_32(vnid, VNI_HASH_BITS); } static __be64 vni_to_tunnel_id(const __u8 *vni) { #ifdef __BIG_ENDIAN return (vni[0] << 16) | (vni[1] << 8) | vni[2]; #else return (__force __be64)(((__force u64)vni[0] << 40) | ((__force u64)vni[1] << 48) | ((__force u64)vni[2] << 56)); #endif } /* Convert 64 bit tunnel ID to 24 bit VNI. */ static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) { #ifdef __BIG_ENDIAN vni[0] = (__force __u8)(tun_id >> 16); vni[1] = (__force __u8)(tun_id >> 8); vni[2] = (__force __u8)tun_id; #else vni[0] = (__force __u8)((__force u64)tun_id >> 40); vni[1] = (__force __u8)((__force u64)tun_id >> 48); vni[2] = (__force __u8)((__force u64)tun_id >> 56); #endif } static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni) { return !memcmp(vni, &tun_id[5], 3); } static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) { return gs->sock->sk->sk_family; } static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, __be32 addr, u8 vni[]) { struct hlist_head *vni_list_head; struct geneve_dev_node *node; __u32 hash; /* Find the device for this VNI */ hash = geneve_net_vni_hash(vni); vni_list_head = &gs->vni_list[hash]; hlist_for_each_entry_rcu(node, vni_list_head, hlist) { if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && addr == node->geneve->cfg.info.key.u.ipv4.dst) return node->geneve; } return NULL; } #if IS_ENABLED(CONFIG_IPV6) static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, struct in6_addr addr6, u8 vni[]) { struct hlist_head *vni_list_head; struct geneve_dev_node *node; __u32 hash; /* Find the device for this VNI */ hash = geneve_net_vni_hash(vni); vni_list_head = &gs->vni_list[hash]; hlist_for_each_entry_rcu(node, vni_list_head, hlist) { if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst)) return node->geneve; } return NULL; } #endif static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) { return (struct genevehdr *)(udp_hdr(skb) + 1); } static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, struct sk_buff *skb) { static u8 zero_vni[3]; u8 *vni; if (geneve_get_sk_family(gs) == AF_INET) { struct iphdr *iph; __be32 addr; iph = ip_hdr(skb); /* outer IP header... */ if (gs->collect_md) { vni = zero_vni; addr = 0; } else { vni = geneve_hdr(skb)->vni; addr = iph->saddr; } return geneve_lookup(gs, addr, vni); #if IS_ENABLED(CONFIG_IPV6) } else if (geneve_get_sk_family(gs) == AF_INET6) { static struct in6_addr zero_addr6; struct ipv6hdr *ip6h; struct in6_addr addr6; ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ if (gs->collect_md) { vni = zero_vni; addr6 = zero_addr6; } else { vni = geneve_hdr(skb)->vni; addr6 = ip6h->saddr; } return geneve6_lookup(gs, addr6, vni); #endif } return NULL; } /* geneve receive/decap routine */ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, struct sk_buff *skb) { struct genevehdr *gnvh = geneve_hdr(skb); struct metadata_dst *tun_dst = NULL; unsigned int len; int nh, err = 0; void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { IP_TUNNEL_DECLARE_FLAGS(flags) = { }; __set_bit(IP_TUNNEL_KEY_BIT, flags); __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam); __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical); tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, vni_to_tunnel_id(gnvh->vni), gnvh->opt_len * 4); if (!tun_dst) { dev_dstats_rx_dropped(geneve->dev); goto drop; } /* Update tunnel dst according to Geneve options. */ ip_tunnel_flags_zero(flags); __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags); ip_tunnel_info_opts_set(&tun_dst->u.tun_info, gnvh->options, gnvh->opt_len * 4, flags); } else { /* Drop packets w/ critical options, * since we don't support any... */ if (gnvh->critical) { DEV_STATS_INC(geneve->dev, rx_frame_errors); DEV_STATS_INC(geneve->dev, rx_errors); goto drop; } } if (tun_dst) skb_dst_set(skb, &tun_dst->dst); if (gnvh->proto_type == htons(ETH_P_TEB)) { skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, geneve->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); /* Ignore packet loops (and multicast echo) */ if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) { DEV_STATS_INC(geneve->dev, rx_errors); goto drop; } } else { skb_reset_mac_header(skb); skb->dev = geneve->dev; skb->pkt_type = PACKET_HOST; } /* Save offset of outer header relative to skb->head, * because we are going to reset the network header to the inner header * and might change skb->head. */ nh = skb_network_header(skb) - skb->head; skb_reset_network_header(skb); if (!pskb_inet_may_pull(skb)) { DEV_STATS_INC(geneve->dev, rx_length_errors); DEV_STATS_INC(geneve->dev, rx_errors); goto drop; } /* Get the outer header. */ oiph = skb->head + nh; if (geneve_get_sk_family(gs) == AF_INET) err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) else err = IP6_ECN_decapsulate(oiph, skb); #endif if (unlikely(err)) { if (log_ecn_error) { if (geneve_get_sk_family(gs) == AF_INET) net_info_ratelimited("non-ECT from %pI4 " "with TOS=%#x\n", &((struct iphdr *)oiph)->saddr, ((struct iphdr *)oiph)->tos); #if IS_ENABLED(CONFIG_IPV6) else net_info_ratelimited("non-ECT from %pI6\n", &((struct ipv6hdr *)oiph)->saddr); #endif } if (err > 1) { DEV_STATS_INC(geneve->dev, rx_frame_errors); DEV_STATS_INC(geneve->dev, rx_errors); goto drop; } } len = skb->len; err = gro_cells_receive(&geneve->gro_cells, skb); if (likely(err == NET_RX_SUCCESS)) dev_dstats_rx_add(geneve->dev, len); return; drop: /* Consume bad packet */ kfree_skb(skb); } /* Setup stats when device is created */ static int geneve_init(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); int err; err = gro_cells_init(&geneve->gro_cells, dev); if (err) return err; err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL); if (err) { gro_cells_destroy(&geneve->gro_cells); return err; } netdev_lockdep_set_classes(dev); return 0; } static void geneve_uninit(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); dst_cache_destroy(&geneve->cfg.info.dst_cache); gro_cells_destroy(&geneve->gro_cells); } /* Callback from net/ipv4/udp.c to receive packets */ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct genevehdr *geneveh; struct geneve_dev *geneve; struct geneve_sock *gs; __be16 inner_proto; int opts_len; /* Need UDP and Geneve header to be present */ if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) goto drop; /* Return packets with reserved bits set */ geneveh = geneve_hdr(skb); if (unlikely(geneveh->ver != GENEVE_VER)) goto drop; gs = rcu_dereference_sk_user_data(sk); if (!gs) goto drop; geneve = geneve_lookup_skb(gs, skb); if (!geneve) goto drop; inner_proto = geneveh->proto_type; if (unlikely((!geneve->cfg.inner_proto_inherit && inner_proto != htons(ETH_P_TEB)))) { dev_dstats_rx_dropped(geneve->dev); goto drop; } opts_len = geneveh->opt_len * 4; if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto, !net_eq(geneve->net, dev_net(geneve->dev)))) { dev_dstats_rx_dropped(geneve->dev); goto drop; } geneve_rx(geneve, gs, skb); return 0; drop: /* Consume bad packet */ kfree_skb(skb); return 0; } /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */ static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb) { struct genevehdr *geneveh; struct geneve_sock *gs; u8 zero_vni[3] = { 0 }; u8 *vni = zero_vni; if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN)) return -EINVAL; geneveh = geneve_hdr(skb); if (geneveh->ver != GENEVE_VER) return -EINVAL; if (geneveh->proto_type != htons(ETH_P_TEB)) return -EINVAL; gs = rcu_dereference_sk_user_data(sk); if (!gs) return -ENOENT; if (geneve_get_sk_family(gs) == AF_INET) { struct iphdr *iph = ip_hdr(skb); __be32 addr4 = 0; if (!gs->collect_md) { vni = geneve_hdr(skb)->vni; addr4 = iph->daddr; } return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT; } #if IS_ENABLED(CONFIG_IPV6) if (geneve_get_sk_family(gs) == AF_INET6) { struct ipv6hdr *ip6h = ipv6_hdr(skb); struct in6_addr addr6; memset(&addr6, 0, sizeof(struct in6_addr)); if (!gs->collect_md) { vni = geneve_hdr(skb)->vni; addr6 = ip6h->daddr; } return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT; } #endif return -EPFNOSUPPORT; } static struct socket *geneve_create_sock(struct net *net, bool ipv6, __be16 port, bool ipv6_rx_csum) { struct socket *sock; struct udp_port_cfg udp_conf; int err; memset(&udp_conf, 0, sizeof(udp_conf)); if (ipv6) { udp_conf.family = AF_INET6; udp_conf.ipv6_v6only = 1; udp_conf.use_udp6_rx_checksums = ipv6_rx_csum; } else { udp_conf.family = AF_INET; udp_conf.local_ip.s_addr = htonl(INADDR_ANY); } udp_conf.local_udp_port = port; /* Open UDP socket */ err = udp_sock_create(net, &udp_conf, &sock); if (err < 0) return ERR_PTR(err); udp_allow_gso(sock->sk); return sock; } static int geneve_hlen(struct genevehdr *gh) { return sizeof(*gh) + gh->opt_len * 4; } static struct sk_buff *geneve_gro_receive(struct sock *sk, struct list_head *head, struct sk_buff *skb) { struct sk_buff *pp = NULL; struct sk_buff *p; struct genevehdr *gh, *gh2; unsigned int hlen, gh_len, off_gnv; const struct packet_offload *ptype; __be16 type; int flush = 1; off_gnv = skb_gro_offset(skb); hlen = off_gnv + sizeof(*gh); gh = skb_gro_header(skb, hlen, off_gnv); if (unlikely(!gh)) goto out; if (gh->ver != GENEVE_VER || gh->oam) goto out; gh_len = geneve_hlen(gh); hlen = off_gnv + gh_len; if (!skb_gro_may_pull(skb, hlen)) { gh = skb_gro_header_slow(skb, hlen, off_gnv); if (unlikely(!gh)) goto out; } list_for_each_entry(p, head, list) { if (!NAPI_GRO_CB(p)->same_flow) continue; gh2 = (struct genevehdr *)(p->data + off_gnv); if (gh->opt_len != gh2->opt_len || memcmp(gh, gh2, gh_len)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } } skb_gro_pull(skb, gh_len); skb_gro_postpull_rcsum(skb, gh, gh_len); type = gh->proto_type; if (likely(type == htons(ETH_P_TEB))) return call_gro_receive(eth_gro_receive, head, skb); ptype = gro_find_receive_by_type(type); if (!ptype) goto out; pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); flush = 0; out: skb_gro_flush_final(skb, pp, flush); return pp; } static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) { struct genevehdr *gh; struct packet_offload *ptype; __be16 type; int gh_len; int err = -ENOSYS; gh = (struct genevehdr *)(skb->data + nhoff); gh_len = geneve_hlen(gh); type = gh->proto_type; /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */ if (likely(type == htons(ETH_P_TEB))) return eth_gro_complete(skb, nhoff + gh_len); ptype = gro_find_complete_by_type(type); if (ptype) err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); skb_set_inner_mac_header(skb, nhoff + gh_len); return err; } /* Create new listen socket if needed */ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, bool ipv6, bool ipv6_rx_csum) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; struct socket *sock; struct udp_tunnel_sock_cfg tunnel_cfg; int h; gs = kzalloc(sizeof(*gs), GFP_KERNEL); if (!gs) return ERR_PTR(-ENOMEM); sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum); if (IS_ERR(sock)) { kfree(gs); return ERR_CAST(sock); } gs->sock = sock; gs->refcnt = 1; for (h = 0; h < VNI_HASH_SIZE; ++h) INIT_HLIST_HEAD(&gs->vni_list[h]); /* Initialize the geneve udp offloads structure */ udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE); /* Mark socket as an encapsulation socket */ memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); tunnel_cfg.sk_user_data = gs; tunnel_cfg.encap_type = 1; tunnel_cfg.gro_receive = geneve_gro_receive; tunnel_cfg.gro_complete = geneve_gro_complete; tunnel_cfg.encap_rcv = geneve_udp_encap_recv; tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup; tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tunnel_cfg); list_add(&gs->list, &gn->sock_list); return gs; } static void __geneve_sock_release(struct geneve_sock *gs) { if (!gs || --gs->refcnt) return; list_del(&gs->list); udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE); udp_tunnel_sock_release(gs->sock); kfree_rcu(gs, rcu); } static void geneve_sock_release(struct geneve_dev *geneve) { struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4); #if IS_ENABLED(CONFIG_IPV6) struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6); rcu_assign_pointer(geneve->sock6, NULL); #endif rcu_assign_pointer(geneve->sock4, NULL); synchronize_net(); __geneve_sock_release(gs4); #if IS_ENABLED(CONFIG_IPV6) __geneve_sock_release(gs6); #endif } static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, sa_family_t family, __be16 dst_port) { struct geneve_sock *gs; list_for_each_entry(gs, &gn->sock_list, list) { if (inet_sk(gs->sock->sk)->inet_sport == dst_port && geneve_get_sk_family(gs) == family) { return gs; } } return NULL; } static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) { struct net *net = geneve->net; struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev_node *node; struct geneve_sock *gs; __u8 vni[3]; __u32 hash; gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->cfg.info.key.tp_dst); if (gs) { gs->refcnt++; goto out; } gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6, geneve->cfg.use_udp6_rx_checksums); if (IS_ERR(gs)) return PTR_ERR(gs); out: gs->collect_md = geneve->cfg.collect_md; #if IS_ENABLED(CONFIG_IPV6) if (ipv6) { rcu_assign_pointer(geneve->sock6, gs); node = &geneve->hlist6; } else #endif { rcu_assign_pointer(geneve->sock4, gs); node = &geneve->hlist4; } node->geneve = geneve; tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni); hash = geneve_net_vni_hash(vni); hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]); return 0; } static int geneve_open(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); bool metadata = geneve->cfg.collect_md; bool ipv4, ipv6; int ret = 0; ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || metadata; ipv4 = !ipv6 || metadata; #if IS_ENABLED(CONFIG_IPV6) if (ipv6) { ret = geneve_sock_add(geneve, true); if (ret < 0 && ret != -EAFNOSUPPORT) ipv4 = false; } #endif if (ipv4) ret = geneve_sock_add(geneve, false); if (ret < 0) geneve_sock_release(geneve); return ret; } static int geneve_stop(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); hlist_del_init_rcu(&geneve->hlist4.hlist); #if IS_ENABLED(CONFIG_IPV6) hlist_del_init_rcu(&geneve->hlist6.hlist); #endif geneve_sock_release(geneve); return 0; } static void geneve_build_header(struct genevehdr *geneveh, const struct ip_tunnel_info *info, __be16 inner_proto) { geneveh->ver = GENEVE_VER; geneveh->opt_len = info->options_len / 4; geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags); geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT, info->key.tun_flags); geneveh->rsvd1 = 0; tunnel_id_to_vni(info->key.tun_id, geneveh->vni); geneveh->proto_type = inner_proto; geneveh->rsvd2 = 0; if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) ip_tunnel_info_opts_get(geneveh->options, info); } static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, const struct ip_tunnel_info *info, bool xnet, int ip_hdr_len, bool inner_proto_inherit) { bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); struct genevehdr *gnvh; __be16 inner_proto; int min_headroom; int err; skb_reset_mac_header(skb); skb_scrub_packet(skb, xnet); min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + GENEVE_BASE_HLEN + info->options_len + ip_hdr_len; err = skb_cow_head(skb, min_headroom); if (unlikely(err)) goto free_dst; err = udp_tunnel_handle_offloads(skb, udp_sum); if (err) goto free_dst; gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len); inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB); geneve_build_header(gnvh, info, inner_proto); skb_set_inner_protocol(skb, inner_proto); return 0; free_dst: dst_release(dst); return err; } static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev, const struct ip_tunnel_info *info, bool *use_cache) { struct geneve_dev *geneve = netdev_priv(dev); u8 dsfield; dsfield = info->key.tos; if (dsfield == 1 && !geneve->cfg.collect_md) { dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb); *use_cache = false; } return dsfield; } static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); const struct ip_tunnel_key *key = &info->key; struct rtable *rt; bool use_cache; __u8 tos, ttl; __be16 df = 0; __be32 saddr; __be16 sport; int err; if (skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; if (!gs4) return -EIO; use_cache = ip_tunnel_dst_cache_usable(skb, info); tos = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, geneve->cfg.port_min, geneve->cfg.port_max, true); rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, &info->key, sport, geneve->cfg.info.key.tp_dst, tos, use_cache ? (struct dst_cache *)&info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); err = skb_tunnel_check_pmtu(skb, &rt->dst, GENEVE_IPV4_HLEN + info->options_len, netif_is_any_bridge_port(dev)); if (err < 0) { dst_release(&rt->dst); return err; } else if (err) { struct ip_tunnel_info *info; info = skb_tunnel_info(skb); if (info) { struct ip_tunnel_info *unclone; unclone = skb_tunnel_info_unclone(skb); if (unlikely(!unclone)) { dst_release(&rt->dst); return -ENOMEM; } unclone->key.u.ipv4.dst = saddr; unclone->key.u.ipv4.src = info->key.u.ipv4.dst; } if (!pskb_may_pull(skb, ETH_HLEN)) { dst_release(&rt->dst); return -EINVAL; } skb->protocol = eth_type_trans(skb, geneve->dev); __netif_rx(skb); dst_release(&rt->dst); return -EMSGSIZE; } tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb); if (geneve->cfg.collect_md) { ttl = key->ttl; df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? htons(IP_DF) : 0; } else { if (geneve->cfg.ttl_inherit) ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); else ttl = key->ttl; ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); if (geneve->cfg.df == GENEVE_DF_SET) { df = htons(IP_DF); } else if (geneve->cfg.df == GENEVE_DF_INHERIT) { struct ethhdr *eth = skb_eth_hdr(skb); if (ntohs(eth->h_proto) == ETH_P_IPV6) { df = htons(IP_DF); } else if (ntohs(eth->h_proto) == ETH_P_IP) { struct iphdr *iph = ip_hdr(skb); if (iph->frag_off & htons(IP_DF)) df = htons(IP_DF); } } } err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr), inner_proto_inherit); if (unlikely(err)) return err; udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst, tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, !net_eq(geneve->net, dev_net(geneve->dev)), !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags)); return 0; } #if IS_ENABLED(CONFIG_IPV6) static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); const struct ip_tunnel_key *key = &info->key; struct dst_entry *dst = NULL; struct in6_addr saddr; bool use_cache; __u8 prio, ttl; __be16 sport; int err; if (skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; if (!gs6) return -EIO; use_cache = ip_tunnel_dst_cache_usable(skb, info); prio = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, geneve->cfg.port_min, geneve->cfg.port_max, true); dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0, &saddr, key, sport, geneve->cfg.info.key.tp_dst, prio, use_cache ? (struct dst_cache *)&info->dst_cache : NULL); if (IS_ERR(dst)) return PTR_ERR(dst); err = skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len, netif_is_any_bridge_port(dev)); if (err < 0) { dst_release(dst); return err; } else if (err) { struct ip_tunnel_info *info = skb_tunnel_info(skb); if (info) { struct ip_tunnel_info *unclone; unclone = skb_tunnel_info_unclone(skb); if (unlikely(!unclone)) { dst_release(dst); return -ENOMEM; } unclone->key.u.ipv6.dst = saddr; unclone->key.u.ipv6.src = info->key.u.ipv6.dst; } if (!pskb_may_pull(skb, ETH_HLEN)) { dst_release(dst); return -EINVAL; } skb->protocol = eth_type_trans(skb, geneve->dev); __netif_rx(skb); dst_release(dst); return -EMSGSIZE; } prio = ip_tunnel_ecn_encap(prio, ip_hdr(skb), skb); if (geneve->cfg.collect_md) { ttl = key->ttl; } else { if (geneve->cfg.ttl_inherit) ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); else ttl = key->ttl; ttl = ttl ? : ip6_dst_hoplimit(dst); } err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr), inner_proto_inherit); if (unlikely(err)) return err; udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, &saddr, &key->u.ipv6.dst, prio, ttl, info->key.label, sport, geneve->cfg.info.key.tp_dst, !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags)); return 0; } #endif static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); struct ip_tunnel_info *info = NULL; int err; if (geneve->cfg.collect_md) { info = skb_tunnel_info(skb); if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { netdev_dbg(dev, "no tunnel metadata\n"); dev_kfree_skb(skb); dev_dstats_tx_dropped(dev); return NETDEV_TX_OK; } } else { info = &geneve->cfg.info; } rcu_read_lock(); #if IS_ENABLED(CONFIG_IPV6) if (info->mode & IP_TUNNEL_INFO_IPV6) err = geneve6_xmit_skb(skb, dev, geneve, info); else #endif err = geneve_xmit_skb(skb, dev, geneve, info); rcu_read_unlock(); if (likely(!err)) return NETDEV_TX_OK; if (err != -EMSGSIZE) dev_kfree_skb(skb); if (err == -ELOOP) DEV_STATS_INC(dev, collisions); else if (err == -ENETUNREACH) DEV_STATS_INC(dev, tx_carrier_errors); DEV_STATS_INC(dev, tx_errors); return NETDEV_TX_OK; } static int geneve_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu > dev->max_mtu) new_mtu = dev->max_mtu; else if (new_mtu < dev->min_mtu) new_mtu = dev->min_mtu; WRITE_ONCE(dev->mtu, new_mtu); return 0; } static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); struct geneve_dev *geneve = netdev_priv(dev); __be16 sport; if (ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); bool use_cache; __be32 saddr; u8 tos; if (!gs4) return -EIO; use_cache = ip_tunnel_dst_cache_usable(skb, info); tos = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, geneve->cfg.port_min, geneve->cfg.port_max, true); rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, &info->key, sport, geneve->cfg.info.key.tp_dst, tos, use_cache ? &info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); info->key.u.ipv4.src = saddr; #if IS_ENABLED(CONFIG_IPV6) } else if (ip_tunnel_info_af(info) == AF_INET6) { struct dst_entry *dst; struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); struct in6_addr saddr; bool use_cache; u8 prio; if (!gs6) return -EIO; use_cache = ip_tunnel_dst_cache_usable(skb, info); prio = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, geneve->cfg.port_min, geneve->cfg.port_max, true); dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0, &saddr, &info->key, sport, geneve->cfg.info.key.tp_dst, prio, use_cache ? &info->dst_cache : NULL); if (IS_ERR(dst)) return PTR_ERR(dst); dst_release(dst); info->key.u.ipv6.src = saddr; #endif } else { return -EINVAL; } info->key.tp_src = sport; info->key.tp_dst = geneve->cfg.info.key.tp_dst; return 0; } static const struct net_device_ops geneve_netdev_ops = { .ndo_init = geneve_init, .ndo_uninit = geneve_uninit, .ndo_open = geneve_open, .ndo_stop = geneve_stop, .ndo_start_xmit = geneve_xmit, .ndo_change_mtu = geneve_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_fill_metadata_dst = geneve_fill_metadata_dst, }; static void geneve_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); } static const struct ethtool_ops geneve_ethtool_ops = { .get_drvinfo = geneve_get_drvinfo, .get_link = ethtool_op_get_link, }; /* Info for udev, that this is a virtual tunnel endpoint */ static const struct device_type geneve_type = { .name = "geneve", }; /* Calls the ndo_udp_tunnel_add of the caller in order to * supply the listening GENEVE udp ports. Callers are expected * to implement the ndo_udp_tunnel_add. */ static void geneve_offload_rx_ports(struct net_device *dev, bool push) { struct net *net = dev_net(dev); struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; rcu_read_lock(); list_for_each_entry_rcu(gs, &gn->sock_list, list) { if (push) { udp_tunnel_push_rx_port(dev, gs->sock, UDP_TUNNEL_TYPE_GENEVE); } else { udp_tunnel_drop_rx_port(dev, gs->sock, UDP_TUNNEL_TYPE_GENEVE); } } rcu_read_unlock(); } /* Initialize the device structure. */ static void geneve_setup(struct net_device *dev) { ether_setup(dev); dev->netdev_ops = &geneve_netdev_ops; dev->ethtool_ops = &geneve_ethtool_ops; dev->needs_free_netdev = true; SET_NETDEV_DEVTYPE(dev, &geneve_type); dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->features |= NETIF_F_RXCSUM; dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->hw_features |= NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; /* MTU range: 68 - (something less than 65535) */ dev->min_mtu = ETH_MIN_MTU; /* The max_mtu calculation does not take account of GENEVE * options, to avoid excluding potentially valid * configurations. This will be further reduced by IPvX hdr size. */ dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; netif_keep_dst(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; dev->lltx = true; eth_hw_addr_random(dev); } static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT }, [IFLA_GENEVE_ID] = { .type = NLA_U32 }, [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, [IFLA_GENEVE_LABEL] = { .type = NLA_U32 }, [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 }, [IFLA_GENEVE_DF] = { .type = NLA_U8 }, [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG }, [IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)), }; static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], "Provided link layer address is not Ethernet"); return -EINVAL; } if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], "Provided Ethernet address is not unicast"); return -EADDRNOTAVAIL; } } if (!data) { NL_SET_ERR_MSG(extack, "Not enough attributes provided to perform the operation"); return -EINVAL; } if (data[IFLA_GENEVE_ID]) { __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); if (vni >= GENEVE_N_VID) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID], "Geneve ID must be lower than 16777216"); return -ERANGE; } } if (data[IFLA_GENEVE_DF]) { enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]); if (df < 0 || df > GENEVE_DF_MAX) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF], "Invalid DF attribute"); return -EINVAL; } } if (data[IFLA_GENEVE_PORT_RANGE]) { const struct ifla_geneve_port_range *p; p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); if (ntohs(p->high) < ntohs(p->low)) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_PORT_RANGE], "Invalid source port range"); return -EINVAL; } } return 0; } static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, const struct ip_tunnel_info *info, bool *tun_on_same_port, bool *tun_collect_md) { struct geneve_dev *geneve, *t = NULL; *tun_on_same_port = false; *tun_collect_md = false; list_for_each_entry(geneve, &gn->geneve_list, next) { if (info->key.tp_dst == geneve->cfg.info.key.tp_dst) { *tun_collect_md = geneve->cfg.collect_md; *tun_on_same_port = true; } if (info->key.tun_id == geneve->cfg.info.key.tun_id && info->key.tp_dst == geneve->cfg.info.key.tp_dst && !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u))) t = geneve; } return t; } static bool is_tnl_info_zero(const struct ip_tunnel_info *info) { return !(info->key.tun_id || info->key.tos || !ip_tunnel_flags_empty(info->key.tun_flags) || info->key.ttl || info->key.label || info->key.tp_src || memchr_inv(&info->key.u, 0, sizeof(info->key.u))); } static bool geneve_dst_addr_equal(struct ip_tunnel_info *a, struct ip_tunnel_info *b) { if (ip_tunnel_info_af(a) == AF_INET) return a->key.u.ipv4.dst == b->key.u.ipv4.dst; else return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst); } static int geneve_configure(struct net *net, struct net_device *dev, struct netlink_ext_ack *extack, const struct geneve_config *cfg) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *t, *geneve = netdev_priv(dev); const struct ip_tunnel_info *info = &cfg->info; bool tun_collect_md, tun_on_same_port; int err, encap_len; if (cfg->collect_md && !is_tnl_info_zero(info)) { NL_SET_ERR_MSG(extack, "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified"); return -EINVAL; } geneve->net = net; geneve->dev = dev; t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md); if (t) return -EBUSY; /* make enough headroom for basic scenario */ encap_len = GENEVE_BASE_HLEN + ETH_HLEN; if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) { encap_len += sizeof(struct iphdr); dev->max_mtu -= sizeof(struct iphdr); } else { encap_len += sizeof(struct ipv6hdr); dev->max_mtu -= sizeof(struct ipv6hdr); } dev->needed_headroom = encap_len + ETH_HLEN; if (cfg->collect_md) { if (tun_on_same_port) { NL_SET_ERR_MSG(extack, "There can be only one externally controlled device on a destination port"); return -EPERM; } } else { if (tun_collect_md) { NL_SET_ERR_MSG(extack, "There already exists an externally controlled device on this destination port"); return -EPERM; } } dst_cache_reset(&geneve->cfg.info.dst_cache); memcpy(&geneve->cfg, cfg, sizeof(*cfg)); if (geneve->cfg.inner_proto_inherit) { dev->header_ops = NULL; dev->type = ARPHRD_NONE; dev->hard_header_len = 0; dev->addr_len = 0; dev->flags = IFF_POINTOPOINT | IFF_NOARP; } err = register_netdevice(dev); if (err) return err; list_add(&geneve->next, &gn->geneve_list); return 0; } static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port) { memset(info, 0, sizeof(*info)); info->key.tp_dst = htons(dst_port); } static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack, struct geneve_config *cfg, bool changelink) { struct ip_tunnel_info *info = &cfg->info; int attrtype; if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) { NL_SET_ERR_MSG(extack, "Cannot specify both IPv4 and IPv6 Remote addresses"); return -EINVAL; } if (data[IFLA_GENEVE_REMOTE]) { if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) { attrtype = IFLA_GENEVE_REMOTE; goto change_notsup; } info->key.u.ipv4.dst = nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); if (ipv4_is_multicast(info->key.u.ipv4.dst)) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE], "Remote IPv4 address cannot be Multicast"); return -EINVAL; } } if (data[IFLA_GENEVE_REMOTE6]) { #if IS_ENABLED(CONFIG_IPV6) if (changelink && (ip_tunnel_info_af(info) == AF_INET)) { attrtype = IFLA_GENEVE_REMOTE6; goto change_notsup; } info->mode = IP_TUNNEL_INFO_IPV6; info->key.u.ipv6.dst = nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); if (ipv6_addr_type(&info->key.u.ipv6.dst) & IPV6_ADDR_LINKLOCAL) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], "Remote IPv6 address cannot be link-local"); return -EINVAL; } if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], "Remote IPv6 address cannot be Multicast"); return -EINVAL; } __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); cfg->use_udp6_rx_checksums = true; #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], "IPv6 support not enabled in the kernel"); return -EPFNOSUPPORT; #endif } if (data[IFLA_GENEVE_ID]) { __u32 vni; __u8 tvni[3]; __be64 tunid; vni = nla_get_u32(data[IFLA_GENEVE_ID]); tvni[0] = (vni & 0x00ff0000) >> 16; tvni[1] = (vni & 0x0000ff00) >> 8; tvni[2] = vni & 0x000000ff; tunid = vni_to_tunnel_id(tvni); if (changelink && (tunid != info->key.tun_id)) { attrtype = IFLA_GENEVE_ID; goto change_notsup; } info->key.tun_id = tunid; } if (data[IFLA_GENEVE_TTL_INHERIT]) { if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT])) cfg->ttl_inherit = true; else cfg->ttl_inherit = false; } else if (data[IFLA_GENEVE_TTL]) { info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); cfg->ttl_inherit = false; } if (data[IFLA_GENEVE_TOS]) info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]); if (data[IFLA_GENEVE_DF]) cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]); if (data[IFLA_GENEVE_LABEL]) { info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & IPV6_FLOWLABEL_MASK; if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL], "Label attribute only applies for IPv6 Geneve devices"); return -EINVAL; } } if (data[IFLA_GENEVE_PORT]) { if (changelink) { attrtype = IFLA_GENEVE_PORT; goto change_notsup; } info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]); } if (data[IFLA_GENEVE_PORT_RANGE]) { const struct ifla_geneve_port_range *p; if (changelink) { attrtype = IFLA_GENEVE_PORT_RANGE; goto change_notsup; } p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); cfg->port_min = ntohs(p->low); cfg->port_max = ntohs(p->high); } if (data[IFLA_GENEVE_COLLECT_METADATA]) { if (changelink) { attrtype = IFLA_GENEVE_COLLECT_METADATA; goto change_notsup; } cfg->collect_md = true; } if (data[IFLA_GENEVE_UDP_CSUM]) { if (changelink) { attrtype = IFLA_GENEVE_UDP_CSUM; goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); } if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { #if IS_ENABLED(CONFIG_IPV6) if (changelink) { attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX; goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX], "IPv6 support not enabled in the kernel"); return -EPFNOSUPPORT; #endif } if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) { #if IS_ENABLED(CONFIG_IPV6) if (changelink) { attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX; goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) cfg->use_udp6_rx_checksums = false; #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX], "IPv6 support not enabled in the kernel"); return -EPFNOSUPPORT; #endif } if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) { if (changelink) { attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT; goto change_notsup; } cfg->inner_proto_inherit = true; } return 0; change_notsup: NL_SET_ERR_MSG_ATTR(extack, data[attrtype], "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, and UDP checksum attributes are not supported"); return -EOPNOTSUPP; } static void geneve_link_config(struct net_device *dev, struct ip_tunnel_info *info, struct nlattr *tb[]) { struct geneve_dev *geneve = netdev_priv(dev); int ldev_mtu = 0; if (tb[IFLA_MTU]) { geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); return; } switch (ip_tunnel_info_af(info)) { case AF_INET: { struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst }; struct rtable *rt = ip_route_output_key(geneve->net, &fl4); if (!IS_ERR(rt) && rt->dst.dev) { ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN; ip_rt_put(rt); } break; } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { struct rt6_info *rt; if (!__in6_dev_get(dev)) break; rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, NULL, 0); if (rt && rt->dst.dev) ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; ip6_rt_put(rt); break; } #endif } if (ldev_mtu <= 0) return; geneve_change_mtu(dev, ldev_mtu - info->options_len); } static int geneve_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct net *link_net = rtnl_newlink_link_net(params); struct nlattr **data = params->data; struct nlattr **tb = params->tb; struct geneve_config cfg = { .df = GENEVE_DF_UNSET, .use_udp6_rx_checksums = false, .ttl_inherit = false, .collect_md = false, .port_min = 1, .port_max = USHRT_MAX, }; int err; init_tnl_info(&cfg.info, GENEVE_UDP_PORT); err = geneve_nl2info(tb, data, extack, &cfg, false); if (err) return err; err = geneve_configure(link_net, dev, extack, &cfg); if (err) return err; geneve_link_config(dev, &cfg.info, tb); return 0; } /* Quiesces the geneve device data path for both TX and RX. * * On transmit geneve checks for non-NULL geneve_sock before it proceeds. * So, if we set that socket to NULL under RCU and wait for synchronize_net() * to complete for the existing set of in-flight packets to be transmitted, * then we would have quiesced the transmit data path. All the future packets * will get dropped until we unquiesce the data path. * * On receive geneve dereference the geneve_sock stashed in the socket. So, * if we set that to NULL under RCU and wait for synchronize_net() to * complete, then we would have quiesced the receive data path. */ static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4, struct geneve_sock **gs6) { *gs4 = rtnl_dereference(geneve->sock4); rcu_assign_pointer(geneve->sock4, NULL); if (*gs4) rcu_assign_sk_user_data((*gs4)->sock->sk, NULL); #if IS_ENABLED(CONFIG_IPV6) *gs6 = rtnl_dereference(geneve->sock6); rcu_assign_pointer(geneve->sock6, NULL); if (*gs6) rcu_assign_sk_user_data((*gs6)->sock->sk, NULL); #else *gs6 = NULL; #endif synchronize_net(); } /* Resumes the geneve device data path for both TX and RX. */ static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4, struct geneve_sock __maybe_unused *gs6) { rcu_assign_pointer(geneve->sock4, gs4); if (gs4) rcu_assign_sk_user_data(gs4->sock->sk, gs4); #if IS_ENABLED(CONFIG_IPV6) rcu_assign_pointer(geneve->sock6, gs6); if (gs6) rcu_assign_sk_user_data(gs6->sock->sk, gs6); #endif synchronize_net(); } static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct geneve_dev *geneve = netdev_priv(dev); struct geneve_sock *gs4, *gs6; struct geneve_config cfg; int err; /* If the geneve device is configured for metadata (or externally * controlled, for example, OVS), then nothing can be changed. */ if (geneve->cfg.collect_md) return -EOPNOTSUPP; /* Start with the existing info. */ memcpy(&cfg, &geneve->cfg, sizeof(cfg)); err = geneve_nl2info(tb, data, extack, &cfg, true); if (err) return err; if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) { dst_cache_reset(&cfg.info.dst_cache); geneve_link_config(dev, &cfg.info, tb); } geneve_quiesce(geneve, &gs4, &gs6); memcpy(&geneve->cfg, &cfg, sizeof(cfg)); geneve_unquiesce(geneve, gs4, gs6); return 0; } static void geneve_dellink(struct net_device *dev, struct list_head *head) { struct geneve_dev *geneve = netdev_priv(dev); list_del(&geneve->next); unregister_netdevice_queue(dev, head); } static size_t geneve_get_size(const struct net_device *dev) { return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */ nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */ nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */ nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */ nla_total_size(sizeof(struct ifla_geneve_port_range)) + /* IFLA_GENEVE_PORT_RANGE */ 0; } static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); struct ip_tunnel_info *info = &geneve->cfg.info; bool ttl_inherit = geneve->cfg.ttl_inherit; bool metadata = geneve->cfg.collect_md; struct ifla_geneve_port_range ports = { .low = htons(geneve->cfg.port_min), .high = htons(geneve->cfg.port_max), }; __u8 tmp_vni[3]; __u32 vni; tunnel_id_to_vni(info->key.tun_id, tmp_vni); vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2]; if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) goto nla_put_failure; if (!metadata && ip_tunnel_info_af(info) == AF_INET) { if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, info->key.u.ipv4.dst)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags))) goto nla_put_failure; #if IS_ENABLED(CONFIG_IPV6) } else if (!metadata) { if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, &info->key.u.ipv6.dst)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags))) goto nla_put_failure; #endif } if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df)) goto nla_put_failure; if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst)) goto nla_put_failure; if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) goto nla_put_failure; #if IS_ENABLED(CONFIG_IPV6) if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, !geneve->cfg.use_udp6_rx_checksums)) goto nla_put_failure; #endif if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit)) goto nla_put_failure; if (geneve->cfg.inner_proto_inherit && nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT)) goto nla_put_failure; if (nla_put(skb, IFLA_GENEVE_PORT_RANGE, sizeof(ports), &ports)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static struct rtnl_link_ops geneve_link_ops __read_mostly = { .kind = "geneve", .maxtype = IFLA_GENEVE_MAX, .policy = geneve_policy, .priv_size = sizeof(struct geneve_dev), .setup = geneve_setup, .validate = geneve_validate, .newlink = geneve_newlink, .changelink = geneve_changelink, .dellink = geneve_dellink, .get_size = geneve_get_size, .fill_info = geneve_fill_info, }; struct net_device *geneve_dev_create_fb(struct net *net, const char *name, u8 name_assign_type, u16 dst_port) { struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; LIST_HEAD(list_kill); int err; struct geneve_config cfg = { .df = GENEVE_DF_UNSET, .use_udp6_rx_checksums = true, .ttl_inherit = false, .collect_md = true, .port_min = 1, .port_max = USHRT_MAX, }; memset(tb, 0, sizeof(tb)); dev = rtnl_create_link(net, name, name_assign_type, &geneve_link_ops, tb, NULL); if (IS_ERR(dev)) return dev; init_tnl_info(&cfg.info, dst_port); err = geneve_configure(net, dev, NULL, &cfg); if (err) { free_netdev(dev); return ERR_PTR(err); } /* openvswitch users expect packet sizes to be unrestricted, * so set the largest MTU we can. */ err = geneve_change_mtu(dev, IP_MAX_MTU); if (err) goto err; err = rtnl_configure_link(dev, NULL, 0, NULL); if (err < 0) goto err; return dev; err: geneve_dellink(dev, &list_kill); unregister_netdevice_many(&list_kill); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(geneve_dev_create_fb); static int geneve_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) geneve_offload_rx_ports(dev, true); else if (event == NETDEV_UDP_TUNNEL_DROP_INFO) geneve_offload_rx_ports(dev, false); return NOTIFY_DONE; } static struct notifier_block geneve_notifier_block __read_mostly = { .notifier_call = geneve_netdevice_event, }; static __net_init int geneve_init_net(struct net *net) { struct geneve_net *gn = net_generic(net, geneve_net_id); INIT_LIST_HEAD(&gn->geneve_list); INIT_LIST_HEAD(&gn->sock_list); return 0; } static void __net_exit geneve_exit_rtnl_net(struct net *net, struct list_head *dev_to_kill) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *geneve, *next; list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) geneve_dellink(geneve->dev, dev_to_kill); } static void __net_exit geneve_exit_net(struct net *net) { const struct geneve_net *gn = net_generic(net, geneve_net_id); WARN_ON_ONCE(!list_empty(&gn->sock_list)); } static struct pernet_operations geneve_net_ops = { .init = geneve_init_net, .exit_rtnl = geneve_exit_rtnl_net, .exit = geneve_exit_net, .id = &geneve_net_id, .size = sizeof(struct geneve_net), }; static int __init geneve_init_module(void) { int rc; rc = register_pernet_subsys(&geneve_net_ops); if (rc) goto out1; rc = register_netdevice_notifier(&geneve_notifier_block); if (rc) goto out2; rc = rtnl_link_register(&geneve_link_ops); if (rc) goto out3; return 0; out3: unregister_netdevice_notifier(&geneve_notifier_block); out2: unregister_pernet_subsys(&geneve_net_ops); out1: return rc; } late_initcall(geneve_init_module); static void __exit geneve_cleanup_module(void) { rtnl_link_unregister(&geneve_link_ops); unregister_netdevice_notifier(&geneve_notifier_block); unregister_pernet_subsys(&geneve_net_ops); } module_exit(geneve_cleanup_module); MODULE_LICENSE("GPL"); MODULE_VERSION(GENEVE_NETDEV_VER); MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); MODULE_ALIAS_RTNL_LINK("geneve"); |
| 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 | // SPDX-License-Identifier: GPL-2.0-or-later /* * slot_map.c * * Copyright (C) 2002, 2004 Oracle. All rights reserved. */ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> #include <cluster/masklog.h> #include "ocfs2.h" #include "dlmglue.h" #include "extent_map.h" #include "heartbeat.h" #include "inode.h" #include "slot_map.h" #include "super.h" #include "sysfile.h" #include "ocfs2_trace.h" #include "buffer_head_io.h" struct ocfs2_slot { int sl_valid; unsigned int sl_node_num; }; struct ocfs2_slot_info { int si_extended; int si_slots_per_block; struct inode *si_inode; unsigned int si_blocks; struct buffer_head **si_bh; unsigned int si_num_slots; struct ocfs2_slot si_slots[] __counted_by(si_num_slots); }; static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, unsigned int node_num); static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si, int slot_num) { BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots)); si->si_slots[slot_num].sl_valid = 0; } static void ocfs2_set_slot(struct ocfs2_slot_info *si, int slot_num, unsigned int node_num) { BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots)); si->si_slots[slot_num].sl_valid = 1; si->si_slots[slot_num].sl_node_num = node_num; } /* This version is for the extended slot map */ static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si) { int b, i, slotno; struct ocfs2_slot_map_extended *se; slotno = 0; for (b = 0; b < si->si_blocks; b++) { se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data; for (i = 0; (i < si->si_slots_per_block) && (slotno < si->si_num_slots); i++, slotno++) { if (se->se_slots[i].es_valid) ocfs2_set_slot(si, slotno, le32_to_cpu(se->se_slots[i].es_node_num)); else ocfs2_invalidate_slot(si, slotno); } } } /* * Post the slot information on disk into our slot_info struct. * Must be protected by osb_lock. */ static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si) { int i; struct ocfs2_slot_map *sm; sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data; for (i = 0; i < si->si_num_slots; i++) { if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT) ocfs2_invalidate_slot(si, i); else ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i])); } } static void ocfs2_update_slot_info(struct ocfs2_slot_info *si) { /* * The slot data will have been refreshed when ocfs2_super_lock * was taken. */ if (si->si_extended) ocfs2_update_slot_info_extended(si); else ocfs2_update_slot_info_old(si); } int ocfs2_refresh_slot_info(struct ocfs2_super *osb) { int ret; struct ocfs2_slot_info *si = osb->slot_info; if (si == NULL) return 0; BUG_ON(si->si_blocks == 0); BUG_ON(si->si_bh == NULL); trace_ocfs2_refresh_slot_info(si->si_blocks); /* * We pass -1 as blocknr because we expect all of si->si_bh to * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If * this is not true, the read of -1 (UINT64_MAX) will fail. */ ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks, si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL); if (ret == 0) { spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); spin_unlock(&osb->osb_lock); } return ret; } /* post the our slot info stuff into it's destination bh and write it * out. */ static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si, int slot_num, struct buffer_head **bh) { int blkind = slot_num / si->si_slots_per_block; int slotno = slot_num % si->si_slots_per_block; struct ocfs2_slot_map_extended *se; BUG_ON(blkind >= si->si_blocks); se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data; se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid; if (si->si_slots[slot_num].sl_valid) se->se_slots[slotno].es_node_num = cpu_to_le32(si->si_slots[slot_num].sl_node_num); *bh = si->si_bh[blkind]; } static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si, int slot_num, struct buffer_head **bh) { int i; struct ocfs2_slot_map *sm; sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data; for (i = 0; i < si->si_num_slots; i++) { if (si->si_slots[i].sl_valid) sm->sm_slots[i] = cpu_to_le16(si->si_slots[i].sl_node_num); else sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT); } *bh = si->si_bh[0]; } static int ocfs2_update_disk_slot(struct ocfs2_super *osb, struct ocfs2_slot_info *si, int slot_num) { int status; struct buffer_head *bh; spin_lock(&osb->osb_lock); if (si->si_extended) ocfs2_update_disk_slot_extended(si, slot_num, &bh); else ocfs2_update_disk_slot_old(si, slot_num, &bh); spin_unlock(&osb->osb_lock); status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode)); if (status < 0) mlog_errno(status); return status; } /* * Calculate how many bytes are needed by the slot map. Returns * an error if the slot map file is too small. */ static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb, struct inode *inode, unsigned long long *bytes) { unsigned long long bytes_needed; if (ocfs2_uses_extended_slot_map(osb)) { bytes_needed = osb->max_slots * sizeof(struct ocfs2_extended_slot); } else { bytes_needed = osb->max_slots * sizeof(__le16); } if (bytes_needed > i_size_read(inode)) { mlog(ML_ERROR, "Slot map file is too small! (size %llu, needed %llu)\n", i_size_read(inode), bytes_needed); return -ENOSPC; } *bytes = bytes_needed; return 0; } /* try to find global node in the slot info. Returns -ENOENT * if nothing is found. */ static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, unsigned int node_num) { int i, ret = -ENOENT; for(i = 0; i < si->si_num_slots; i++) { if (si->si_slots[i].sl_valid && (node_num == si->si_slots[i].sl_node_num)) { ret = i; break; } } return ret; } static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, int preferred) { int i, ret = -ENOSPC; if ((preferred >= 0) && (preferred < si->si_num_slots)) { if (!si->si_slots[preferred].sl_valid) { ret = preferred; goto out; } } for(i = 0; i < si->si_num_slots; i++) { if (!si->si_slots[i].sl_valid) { ret = i; break; } } out: return ret; } int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num) { int slot; struct ocfs2_slot_info *si = osb->slot_info; spin_lock(&osb->osb_lock); slot = __ocfs2_node_num_to_slot(si, node_num); spin_unlock(&osb->osb_lock); return slot; } int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num, unsigned int *node_num) { struct ocfs2_slot_info *si = osb->slot_info; assert_spin_locked(&osb->osb_lock); BUG_ON(slot_num < 0); BUG_ON(slot_num >= osb->max_slots); if (!si->si_slots[slot_num].sl_valid) return -ENOENT; *node_num = si->si_slots[slot_num].sl_node_num; return 0; } static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si) { unsigned int i; if (si == NULL) return; iput(si->si_inode); if (si->si_bh) { for (i = 0; i < si->si_blocks; i++) { if (si->si_bh[i]) { brelse(si->si_bh[i]); si->si_bh[i] = NULL; } } kfree(si->si_bh); } kfree(si); } int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num) { struct ocfs2_slot_info *si = osb->slot_info; if (si == NULL) return 0; spin_lock(&osb->osb_lock); ocfs2_invalidate_slot(si, slot_num); spin_unlock(&osb->osb_lock); return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num); } static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, struct ocfs2_slot_info *si) { int status = 0; u64 blkno; unsigned long long blocks, bytes = 0; unsigned int i; struct buffer_head *bh; status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes); if (status) goto bail; blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes); BUG_ON(blocks > UINT_MAX); si->si_blocks = blocks; if (!si->si_blocks) goto bail; if (si->si_extended) si->si_slots_per_block = (osb->sb->s_blocksize / sizeof(struct ocfs2_extended_slot)); else si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16); /* The size checks above should ensure this */ BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks); trace_ocfs2_map_slot_buffers(bytes, si->si_blocks); si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *), GFP_KERNEL); if (!si->si_bh) { status = -ENOMEM; mlog_errno(status); goto bail; } for (i = 0; i < si->si_blocks; i++) { status = ocfs2_extent_map_get_blocks(si->si_inode, i, &blkno, NULL, NULL); if (status < 0) { mlog_errno(status); goto bail; } trace_ocfs2_map_slot_buffers_block((unsigned long long)blkno, i); bh = NULL; /* Acquire a fresh bh */ status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno, 1, &bh, OCFS2_BH_IGNORE_CACHE, NULL); if (status < 0) { mlog_errno(status); goto bail; } si->si_bh[i] = bh; } bail: return status; } int ocfs2_init_slot_info(struct ocfs2_super *osb) { int status; struct inode *inode = NULL; struct ocfs2_slot_info *si; si = kzalloc(struct_size(si, si_slots, osb->max_slots), GFP_KERNEL); if (!si) { status = -ENOMEM; mlog_errno(status); return status; } si->si_extended = ocfs2_uses_extended_slot_map(osb); si->si_num_slots = osb->max_slots; inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); if (!inode) { status = -EINVAL; mlog_errno(status); goto bail; } si->si_inode = inode; status = ocfs2_map_slot_buffers(osb, si); if (status < 0) { mlog_errno(status); goto bail; } osb->slot_info = (struct ocfs2_slot_info *)si; bail: if (status < 0) __ocfs2_free_slot_info(si); return status; } void ocfs2_free_slot_info(struct ocfs2_super *osb) { struct ocfs2_slot_info *si = osb->slot_info; osb->slot_info = NULL; __ocfs2_free_slot_info(si); } int ocfs2_find_slot(struct ocfs2_super *osb) { int status; int slot; struct ocfs2_slot_info *si; si = osb->slot_info; spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); /* search for ourselves first and take the slot if it already * exists. Perhaps we need to mark this in a variable for our * own journal recovery? Possibly not, though we certainly * need to warn to the user */ slot = __ocfs2_node_num_to_slot(si, osb->node_num); if (slot < 0) { /* if no slot yet, then just take 1st available * one. */ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); if (slot < 0) { spin_unlock(&osb->osb_lock); mlog(ML_ERROR, "no free slots available!\n"); status = -EINVAL; goto bail; } } else printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " "allocated to this node!\n", slot, osb->dev_str); ocfs2_set_slot(si, slot, osb->node_num); osb->slot_num = slot; spin_unlock(&osb->osb_lock); trace_ocfs2_find_slot(osb->slot_num); status = ocfs2_update_disk_slot(osb, si, osb->slot_num); if (status < 0) { mlog_errno(status); /* * if write block failed, invalidate slot to avoid overwrite * slot during dismount in case another node rightly has mounted */ spin_lock(&osb->osb_lock); ocfs2_invalidate_slot(si, osb->slot_num); osb->slot_num = OCFS2_INVALID_SLOT; spin_unlock(&osb->osb_lock); } bail: return status; } void ocfs2_put_slot(struct ocfs2_super *osb) { int status, slot_num; struct ocfs2_slot_info *si = osb->slot_info; if (!si) return; spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); slot_num = osb->slot_num; ocfs2_invalidate_slot(si, osb->slot_num); osb->slot_num = OCFS2_INVALID_SLOT; spin_unlock(&osb->osb_lock); status = ocfs2_update_disk_slot(osb, si, slot_num); if (status < 0) mlog_errno(status); ocfs2_free_slot_info(osb); } |
| 19 12 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | /* * IPv4 specific functions of netfilter core * * Rusty Russell (C) 2000 -- This code is GPL. * Patrick McHardy (C) 2006-2012 */ #include <linux/kernel.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/gfp.h> #include <linux/export.h> #include <net/route.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/inet_dscp.h> #include <net/netfilter/nf_queue.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; __u8 flags; struct net_device *dev = skb_dst(skb)->dev; struct flow_keys flkeys; unsigned int hh_len; sk = sk_to_full_sk(sk); flags = sk ? inet_sk_flowi_flags(sk) : 0; if (addr_type == RTN_UNSPEC) addr_type = inet_addr_type_dev_table(net, dev, saddr); if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) flags |= FLOWI_FLAG_ANYSRC; else saddr = 0; /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. */ fl4.daddr = iph->daddr; fl4.saddr = saddr; fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0; fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev); fl4.flowi4_mark = skb->mark; fl4.flowi4_flags = flags; fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); /* Drop old route. */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); if (skb_dst(skb)->error) return skb_dst(skb)->error; #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && xfrm_decode_session(net, skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); skb_dst_set(skb, dst); } #endif /* Change in oif may mean change in hh_len. */ hh_len = skb_dst(skb)->dev->hard_header_len; if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 0, GFP_ATOMIC)) return -ENOMEM; return 0; } EXPORT_SYMBOL(ip_route_me_harder); int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict __always_unused) { struct rtable *rt = ip_route_output_key(net, &fl->u.ip4); if (IS_ERR(rt)) return PTR_ERR(rt); *dst = &rt->dst; return 0; } EXPORT_SYMBOL_GPL(nf_ip_route); |
| 45 1 18 1 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz> * Copyright (C) 2022, 2024 Intel Corporation */ #ifndef IEEE80211_RATE_H #define IEEE80211_RATE_H #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/types.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "sta_info.h" #include "driver-ops.h" struct rate_control_ref { const struct rate_control_ops *ops; void *priv; }; void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_tx_rate_control *txrc); void rate_control_tx_status(struct ieee80211_local *local, struct ieee80211_tx_status *st); void rate_control_rate_init(struct link_sta_info *link_sta); void rate_control_rate_init_all_links(struct sta_info *sta); void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_supported_band *sband, struct link_sta_info *link_sta, u32 changed); static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, struct sta_info *sta, gfp_t gfp) { spin_lock_init(&sta->rate_ctrl_lock); return ref->ops->alloc_sta(ref->priv, &sta->sta, gfp); } static inline void rate_control_free_sta(struct sta_info *sta) { struct rate_control_ref *ref = sta->rate_ctrl; struct ieee80211_sta *ista = &sta->sta; void *priv_sta = sta->rate_ctrl_priv; ref->ops->free_sta(ref->priv, ista, priv_sta); } static inline void rate_control_add_sta_debugfs(struct sta_info *sta) { #ifdef CONFIG_MAC80211_DEBUGFS struct rate_control_ref *ref = sta->rate_ctrl; if (ref && sta->debugfs_dir && ref->ops->add_sta_debugfs) ref->ops->add_sta_debugfs(ref->priv, sta->rate_ctrl_priv, sta->debugfs_dir); #endif } extern const struct debugfs_short_fops rcname_ops; static inline void rate_control_add_debugfs(struct ieee80211_local *local) { #ifdef CONFIG_MAC80211_DEBUGFS struct dentry *debugfsdir; if (!local->rate_ctrl) return; if (!local->rate_ctrl->ops->add_debugfs) return; debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir); local->debugfs.rcdir = debugfsdir; debugfs_create_file("name", 0400, debugfsdir, local->rate_ctrl, &rcname_ops); local->rate_ctrl->ops->add_debugfs(&local->hw, local->rate_ctrl->priv, debugfsdir); #endif } void ieee80211_check_rate_mask(struct ieee80211_link_data *link); /* Get a reference to the rate control algorithm. If `name' is NULL, get the * first available algorithm. */ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, const char *name); void rate_control_deinitialize(struct ieee80211_local *local); /* Rate control algorithms */ #ifdef CONFIG_MAC80211_RC_MINSTREL int rc80211_minstrel_init(void); void rc80211_minstrel_exit(void); #else static inline int rc80211_minstrel_init(void) { return 0; } static inline void rc80211_minstrel_exit(void) { } #endif #endif /* IEEE80211_RATE_H */ |
| 5 1 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | /* Kernel module to match connection tracking byte counter. * GPL (C) 2002 Martin Devera (devik@cdi.cz). */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/math64.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_connbytes.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_acct.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: Number of packets/bytes per connection matching"); MODULE_ALIAS("ipt_connbytes"); MODULE_ALIAS("ip6t_connbytes"); static bool connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_connbytes_info *sinfo = par->matchinfo; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; u_int64_t what = 0; /* initialize to make gcc happy */ u_int64_t bytes = 0; u_int64_t pkts = 0; const struct nf_conn_acct *acct; const struct nf_conn_counter *counters; ct = nf_ct_get(skb, &ctinfo); if (!ct) return false; acct = nf_conn_acct_find(ct); if (!acct) return false; counters = acct->counter; switch (sinfo->what) { case XT_CONNBYTES_PKTS: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); break; case XT_CONNBYTES_DIR_REPLY: what = atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; case XT_CONNBYTES_DIR_BOTH: what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); what += atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; } break; case XT_CONNBYTES_BYTES: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); break; case XT_CONNBYTES_DIR_REPLY: what = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); break; case XT_CONNBYTES_DIR_BOTH: what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); what += atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); break; } break; case XT_CONNBYTES_AVGPKT: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); break; case XT_CONNBYTES_DIR_REPLY: bytes = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); pkts = atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; case XT_CONNBYTES_DIR_BOTH: bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes) + atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets) + atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; } if (pkts != 0) what = div64_u64(bytes, pkts); break; } if (sinfo->count.to >= sinfo->count.from) return what <= sinfo->count.to && what >= sinfo->count.from; else /* inverted */ return what < sinfo->count.to || what > sinfo->count.from; } static int connbytes_mt_check(const struct xt_mtchk_param *par) { const struct xt_connbytes_info *sinfo = par->matchinfo; int ret; if (sinfo->what != XT_CONNBYTES_PKTS && sinfo->what != XT_CONNBYTES_BYTES && sinfo->what != XT_CONNBYTES_AVGPKT) return -EINVAL; if (sinfo->direction != XT_CONNBYTES_DIR_ORIGINAL && sinfo->direction != XT_CONNBYTES_DIR_REPLY && sinfo->direction != XT_CONNBYTES_DIR_BOTH) return -EINVAL; ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info_ratelimited("cannot load conntrack support for proto=%u\n", par->family); return ret; } /* * This filter cannot function correctly unless connection tracking * accounting is enabled, so complain in the hope that someone notices. */ if (!nf_ct_acct_enabled(par->net)) { pr_warn("Forcing CT accounting to be enabled\n"); nf_ct_set_acct(par->net, true); } return ret; } static void connbytes_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static struct xt_match connbytes_mt_reg __read_mostly = { .name = "connbytes", .revision = 0, .family = NFPROTO_UNSPEC, .checkentry = connbytes_mt_check, .match = connbytes_mt, .destroy = connbytes_mt_destroy, .matchsize = sizeof(struct xt_connbytes_info), .me = THIS_MODULE, }; static int __init connbytes_mt_init(void) { return xt_register_match(&connbytes_mt_reg); } static void __exit connbytes_mt_exit(void) { xt_unregister_match(&connbytes_mt_reg); } module_init(connbytes_mt_init); module_exit(connbytes_mt_exit); |
| 22 23 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/lib/crc-ccitt.c */ #include <linux/types.h> #include <linux/module.h> #include <linux/crc-ccitt.h> /* * This mysterious table is just the CRC of each possible byte. It can be * computed using the standard bit-at-a-time methods. The polynomial can * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12. * Add the implicit x^16, and you have the standard CRC-CCITT. */ u16 const crc_ccitt_table[256] = { 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78 }; EXPORT_SYMBOL(crc_ccitt_table); /** * crc_ccitt - recompute the CRC (CRC-CCITT variant) for the data * buffer * @crc: previous CRC value * @buffer: data pointer * @len: number of bytes in the buffer */ u16 crc_ccitt(u16 crc, u8 const *buffer, size_t len) { while (len--) crc = crc_ccitt_byte(crc, *buffer++); return crc; } EXPORT_SYMBOL(crc_ccitt); MODULE_DESCRIPTION("CRC-CCITT calculations"); MODULE_LICENSE("GPL"); |
| 754 9532 2886 12377 153 321 4251 138 53 9985 338 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Written by Mark Hemment, 1996 (markhe@nextd.demon.co.uk). * * (C) SGI 2006, Christoph Lameter * Cleaned up and restructured to ease the addition of alternative * implementations of SLAB allocators. * (C) Linux Foundation 2008-2013 * Unified interface for all slab allocators */ #ifndef _LINUX_SLAB_H #define _LINUX_SLAB_H #include <linux/cache.h> #include <linux/gfp.h> #include <linux/overflow.h> #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/workqueue.h> #include <linux/percpu-refcount.h> #include <linux/cleanup.h> #include <linux/hash.h> enum _slab_flag_bits { _SLAB_CONSISTENCY_CHECKS, _SLAB_RED_ZONE, _SLAB_POISON, _SLAB_KMALLOC, _SLAB_HWCACHE_ALIGN, _SLAB_CACHE_DMA, _SLAB_CACHE_DMA32, _SLAB_STORE_USER, _SLAB_PANIC, _SLAB_TYPESAFE_BY_RCU, _SLAB_TRACE, #ifdef CONFIG_DEBUG_OBJECTS _SLAB_DEBUG_OBJECTS, #endif _SLAB_NOLEAKTRACE, _SLAB_NO_MERGE, #ifdef CONFIG_FAILSLAB _SLAB_FAILSLAB, #endif #ifdef CONFIG_MEMCG _SLAB_ACCOUNT, #endif #ifdef CONFIG_KASAN_GENERIC _SLAB_KASAN, #endif _SLAB_NO_USER_FLAGS, #ifdef CONFIG_KFENCE _SLAB_SKIP_KFENCE, #endif #ifndef CONFIG_SLUB_TINY _SLAB_RECLAIM_ACCOUNT, #endif _SLAB_OBJECT_POISON, _SLAB_CMPXCHG_DOUBLE, #ifdef CONFIG_SLAB_OBJ_EXT _SLAB_NO_OBJ_EXT, #endif _SLAB_FLAGS_LAST_BIT }; #define __SLAB_FLAG_BIT(nr) ((slab_flags_t __force)(1U << (nr))) #define __SLAB_FLAG_UNUSED ((slab_flags_t __force)(0U)) /* * Flags to pass to kmem_cache_create(). * The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op */ /* DEBUG: Perform (expensive) checks on alloc/free */ #define SLAB_CONSISTENCY_CHECKS __SLAB_FLAG_BIT(_SLAB_CONSISTENCY_CHECKS) /* DEBUG: Red zone objs in a cache */ #define SLAB_RED_ZONE __SLAB_FLAG_BIT(_SLAB_RED_ZONE) /* DEBUG: Poison objects */ #define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON) /* Indicate a kmalloc slab */ #define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC) /** * define SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries. * * Sufficiently large objects are aligned on cache line boundary. For object * size smaller than a half of cache line size, the alignment is on the half of * cache line size. In general, if object size is smaller than 1/2^n of cache * line size, the alignment is adjusted to 1/2^n. * * If explicit alignment is also requested by the respective * &struct kmem_cache_args field, the greater of both is alignments is applied. */ #define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) /* Use GFP_DMA memory */ #define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA) /* Use GFP_DMA32 memory */ #define SLAB_CACHE_DMA32 __SLAB_FLAG_BIT(_SLAB_CACHE_DMA32) /* DEBUG: Store the last owner for bug hunting */ #define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER) /* Panic if kmem_cache_create() fails */ #define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) /** * define SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * * This delays freeing the SLAB page by a grace period, it does _NOT_ * delay object freeing. This means that if you do kmem_cache_free() * that memory location is free to be reused at any time. Thus it may * be possible to see another object there in the same RCU grace period. * * This feature only ensures the memory location backing the object * stays valid, the trick to using this is relying on an independent * object validation pass. Something like: * * :: * * begin: * rcu_read_lock(); * obj = lockless_lookup(key); * if (obj) { * if (!try_get_ref(obj)) // might fail for free objects * rcu_read_unlock(); * goto begin; * * if (obj->key != key) { // not the object we expected * put_ref(obj); * rcu_read_unlock(); * goto begin; * } * } * rcu_read_unlock(); * * This is useful if we need to approach a kernel structure obliquely, * from its address obtained without the usual locking. We can lock * the structure to stabilize it and check it's still at the given address, * only if we can be sure that the memory has not been meanwhile reused * for some other kind of object (which our subsystem's lock might corrupt). * * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. * * Note that object identity check has to be done *after* acquiring a * reference, therefore user has to ensure proper ordering for loads. * Similarly, when initializing objects allocated with SLAB_TYPESAFE_BY_RCU, * the newly allocated object has to be fully initialized *before* its * refcount gets initialized and proper ordering for stores is required. * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() are * designed with the proper fences required for reference counting objects * allocated with SLAB_TYPESAFE_BY_RCU. * * Note that it is not possible to acquire a lock within a structure * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages * are not zeroed before being given to the slab, which means that any * locks must be initialized after each and every kmem_struct_alloc(). * Alternatively, make the ctor passed to kmem_cache_create() initialize * the locks at page-allocation time, as is done in __i915_request_ctor(), * sighand_ctor(), and anon_vma_ctor(). Such a ctor permits readers * to safely acquire those ctor-initialized locks under rcu_read_lock() * protection. * * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ #define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) /* Trace allocations and frees */ #define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE) /* Flag to prevent checks on free */ #ifdef CONFIG_DEBUG_OBJECTS # define SLAB_DEBUG_OBJECTS __SLAB_FLAG_BIT(_SLAB_DEBUG_OBJECTS) #else # define SLAB_DEBUG_OBJECTS __SLAB_FLAG_UNUSED #endif /* Avoid kmemleak tracing */ #define SLAB_NOLEAKTRACE __SLAB_FLAG_BIT(_SLAB_NOLEAKTRACE) /* * Prevent merging with compatible kmem caches. This flag should be used * cautiously. Valid use cases: * * - caches created for self-tests (e.g. kunit) * - general caches created and used by a subsystem, only when a * (subsystem-specific) debug option is enabled * - performance critical caches, should be very rare and consulted with slab * maintainers, and not used together with CONFIG_SLUB_TINY */ #define SLAB_NO_MERGE __SLAB_FLAG_BIT(_SLAB_NO_MERGE) /* Fault injection mark */ #ifdef CONFIG_FAILSLAB # define SLAB_FAILSLAB __SLAB_FLAG_BIT(_SLAB_FAILSLAB) #else # define SLAB_FAILSLAB __SLAB_FLAG_UNUSED #endif /** * define SLAB_ACCOUNT - Account allocations to memcg. * * All object allocations from this cache will be memcg accounted, regardless of * __GFP_ACCOUNT being or not being passed to individual allocations. */ #ifdef CONFIG_MEMCG # define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) #else # define SLAB_ACCOUNT __SLAB_FLAG_UNUSED #endif #ifdef CONFIG_KASAN_GENERIC #define SLAB_KASAN __SLAB_FLAG_BIT(_SLAB_KASAN) #else #define SLAB_KASAN __SLAB_FLAG_UNUSED #endif /* * Ignore user specified debugging flags. * Intended for caches created for self-tests so they have only flags * specified in the code and other flags are ignored. */ #define SLAB_NO_USER_FLAGS __SLAB_FLAG_BIT(_SLAB_NO_USER_FLAGS) #ifdef CONFIG_KFENCE #define SLAB_SKIP_KFENCE __SLAB_FLAG_BIT(_SLAB_SKIP_KFENCE) #else #define SLAB_SKIP_KFENCE __SLAB_FLAG_UNUSED #endif /* The following flags affect the page allocator grouping pages by mobility */ /** * define SLAB_RECLAIM_ACCOUNT - Objects are reclaimable. * * Use this flag for caches that have an associated shrinker. As a result, slab * pages are allocated with __GFP_RECLAIMABLE, which affects grouping pages by * mobility, and are accounted in SReclaimable counter in /proc/meminfo */ #ifndef CONFIG_SLUB_TINY #define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT) #else #define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_UNUSED #endif #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ /* Slab created using create_boot_cache */ #ifdef CONFIG_SLAB_OBJ_EXT #define SLAB_NO_OBJ_EXT __SLAB_FLAG_BIT(_SLAB_NO_OBJ_EXT) #else #define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED #endif /* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault. * * ZERO_SIZE_PTR can be passed to kfree though in the same way that NULL can. * Both make kfree a no-op. */ #define ZERO_SIZE_PTR ((void *)16) #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ (unsigned long)ZERO_SIZE_PTR) #include <linux/kasan.h> struct list_lru; struct mem_cgroup; /* * struct kmem_cache related prototypes */ bool slab_is_available(void); /** * struct kmem_cache_args - Less common arguments for kmem_cache_create() * * Any uninitialized fields of the structure are interpreted as unused. The * exception is @freeptr_offset where %0 is a valid value, so * @use_freeptr_offset must be also set to %true in order to interpret the field * as used. For @useroffset %0 is also valid, but only with non-%0 * @usersize. * * When %NULL args is passed to kmem_cache_create(), it is equivalent to all * fields unused. */ struct kmem_cache_args { /** * @align: The required alignment for the objects. * * %0 means no specific alignment is requested. */ unsigned int align; /** * @useroffset: Usercopy region offset. * * %0 is a valid offset, when @usersize is non-%0 */ unsigned int useroffset; /** * @usersize: Usercopy region size. * * %0 means no usercopy region is specified. */ unsigned int usersize; /** * @freeptr_offset: Custom offset for the free pointer * in &SLAB_TYPESAFE_BY_RCU caches * * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer * outside of the object. This might cause the object to grow in size. * Cache creators that have a reason to avoid this can specify a custom * free pointer offset in their struct where the free pointer will be * placed. * * Note that placing the free pointer inside the object requires the * caller to ensure that no fields are invalidated that are required to * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for * details). * * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset * is specified, %use_freeptr_offset must be set %true. * * Note that @ctor currently isn't supported with custom free pointers * as a @ctor requires an external free pointer. */ unsigned int freeptr_offset; /** * @use_freeptr_offset: Whether a @freeptr_offset is used. */ bool use_freeptr_offset; /** * @ctor: A constructor for the objects. * * The constructor is invoked for each object in a newly allocated slab * page. It is the cache user's responsibility to free object in the * same state as after calling the constructor, or deal appropriately * with any differences between a freshly constructed and a reallocated * object. * * %NULL means no constructor. */ void (*ctor)(void *); }; struct kmem_cache *__kmem_cache_create_args(const char *name, unsigned int object_size, struct kmem_cache_args *args, slab_flags_t flags); static inline struct kmem_cache * __kmem_cache_create(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, void (*ctor)(void *)) { struct kmem_cache_args kmem_args = { .align = align, .ctor = ctor, }; return __kmem_cache_create_args(name, size, &kmem_args, flags); } /** * kmem_cache_create_usercopy - Create a kmem cache with a region suitable * for copying to userspace. * @name: A string which is used in /proc/slabinfo to identify this cache. * @size: The size of objects to be created in this cache. * @align: The required alignment for the objects. * @flags: SLAB flags * @useroffset: Usercopy region offset * @usersize: Usercopy region size * @ctor: A constructor for the objects, or %NULL. * * This is a legacy wrapper, new code should use either KMEM_CACHE_USERCOPY() * if whitelisting a single field is sufficient, or kmem_cache_create() with * the necessary parameters passed via the args parameter (see * &struct kmem_cache_args) * * Return: a pointer to the cache on success, NULL on failure. */ static inline struct kmem_cache * kmem_cache_create_usercopy(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, unsigned int useroffset, unsigned int usersize, void (*ctor)(void *)) { struct kmem_cache_args kmem_args = { .align = align, .ctor = ctor, .useroffset = useroffset, .usersize = usersize, }; return __kmem_cache_create_args(name, size, &kmem_args, flags); } /* If NULL is passed for @args, use this variant with default arguments. */ static inline struct kmem_cache * __kmem_cache_default_args(const char *name, unsigned int size, struct kmem_cache_args *args, slab_flags_t flags) { struct kmem_cache_args kmem_default_args = {}; /* Make sure we don't get passed garbage. */ if (WARN_ON_ONCE(args)) return ERR_PTR(-EINVAL); return __kmem_cache_create_args(name, size, &kmem_default_args, flags); } /** * kmem_cache_create - Create a kmem cache. * @__name: A string which is used in /proc/slabinfo to identify this cache. * @__object_size: The size of objects to be created in this cache. * @__args: Optional arguments, see &struct kmem_cache_args. Passing %NULL * means defaults will be used for all the arguments. * * This is currently implemented as a macro using ``_Generic()`` to call * either the new variant of the function, or a legacy one. * * The new variant has 4 parameters: * ``kmem_cache_create(name, object_size, args, flags)`` * * See __kmem_cache_create_args() which implements this. * * The legacy variant has 5 parameters: * ``kmem_cache_create(name, object_size, align, flags, ctor)`` * * The align and ctor parameters map to the respective fields of * &struct kmem_cache_args * * Context: Cannot be called within a interrupt, but can be interrupted. * * Return: a pointer to the cache on success, NULL on failure. */ #define kmem_cache_create(__name, __object_size, __args, ...) \ _Generic((__args), \ struct kmem_cache_args *: __kmem_cache_create_args, \ void *: __kmem_cache_default_args, \ default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__) void kmem_cache_destroy(struct kmem_cache *s); int kmem_cache_shrink(struct kmem_cache *s); /* * Please use this macro to create slab caches. Simply specify the * name of the structure and maybe some flags that are listed above. * * The alignment of the struct determines object alignment. If you * f.e. add ____cacheline_aligned_in_smp to the struct declaration * then the objects will be properly aligned in SMP configurations. */ #define KMEM_CACHE(__struct, __flags) \ __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ &(struct kmem_cache_args) { \ .align = __alignof__(struct __struct), \ }, (__flags)) /* * To whitelist a single field for copying to/from usercopy, use this * macro instead for KMEM_CACHE() above. */ #define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ &(struct kmem_cache_args) { \ .align = __alignof__(struct __struct), \ .useroffset = offsetof(struct __struct, __field), \ .usersize = sizeof_field(struct __struct, __field), \ }, (__flags)) /* * Common kmalloc functions provided by all allocators */ void * __must_check krealloc_noprof(const void *objp, size_t new_size, gfp_t flags) __realloc_size(2); #define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__)) void kfree(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T)) /** * ksize - Report actual allocation size of associated object * * @objp: Pointer returned from a prior kmalloc()-family allocation. * * This should not be used for writing beyond the originally requested * allocation size. Either use krealloc() or round up the allocation size * with kmalloc_size_roundup() prior to allocation. If this is used to * access beyond the originally requested allocation size, UBSAN_BOUNDS * and/or FORTIFY_SOURCE may trip, since they only know about the * originally allocated size via the __alloc_size attribute. */ size_t ksize(const void *objp); #ifdef CONFIG_PRINTK bool kmem_dump_obj(void *object); #else static inline bool kmem_dump_obj(void *object) { return false; } #endif /* * Some archs want to perform DMA into kmalloc caches and need a guaranteed * alignment larger than the alignment of a 64-bit integer. * Setting ARCH_DMA_MINALIGN in arch headers allows that. */ #ifdef ARCH_HAS_DMA_MINALIGN #if ARCH_DMA_MINALIGN > 8 && !defined(ARCH_KMALLOC_MINALIGN) #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN #endif #endif #ifndef ARCH_KMALLOC_MINALIGN #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) #elif ARCH_KMALLOC_MINALIGN > 8 #define KMALLOC_MIN_SIZE ARCH_KMALLOC_MINALIGN #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) #endif /* * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. * Intended for arches that get misalignment faults even for 64 bit integer * aligned buffers. */ #ifndef ARCH_SLAB_MINALIGN #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif /* * Arches can define this function if they want to decide the minimum slab * alignment at runtime. The value returned by the function must be a power * of two and >= ARCH_SLAB_MINALIGN. */ #ifndef arch_slab_minalign static inline unsigned int arch_slab_minalign(void) { return ARCH_SLAB_MINALIGN; } #endif /* * kmem_cache_alloc and friends return pointers aligned to ARCH_SLAB_MINALIGN. * kmalloc and friends return pointers aligned to both ARCH_KMALLOC_MINALIGN * and ARCH_SLAB_MINALIGN, but here we only assume the former alignment. */ #define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN) #define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN) #define __assume_page_alignment __assume_aligned(PAGE_SIZE) /* * Kmalloc array related definitions */ /* * SLUB directly allocates requests fitting in to an order-1 page * (PAGE_SIZE*2). Larger requests are passed to the page allocator. */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) #define KMALLOC_SHIFT_MAX (MAX_PAGE_ORDER + PAGE_SHIFT) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif /* Maximum allocatable size */ #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) /* Maximum size for which we actually use a slab cache */ #define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH) /* Maximum order allocatable via the slab allocator */ #define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_MAX - PAGE_SHIFT) /* * Kmalloc subsystem. */ #ifndef KMALLOC_MIN_SIZE #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) #endif /* * This restriction comes from byte sized index implementation. * Page size is normally 2^12 bytes and, in this case, if we want to use * byte sized index which can represent 2^8 entries, the size of the object * should be equal or greater to 2^12 / 2^8 = 2^4 = 16. * If minimum size of kmalloc is less than 16, we use it as minimum object * size and give up to use byte sized index. */ #define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ (KMALLOC_MIN_SIZE) : 16) #ifdef CONFIG_RANDOM_KMALLOC_CACHES #define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies #else #define RANDOM_KMALLOC_CACHES_NR 0 #endif /* * Whenever changing this, take care of that kmalloc_type() and * create_kmalloc_caches() still work as intended. * * KMALLOC_NORMAL can contain only unaccounted objects whereas KMALLOC_CGROUP * is for accounted but unreclaimable and non-dma objects. All the other * kmem caches can have both accounted and unaccounted objects. */ enum kmalloc_cache_type { KMALLOC_NORMAL = 0, #ifndef CONFIG_ZONE_DMA KMALLOC_DMA = KMALLOC_NORMAL, #endif #ifndef CONFIG_MEMCG KMALLOC_CGROUP = KMALLOC_NORMAL, #endif KMALLOC_RANDOM_START = KMALLOC_NORMAL, KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR, #ifdef CONFIG_SLUB_TINY KMALLOC_RECLAIM = KMALLOC_NORMAL, #else KMALLOC_RECLAIM, #endif #ifdef CONFIG_ZONE_DMA KMALLOC_DMA, #endif #ifdef CONFIG_MEMCG KMALLOC_CGROUP, #endif NR_KMALLOC_TYPES }; typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1]; extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES]; /* * Define gfp bits that should not be set for KMALLOC_NORMAL. */ #define KMALLOC_NOT_NORMAL_BITS \ (__GFP_RECLAIMABLE | \ (IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \ (IS_ENABLED(CONFIG_MEMCG) ? __GFP_ACCOUNT : 0)) extern unsigned long random_kmalloc_seed; static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller) { /* * The most common case is KMALLOC_NORMAL, so test for it * with a single branch for all the relevant flags. */ if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0)) #ifdef CONFIG_RANDOM_KMALLOC_CACHES /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */ return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed, ilog2(RANDOM_KMALLOC_CACHES_NR + 1)); #else return KMALLOC_NORMAL; #endif /* * At least one of the flags has to be set. Their priorities in * decreasing order are: * 1) __GFP_DMA * 2) __GFP_RECLAIMABLE * 3) __GFP_ACCOUNT */ if (IS_ENABLED(CONFIG_ZONE_DMA) && (flags & __GFP_DMA)) return KMALLOC_DMA; if (!IS_ENABLED(CONFIG_MEMCG) || (flags & __GFP_RECLAIMABLE)) return KMALLOC_RECLAIM; else return KMALLOC_CGROUP; } /* * Figure out which kmalloc slab an allocation of a certain size * belongs to. * 0 = zero alloc * 1 = 65 .. 96 bytes * 2 = 129 .. 192 bytes * n = 2^(n-1)+1 .. 2^n * * Note: __kmalloc_index() is compile-time optimized, and not runtime optimized; * typical usage is via kmalloc_index() and therefore evaluated at compile-time. * Callers where !size_is_constant should only be test modules, where runtime * overheads of __kmalloc_index() can be tolerated. Also see kmalloc_slab(). */ static __always_inline unsigned int __kmalloc_index(size_t size, bool size_is_constant) { if (!size) return 0; if (size <= KMALLOC_MIN_SIZE) return KMALLOC_SHIFT_LOW; if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96) return 1; if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192) return 2; if (size <= 8) return 3; if (size <= 16) return 4; if (size <= 32) return 5; if (size <= 64) return 6; if (size <= 128) return 7; if (size <= 256) return 8; if (size <= 512) return 9; if (size <= 1024) return 10; if (size <= 2 * 1024) return 11; if (size <= 4 * 1024) return 12; if (size <= 8 * 1024) return 13; if (size <= 16 * 1024) return 14; if (size <= 32 * 1024) return 15; if (size <= 64 * 1024) return 16; if (size <= 128 * 1024) return 17; if (size <= 256 * 1024) return 18; if (size <= 512 * 1024) return 19; if (size <= 1024 * 1024) return 20; if (size <= 2 * 1024 * 1024) return 21; if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant) BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()"); else BUG(); /* Will never be reached. Needed because the compiler may complain */ return -1; } static_assert(PAGE_SHIFT <= 20); #define kmalloc_index(s) __kmalloc_index(s, true) #include <linux/alloc_tag.h> /** * kmem_cache_alloc - Allocate an object * @cachep: The cache to allocate from. * @flags: See kmalloc(). * * Allocate an object from this cache. * See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags. * * Return: pointer to the new object or %NULL in case of error */ void *kmem_cache_alloc_noprof(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc; #define kmem_cache_alloc(...) alloc_hooks(kmem_cache_alloc_noprof(__VA_ARGS__)) void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags) __assume_slab_alignment __malloc; #define kmem_cache_alloc_lru(...) alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__)) /** * kmem_cache_charge - memcg charge an already allocated slab memory * @objp: address of the slab object to memcg charge * @gfpflags: describe the allocation context * * kmem_cache_charge allows charging a slab object to the current memcg, * primarily in cases where charging at allocation time might not be possible * because the target memcg is not known (i.e. softirq context) * * The objp should be pointer returned by the slab allocator functions like * kmalloc (with __GFP_ACCOUNT in flags) or kmem_cache_alloc. The memcg charge * behavior can be controlled through gfpflags parameter, which affects how the * necessary internal metadata can be allocated. Including __GFP_NOFAIL denotes * that overcharging is requested instead of failure, but is not applied for the * internal metadata allocation. * * There are several cases where it will return true even if the charging was * not done: * More specifically: * * 1. For !CONFIG_MEMCG or cgroup_disable=memory systems. * 2. Already charged slab objects. * 3. For slab objects from KMALLOC_NORMAL caches - allocated by kmalloc() * without __GFP_ACCOUNT * 4. Allocating internal metadata has failed * * Return: true if charge was successful otherwise false. */ bool kmem_cache_charge(void *objp, gfp_t gfpflags); void kmem_cache_free(struct kmem_cache *s, void *objp); kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, unsigned int useroffset, unsigned int usersize, void (*ctor)(void *)); /* * Bulk allocation and freeing operations. These are accelerated in an * allocator specific way to avoid taking locks repeatedly or building * metadata structures unnecessarily. * * Note that interrupts must be enabled when calling these functions. */ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, void **p); #define kmem_cache_alloc_bulk(...) alloc_hooks(kmem_cache_alloc_bulk_noprof(__VA_ARGS__)) static __always_inline void kfree_bulk(size_t size, void **p) { kmem_cache_free_bulk(NULL, size, p); } void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; #define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) /* * These macros allow declaring a kmem_buckets * parameter alongside size, which * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call * sites don't have to pass NULL. */ #ifdef CONFIG_SLAB_BUCKETS #define DECL_BUCKET_PARAMS(_size, _b) size_t (_size), kmem_buckets *(_b) #define PASS_BUCKET_PARAMS(_size, _b) (_size), (_b) #define PASS_BUCKET_PARAM(_b) (_b) #else #define DECL_BUCKET_PARAMS(_size, _b) size_t (_size) #define PASS_BUCKET_PARAMS(_size, _b) (_size) #define PASS_BUCKET_PARAM(_b) NULL #endif /* * The following functions are not to be used directly and are intended only * for internal use from kmalloc() and kmalloc_node() * with the exception of kunit tests */ void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __assume_kmalloc_alignment __alloc_size(1); void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size) __assume_kmalloc_alignment __alloc_size(3); void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags, int node, size_t size) __assume_kmalloc_alignment __alloc_size(4); void *__kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment __alloc_size(1); void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment __alloc_size(1); /** * kmalloc - allocate kernel memory * @size: how many bytes of memory are required. * @flags: describe the allocation context * * kmalloc is the normal method of allocating memory * for objects smaller than page size in the kernel. * * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN * bytes. For @size of power of two bytes, the alignment is also guaranteed * to be at least to the size. For other sizes, the alignment is guaranteed to * be at least the largest power-of-two divisor of @size. * * The @flags argument may be one of the GFP flags defined at * include/linux/gfp_types.h and described at * :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>` * * The recommended usage of the @flags is described at * :ref:`Documentation/core-api/memory-allocation.rst <memory_allocation>` * * Below is a brief outline of the most useful GFP flags * * %GFP_KERNEL * Allocate normal kernel ram. May sleep. * * %GFP_NOWAIT * Allocation will not sleep. * * %GFP_ATOMIC * Allocation will not sleep. May use emergency pools. * * Also it is possible to set different flags by OR'ing * in one or more of the following additional @flags: * * %__GFP_ZERO * Zero the allocated memory before returning. Also see kzalloc(). * * %__GFP_HIGH * This allocation has high priority and may use emergency pools. * * %__GFP_NOFAIL * Indicate that this allocation is in no way allowed to fail * (think twice before using). * * %__GFP_NORETRY * If memory is not immediately available, * then give up at once. * * %__GFP_NOWARN * If allocation fails, don't issue any warnings. * * %__GFP_RETRY_MAYFAIL * Try really hard to succeed the allocation but fail * eventually. */ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t flags) { if (__builtin_constant_p(size) && size) { unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) return __kmalloc_large_noprof(size, flags); index = kmalloc_index(size); return __kmalloc_cache_noprof( kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, size); } return __kmalloc_noprof(size, flags); } #define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__)) #define kmem_buckets_alloc(_b, _size, _flags) \ alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) #define kmem_buckets_alloc_track_caller(_b, _size, _flags) \ alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_)) static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && size) { unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) return __kmalloc_large_node_noprof(size, flags, node); index = kmalloc_index(size); return __kmalloc_cache_node_noprof( kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, node, size); } return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node); } #define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__)) /** * kmalloc_array - allocate memory for an array. * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t size, gfp_t flags) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; return kmalloc_noprof(bytes, flags); } #define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__)) /** * krealloc_array - reallocate memory for an array. * @p: pointer to the memory chunk to reallocate * @new_n: new number of elements to alloc * @new_size: new size of a single member of the array * @flags: the type of memory to allocate (see kmalloc) * * If __GFP_ZERO logic is requested, callers must ensure that, starting with the * initial memory allocation, every subsequent call to this API for the same * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that * __GFP_ZERO is not fully honored by this API. * * See krealloc_noprof() for further details. * * In any case, the contents of the object pointed to are preserved up to the * lesser of the new and old sizes. */ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p, size_t new_n, size_t new_size, gfp_t flags) { size_t bytes; if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) return NULL; return krealloc_noprof(p, bytes, flags); } #define krealloc_array(...) alloc_hooks(krealloc_array_noprof(__VA_ARGS__)) /** * kcalloc - allocate memory for an array. The memory is set to zero. * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ #define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO) void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node, unsigned long caller) __alloc_size(1); #define kmalloc_node_track_caller_noprof(size, flags, node, caller) \ __kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller) #define kmalloc_node_track_caller(...) \ alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_)) /* * kmalloc_track_caller is a special version of kmalloc that records the * calling function of the routine calling it for slab leak tracking instead * of just the calling function (confusing, eh?). * It's useful when the call to kmalloc comes from a widely-used standard * allocator where we care about the real place the memory allocation * request comes from. */ #define kmalloc_track_caller(...) kmalloc_node_track_caller(__VA_ARGS__, NUMA_NO_NODE) #define kmalloc_track_caller_noprof(...) \ kmalloc_node_track_caller_noprof(__VA_ARGS__, NUMA_NO_NODE, _RET_IP_) static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; if (__builtin_constant_p(n) && __builtin_constant_p(size)) return kmalloc_node_noprof(bytes, flags, node); return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node); } #define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__)) #define kcalloc_node(_n, _size, _flags, _node) \ kmalloc_array_node(_n, _size, (_flags) | __GFP_ZERO, _node) /* * Shortcuts */ #define kmem_cache_zalloc(_k, _flags) kmem_cache_alloc(_k, (_flags)|__GFP_ZERO) /** * kzalloc - allocate memory. The memory is set to zero. * @size: how many bytes of memory are required. * @flags: the type of memory to allocate (see kmalloc). */ static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags) { return kmalloc_noprof(size, flags | __GFP_ZERO); } #define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__)) #define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node) void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1); #define kvmalloc_node_noprof(size, flags, node) \ __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node) #define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__)) #define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE) #define kvmalloc_noprof(_size, _flags) kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE) #define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO) #define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node) #define kmem_buckets_valloc(_b, _size, _flags) \ alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) static inline __alloc_size(1, 2) void * kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; return kvmalloc_node_noprof(bytes, flags, node); } #define kvmalloc_array_noprof(...) kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE) #define kvcalloc_node_noprof(_n,_s,_f,_node) kvmalloc_array_node_noprof(_n,_s,(_f)|__GFP_ZERO,_node) #define kvcalloc_noprof(...) kvcalloc_node_noprof(__VA_ARGS__, NUMA_NO_NODE) #define kvmalloc_array(...) alloc_hooks(kvmalloc_array_noprof(__VA_ARGS__)) #define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__)) #define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__)) void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags) __realloc_size(2); #define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__)) extern void kvfree(const void *addr); DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T)) extern void kvfree_sensitive(const void *addr, size_t len); unsigned int kmem_cache_size(struct kmem_cache *s); #ifndef CONFIG_KVFREE_RCU_BATCHED static inline void kvfree_rcu_barrier(void) { rcu_barrier(); } static inline void kfree_rcu_scheduler_running(void) { } #else void kvfree_rcu_barrier(void); void kfree_rcu_scheduler_running(void); #endif /** * kmalloc_size_roundup - Report allocation bucket size for the given size * * @size: Number of bytes to round up from. * * This returns the number of bytes that would be available in a kmalloc() * allocation of @size bytes. For example, a 126 byte request would be * rounded up to the next sized kmalloc bucket, 128 bytes. (This is strictly * for the general-purpose kmalloc()-based allocations, and is not for the * pre-sized kmem_cache_alloc()-based allocations.) * * Use this to kmalloc() the full bucket size ahead of time instead of using * ksize() to query the size after an allocation. */ size_t kmalloc_size_roundup(size_t size); void __init kmem_cache_init_late(void); void __init kvfree_rcu_init(void); #endif /* _LINUX_SLAB_H */ |
| 2 2 1 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 | // SPDX-License-Identifier: GPL-2.0+ #include <linux/jiffies.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/slab.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <linux/firmware.h> #include "usb.h" #include "transport.h" #include "protocol.h" #include "debug.h" #include "scsiglue.h" #define SD_INIT1_FIRMWARE "ene-ub6250/sd_init1.bin" #define SD_INIT2_FIRMWARE "ene-ub6250/sd_init2.bin" #define SD_RW_FIRMWARE "ene-ub6250/sd_rdwr.bin" #define MS_INIT_FIRMWARE "ene-ub6250/ms_init.bin" #define MSP_RW_FIRMWARE "ene-ub6250/msp_rdwr.bin" #define MS_RW_FIRMWARE "ene-ub6250/ms_rdwr.bin" #define DRV_NAME "ums_eneub6250" MODULE_DESCRIPTION("Driver for ENE UB6250 reader"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("USB_STORAGE"); MODULE_FIRMWARE(SD_INIT1_FIRMWARE); MODULE_FIRMWARE(SD_INIT2_FIRMWARE); MODULE_FIRMWARE(SD_RW_FIRMWARE); MODULE_FIRMWARE(MS_INIT_FIRMWARE); MODULE_FIRMWARE(MSP_RW_FIRMWARE); MODULE_FIRMWARE(MS_RW_FIRMWARE); /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ { USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags)} static const struct usb_device_id ene_ub6250_usb_ids[] = { # include "unusual_ene_ub6250.h" { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, ene_ub6250_usb_ids); #undef UNUSUAL_DEV /* * The flags table */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } static const struct us_unusual_dev ene_ub6250_unusual_dev_list[] = { # include "unusual_ene_ub6250.h" { } /* Terminating entry */ }; #undef UNUSUAL_DEV /* ENE bin code len */ #define ENE_BIN_CODE_LEN 0x800 /* EnE HW Register */ #define REG_CARD_STATUS 0xFF83 #define REG_HW_TRAP1 0xFF89 /* SRB Status */ #define SS_SUCCESS 0x000000 /* No Sense */ #define SS_NOT_READY 0x023A00 /* Medium not present */ #define SS_MEDIUM_ERR 0x031100 /* Unrecovered read error */ #define SS_HW_ERR 0x040800 /* Communication failure */ #define SS_ILLEGAL_REQUEST 0x052000 /* Invalid command */ #define SS_UNIT_ATTENTION 0x062900 /* Reset occurred */ /* ENE Load FW Pattern */ #define SD_INIT1_PATTERN 1 #define SD_INIT2_PATTERN 2 #define SD_RW_PATTERN 3 #define MS_INIT_PATTERN 4 #define MSP_RW_PATTERN 5 #define MS_RW_PATTERN 6 #define SM_INIT_PATTERN 7 #define SM_RW_PATTERN 8 #define FDIR_WRITE 0 #define FDIR_READ 1 /* For MS Card */ /* Status Register 1 */ #define MS_REG_ST1_MB 0x80 /* media busy */ #define MS_REG_ST1_FB1 0x40 /* flush busy 1 */ #define MS_REG_ST1_DTER 0x20 /* error on data(corrected) */ #define MS_REG_ST1_UCDT 0x10 /* unable to correct data */ #define MS_REG_ST1_EXER 0x08 /* error on extra(corrected) */ #define MS_REG_ST1_UCEX 0x04 /* unable to correct extra */ #define MS_REG_ST1_FGER 0x02 /* error on overwrite flag(corrected) */ #define MS_REG_ST1_UCFG 0x01 /* unable to correct overwrite flag */ #define MS_REG_ST1_DEFAULT (MS_REG_ST1_MB | MS_REG_ST1_FB1 | MS_REG_ST1_DTER | MS_REG_ST1_UCDT | MS_REG_ST1_EXER | MS_REG_ST1_UCEX | MS_REG_ST1_FGER | MS_REG_ST1_UCFG) /* Overwrite Area */ #define MS_REG_OVR_BKST 0x80 /* block status */ #define MS_REG_OVR_BKST_OK MS_REG_OVR_BKST /* OK */ #define MS_REG_OVR_BKST_NG 0x00 /* NG */ #define MS_REG_OVR_PGST0 0x40 /* page status */ #define MS_REG_OVR_PGST1 0x20 #define MS_REG_OVR_PGST_MASK (MS_REG_OVR_PGST0 | MS_REG_OVR_PGST1) #define MS_REG_OVR_PGST_OK (MS_REG_OVR_PGST0 | MS_REG_OVR_PGST1) /* OK */ #define MS_REG_OVR_PGST_NG MS_REG_OVR_PGST1 /* NG */ #define MS_REG_OVR_PGST_DATA_ERROR 0x00 /* data error */ #define MS_REG_OVR_UDST 0x10 /* update status */ #define MS_REG_OVR_UDST_UPDATING 0x00 /* updating */ #define MS_REG_OVR_UDST_NO_UPDATE MS_REG_OVR_UDST #define MS_REG_OVR_RESERVED 0x08 #define MS_REG_OVR_DEFAULT (MS_REG_OVR_BKST_OK | MS_REG_OVR_PGST_OK | MS_REG_OVR_UDST_NO_UPDATE | MS_REG_OVR_RESERVED) /* Management Flag */ #define MS_REG_MNG_SCMS0 0x20 /* serial copy management system */ #define MS_REG_MNG_SCMS1 0x10 #define MS_REG_MNG_SCMS_MASK (MS_REG_MNG_SCMS0 | MS_REG_MNG_SCMS1) #define MS_REG_MNG_SCMS_COPY_OK (MS_REG_MNG_SCMS0 | MS_REG_MNG_SCMS1) #define MS_REG_MNG_SCMS_ONE_COPY MS_REG_MNG_SCMS1 #define MS_REG_MNG_SCMS_NO_COPY 0x00 #define MS_REG_MNG_ATFLG 0x08 /* address transfer table flag */ #define MS_REG_MNG_ATFLG_OTHER MS_REG_MNG_ATFLG /* other */ #define MS_REG_MNG_ATFLG_ATTBL 0x00 /* address transfer table */ #define MS_REG_MNG_SYSFLG 0x04 /* system flag */ #define MS_REG_MNG_SYSFLG_USER MS_REG_MNG_SYSFLG /* user block */ #define MS_REG_MNG_SYSFLG_BOOT 0x00 /* system block */ #define MS_REG_MNG_RESERVED 0xc3 #define MS_REG_MNG_DEFAULT (MS_REG_MNG_SCMS_COPY_OK | MS_REG_MNG_ATFLG_OTHER | MS_REG_MNG_SYSFLG_USER | MS_REG_MNG_RESERVED) #define MS_MAX_PAGES_PER_BLOCK 32 #define MS_MAX_INITIAL_ERROR_BLOCKS 10 #define MS_LIB_BITS_PER_BYTE 8 #define MS_SYSINF_FORMAT_FAT 1 #define MS_SYSINF_USAGE_GENERAL 0 #define MS_SYSINF_MSCLASS_TYPE_1 1 #define MS_SYSINF_PAGE_SIZE MS_BYTES_PER_PAGE /* fixed */ #define MS_SYSINF_CARDTYPE_RDONLY 1 #define MS_SYSINF_CARDTYPE_RDWR 2 #define MS_SYSINF_CARDTYPE_HYBRID 3 #define MS_SYSINF_SECURITY 0x01 #define MS_SYSINF_SECURITY_NO_SUPPORT MS_SYSINF_SECURITY #define MS_SYSINF_SECURITY_SUPPORT 0 #define MS_SYSINF_RESERVED1 1 #define MS_SYSINF_RESERVED2 1 #define MS_SYSENT_TYPE_INVALID_BLOCK 0x01 #define MS_SYSENT_TYPE_CIS_IDI 0x0a /* CIS/IDI */ #define SIZE_OF_KIRO 1024 #define BYTE_MASK 0xff /* ms error code */ #define MS_STATUS_WRITE_PROTECT 0x0106 #define MS_STATUS_SUCCESS 0x0000 #define MS_ERROR_FLASH_READ 0x8003 #define MS_ERROR_FLASH_ERASE 0x8005 #define MS_LB_ERROR 0xfff0 #define MS_LB_BOOT_BLOCK 0xfff1 #define MS_LB_INITIAL_ERROR 0xfff2 #define MS_STATUS_SUCCESS_WITH_ECC 0xfff3 #define MS_LB_ACQUIRED_ERROR 0xfff4 #define MS_LB_NOT_USED_ERASED 0xfff5 #define MS_NOCARD_ERROR 0xfff8 #define MS_NO_MEMORY_ERROR 0xfff9 #define MS_STATUS_INT_ERROR 0xfffa #define MS_STATUS_ERROR 0xfffe #define MS_LB_NOT_USED 0xffff #define MS_REG_MNG_SYSFLG 0x04 /* system flag */ #define MS_REG_MNG_SYSFLG_USER MS_REG_MNG_SYSFLG /* user block */ #define MS_BOOT_BLOCK_ID 0x0001 #define MS_BOOT_BLOCK_FORMAT_VERSION 0x0100 #define MS_BOOT_BLOCK_DATA_ENTRIES 2 #define MS_NUMBER_OF_SYSTEM_ENTRY 4 #define MS_NUMBER_OF_BOOT_BLOCK 2 #define MS_BYTES_PER_PAGE 512 #define MS_LOGICAL_BLOCKS_PER_SEGMENT 496 #define MS_LOGICAL_BLOCKS_IN_1ST_SEGMENT 494 #define MS_PHYSICAL_BLOCKS_PER_SEGMENT 0x200 /* 512 */ #define MS_PHYSICAL_BLOCKS_PER_SEGMENT_MASK 0x1ff /* overwrite area */ #define MS_REG_OVR_BKST 0x80 /* block status */ #define MS_REG_OVR_BKST_OK MS_REG_OVR_BKST /* OK */ #define MS_REG_OVR_BKST_NG 0x00 /* NG */ /* Status Register 1 */ #define MS_REG_ST1_DTER 0x20 /* error on data(corrected) */ #define MS_REG_ST1_EXER 0x08 /* error on extra(corrected) */ #define MS_REG_ST1_FGER 0x02 /* error on overwrite flag(corrected) */ /* MemoryStick Register */ /* Status Register 0 */ #define MS_REG_ST0_WP 0x01 /* write protected */ #define MS_REG_ST0_WP_ON MS_REG_ST0_WP #define MS_LIB_CTRL_RDONLY 0 #define MS_LIB_CTRL_WRPROTECT 1 /*dphy->log table */ #define ms_libconv_to_logical(pdx, PhyBlock) (((PhyBlock) >= (pdx)->MS_Lib.NumberOfPhyBlock) ? MS_STATUS_ERROR : (pdx)->MS_Lib.Phy2LogMap[PhyBlock]) #define ms_libconv_to_physical(pdx, LogBlock) (((LogBlock) >= (pdx)->MS_Lib.NumberOfLogBlock) ? MS_STATUS_ERROR : (pdx)->MS_Lib.Log2PhyMap[LogBlock]) #define ms_lib_ctrl_set(pdx, Flag) ((pdx)->MS_Lib.flags |= (1 << (Flag))) #define ms_lib_ctrl_reset(pdx, Flag) ((pdx)->MS_Lib.flags &= ~(1 << (Flag))) #define ms_lib_ctrl_check(pdx, Flag) ((pdx)->MS_Lib.flags & (1 << (Flag))) #define ms_lib_iswritable(pdx) ((ms_lib_ctrl_check((pdx), MS_LIB_CTRL_RDONLY) == 0) && (ms_lib_ctrl_check(pdx, MS_LIB_CTRL_WRPROTECT) == 0)) #define ms_lib_clear_pagemap(pdx) memset((pdx)->MS_Lib.pagemap, 0, sizeof((pdx)->MS_Lib.pagemap)) #define memstick_logaddr(logadr1, logadr0) ((((u16)(logadr1)) << 8) | (logadr0)) /* SD_STATUS bits */ #define SD_Insert BIT(0) #define SD_Ready BIT(1) #define SD_MediaChange BIT(2) #define SD_IsMMC BIT(3) #define SD_HiCapacity BIT(4) #define SD_HiSpeed BIT(5) #define SD_WtP BIT(6) /* Bit 7 reserved */ /* MS_STATUS bits */ #define MS_Insert BIT(0) #define MS_Ready BIT(1) #define MS_MediaChange BIT(2) #define MS_IsMSPro BIT(3) #define MS_IsMSPHG BIT(4) /* Bit 5 reserved */ #define MS_WtP BIT(6) /* Bit 7 reserved */ /* SM_STATUS bits */ #define SM_Insert BIT(0) #define SM_Ready BIT(1) #define SM_MediaChange BIT(2) /* Bits 3-5 reserved */ #define SM_WtP BIT(6) #define SM_IsMS BIT(7) struct ms_bootblock_cis { u8 bCistplDEVICE[6]; /* 0 */ u8 bCistplDEVICE0C[6]; /* 6 */ u8 bCistplJEDECC[4]; /* 12 */ u8 bCistplMANFID[6]; /* 16 */ u8 bCistplVER1[32]; /* 22 */ u8 bCistplFUNCID[4]; /* 54 */ u8 bCistplFUNCE0[4]; /* 58 */ u8 bCistplFUNCE1[5]; /* 62 */ u8 bCistplCONF[7]; /* 67 */ u8 bCistplCFTBLENT0[10];/* 74 */ u8 bCistplCFTBLENT1[8]; /* 84 */ u8 bCistplCFTBLENT2[12];/* 92 */ u8 bCistplCFTBLENT3[8]; /* 104 */ u8 bCistplCFTBLENT4[17];/* 112 */ u8 bCistplCFTBLENT5[8]; /* 129 */ u8 bCistplCFTBLENT6[17];/* 137 */ u8 bCistplCFTBLENT7[8]; /* 154 */ u8 bCistplNOLINK[3]; /* 162 */ } ; struct ms_bootblock_idi { #define MS_IDI_GENERAL_CONF 0x848A u16 wIDIgeneralConfiguration; /* 0 */ u16 wIDInumberOfCylinder; /* 1 */ u16 wIDIreserved0; /* 2 */ u16 wIDInumberOfHead; /* 3 */ u16 wIDIbytesPerTrack; /* 4 */ u16 wIDIbytesPerSector; /* 5 */ u16 wIDIsectorsPerTrack; /* 6 */ u16 wIDItotalSectors[2]; /* 7-8 high,low */ u16 wIDIreserved1[11]; /* 9-19 */ u16 wIDIbufferType; /* 20 */ u16 wIDIbufferSize; /* 21 */ u16 wIDIlongCmdECC; /* 22 */ u16 wIDIfirmVersion[4]; /* 23-26 */ u16 wIDImodelName[20]; /* 27-46 */ u16 wIDIreserved2; /* 47 */ u16 wIDIlongWordSupported; /* 48 */ u16 wIDIdmaSupported; /* 49 */ u16 wIDIreserved3; /* 50 */ u16 wIDIpioTiming; /* 51 */ u16 wIDIdmaTiming; /* 52 */ u16 wIDItransferParameter; /* 53 */ u16 wIDIformattedCylinder; /* 54 */ u16 wIDIformattedHead; /* 55 */ u16 wIDIformattedSectorsPerTrack;/* 56 */ u16 wIDIformattedTotalSectors[2];/* 57-58 */ u16 wIDImultiSector; /* 59 */ u16 wIDIlbaSectors[2]; /* 60-61 */ u16 wIDIsingleWordDMA; /* 62 */ u16 wIDImultiWordDMA; /* 63 */ u16 wIDIreserved4[192]; /* 64-255 */ }; struct ms_bootblock_sysent_rec { u32 dwStart; u32 dwSize; u8 bType; u8 bReserved[3]; }; struct ms_bootblock_sysent { struct ms_bootblock_sysent_rec entry[MS_NUMBER_OF_SYSTEM_ENTRY]; }; struct ms_bootblock_sysinf { u8 bMsClass; /* must be 1 */ u8 bCardType; /* see below */ u16 wBlockSize; /* n KB */ u16 wBlockNumber; /* number of physical block */ u16 wTotalBlockNumber; /* number of logical block */ u16 wPageSize; /* must be 0x200 */ u8 bExtraSize; /* 0x10 */ u8 bSecuritySupport; u8 bAssemblyDate[8]; u8 bFactoryArea[4]; u8 bAssemblyMakerCode; u8 bAssemblyMachineCode[3]; u16 wMemoryMakerCode; u16 wMemoryDeviceCode; u16 wMemorySize; u8 bReserved1; u8 bReserved2; u8 bVCC; u8 bVPP; u16 wControllerChipNumber; u16 wControllerFunction; /* New MS */ u8 bReserved3[9]; /* New MS */ u8 bParallelSupport; /* New MS */ u16 wFormatValue; /* New MS */ u8 bFormatType; u8 bUsage; u8 bDeviceType; u8 bReserved4[22]; u8 bFUValue3; u8 bFUValue4; u8 bReserved5[15]; }; struct ms_bootblock_header { u16 wBlockID; u16 wFormatVersion; u8 bReserved1[184]; u8 bNumberOfDataEntry; u8 bReserved2[179]; }; struct ms_bootblock_page0 { struct ms_bootblock_header header; struct ms_bootblock_sysent sysent; struct ms_bootblock_sysinf sysinf; }; struct ms_bootblock_cis_idi { union { struct ms_bootblock_cis cis; u8 dmy[256]; } cis; union { struct ms_bootblock_idi idi; u8 dmy[256]; } idi; }; /* ENE MS Lib struct */ struct ms_lib_type_extdat { u8 reserved; u8 intr; u8 status0; u8 status1; u8 ovrflg; u8 mngflg; u16 logadr; }; struct ms_lib_ctrl { u32 flags; u32 BytesPerSector; u32 NumberOfCylinder; u32 SectorsPerCylinder; u16 cardType; /* R/W, RO, Hybrid */ u16 blockSize; u16 PagesPerBlock; u16 NumberOfPhyBlock; u16 NumberOfLogBlock; u16 NumberOfSegment; u16 *Phy2LogMap; /* phy2log table */ u16 *Log2PhyMap; /* log2phy table */ u16 wrtblk; unsigned char *pagemap[(MS_MAX_PAGES_PER_BLOCK + (MS_LIB_BITS_PER_BYTE-1)) / MS_LIB_BITS_PER_BYTE]; unsigned char *blkpag; struct ms_lib_type_extdat *blkext; unsigned char copybuf[512]; }; /* SD Block Length */ /* 2^9 = 512 Bytes, The HW maximum read/write data length */ #define SD_BLOCK_LEN 9 struct ene_ub6250_info { /* I/O bounce buffer */ u8 *bbuf; /* for 6250 code */ u8 SD_Status; u8 MS_Status; u8 SM_Status; /* ----- SD Control Data ---------------- */ /*SD_REGISTER SD_Regs; */ u16 SD_Block_Mult; u8 SD_READ_BL_LEN; u16 SD_C_SIZE; u8 SD_C_SIZE_MULT; /* SD/MMC New spec. */ u8 SD_SPEC_VER; u8 SD_CSD_VER; u8 SD20_HIGH_CAPACITY; u32 HC_C_SIZE; u8 MMC_SPEC_VER; u8 MMC_BusWidth; u8 MMC_HIGH_CAPACITY; /*----- MS Control Data ---------------- */ bool MS_SWWP; u32 MSP_TotalBlock; struct ms_lib_ctrl MS_Lib; bool MS_IsRWPage; u16 MS_Model; /*----- SM Control Data ---------------- */ u8 SM_DeviceID; u8 SM_CardID; unsigned char *testbuf; u8 BIN_FLAG; u32 bl_num; int SrbStatus; /*------Power Managerment ---------------*/ bool Power_IsResum; }; static int ene_sd_init(struct us_data *us); static int ene_ms_init(struct us_data *us); static int ene_load_bincode(struct us_data *us, unsigned char flag); static void ene_ub6250_info_destructor(void *extra) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) extra; if (!extra) return; kfree(info->bbuf); } static int ene_send_scsi_cmd(struct us_data *us, u8 fDir, void *buf, int use_sg) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct bulk_cs_wrap *bcs = (struct bulk_cs_wrap *) us->iobuf; int result; unsigned int residue; unsigned int cswlen = 0, partial = 0; unsigned int transfer_length = bcb->DataTransferLength; /* usb_stor_dbg(us, "transport --- ene_send_scsi_cmd\n"); */ /* send cmd to out endpoint */ result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, bcb, US_BULK_CB_WRAP_LEN, NULL); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "send cmd to out endpoint fail ---\n"); return USB_STOR_TRANSPORT_ERROR; } if (buf) { unsigned int pipe = fDir; if (fDir == FDIR_READ) pipe = us->recv_bulk_pipe; else pipe = us->send_bulk_pipe; /* Bulk */ if (use_sg) { result = usb_stor_bulk_srb(us, pipe, us->srb); } else { result = usb_stor_bulk_transfer_sg(us, pipe, buf, transfer_length, 0, &partial); } if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "data transfer fail ---\n"); return USB_STOR_TRANSPORT_ERROR; } } /* Get CSW for device status */ result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, &cswlen); if (result == USB_STOR_XFER_SHORT && cswlen == 0) { usb_stor_dbg(us, "Received 0-length CSW; retrying...\n"); result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, &cswlen); } if (result == USB_STOR_XFER_STALLED) { /* get the status again */ usb_stor_dbg(us, "Attempting to get CSW (2nd try)...\n"); result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, bcs, US_BULK_CS_WRAP_LEN, NULL); } if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* check bulk status */ residue = le32_to_cpu(bcs->Residue); /* * try to compute the actual residue, based on how much data * was really transferred and what the device tells us */ if (residue && !(us->fflags & US_FL_IGNORE_RESIDUE)) { residue = min(residue, transfer_length); if (us->srb != NULL) scsi_set_resid(us->srb, max(scsi_get_resid(us->srb), residue)); } if (bcs->Status != US_BULK_STAT_OK) return USB_STOR_TRANSPORT_ERROR; return USB_STOR_TRANSPORT_GOOD; } static int do_scsi_request_sense(struct us_data *us, struct scsi_cmnd *srb) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; unsigned char buf[18]; memset(buf, 0, 18); buf[0] = 0x70; /* Current error */ buf[2] = info->SrbStatus >> 16; /* Sense key */ buf[7] = 10; /* Additional length */ buf[12] = info->SrbStatus >> 8; /* ASC */ buf[13] = info->SrbStatus; /* ASCQ */ usb_stor_set_xfer_buf(buf, sizeof(buf), srb); return USB_STOR_TRANSPORT_GOOD; } static int do_scsi_inquiry(struct us_data *us, struct scsi_cmnd *srb) { unsigned char data_ptr[36] = { 0x00, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x55, 0x53, 0x42, 0x32, 0x2E, 0x30, 0x20, 0x20, 0x43, 0x61, 0x72, 0x64, 0x52, 0x65, 0x61, 0x64, 0x65, 0x72, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x30, 0x31, 0x30, 0x30 }; usb_stor_set_xfer_buf(data_ptr, 36, srb); return USB_STOR_TRANSPORT_GOOD; } static int sd_scsi_test_unit_ready(struct us_data *us, struct scsi_cmnd *srb) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; if ((info->SD_Status & SD_Insert) && (info->SD_Status & SD_Ready)) return USB_STOR_TRANSPORT_GOOD; else { ene_sd_init(us); return USB_STOR_TRANSPORT_GOOD; } return USB_STOR_TRANSPORT_GOOD; } static int sd_scsi_mode_sense(struct us_data *us, struct scsi_cmnd *srb) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; unsigned char mediaNoWP[12] = { 0x0b, 0x00, 0x00, 0x08, 0x00, 0x00, 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00 }; unsigned char mediaWP[12] = { 0x0b, 0x00, 0x80, 0x08, 0x00, 0x00, 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00 }; if (info->SD_Status & SD_WtP) usb_stor_set_xfer_buf(mediaWP, 12, srb); else usb_stor_set_xfer_buf(mediaNoWP, 12, srb); return USB_STOR_TRANSPORT_GOOD; } static int sd_scsi_read_capacity(struct us_data *us, struct scsi_cmnd *srb) { u32 bl_num; u32 bl_len; unsigned int offset = 0; unsigned char buf[8]; struct scatterlist *sg = NULL; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; usb_stor_dbg(us, "sd_scsi_read_capacity\n"); if (info->SD_Status & SD_HiCapacity) { bl_len = 0x200; if (info->SD_Status & SD_IsMMC) bl_num = info->HC_C_SIZE-1; else bl_num = (info->HC_C_SIZE + 1) * 1024 - 1; } else { bl_len = 1 << (info->SD_READ_BL_LEN); bl_num = info->SD_Block_Mult * (info->SD_C_SIZE + 1) * (1 << (info->SD_C_SIZE_MULT + 2)) - 1; } info->bl_num = bl_num; usb_stor_dbg(us, "bl_len = %x\n", bl_len); usb_stor_dbg(us, "bl_num = %x\n", bl_num); /*srb->request_bufflen = 8; */ buf[0] = (bl_num >> 24) & 0xff; buf[1] = (bl_num >> 16) & 0xff; buf[2] = (bl_num >> 8) & 0xff; buf[3] = (bl_num >> 0) & 0xff; buf[4] = (bl_len >> 24) & 0xff; buf[5] = (bl_len >> 16) & 0xff; buf[6] = (bl_len >> 8) & 0xff; buf[7] = (bl_len >> 0) & 0xff; usb_stor_access_xfer_buf(buf, 8, srb, &sg, &offset, TO_XFER_BUF); return USB_STOR_TRANSPORT_GOOD; } static int sd_scsi_read(struct us_data *us, struct scsi_cmnd *srb) { int result; unsigned char *cdb = srb->cmnd; struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; u32 bn = ((cdb[2] << 24) & 0xff000000) | ((cdb[3] << 16) & 0x00ff0000) | ((cdb[4] << 8) & 0x0000ff00) | ((cdb[5] << 0) & 0x000000ff); u16 blen = ((cdb[7] << 8) & 0xff00) | ((cdb[8] << 0) & 0x00ff); u32 bnByte = bn * 0x200; u32 blenByte = blen * 0x200; if (bn > info->bl_num) return USB_STOR_TRANSPORT_ERROR; result = ene_load_bincode(us, SD_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Load SD RW pattern Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } if (info->SD_Status & SD_HiCapacity) bnByte = bn; /* set up the command wrapper */ memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = blenByte; bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF1; bcb->CDB[5] = (unsigned char)(bnByte); bcb->CDB[4] = (unsigned char)(bnByte>>8); bcb->CDB[3] = (unsigned char)(bnByte>>16); bcb->CDB[2] = (unsigned char)(bnByte>>24); result = ene_send_scsi_cmd(us, FDIR_READ, scsi_sglist(srb), 1); return result; } static int sd_scsi_write(struct us_data *us, struct scsi_cmnd *srb) { int result; unsigned char *cdb = srb->cmnd; struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; u32 bn = ((cdb[2] << 24) & 0xff000000) | ((cdb[3] << 16) & 0x00ff0000) | ((cdb[4] << 8) & 0x0000ff00) | ((cdb[5] << 0) & 0x000000ff); u16 blen = ((cdb[7] << 8) & 0xff00) | ((cdb[8] << 0) & 0x00ff); u32 bnByte = bn * 0x200; u32 blenByte = blen * 0x200; if (bn > info->bl_num) return USB_STOR_TRANSPORT_ERROR; result = ene_load_bincode(us, SD_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Load SD RW pattern Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } if (info->SD_Status & SD_HiCapacity) bnByte = bn; /* set up the command wrapper */ memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = blenByte; bcb->Flags = US_BULK_FLAG_OUT; bcb->CDB[0] = 0xF0; bcb->CDB[5] = (unsigned char)(bnByte); bcb->CDB[4] = (unsigned char)(bnByte>>8); bcb->CDB[3] = (unsigned char)(bnByte>>16); bcb->CDB[2] = (unsigned char)(bnByte>>24); result = ene_send_scsi_cmd(us, FDIR_WRITE, scsi_sglist(srb), 1); return result; } /* * ENE MS Card */ static int ms_lib_set_logicalpair(struct us_data *us, u16 logblk, u16 phyblk) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; if ((logblk >= info->MS_Lib.NumberOfLogBlock) || (phyblk >= info->MS_Lib.NumberOfPhyBlock)) return (u32)-1; info->MS_Lib.Phy2LogMap[phyblk] = logblk; info->MS_Lib.Log2PhyMap[logblk] = phyblk; return 0; } static int ms_lib_set_logicalblockmark(struct us_data *us, u16 phyblk, u16 mark) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; if (phyblk >= info->MS_Lib.NumberOfPhyBlock) return (u32)-1; info->MS_Lib.Phy2LogMap[phyblk] = mark; return 0; } static int ms_lib_set_initialerrorblock(struct us_data *us, u16 phyblk) { return ms_lib_set_logicalblockmark(us, phyblk, MS_LB_INITIAL_ERROR); } static int ms_lib_set_bootblockmark(struct us_data *us, u16 phyblk) { return ms_lib_set_logicalblockmark(us, phyblk, MS_LB_BOOT_BLOCK); } static int ms_lib_free_logicalmap(struct us_data *us) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; kfree(info->MS_Lib.Phy2LogMap); info->MS_Lib.Phy2LogMap = NULL; kfree(info->MS_Lib.Log2PhyMap); info->MS_Lib.Log2PhyMap = NULL; return 0; } static int ms_lib_alloc_logicalmap(struct us_data *us) { u32 i; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; info->MS_Lib.Phy2LogMap = kmalloc_array(info->MS_Lib.NumberOfPhyBlock, sizeof(u16), GFP_KERNEL); info->MS_Lib.Log2PhyMap = kmalloc_array(info->MS_Lib.NumberOfLogBlock, sizeof(u16), GFP_KERNEL); if ((info->MS_Lib.Phy2LogMap == NULL) || (info->MS_Lib.Log2PhyMap == NULL)) { ms_lib_free_logicalmap(us); return (u32)-1; } for (i = 0; i < info->MS_Lib.NumberOfPhyBlock; i++) info->MS_Lib.Phy2LogMap[i] = MS_LB_NOT_USED; for (i = 0; i < info->MS_Lib.NumberOfLogBlock; i++) info->MS_Lib.Log2PhyMap[i] = MS_LB_NOT_USED; return 0; } static void ms_lib_clear_writebuf(struct us_data *us) { int i; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; info->MS_Lib.wrtblk = (u16)-1; ms_lib_clear_pagemap(info); if (info->MS_Lib.blkpag) memset(info->MS_Lib.blkpag, 0xff, info->MS_Lib.PagesPerBlock * info->MS_Lib.BytesPerSector); if (info->MS_Lib.blkext) { for (i = 0; i < info->MS_Lib.PagesPerBlock; i++) { info->MS_Lib.blkext[i].status1 = MS_REG_ST1_DEFAULT; info->MS_Lib.blkext[i].ovrflg = MS_REG_OVR_DEFAULT; info->MS_Lib.blkext[i].mngflg = MS_REG_MNG_DEFAULT; info->MS_Lib.blkext[i].logadr = MS_LB_NOT_USED; } } } static int ms_count_freeblock(struct us_data *us, u16 PhyBlock) { u32 Ende, Count; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; Ende = PhyBlock + MS_PHYSICAL_BLOCKS_PER_SEGMENT; for (Count = 0; PhyBlock < Ende; PhyBlock++) { switch (info->MS_Lib.Phy2LogMap[PhyBlock]) { case MS_LB_NOT_USED: case MS_LB_NOT_USED_ERASED: Count++; break; default: break; } } return Count; } static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr, u8 PageNum, u32 *PageBuf, struct ms_lib_type_extdat *ExtraDat) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; u8 *bbuf = info->bbuf; int result; u32 bn = PhyBlockAddr * 0x20 + PageNum; result = ene_load_bincode(us, MS_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* Read Page Data */ memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = 0x200; bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF1; bcb->CDB[1] = 0x02; /* in init.c ENE_MSInit() is 0x01 */ bcb->CDB[5] = (unsigned char)(bn); bcb->CDB[4] = (unsigned char)(bn>>8); bcb->CDB[3] = (unsigned char)(bn>>16); bcb->CDB[2] = (unsigned char)(bn>>24); result = ene_send_scsi_cmd(us, FDIR_READ, PageBuf, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; /* Read Extra Data */ memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = 0x4; bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF1; bcb->CDB[1] = 0x03; bcb->CDB[5] = (unsigned char)(PageNum); bcb->CDB[4] = (unsigned char)(PhyBlockAddr); bcb->CDB[3] = (unsigned char)(PhyBlockAddr>>8); bcb->CDB[2] = (unsigned char)(PhyBlockAddr>>16); bcb->CDB[6] = 0x01; result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; ExtraDat->reserved = 0; ExtraDat->intr = 0x80; /* Not yet,fireware support */ ExtraDat->status0 = 0x10; /* Not yet,fireware support */ ExtraDat->status1 = 0x00; /* Not yet,fireware support */ ExtraDat->ovrflg = bbuf[0]; ExtraDat->mngflg = bbuf[1]; ExtraDat->logadr = memstick_logaddr(bbuf[2], bbuf[3]); return USB_STOR_TRANSPORT_GOOD; } static int ms_lib_process_bootblock(struct us_data *us, u16 PhyBlock, u8 *PageData) { struct ms_bootblock_sysent *SysEntry; struct ms_bootblock_sysinf *SysInfo; u32 i, result; u8 PageNumber; u8 *PageBuffer; struct ms_lib_type_extdat ExtraData; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; PageBuffer = kzalloc(MS_BYTES_PER_PAGE * 2, GFP_KERNEL); if (PageBuffer == NULL) return (u32)-1; result = (u32)-1; SysInfo = &(((struct ms_bootblock_page0 *)PageData)->sysinf); if ((SysInfo->bMsClass != MS_SYSINF_MSCLASS_TYPE_1) || (be16_to_cpu(SysInfo->wPageSize) != MS_SYSINF_PAGE_SIZE) || ((SysInfo->bSecuritySupport & MS_SYSINF_SECURITY) == MS_SYSINF_SECURITY_SUPPORT) || (SysInfo->bReserved1 != MS_SYSINF_RESERVED1) || (SysInfo->bReserved2 != MS_SYSINF_RESERVED2) || (SysInfo->bFormatType != MS_SYSINF_FORMAT_FAT) || (SysInfo->bUsage != MS_SYSINF_USAGE_GENERAL)) goto exit; /* */ switch (info->MS_Lib.cardType = SysInfo->bCardType) { case MS_SYSINF_CARDTYPE_RDONLY: ms_lib_ctrl_set(info, MS_LIB_CTRL_RDONLY); break; case MS_SYSINF_CARDTYPE_RDWR: ms_lib_ctrl_reset(info, MS_LIB_CTRL_RDONLY); break; case MS_SYSINF_CARDTYPE_HYBRID: default: goto exit; } info->MS_Lib.blockSize = be16_to_cpu(SysInfo->wBlockSize); info->MS_Lib.NumberOfPhyBlock = be16_to_cpu(SysInfo->wBlockNumber); info->MS_Lib.NumberOfLogBlock = be16_to_cpu(SysInfo->wTotalBlockNumber)-2; info->MS_Lib.PagesPerBlock = info->MS_Lib.blockSize * SIZE_OF_KIRO / MS_BYTES_PER_PAGE; info->MS_Lib.NumberOfSegment = info->MS_Lib.NumberOfPhyBlock / MS_PHYSICAL_BLOCKS_PER_SEGMENT; info->MS_Model = be16_to_cpu(SysInfo->wMemorySize); /*Allocate to all number of logicalblock and physicalblock */ if (ms_lib_alloc_logicalmap(us)) goto exit; /* Mark the book block */ ms_lib_set_bootblockmark(us, PhyBlock); SysEntry = &(((struct ms_bootblock_page0 *)PageData)->sysent); for (i = 0; i < MS_NUMBER_OF_SYSTEM_ENTRY; i++) { u32 EntryOffset, EntrySize; EntryOffset = be32_to_cpu(SysEntry->entry[i].dwStart); if (EntryOffset == 0xffffff) continue; EntrySize = be32_to_cpu(SysEntry->entry[i].dwSize); if (EntrySize == 0) continue; if (EntryOffset + MS_BYTES_PER_PAGE + EntrySize > info->MS_Lib.blockSize * (u32)SIZE_OF_KIRO) continue; if (i == 0) { u8 PrevPageNumber = 0; u16 phyblk; if (SysEntry->entry[i].bType != MS_SYSENT_TYPE_INVALID_BLOCK) goto exit; while (EntrySize > 0) { PageNumber = (u8)(EntryOffset / MS_BYTES_PER_PAGE + 1); if (PageNumber != PrevPageNumber) { switch (ms_read_readpage(us, PhyBlock, PageNumber, (u32 *)PageBuffer, &ExtraData)) { case MS_STATUS_SUCCESS: break; case MS_STATUS_WRITE_PROTECT: case MS_ERROR_FLASH_READ: case MS_STATUS_ERROR: default: goto exit; } PrevPageNumber = PageNumber; } phyblk = be16_to_cpu(*(u16 *)(PageBuffer + (EntryOffset % MS_BYTES_PER_PAGE))); if (phyblk < 0x0fff) ms_lib_set_initialerrorblock(us, phyblk); EntryOffset += 2; EntrySize -= 2; } } else if (i == 1) { /* CIS/IDI */ struct ms_bootblock_idi *idi; if (SysEntry->entry[i].bType != MS_SYSENT_TYPE_CIS_IDI) goto exit; switch (ms_read_readpage(us, PhyBlock, (u8)(EntryOffset / MS_BYTES_PER_PAGE + 1), (u32 *)PageBuffer, &ExtraData)) { case MS_STATUS_SUCCESS: break; case MS_STATUS_WRITE_PROTECT: case MS_ERROR_FLASH_READ: case MS_STATUS_ERROR: default: goto exit; } idi = &((struct ms_bootblock_cis_idi *)(PageBuffer + (EntryOffset % MS_BYTES_PER_PAGE)))->idi.idi; if (le16_to_cpu(idi->wIDIgeneralConfiguration) != MS_IDI_GENERAL_CONF) goto exit; info->MS_Lib.BytesPerSector = le16_to_cpu(idi->wIDIbytesPerSector); if (info->MS_Lib.BytesPerSector != MS_BYTES_PER_PAGE) goto exit; } } /* End for .. */ result = 0; exit: if (result) ms_lib_free_logicalmap(us); kfree(PageBuffer); result = 0; return result; } static void ms_lib_free_writebuf(struct us_data *us) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; info->MS_Lib.wrtblk = (u16)-1; /* set to -1 */ /* memset((fdoExt)->MS_Lib.pagemap, 0, sizeof((fdoExt)->MS_Lib.pagemap)) */ ms_lib_clear_pagemap(info); /* (pdx)->MS_Lib.pagemap memset 0 in ms.h */ if (info->MS_Lib.blkpag) { kfree(info->MS_Lib.blkpag); /* Arnold test ... */ info->MS_Lib.blkpag = NULL; } if (info->MS_Lib.blkext) { kfree(info->MS_Lib.blkext); /* Arnold test ... */ info->MS_Lib.blkext = NULL; } } static void ms_lib_free_allocatedarea(struct us_data *us) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; ms_lib_free_writebuf(us); /* Free MS_Lib.pagemap */ ms_lib_free_logicalmap(us); /* kfree MS_Lib.Phy2LogMap and MS_Lib.Log2PhyMap */ /* set struct us point flag to 0 */ info->MS_Lib.flags = 0; info->MS_Lib.BytesPerSector = 0; info->MS_Lib.SectorsPerCylinder = 0; info->MS_Lib.cardType = 0; info->MS_Lib.blockSize = 0; info->MS_Lib.PagesPerBlock = 0; info->MS_Lib.NumberOfPhyBlock = 0; info->MS_Lib.NumberOfLogBlock = 0; } static int ms_lib_alloc_writebuf(struct us_data *us) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; info->MS_Lib.wrtblk = (u16)-1; info->MS_Lib.blkpag = kmalloc_array(info->MS_Lib.PagesPerBlock, info->MS_Lib.BytesPerSector, GFP_KERNEL); info->MS_Lib.blkext = kmalloc_array(info->MS_Lib.PagesPerBlock, sizeof(struct ms_lib_type_extdat), GFP_KERNEL); if ((info->MS_Lib.blkpag == NULL) || (info->MS_Lib.blkext == NULL)) { ms_lib_free_writebuf(us); return (u32)-1; } ms_lib_clear_writebuf(us); return 0; } static int ms_lib_force_setlogical_pair(struct us_data *us, u16 logblk, u16 phyblk) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; if (logblk == MS_LB_NOT_USED) return 0; if ((logblk >= info->MS_Lib.NumberOfLogBlock) || (phyblk >= info->MS_Lib.NumberOfPhyBlock)) return (u32)-1; info->MS_Lib.Phy2LogMap[phyblk] = logblk; info->MS_Lib.Log2PhyMap[logblk] = phyblk; return 0; } static int ms_read_copyblock(struct us_data *us, u16 oldphy, u16 newphy, u16 PhyBlockAddr, u8 PageNum, unsigned char *buf, u16 len) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; int result; result = ene_load_bincode(us, MS_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = 0x200*len; bcb->Flags = US_BULK_FLAG_OUT; bcb->CDB[0] = 0xF0; bcb->CDB[1] = 0x08; bcb->CDB[4] = (unsigned char)(oldphy); bcb->CDB[3] = (unsigned char)(oldphy>>8); bcb->CDB[2] = 0; /* (BYTE)(oldphy>>16) */ bcb->CDB[7] = (unsigned char)(newphy); bcb->CDB[6] = (unsigned char)(newphy>>8); bcb->CDB[5] = 0; /* (BYTE)(newphy>>16) */ bcb->CDB[9] = (unsigned char)(PhyBlockAddr); bcb->CDB[8] = (unsigned char)(PhyBlockAddr>>8); bcb->CDB[10] = PageNum; result = ene_send_scsi_cmd(us, FDIR_WRITE, buf, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; return USB_STOR_TRANSPORT_GOOD; } static int ms_read_eraseblock(struct us_data *us, u32 PhyBlockAddr) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; int result; u32 bn = PhyBlockAddr; result = ene_load_bincode(us, MS_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = 0x200; bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF2; bcb->CDB[1] = 0x06; bcb->CDB[4] = (unsigned char)(bn); bcb->CDB[3] = (unsigned char)(bn>>8); bcb->CDB[2] = (unsigned char)(bn>>16); result = ene_send_scsi_cmd(us, FDIR_READ, NULL, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; return USB_STOR_TRANSPORT_GOOD; } static int ms_lib_check_disableblock(struct us_data *us, u16 PhyBlock) { unsigned char *PageBuf = NULL; u16 result = MS_STATUS_SUCCESS; u16 blk, index = 0; struct ms_lib_type_extdat extdat; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; PageBuf = kmalloc(MS_BYTES_PER_PAGE, GFP_KERNEL); if (PageBuf == NULL) { result = MS_NO_MEMORY_ERROR; goto exit; } ms_read_readpage(us, PhyBlock, 1, (u32 *)PageBuf, &extdat); do { blk = be16_to_cpu(PageBuf[index]); if (blk == MS_LB_NOT_USED) break; if (blk == info->MS_Lib.Log2PhyMap[0]) { result = MS_ERROR_FLASH_READ; break; } index++; } while (1); exit: kfree(PageBuf); return result; } static int ms_lib_setacquired_errorblock(struct us_data *us, u16 phyblk) { u16 log; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; if (phyblk >= info->MS_Lib.NumberOfPhyBlock) return (u32)-1; log = info->MS_Lib.Phy2LogMap[phyblk]; if (log < info->MS_Lib.NumberOfLogBlock) info->MS_Lib.Log2PhyMap[log] = MS_LB_NOT_USED; if (info->MS_Lib.Phy2LogMap[phyblk] != MS_LB_INITIAL_ERROR) info->MS_Lib.Phy2LogMap[phyblk] = MS_LB_ACQUIRED_ERROR; return 0; } static int ms_lib_overwrite_extra(struct us_data *us, u32 PhyBlockAddr, u8 PageNum, u8 OverwriteFlag) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; int result; result = ene_load_bincode(us, MS_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = 0x4; bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF2; bcb->CDB[1] = 0x05; bcb->CDB[5] = (unsigned char)(PageNum); bcb->CDB[4] = (unsigned char)(PhyBlockAddr); bcb->CDB[3] = (unsigned char)(PhyBlockAddr>>8); bcb->CDB[2] = (unsigned char)(PhyBlockAddr>>16); bcb->CDB[6] = OverwriteFlag; bcb->CDB[7] = 0xFF; bcb->CDB[8] = 0xFF; bcb->CDB[9] = 0xFF; result = ene_send_scsi_cmd(us, FDIR_READ, NULL, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; return USB_STOR_TRANSPORT_GOOD; } static int ms_lib_error_phyblock(struct us_data *us, u16 phyblk) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; if (phyblk >= info->MS_Lib.NumberOfPhyBlock) return MS_STATUS_ERROR; ms_lib_setacquired_errorblock(us, phyblk); if (ms_lib_iswritable(info)) return ms_lib_overwrite_extra(us, phyblk, 0, (u8)(~MS_REG_OVR_BKST & BYTE_MASK)); return MS_STATUS_SUCCESS; } static int ms_lib_erase_phyblock(struct us_data *us, u16 phyblk) { u16 log; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; if (phyblk >= info->MS_Lib.NumberOfPhyBlock) return MS_STATUS_ERROR; log = info->MS_Lib.Phy2LogMap[phyblk]; if (log < info->MS_Lib.NumberOfLogBlock) info->MS_Lib.Log2PhyMap[log] = MS_LB_NOT_USED; info->MS_Lib.Phy2LogMap[phyblk] = MS_LB_NOT_USED; if (ms_lib_iswritable(info)) { switch (ms_read_eraseblock(us, phyblk)) { case MS_STATUS_SUCCESS: info->MS_Lib.Phy2LogMap[phyblk] = MS_LB_NOT_USED_ERASED; return MS_STATUS_SUCCESS; case MS_ERROR_FLASH_ERASE: case MS_STATUS_INT_ERROR: ms_lib_error_phyblock(us, phyblk); return MS_ERROR_FLASH_ERASE; case MS_STATUS_ERROR: default: ms_lib_ctrl_set(info, MS_LIB_CTRL_RDONLY); /* MS_LibCtrlSet will used by ENE_MSInit ,need check, and why us to info*/ ms_lib_setacquired_errorblock(us, phyblk); return MS_STATUS_ERROR; } } ms_lib_setacquired_errorblock(us, phyblk); return MS_STATUS_SUCCESS; } static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock, u8 PageNum, struct ms_lib_type_extdat *ExtraDat) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; u8 *bbuf = info->bbuf; int result; memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = 0x4; bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF1; bcb->CDB[1] = 0x03; bcb->CDB[5] = (unsigned char)(PageNum); bcb->CDB[4] = (unsigned char)(PhyBlock); bcb->CDB[3] = (unsigned char)(PhyBlock>>8); bcb->CDB[2] = (unsigned char)(PhyBlock>>16); bcb->CDB[6] = 0x01; result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; ExtraDat->reserved = 0; ExtraDat->intr = 0x80; /* Not yet, waiting for fireware support */ ExtraDat->status0 = 0x10; /* Not yet, waiting for fireware support */ ExtraDat->status1 = 0x00; /* Not yet, waiting for fireware support */ ExtraDat->ovrflg = bbuf[0]; ExtraDat->mngflg = bbuf[1]; ExtraDat->logadr = memstick_logaddr(bbuf[2], bbuf[3]); return USB_STOR_TRANSPORT_GOOD; } static int ms_libsearch_block_from_physical(struct us_data *us, u16 phyblk) { u16 blk; struct ms_lib_type_extdat extdat; /* need check */ struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; if (phyblk >= info->MS_Lib.NumberOfPhyBlock) return MS_LB_ERROR; for (blk = phyblk + 1; blk != phyblk; blk++) { if ((blk & MS_PHYSICAL_BLOCKS_PER_SEGMENT_MASK) == 0) blk -= MS_PHYSICAL_BLOCKS_PER_SEGMENT; if (info->MS_Lib.Phy2LogMap[blk] == MS_LB_NOT_USED_ERASED) { return blk; } else if (info->MS_Lib.Phy2LogMap[blk] == MS_LB_NOT_USED) { switch (ms_lib_read_extra(us, blk, 0, &extdat)) { case MS_STATUS_SUCCESS: case MS_STATUS_SUCCESS_WITH_ECC: break; case MS_NOCARD_ERROR: return MS_NOCARD_ERROR; case MS_STATUS_INT_ERROR: return MS_LB_ERROR; case MS_ERROR_FLASH_READ: default: ms_lib_setacquired_errorblock(us, blk); continue; } /* End switch */ if ((extdat.ovrflg & MS_REG_OVR_BKST) != MS_REG_OVR_BKST_OK) { ms_lib_setacquired_errorblock(us, blk); continue; } switch (ms_lib_erase_phyblock(us, blk)) { case MS_STATUS_SUCCESS: return blk; case MS_STATUS_ERROR: return MS_LB_ERROR; case MS_ERROR_FLASH_ERASE: default: ms_lib_error_phyblock(us, blk); break; } } } /* End for */ return MS_LB_ERROR; } static int ms_libsearch_block_from_logical(struct us_data *us, u16 logblk) { u16 phyblk; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; phyblk = ms_libconv_to_physical(info, logblk); if (phyblk >= MS_LB_ERROR) { if (logblk >= info->MS_Lib.NumberOfLogBlock) return MS_LB_ERROR; phyblk = (logblk + MS_NUMBER_OF_BOOT_BLOCK) / MS_LOGICAL_BLOCKS_PER_SEGMENT; phyblk *= MS_PHYSICAL_BLOCKS_PER_SEGMENT; phyblk += MS_PHYSICAL_BLOCKS_PER_SEGMENT - 1; } return ms_libsearch_block_from_physical(us, phyblk); } static int ms_scsi_test_unit_ready(struct us_data *us, struct scsi_cmnd *srb) { struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); /* pr_info("MS_SCSI_Test_Unit_Ready\n"); */ if ((info->MS_Status & MS_Insert) && (info->MS_Status & MS_Ready)) { return USB_STOR_TRANSPORT_GOOD; } else { ene_ms_init(us); return USB_STOR_TRANSPORT_GOOD; } return USB_STOR_TRANSPORT_GOOD; } static int ms_scsi_mode_sense(struct us_data *us, struct scsi_cmnd *srb) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; unsigned char mediaNoWP[12] = { 0x0b, 0x00, 0x00, 0x08, 0x00, 0x00, 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00 }; unsigned char mediaWP[12] = { 0x0b, 0x00, 0x80, 0x08, 0x00, 0x00, 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00 }; if (info->MS_Status & MS_WtP) usb_stor_set_xfer_buf(mediaWP, 12, srb); else usb_stor_set_xfer_buf(mediaNoWP, 12, srb); return USB_STOR_TRANSPORT_GOOD; } static int ms_scsi_read_capacity(struct us_data *us, struct scsi_cmnd *srb) { u32 bl_num; u32 bl_len; unsigned int offset = 0; unsigned char buf[8]; struct scatterlist *sg = NULL; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; usb_stor_dbg(us, "ms_scsi_read_capacity\n"); bl_len = 0x200; if (info->MS_Status & MS_IsMSPro) bl_num = info->MSP_TotalBlock - 1; else bl_num = info->MS_Lib.NumberOfLogBlock * info->MS_Lib.blockSize * 2 - 1; info->bl_num = bl_num; usb_stor_dbg(us, "bl_len = %x\n", bl_len); usb_stor_dbg(us, "bl_num = %x\n", bl_num); /*srb->request_bufflen = 8; */ buf[0] = (bl_num >> 24) & 0xff; buf[1] = (bl_num >> 16) & 0xff; buf[2] = (bl_num >> 8) & 0xff; buf[3] = (bl_num >> 0) & 0xff; buf[4] = (bl_len >> 24) & 0xff; buf[5] = (bl_len >> 16) & 0xff; buf[6] = (bl_len >> 8) & 0xff; buf[7] = (bl_len >> 0) & 0xff; usb_stor_access_xfer_buf(buf, 8, srb, &sg, &offset, TO_XFER_BUF); return USB_STOR_TRANSPORT_GOOD; } static void ms_lib_phy_to_log_range(u16 PhyBlock, u16 *LogStart, u16 *LogEnde) { PhyBlock /= MS_PHYSICAL_BLOCKS_PER_SEGMENT; if (PhyBlock) { *LogStart = MS_LOGICAL_BLOCKS_IN_1ST_SEGMENT + (PhyBlock - 1) * MS_LOGICAL_BLOCKS_PER_SEGMENT;/*496*/ *LogEnde = *LogStart + MS_LOGICAL_BLOCKS_PER_SEGMENT;/*496*/ } else { *LogStart = 0; *LogEnde = MS_LOGICAL_BLOCKS_IN_1ST_SEGMENT;/*494*/ } } static int ms_lib_read_extrablock(struct us_data *us, u32 PhyBlock, u8 PageNum, u8 blen, void *buf) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; int result; /* Read Extra Data */ memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = 0x4 * blen; bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF1; bcb->CDB[1] = 0x03; bcb->CDB[5] = (unsigned char)(PageNum); bcb->CDB[4] = (unsigned char)(PhyBlock); bcb->CDB[3] = (unsigned char)(PhyBlock>>8); bcb->CDB[2] = (unsigned char)(PhyBlock>>16); bcb->CDB[6] = blen; result = ene_send_scsi_cmd(us, FDIR_READ, buf, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; return USB_STOR_TRANSPORT_GOOD; } static int ms_lib_scan_logicalblocknumber(struct us_data *us, u16 btBlk1st) { u16 PhyBlock, newblk, i; u16 LogStart, LogEnde; struct ms_lib_type_extdat extdat; u32 count = 0, index = 0; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; u8 *bbuf = info->bbuf; for (PhyBlock = 0; PhyBlock < info->MS_Lib.NumberOfPhyBlock;) { ms_lib_phy_to_log_range(PhyBlock, &LogStart, &LogEnde); for (i = 0; i < MS_PHYSICAL_BLOCKS_PER_SEGMENT; i++, PhyBlock++) { switch (ms_libconv_to_logical(info, PhyBlock)) { case MS_STATUS_ERROR: continue; default: break; } if (count == PhyBlock) { ms_lib_read_extrablock(us, PhyBlock, 0, 0x80, bbuf); count += 0x80; } index = (PhyBlock % 0x80) * 4; extdat.ovrflg = bbuf[index]; extdat.mngflg = bbuf[index+1]; extdat.logadr = memstick_logaddr(bbuf[index+2], bbuf[index+3]); if ((extdat.ovrflg & MS_REG_OVR_BKST) != MS_REG_OVR_BKST_OK) { ms_lib_setacquired_errorblock(us, PhyBlock); continue; } if ((extdat.mngflg & MS_REG_MNG_ATFLG) == MS_REG_MNG_ATFLG_ATTBL) { ms_lib_erase_phyblock(us, PhyBlock); continue; } if (extdat.logadr != MS_LB_NOT_USED) { if ((extdat.logadr < LogStart) || (LogEnde <= extdat.logadr)) { ms_lib_erase_phyblock(us, PhyBlock); continue; } newblk = ms_libconv_to_physical(info, extdat.logadr); if (newblk != MS_LB_NOT_USED) { if (extdat.logadr == 0) { ms_lib_set_logicalpair(us, extdat.logadr, PhyBlock); if (ms_lib_check_disableblock(us, btBlk1st)) { ms_lib_set_logicalpair(us, extdat.logadr, newblk); continue; } } ms_lib_read_extra(us, newblk, 0, &extdat); if ((extdat.ovrflg & MS_REG_OVR_UDST) == MS_REG_OVR_UDST_UPDATING) { ms_lib_erase_phyblock(us, PhyBlock); continue; } else { ms_lib_erase_phyblock(us, newblk); } } ms_lib_set_logicalpair(us, extdat.logadr, PhyBlock); } } } /* End for ... */ return MS_STATUS_SUCCESS; } static int ms_scsi_read(struct us_data *us, struct scsi_cmnd *srb) { int result; unsigned char *cdb = srb->cmnd; struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; u32 bn = ((cdb[2] << 24) & 0xff000000) | ((cdb[3] << 16) & 0x00ff0000) | ((cdb[4] << 8) & 0x0000ff00) | ((cdb[5] << 0) & 0x000000ff); u16 blen = ((cdb[7] << 8) & 0xff00) | ((cdb[8] << 0) & 0x00ff); u32 blenByte = blen * 0x200; if (bn > info->bl_num) return USB_STOR_TRANSPORT_ERROR; if (info->MS_Status & MS_IsMSPro) { result = ene_load_bincode(us, MSP_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Load MPS RW pattern Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } /* set up the command wrapper */ memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = blenByte; bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF1; bcb->CDB[1] = 0x02; bcb->CDB[5] = (unsigned char)(bn); bcb->CDB[4] = (unsigned char)(bn>>8); bcb->CDB[3] = (unsigned char)(bn>>16); bcb->CDB[2] = (unsigned char)(bn>>24); result = ene_send_scsi_cmd(us, FDIR_READ, scsi_sglist(srb), 1); } else { void *buf; int offset = 0; u16 phyblk, logblk; u8 PageNum; u16 len; u32 blkno; buf = kmalloc(blenByte, GFP_KERNEL); if (buf == NULL) return USB_STOR_TRANSPORT_ERROR; result = ene_load_bincode(us, MS_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) { pr_info("Load MS RW pattern Fail !!\n"); result = USB_STOR_TRANSPORT_ERROR; goto exit; } logblk = (u16)(bn / info->MS_Lib.PagesPerBlock); PageNum = (u8)(bn % info->MS_Lib.PagesPerBlock); while (1) { if (blen > (info->MS_Lib.PagesPerBlock-PageNum)) len = info->MS_Lib.PagesPerBlock-PageNum; else len = blen; phyblk = ms_libconv_to_physical(info, logblk); blkno = phyblk * 0x20 + PageNum; /* set up the command wrapper */ memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = 0x200 * len; bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF1; bcb->CDB[1] = 0x02; bcb->CDB[5] = (unsigned char)(blkno); bcb->CDB[4] = (unsigned char)(blkno>>8); bcb->CDB[3] = (unsigned char)(blkno>>16); bcb->CDB[2] = (unsigned char)(blkno>>24); result = ene_send_scsi_cmd(us, FDIR_READ, buf+offset, 0); if (result != USB_STOR_XFER_GOOD) { pr_info("MS_SCSI_Read --- result = %x\n", result); result = USB_STOR_TRANSPORT_ERROR; goto exit; } blen -= len; if (blen <= 0) break; logblk++; PageNum = 0; offset += MS_BYTES_PER_PAGE*len; } usb_stor_set_xfer_buf(buf, blenByte, srb); exit: kfree(buf); } return result; } static int ms_scsi_write(struct us_data *us, struct scsi_cmnd *srb) { int result; struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; unsigned char *cdb = srb->cmnd; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; u32 bn = ((cdb[2] << 24) & 0xff000000) | ((cdb[3] << 16) & 0x00ff0000) | ((cdb[4] << 8) & 0x0000ff00) | ((cdb[5] << 0) & 0x000000ff); u16 blen = ((cdb[7] << 8) & 0xff00) | ((cdb[8] << 0) & 0x00ff); u32 blenByte = blen * 0x200; if (bn > info->bl_num) return USB_STOR_TRANSPORT_ERROR; if (info->MS_Status & MS_IsMSPro) { result = ene_load_bincode(us, MSP_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) { pr_info("Load MSP RW pattern Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } /* set up the command wrapper */ memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = blenByte; bcb->Flags = US_BULK_FLAG_OUT; bcb->CDB[0] = 0xF0; bcb->CDB[1] = 0x04; bcb->CDB[5] = (unsigned char)(bn); bcb->CDB[4] = (unsigned char)(bn>>8); bcb->CDB[3] = (unsigned char)(bn>>16); bcb->CDB[2] = (unsigned char)(bn>>24); result = ene_send_scsi_cmd(us, FDIR_WRITE, scsi_sglist(srb), 1); } else { void *buf; int offset = 0; u16 PhyBlockAddr; u8 PageNum; u16 len, oldphy, newphy; buf = kmalloc(blenByte, GFP_KERNEL); if (buf == NULL) return USB_STOR_TRANSPORT_ERROR; usb_stor_set_xfer_buf(buf, blenByte, srb); result = ene_load_bincode(us, MS_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) { pr_info("Load MS RW pattern Fail !!\n"); result = USB_STOR_TRANSPORT_ERROR; goto exit; } PhyBlockAddr = (u16)(bn / info->MS_Lib.PagesPerBlock); PageNum = (u8)(bn % info->MS_Lib.PagesPerBlock); while (1) { if (blen > (info->MS_Lib.PagesPerBlock-PageNum)) len = info->MS_Lib.PagesPerBlock-PageNum; else len = blen; oldphy = ms_libconv_to_physical(info, PhyBlockAddr); /* need check us <-> info */ newphy = ms_libsearch_block_from_logical(us, PhyBlockAddr); result = ms_read_copyblock(us, oldphy, newphy, PhyBlockAddr, PageNum, buf+offset, len); if (result != USB_STOR_XFER_GOOD) { pr_info("MS_SCSI_Write --- result = %x\n", result); result = USB_STOR_TRANSPORT_ERROR; goto exit; } info->MS_Lib.Phy2LogMap[oldphy] = MS_LB_NOT_USED_ERASED; ms_lib_force_setlogical_pair(us, PhyBlockAddr, newphy); blen -= len; if (blen <= 0) break; PhyBlockAddr++; PageNum = 0; offset += MS_BYTES_PER_PAGE*len; } exit: kfree(buf); } return result; } /* * ENE MS Card */ static int ene_get_card_type(struct us_data *us, u16 index, void *buf) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; int result; memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = 0x01; bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xED; bcb->CDB[2] = (unsigned char)(index>>8); bcb->CDB[3] = (unsigned char)index; result = ene_send_scsi_cmd(us, FDIR_READ, buf, 0); return result; } static int ene_get_card_status(struct us_data *us, u8 *buf) { u16 tmpreg; u32 reg4b; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; /*usb_stor_dbg(us, "transport --- ENE_ReadSDReg\n");*/ reg4b = *(u32 *)&buf[0x18]; info->SD_READ_BL_LEN = (u8)((reg4b >> 8) & 0x0f); tmpreg = (u16) reg4b; reg4b = *(u32 *)(&buf[0x14]); if ((info->SD_Status & SD_HiCapacity) && !(info->SD_Status & SD_IsMMC)) info->HC_C_SIZE = (reg4b >> 8) & 0x3fffff; info->SD_C_SIZE = ((tmpreg & 0x03) << 10) | (u16)(reg4b >> 22); info->SD_C_SIZE_MULT = (u8)(reg4b >> 7) & 0x07; if ((info->SD_Status & SD_HiCapacity) && (info->SD_Status & SD_IsMMC)) info->HC_C_SIZE = *(u32 *)(&buf[0x100]); if (info->SD_READ_BL_LEN > SD_BLOCK_LEN) { info->SD_Block_Mult = 1 << (info->SD_READ_BL_LEN-SD_BLOCK_LEN); info->SD_READ_BL_LEN = SD_BLOCK_LEN; } else { info->SD_Block_Mult = 1; } return USB_STOR_TRANSPORT_GOOD; } static int ene_load_bincode(struct us_data *us, unsigned char flag) { int err; char *fw_name = NULL; unsigned char *buf = NULL; const struct firmware *sd_fw = NULL; int result = USB_STOR_TRANSPORT_ERROR; struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; if (info->BIN_FLAG == flag) return USB_STOR_TRANSPORT_GOOD; switch (flag) { /* For SD */ case SD_INIT1_PATTERN: usb_stor_dbg(us, "SD_INIT1_PATTERN\n"); fw_name = SD_INIT1_FIRMWARE; break; case SD_INIT2_PATTERN: usb_stor_dbg(us, "SD_INIT2_PATTERN\n"); fw_name = SD_INIT2_FIRMWARE; break; case SD_RW_PATTERN: usb_stor_dbg(us, "SD_RW_PATTERN\n"); fw_name = SD_RW_FIRMWARE; break; /* For MS */ case MS_INIT_PATTERN: usb_stor_dbg(us, "MS_INIT_PATTERN\n"); fw_name = MS_INIT_FIRMWARE; break; case MSP_RW_PATTERN: usb_stor_dbg(us, "MSP_RW_PATTERN\n"); fw_name = MSP_RW_FIRMWARE; break; case MS_RW_PATTERN: usb_stor_dbg(us, "MS_RW_PATTERN\n"); fw_name = MS_RW_FIRMWARE; break; default: usb_stor_dbg(us, "----------- Unknown PATTERN ----------\n"); goto nofw; } err = request_firmware(&sd_fw, fw_name, &us->pusb_dev->dev); if (err) { usb_stor_dbg(us, "load firmware %s failed\n", fw_name); goto nofw; } buf = kmemdup(sd_fw->data, sd_fw->size, GFP_KERNEL); if (buf == NULL) goto nofw; memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = sd_fw->size; bcb->Flags = US_BULK_FLAG_OUT; bcb->CDB[0] = 0xEF; result = ene_send_scsi_cmd(us, FDIR_WRITE, buf, 0); if (us->srb != NULL) scsi_set_resid(us->srb, 0); info->BIN_FLAG = flag; kfree(buf); nofw: release_firmware(sd_fw); return result; } static int ms_card_init(struct us_data *us) { u32 result; u16 TmpBlock; unsigned char *PageBuffer0 = NULL, *PageBuffer1 = NULL; struct ms_lib_type_extdat extdat; u16 btBlk1st, btBlk2nd; u32 btBlk1stErred; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; printk(KERN_INFO "MS_CardInit start\n"); ms_lib_free_allocatedarea(us); /* Clean buffer and set struct us_data flag to 0 */ /* get two PageBuffer */ PageBuffer0 = kmalloc(MS_BYTES_PER_PAGE, GFP_KERNEL); PageBuffer1 = kmalloc(MS_BYTES_PER_PAGE, GFP_KERNEL); if ((PageBuffer0 == NULL) || (PageBuffer1 == NULL)) { result = MS_NO_MEMORY_ERROR; goto exit; } btBlk1st = btBlk2nd = MS_LB_NOT_USED; btBlk1stErred = 0; for (TmpBlock = 0; TmpBlock < MS_MAX_INITIAL_ERROR_BLOCKS+2; TmpBlock++) { switch (ms_read_readpage(us, TmpBlock, 0, (u32 *)PageBuffer0, &extdat)) { case MS_STATUS_SUCCESS: break; case MS_STATUS_INT_ERROR: break; case MS_STATUS_ERROR: default: continue; } if ((extdat.ovrflg & MS_REG_OVR_BKST) == MS_REG_OVR_BKST_NG) continue; if (((extdat.mngflg & MS_REG_MNG_SYSFLG) == MS_REG_MNG_SYSFLG_USER) || (be16_to_cpu(((struct ms_bootblock_page0 *)PageBuffer0)->header.wBlockID) != MS_BOOT_BLOCK_ID) || (be16_to_cpu(((struct ms_bootblock_page0 *)PageBuffer0)->header.wFormatVersion) != MS_BOOT_BLOCK_FORMAT_VERSION) || (((struct ms_bootblock_page0 *)PageBuffer0)->header.bNumberOfDataEntry != MS_BOOT_BLOCK_DATA_ENTRIES)) continue; if (btBlk1st != MS_LB_NOT_USED) { btBlk2nd = TmpBlock; break; } btBlk1st = TmpBlock; memcpy(PageBuffer1, PageBuffer0, MS_BYTES_PER_PAGE); if (extdat.status1 & (MS_REG_ST1_DTER | MS_REG_ST1_EXER | MS_REG_ST1_FGER)) btBlk1stErred = 1; } if (btBlk1st == MS_LB_NOT_USED) { result = MS_STATUS_ERROR; goto exit; } /* write protect */ if ((extdat.status0 & MS_REG_ST0_WP) == MS_REG_ST0_WP_ON) ms_lib_ctrl_set(info, MS_LIB_CTRL_WRPROTECT); result = MS_STATUS_ERROR; /* 1st Boot Block */ if (btBlk1stErred == 0) result = ms_lib_process_bootblock(us, btBlk1st, PageBuffer1); /* 1st */ /* 2nd Boot Block */ if (result && (btBlk2nd != MS_LB_NOT_USED)) result = ms_lib_process_bootblock(us, btBlk2nd, PageBuffer0); if (result) { result = MS_STATUS_ERROR; goto exit; } for (TmpBlock = 0; TmpBlock < btBlk1st; TmpBlock++) info->MS_Lib.Phy2LogMap[TmpBlock] = MS_LB_INITIAL_ERROR; info->MS_Lib.Phy2LogMap[btBlk1st] = MS_LB_BOOT_BLOCK; if (btBlk2nd != MS_LB_NOT_USED) { for (TmpBlock = btBlk1st + 1; TmpBlock < btBlk2nd; TmpBlock++) info->MS_Lib.Phy2LogMap[TmpBlock] = MS_LB_INITIAL_ERROR; info->MS_Lib.Phy2LogMap[btBlk2nd] = MS_LB_BOOT_BLOCK; } result = ms_lib_scan_logicalblocknumber(us, btBlk1st); if (result) goto exit; for (TmpBlock = MS_PHYSICAL_BLOCKS_PER_SEGMENT; TmpBlock < info->MS_Lib.NumberOfPhyBlock; TmpBlock += MS_PHYSICAL_BLOCKS_PER_SEGMENT) { if (ms_count_freeblock(us, TmpBlock) == 0) { ms_lib_ctrl_set(info, MS_LIB_CTRL_WRPROTECT); break; } } /* write */ if (ms_lib_alloc_writebuf(us)) { result = MS_NO_MEMORY_ERROR; goto exit; } result = MS_STATUS_SUCCESS; exit: kfree(PageBuffer1); kfree(PageBuffer0); printk(KERN_INFO "MS_CardInit end\n"); return result; } static int ene_ms_init(struct us_data *us) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; int result; u16 MSP_BlockSize, MSP_UserAreaBlocks; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; u8 *bbuf = info->bbuf; unsigned int s; printk(KERN_INFO "transport --- ENE_MSInit\n"); /* the same part to test ENE */ result = ene_load_bincode(us, MS_INIT_PATTERN); if (result != USB_STOR_XFER_GOOD) { printk(KERN_ERR "Load MS Init Code Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = 0x200; bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF1; bcb->CDB[1] = 0x01; result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) { printk(KERN_ERR "Execution MS Init Code Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } /* the same part to test ENE */ info->MS_Status = bbuf[0]; s = info->MS_Status; if ((s & MS_Insert) && (s & MS_Ready)) { printk(KERN_INFO "Insert = %x\n", !!(s & MS_Insert)); printk(KERN_INFO "Ready = %x\n", !!(s & MS_Ready)); printk(KERN_INFO "IsMSPro = %x\n", !!(s & MS_IsMSPro)); printk(KERN_INFO "IsMSPHG = %x\n", !!(s & MS_IsMSPHG)); printk(KERN_INFO "WtP= %x\n", !!(s & MS_WtP)); if (s & MS_IsMSPro) { MSP_BlockSize = (bbuf[6] << 8) | bbuf[7]; MSP_UserAreaBlocks = (bbuf[10] << 8) | bbuf[11]; info->MSP_TotalBlock = MSP_BlockSize * MSP_UserAreaBlocks; } else { ms_card_init(us); /* Card is MS (to ms.c)*/ } usb_stor_dbg(us, "MS Init Code OK !!\n"); } else { usb_stor_dbg(us, "MS Card Not Ready --- %x\n", bbuf[0]); return USB_STOR_TRANSPORT_ERROR; } return USB_STOR_TRANSPORT_GOOD; } static int ene_sd_init(struct us_data *us) { int result; struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; u8 *bbuf = info->bbuf; usb_stor_dbg(us, "transport --- ENE_SDInit\n"); /* SD Init Part-1 */ result = ene_load_bincode(us, SD_INIT1_PATTERN); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Load SD Init Code Part-1 Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF2; result = ene_send_scsi_cmd(us, FDIR_READ, NULL, 0); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Execution SD Init Code Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } /* SD Init Part-2 */ result = ene_load_bincode(us, SD_INIT2_PATTERN); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Load SD Init Code Part-2 Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); bcb->DataTransferLength = 0x200; bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF1; result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Execution SD Init Code Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } info->SD_Status = bbuf[0]; if ((info->SD_Status & SD_Insert) && (info->SD_Status & SD_Ready)) { unsigned int s = info->SD_Status; ene_get_card_status(us, bbuf); usb_stor_dbg(us, "Insert = %x\n", !!(s & SD_Insert)); usb_stor_dbg(us, "Ready = %x\n", !!(s & SD_Ready)); usb_stor_dbg(us, "IsMMC = %x\n", !!(s & SD_IsMMC)); usb_stor_dbg(us, "HiCapacity = %x\n", !!(s & SD_HiCapacity)); usb_stor_dbg(us, "HiSpeed = %x\n", !!(s & SD_HiSpeed)); usb_stor_dbg(us, "WtP = %x\n", !!(s & SD_WtP)); } else { usb_stor_dbg(us, "SD Card Not Ready --- %x\n", bbuf[0]); return USB_STOR_TRANSPORT_ERROR; } return USB_STOR_TRANSPORT_GOOD; } static int ene_init(struct us_data *us) { int result; u8 misc_reg03; struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); u8 *bbuf = info->bbuf; result = ene_get_card_type(us, REG_CARD_STATUS, bbuf); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; misc_reg03 = bbuf[0]; if (misc_reg03 & 0x01) { if (!(info->SD_Status & SD_Ready)) { result = ene_sd_init(us); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; } } if (misc_reg03 & 0x02) { if (!(info->MS_Status & MS_Ready)) { result = ene_ms_init(us); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; } } return result; } /*----- sd_scsi_irp() ---------*/ static int sd_scsi_irp(struct us_data *us, struct scsi_cmnd *srb) { int result; struct ene_ub6250_info *info = (struct ene_ub6250_info *)us->extra; switch (srb->cmnd[0]) { case TEST_UNIT_READY: result = sd_scsi_test_unit_ready(us, srb); break; /* 0x00 */ case REQUEST_SENSE: result = do_scsi_request_sense(us, srb); break; /* 0x03 */ case INQUIRY: result = do_scsi_inquiry(us, srb); break; /* 0x12 */ case MODE_SENSE: result = sd_scsi_mode_sense(us, srb); break; /* 0x1A */ /* case START_STOP: result = SD_SCSI_Start_Stop(us, srb); break; //0x1B */ case READ_CAPACITY: result = sd_scsi_read_capacity(us, srb); break; /* 0x25 */ case READ_10: result = sd_scsi_read(us, srb); break; /* 0x28 */ case WRITE_10: result = sd_scsi_write(us, srb); break; /* 0x2A */ default: info->SrbStatus = SS_ILLEGAL_REQUEST; result = USB_STOR_TRANSPORT_FAILED; break; } if (result == USB_STOR_TRANSPORT_GOOD) info->SrbStatus = SS_SUCCESS; return result; } /* * ms_scsi_irp() */ static int ms_scsi_irp(struct us_data *us, struct scsi_cmnd *srb) { int result; struct ene_ub6250_info *info = (struct ene_ub6250_info *)us->extra; switch (srb->cmnd[0]) { case TEST_UNIT_READY: result = ms_scsi_test_unit_ready(us, srb); break; /* 0x00 */ case REQUEST_SENSE: result = do_scsi_request_sense(us, srb); break; /* 0x03 */ case INQUIRY: result = do_scsi_inquiry(us, srb); break; /* 0x12 */ case MODE_SENSE: result = ms_scsi_mode_sense(us, srb); break; /* 0x1A */ case READ_CAPACITY: result = ms_scsi_read_capacity(us, srb); break; /* 0x25 */ case READ_10: result = ms_scsi_read(us, srb); break; /* 0x28 */ case WRITE_10: result = ms_scsi_write(us, srb); break; /* 0x2A */ default: info->SrbStatus = SS_ILLEGAL_REQUEST; result = USB_STOR_TRANSPORT_FAILED; break; } if (result == USB_STOR_TRANSPORT_GOOD) info->SrbStatus = SS_SUCCESS; return result; } static int ene_transport(struct scsi_cmnd *srb, struct us_data *us) { int result = USB_STOR_XFER_GOOD; struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); /*US_DEBUG(usb_stor_show_command(us, srb)); */ scsi_set_resid(srb, 0); if (unlikely(!(info->SD_Status & SD_Ready) || (info->MS_Status & MS_Ready))) result = ene_init(us); if (result == USB_STOR_XFER_GOOD) { result = USB_STOR_TRANSPORT_ERROR; if (info->SD_Status & SD_Ready) result = sd_scsi_irp(us, srb); if (info->MS_Status & MS_Ready) result = ms_scsi_irp(us, srb); } return result; } static struct scsi_host_template ene_ub6250_host_template; static int ene_ub6250_probe(struct usb_interface *intf, const struct usb_device_id *id) { int result; u8 misc_reg03; struct us_data *us; struct ene_ub6250_info *info; result = usb_stor_probe1(&us, intf, id, (id - ene_ub6250_usb_ids) + ene_ub6250_unusual_dev_list, &ene_ub6250_host_template); if (result) return result; /* FIXME: where should the code alloc extra buf ? */ us->extra = kzalloc(sizeof(struct ene_ub6250_info), GFP_KERNEL); if (!us->extra) return -ENOMEM; us->extra_destructor = ene_ub6250_info_destructor; info = (struct ene_ub6250_info *)(us->extra); info->bbuf = kmalloc(512, GFP_KERNEL); if (!info->bbuf) { kfree(us->extra); return -ENOMEM; } us->transport_name = "ene_ub6250"; us->transport = ene_transport; us->max_lun = 0; result = usb_stor_probe2(us); if (result) return result; /* probe card type */ result = ene_get_card_type(us, REG_CARD_STATUS, info->bbuf); if (result != USB_STOR_XFER_GOOD) { usb_stor_disconnect(intf); return USB_STOR_TRANSPORT_ERROR; } misc_reg03 = info->bbuf[0]; if (!(misc_reg03 & 0x01)) { pr_info("ums_eneub6250: This driver only supports SD/MS cards. " "It does not support SM cards.\n"); } return result; } #ifdef CONFIG_PM static int ene_ub6250_resume(struct usb_interface *iface) { struct us_data *us = usb_get_intfdata(iface); struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); mutex_lock(&us->dev_mutex); if (us->suspend_resume_hook) (us->suspend_resume_hook)(us, US_RESUME); mutex_unlock(&us->dev_mutex); info->Power_IsResum = true; /* info->SD_Status &= ~SD_Ready; */ info->SD_Status = 0; info->MS_Status = 0; info->SM_Status = 0; return 0; } static int ene_ub6250_reset_resume(struct usb_interface *iface) { struct us_data *us = usb_get_intfdata(iface); struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); /* Report the reset to the SCSI core */ usb_stor_reset_resume(iface); /* * FIXME: Notify the subdrivers that they need to reinitialize * the device */ info->Power_IsResum = true; /* info->SD_Status &= ~SD_Ready; */ info->SD_Status = 0; info->MS_Status = 0; info->SM_Status = 0; return 0; } #else #define ene_ub6250_resume NULL #define ene_ub6250_reset_resume NULL #endif static struct usb_driver ene_ub6250_driver = { .name = DRV_NAME, .probe = ene_ub6250_probe, .disconnect = usb_stor_disconnect, .suspend = usb_stor_suspend, .resume = ene_ub6250_resume, .reset_resume = ene_ub6250_reset_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = ene_ub6250_usb_ids, .soft_unbind = 1, .no_dynamic_id = 1, }; module_usb_stor_driver(ene_ub6250_driver, ene_ub6250_host_template, DRV_NAME); |
| 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 | // SPDX-License-Identifier: GPL-2.0-or-later /* * * Bluetooth HCI UART driver for Intel/AG6xx devices * * Copyright (C) 2016 Intel Corporation */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/firmware.h> #include <linux/module.h> #include <linux/tty.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include "hci_uart.h" #include "btintel.h" struct ag6xx_data { struct sk_buff *rx_skb; struct sk_buff_head txq; }; struct pbn_entry { __le32 addr; __le32 plen; __u8 data[]; } __packed; static int ag6xx_open(struct hci_uart *hu) { struct ag6xx_data *ag6xx; BT_DBG("hu %p", hu); ag6xx = kzalloc(sizeof(*ag6xx), GFP_KERNEL); if (!ag6xx) return -ENOMEM; skb_queue_head_init(&ag6xx->txq); hu->priv = ag6xx; return 0; } static int ag6xx_close(struct hci_uart *hu) { struct ag6xx_data *ag6xx = hu->priv; BT_DBG("hu %p", hu); skb_queue_purge(&ag6xx->txq); kfree_skb(ag6xx->rx_skb); kfree(ag6xx); hu->priv = NULL; return 0; } static int ag6xx_flush(struct hci_uart *hu) { struct ag6xx_data *ag6xx = hu->priv; BT_DBG("hu %p", hu); skb_queue_purge(&ag6xx->txq); return 0; } static struct sk_buff *ag6xx_dequeue(struct hci_uart *hu) { struct ag6xx_data *ag6xx = hu->priv; struct sk_buff *skb; skb = skb_dequeue(&ag6xx->txq); if (!skb) return skb; /* Prepend skb with frame type */ memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); return skb; } static int ag6xx_enqueue(struct hci_uart *hu, struct sk_buff *skb) { struct ag6xx_data *ag6xx = hu->priv; skb_queue_tail(&ag6xx->txq, skb); return 0; } static const struct h4_recv_pkt ag6xx_recv_pkts[] = { { H4_RECV_ACL, .recv = hci_recv_frame }, { H4_RECV_SCO, .recv = hci_recv_frame }, { H4_RECV_EVENT, .recv = hci_recv_frame }, }; static int ag6xx_recv(struct hci_uart *hu, const void *data, int count) { struct ag6xx_data *ag6xx = hu->priv; if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; ag6xx->rx_skb = h4_recv_buf(hu->hdev, ag6xx->rx_skb, data, count, ag6xx_recv_pkts, ARRAY_SIZE(ag6xx_recv_pkts)); if (IS_ERR(ag6xx->rx_skb)) { int err = PTR_ERR(ag6xx->rx_skb); bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err); ag6xx->rx_skb = NULL; return err; } return count; } static int intel_mem_write(struct hci_dev *hdev, u32 addr, u32 plen, const void *data) { /* Can write a maximum of 247 bytes per HCI command. * HCI cmd Header (3), Intel mem write header (6), data (247). */ while (plen > 0) { struct sk_buff *skb; u8 cmd_param[253], fragment_len = (plen > 247) ? 247 : plen; __le32 leaddr = cpu_to_le32(addr); memcpy(cmd_param, &leaddr, 4); cmd_param[4] = 0; cmd_param[5] = fragment_len; memcpy(cmd_param + 6, data, fragment_len); skb = __hci_cmd_sync(hdev, 0xfc8e, fragment_len + 6, cmd_param, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) return PTR_ERR(skb); kfree_skb(skb); plen -= fragment_len; data += fragment_len; addr += fragment_len; } return 0; } static int ag6xx_setup(struct hci_uart *hu) { struct hci_dev *hdev = hu->hdev; struct sk_buff *skb; struct intel_version ver; const struct firmware *fw; const u8 *fw_ptr; char fwname[64]; bool patched = false; int err; hu->hdev->set_diag = btintel_set_diag; hu->hdev->set_bdaddr = btintel_set_bdaddr; err = btintel_enter_mfg(hdev); if (err) return err; err = btintel_read_version(hdev, &ver); if (err) return err; btintel_version_info(hdev, &ver); /* The hardware platform number has a fixed value of 0x37 and * for now only accept this single value. */ if (ver.hw_platform != 0x37) { bt_dev_err(hdev, "Unsupported Intel hardware platform: 0x%X", ver.hw_platform); return -EINVAL; } /* Only the hardware variant iBT 2.1 (AG6XX) is supported by this * firmware setup method. */ if (ver.hw_variant != 0x0a) { bt_dev_err(hdev, "Unsupported Intel hardware variant: 0x%x", ver.hw_variant); return -EINVAL; } snprintf(fwname, sizeof(fwname), "intel/ibt-hw-%x.%x.bddata", ver.hw_platform, ver.hw_variant); err = request_firmware(&fw, fwname, &hdev->dev); if (err < 0) { bt_dev_err(hdev, "Failed to open Intel bddata file: %s (%d)", fwname, err); goto patch; } bt_dev_info(hdev, "Applying bddata (%s)", fwname); skb = __hci_cmd_sync_ev(hdev, 0xfc2f, fw->size, fw->data, HCI_EV_CMD_STATUS, HCI_CMD_TIMEOUT); if (IS_ERR(skb)) { bt_dev_err(hdev, "Applying bddata failed (%ld)", PTR_ERR(skb)); release_firmware(fw); return PTR_ERR(skb); } kfree_skb(skb); release_firmware(fw); patch: /* If there is no applied patch, fw_patch_num is always 0x00. In other * cases, current firmware is already patched. No need to patch it. */ if (ver.fw_patch_num) { bt_dev_info(hdev, "Device is already patched. patch num: %02x", ver.fw_patch_num); patched = true; goto complete; } snprintf(fwname, sizeof(fwname), "intel/ibt-hw-%x.%x.%x-fw-%x.%x.%x.%x.%x.pbn", ver.hw_platform, ver.hw_variant, ver.hw_revision, ver.fw_variant, ver.fw_revision, ver.fw_build_num, ver.fw_build_ww, ver.fw_build_yy); err = request_firmware(&fw, fwname, &hdev->dev); if (err < 0) { bt_dev_err(hdev, "Failed to open Intel patch file: %s(%d)", fwname, err); goto complete; } fw_ptr = fw->data; bt_dev_info(hdev, "Patching firmware file (%s)", fwname); /* PBN patch file contains a list of binary patches to be applied on top * of the embedded firmware. Each patch entry header contains the target * address and patch size. * * Patch entry: * | addr(le) | patch_len(le) | patch_data | * | 4 Bytes | 4 Bytes | n Bytes | * * PBN file is terminated by a patch entry whose address is 0xffffffff. */ while (fw->size > fw_ptr - fw->data) { struct pbn_entry *pbn = (void *)fw_ptr; u32 addr, plen; if (pbn->addr == 0xffffffff) { bt_dev_info(hdev, "Patching complete"); patched = true; break; } addr = le32_to_cpu(pbn->addr); plen = le32_to_cpu(pbn->plen); if (fw->data + fw->size <= pbn->data + plen) { bt_dev_info(hdev, "Invalid patch len (%d)", plen); break; } bt_dev_info(hdev, "Patching %td/%zu", (fw_ptr - fw->data), fw->size); err = intel_mem_write(hdev, addr, plen, pbn->data); if (err) { bt_dev_err(hdev, "Patching failed"); break; } fw_ptr = pbn->data + plen; } release_firmware(fw); complete: /* Exit manufacturing mode and reset */ err = btintel_exit_mfg(hdev, true, patched); if (err) return err; /* Set the event mask for Intel specific vendor events. This enables * a few extra events that are useful during general operation. */ btintel_set_event_mask_mfg(hdev, false); btintel_check_bdaddr(hdev); return 0; } static const struct hci_uart_proto ag6xx_proto = { .id = HCI_UART_AG6XX, .name = "AG6XX", .manufacturer = 2, .open = ag6xx_open, .close = ag6xx_close, .flush = ag6xx_flush, .setup = ag6xx_setup, .recv = ag6xx_recv, .enqueue = ag6xx_enqueue, .dequeue = ag6xx_dequeue, }; int __init ag6xx_init(void) { return hci_uart_register_proto(&ag6xx_proto); } int __exit ag6xx_deinit(void) { return hci_uart_unregister_proto(&ag6xx_proto); } |
| 12 12 8 6 6 11 1 3 1 6 7 1 1 3 3 3 3 1 1 1 1 1 1 3 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 | // SPDX-License-Identifier: GPL-2.0 /* * cfg80211 wext compat for managed mode. * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * Copyright (C) 2009, 2020-2023 Intel Corporation */ #include <linux/export.h> #include <linux/etherdevice.h> #include <linux/if_arp.h> #include <linux/slab.h> #include <net/cfg80211.h> #include <net/cfg80211-wext.h> #include "wext-compat.h" #include "nl80211.h" int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct cfg80211_cached_keys *ck = NULL; const u8 *prev_bssid = NULL; int err, i; ASSERT_RTNL(); lockdep_assert_wiphy(wdev->wiphy); if (!netif_running(wdev->netdev)) return 0; wdev->wext.connect.ie = wdev->wext.ie; wdev->wext.connect.ie_len = wdev->wext.ie_len; /* Use default background scan period */ wdev->wext.connect.bg_scan_period = -1; if (wdev->wext.keys) { wdev->wext.keys->def = wdev->wext.default_key; if (wdev->wext.default_key != -1) wdev->wext.connect.privacy = true; } if (!wdev->wext.connect.ssid_len) return 0; if (wdev->wext.keys && wdev->wext.keys->def != -1) { ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; for (i = 0; i < 4; i++) ck->params[i].key = ck->data[i]; } if (wdev->wext.prev_bssid_valid) prev_bssid = wdev->wext.prev_bssid; err = cfg80211_connect(rdev, wdev->netdev, &wdev->wext.connect, ck, prev_bssid); if (err) kfree_sensitive(ck); return err; } int cfg80211_mgd_wext_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *wextfreq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_channel *chan = NULL; int err, freq; /* call only for station! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return -EINVAL; freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; if (freq) { chan = ieee80211_get_channel(wdev->wiphy, freq); if (!chan) return -EINVAL; if (chan->flags & IEEE80211_CHAN_DISABLED) return -EINVAL; } if (wdev->conn) { bool event = true; if (wdev->wext.connect.channel == chan) return 0; /* if SSID set, we'll try right again, avoid event */ if (wdev->wext.connect.ssid_len) event = false; err = cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, event); if (err) return err; } wdev->wext.connect.channel = chan; return cfg80211_mgd_wext_connect(rdev, wdev); } int cfg80211_mgd_wext_giwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *freq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct ieee80211_channel *chan = NULL; /* call only for station! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return -EINVAL; if (wdev->valid_links) return -EOPNOTSUPP; if (wdev->links[0].client.current_bss) chan = wdev->links[0].client.current_bss->pub.channel; else if (wdev->wext.connect.channel) chan = wdev->wext.connect.channel; if (chan) { freq->m = chan->center_freq; freq->e = 6; return 0; } /* no channel if not joining */ return -EINVAL; } int cfg80211_mgd_wext_siwessid(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); size_t len = data->length; int err; /* call only for station! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return -EINVAL; if (!data->flags) len = 0; /* iwconfig uses nul termination in SSID.. */ if (len > 0 && ssid[len - 1] == '\0') len--; if (wdev->conn) { bool event = true; if (wdev->wext.connect.ssid && len && len == wdev->wext.connect.ssid_len && memcmp(wdev->wext.connect.ssid, ssid, len) == 0) return 0; /* if SSID set now, we'll try to connect, avoid event */ if (len) event = false; err = cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, event); if (err) return err; } wdev->wext.prev_bssid_valid = false; wdev->wext.connect.ssid = wdev->wext.ssid; memcpy(wdev->wext.ssid, ssid, len); wdev->wext.connect.ssid_len = len; wdev->wext.connect.crypto.control_port = false; wdev->wext.connect.crypto.control_port_ethertype = cpu_to_be16(ETH_P_PAE); return cfg80211_mgd_wext_connect(rdev, wdev); } int cfg80211_mgd_wext_giwessid(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; int ret = 0; /* call only for station! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return -EINVAL; if (wdev->valid_links) return -EINVAL; data->flags = 0; if (wdev->links[0].client.current_bss) { const struct element *ssid_elem; rcu_read_lock(); ssid_elem = ieee80211_bss_get_elem( &wdev->links[0].client.current_bss->pub, WLAN_EID_SSID); if (ssid_elem) { data->flags = 1; data->length = ssid_elem->datalen; if (data->length > IW_ESSID_MAX_SIZE) ret = -EINVAL; else memcpy(ssid, ssid_elem->data, data->length); } rcu_read_unlock(); } else if (wdev->wext.connect.ssid && wdev->wext.connect.ssid_len) { data->flags = 1; data->length = wdev->wext.connect.ssid_len; memcpy(ssid, wdev->wext.connect.ssid, data->length); } return ret; } int cfg80211_mgd_wext_siwap(struct net_device *dev, struct iw_request_info *info, struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u8 *bssid = ap_addr->sa_data; int err; /* call only for station! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return -EINVAL; if (ap_addr->sa_family != ARPHRD_ETHER) return -EINVAL; /* automatic mode */ if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid)) bssid = NULL; if (wdev->conn) { /* both automatic */ if (!bssid && !wdev->wext.connect.bssid) return 0; /* fixed already - and no change */ if (wdev->wext.connect.bssid && bssid && ether_addr_equal(bssid, wdev->wext.connect.bssid)) return 0; err = cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, false); if (err) return err; } if (bssid) { memcpy(wdev->wext.bssid, bssid, ETH_ALEN); wdev->wext.connect.bssid = wdev->wext.bssid; } else wdev->wext.connect.bssid = NULL; return cfg80211_mgd_wext_connect(rdev, wdev); } int cfg80211_mgd_wext_giwap(struct net_device *dev, struct iw_request_info *info, struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; /* call only for station! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return -EINVAL; ap_addr->sa_family = ARPHRD_ETHER; if (wdev->valid_links) return -EOPNOTSUPP; if (wdev->links[0].client.current_bss) memcpy(ap_addr->sa_data, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN); else eth_zero_addr(ap_addr->sa_data); return 0; } int cfg80211_wext_siwgenie(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct iw_point *data = &wrqu->data; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int ie_len = data->length; u8 *ie = extra; if (wdev->iftype != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!ie_len) ie = NULL; guard(wiphy)(wdev->wiphy); /* no change */ if (wdev->wext.ie_len == ie_len && memcmp(wdev->wext.ie, ie, ie_len) == 0) return 0; if (ie_len) { ie = kmemdup(extra, ie_len, GFP_KERNEL); if (!ie) return -ENOMEM; } else { ie = NULL; } kfree(wdev->wext.ie); wdev->wext.ie = ie; wdev->wext.ie_len = ie_len; if (wdev->conn) return cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, false); /* userspace better not think we'll reconnect */ return 0; } int cfg80211_wext_siwmlme(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct iw_mlme *mlme = (struct iw_mlme *)extra; struct cfg80211_registered_device *rdev; if (!wdev) return -EOPNOTSUPP; rdev = wiphy_to_rdev(wdev->wiphy); if (wdev->iftype != NL80211_IFTYPE_STATION) return -EINVAL; if (mlme->addr.sa_family != ARPHRD_ETHER) return -EINVAL; guard(wiphy)(&rdev->wiphy); switch (mlme->cmd) { case IW_MLME_DEAUTH: case IW_MLME_DISASSOC: return cfg80211_disconnect(rdev, dev, mlme->reason_code, true); default: return -EOPNOTSUPP; } } |
| 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * Copyright (c) 2016-2025 Christoph Hellwig. * All Rights Reserved. */ #include "xfs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_iomap.h" #include "xfs_trace.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_reflink.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_icache.h" #include "xfs_zone_alloc.h" #include "xfs_rtgroup.h" struct xfs_writepage_ctx { struct iomap_writepage_ctx ctx; unsigned int data_seq; unsigned int cow_seq; }; static inline struct xfs_writepage_ctx * XFS_WPC(struct iomap_writepage_ctx *ctx) { return container_of(ctx, struct xfs_writepage_ctx, ctx); } /* * Fast and loose check if this write could update the on-disk inode size. */ static inline bool xfs_ioend_is_append(struct iomap_ioend *ioend) { return ioend->io_offset + ioend->io_size > XFS_I(ioend->io_inode)->i_disk_size; } /* * Update on-disk file size now that data has been written to disk. */ int xfs_setfilesize( struct xfs_inode *ip, xfs_off_t offset, size_t size) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; xfs_fsize_t isize; int error; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); if (error) return error; xfs_ilock(ip, XFS_ILOCK_EXCL); isize = xfs_new_eof(ip, offset + size); if (!isize) { xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_trans_cancel(tp); return 0; } trace_xfs_setfilesize(ip, offset, size); ip->i_disk_size = isize; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); return xfs_trans_commit(tp); } static void xfs_ioend_put_open_zones( struct iomap_ioend *ioend) { struct iomap_ioend *tmp; /* * Put the open zone for all ioends merged into this one (if any). */ list_for_each_entry(tmp, &ioend->io_list, io_list) xfs_open_zone_put(tmp->io_private); /* * The main ioend might not have an open zone if the submission failed * before xfs_zone_alloc_and_submit got called. */ if (ioend->io_private) xfs_open_zone_put(ioend->io_private); } /* * IO write completion. */ STATIC void xfs_end_ioend( struct iomap_ioend *ioend) { struct xfs_inode *ip = XFS_I(ioend->io_inode); struct xfs_mount *mp = ip->i_mount; bool is_zoned = xfs_is_zoned_inode(ip); xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; unsigned int nofs_flag; int error; /* * We can allocate memory here while doing writeback on behalf of * memory reclaim. To avoid memory allocation deadlocks set the * task-wide nofs context for the following operations. */ nofs_flag = memalloc_nofs_save(); /* * Just clean up the in-memory structures if the fs has been shut down. */ if (xfs_is_shutdown(mp)) { error = -EIO; goto done; } /* * Clean up all COW blocks and underlying data fork delalloc blocks on * I/O error. The delalloc punch is required because this ioend was * mapped to blocks in the COW fork and the associated pages are no * longer dirty. If we don't remove delalloc blocks here, they become * stale and can corrupt free space accounting on unmount. */ error = blk_status_to_errno(ioend->io_bio.bi_status); if (unlikely(error)) { if (ioend->io_flags & IOMAP_IOEND_SHARED) { ASSERT(!is_zoned); xfs_reflink_cancel_cow_range(ip, offset, size, true); xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset, offset + size, NULL); } goto done; } /* * Success: commit the COW or unwritten blocks if needed. */ if (is_zoned) error = xfs_zoned_end_io(ip, offset, size, ioend->io_sector, ioend->io_private, NULLFSBLOCK); else if (ioend->io_flags & IOMAP_IOEND_SHARED) error = xfs_reflink_end_cow(ip, offset, size); else if (ioend->io_flags & IOMAP_IOEND_UNWRITTEN) error = xfs_iomap_write_unwritten(ip, offset, size, false); if (!error && !(ioend->io_flags & IOMAP_IOEND_DIRECT) && xfs_ioend_is_append(ioend)) error = xfs_setfilesize(ip, offset, size); done: if (is_zoned) xfs_ioend_put_open_zones(ioend); iomap_finish_ioends(ioend, error); memalloc_nofs_restore(nofs_flag); } /* * Finish all pending IO completions that require transactional modifications. * * We try to merge physical and logically contiguous ioends before completion to * minimise the number of transactions we need to perform during IO completion. * Both unwritten extent conversion and COW remapping need to iterate and modify * one physical extent at a time, so we gain nothing by merging physically * discontiguous extents here. * * The ioend chain length that we can be processing here is largely unbound in * length and we may have to perform significant amounts of work on each ioend * to complete it. Hence we have to be careful about holding the CPU for too * long in this loop. */ void xfs_end_io( struct work_struct *work) { struct xfs_inode *ip = container_of(work, struct xfs_inode, i_ioend_work); struct iomap_ioend *ioend; struct list_head tmp; unsigned long flags; spin_lock_irqsave(&ip->i_ioend_lock, flags); list_replace_init(&ip->i_ioend_list, &tmp); spin_unlock_irqrestore(&ip->i_ioend_lock, flags); iomap_sort_ioends(&tmp); while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend, io_list))) { list_del_init(&ioend->io_list); iomap_ioend_try_merge(ioend, &tmp); xfs_end_ioend(ioend); cond_resched(); } } void xfs_end_bio( struct bio *bio) { struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); struct xfs_inode *ip = XFS_I(ioend->io_inode); struct xfs_mount *mp = ip->i_mount; unsigned long flags; /* * For Appends record the actually written block number and set the * boundary flag if needed. */ if (IS_ENABLED(CONFIG_XFS_RT) && bio_is_zone_append(bio)) { ioend->io_sector = bio->bi_iter.bi_sector; xfs_mark_rtg_boundary(ioend); } spin_lock_irqsave(&ip->i_ioend_lock, flags); if (list_empty(&ip->i_ioend_list)) WARN_ON_ONCE(!queue_work(mp->m_unwritten_workqueue, &ip->i_ioend_work)); list_add_tail(&ioend->io_list, &ip->i_ioend_list); spin_unlock_irqrestore(&ip->i_ioend_lock, flags); } /* * Fast revalidation of the cached writeback mapping. Return true if the current * mapping is valid, false otherwise. */ static bool xfs_imap_valid( struct iomap_writepage_ctx *wpc, struct xfs_inode *ip, loff_t offset) { if (offset < wpc->iomap.offset || offset >= wpc->iomap.offset + wpc->iomap.length) return false; /* * If this is a COW mapping, it is sufficient to check that the mapping * covers the offset. Be careful to check this first because the caller * can revalidate a COW mapping without updating the data seqno. */ if (wpc->iomap.flags & IOMAP_F_SHARED) return true; /* * This is not a COW mapping. Check the sequence number of the data fork * because concurrent changes could have invalidated the extent. Check * the COW fork because concurrent changes since the last time we * checked (and found nothing at this offset) could have added * overlapping blocks. */ if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) { trace_xfs_wb_data_iomap_invalid(ip, &wpc->iomap, XFS_WPC(wpc)->data_seq, XFS_DATA_FORK); return false; } if (xfs_inode_has_cow_data(ip) && XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) { trace_xfs_wb_cow_iomap_invalid(ip, &wpc->iomap, XFS_WPC(wpc)->cow_seq, XFS_COW_FORK); return false; } return true; } static int xfs_map_blocks( struct iomap_writepage_ctx *wpc, struct inode *inode, loff_t offset, unsigned int len) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t count = i_blocksize(inode); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); xfs_fileoff_t cow_fsb; int whichfork; struct xfs_bmbt_irec imap; struct xfs_iext_cursor icur; int retries = 0; int error = 0; unsigned int *seq; if (xfs_is_shutdown(mp)) return -EIO; XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS); /* * COW fork blocks can overlap data fork blocks even if the blocks * aren't shared. COW I/O always takes precedent, so we must always * check for overlap on reflink inodes unless the mapping is already a * COW one, or the COW fork hasn't changed from the last time we looked * at it. * * It's safe to check the COW fork if_seq here without the ILOCK because * we've indirectly protected against concurrent updates: writeback has * the page locked, which prevents concurrent invalidations by reflink * and directio and prevents concurrent buffered writes to the same * page. Changes to if_seq always happen under i_lock, which protects * against concurrent updates and provides a memory barrier on the way * out that ensures that we always see the current value. */ if (xfs_imap_valid(wpc, ip, offset)) return 0; /* * If we don't have a valid map, now it's time to get a new one for this * offset. This will convert delayed allocations (including COW ones) * into real extents. If we return without a valid map, it means we * landed in a hole and we skip the block. */ retry: cow_fsb = NULLFILEOFF; whichfork = XFS_DATA_FORK; xfs_ilock(ip, XFS_ILOCK_SHARED); ASSERT(!xfs_need_iread_extents(&ip->i_df)); /* * Check if this is offset is covered by a COW extents, and if yes use * it directly instead of looking up anything in the data fork. */ if (xfs_inode_has_cow_data(ip) && xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap)) cow_fsb = imap.br_startoff; if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) { XFS_WPC(wpc)->cow_seq = READ_ONCE(ip->i_cowfp->if_seq); xfs_iunlock(ip, XFS_ILOCK_SHARED); whichfork = XFS_COW_FORK; goto allocate_blocks; } /* * No COW extent overlap. Revalidate now that we may have updated * ->cow_seq. If the data mapping is still valid, we're done. */ if (xfs_imap_valid(wpc, ip, offset)) { xfs_iunlock(ip, XFS_ILOCK_SHARED); return 0; } /* * If we don't have a valid map, now it's time to get a new one for this * offset. This will convert delayed allocations (including COW ones) * into real extents. */ if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) imap.br_startoff = end_fsb; /* fake a hole past EOF */ XFS_WPC(wpc)->data_seq = READ_ONCE(ip->i_df.if_seq); xfs_iunlock(ip, XFS_ILOCK_SHARED); /* landed in a hole or beyond EOF? */ if (imap.br_startoff > offset_fsb) { imap.br_blockcount = imap.br_startoff - offset_fsb; imap.br_startoff = offset_fsb; imap.br_startblock = HOLESTARTBLOCK; imap.br_state = XFS_EXT_NORM; } /* * Truncate to the next COW extent if there is one. This is the only * opportunity to do this because we can skip COW fork lookups for the * subsequent blocks in the mapping; however, the requirement to treat * the COW range separately remains. */ if (cow_fsb != NULLFILEOFF && cow_fsb < imap.br_startoff + imap.br_blockcount) imap.br_blockcount = cow_fsb - imap.br_startoff; /* got a delalloc extent? */ if (imap.br_startblock != HOLESTARTBLOCK && isnullstartblock(imap.br_startblock)) goto allocate_blocks; xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq); trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); return 0; allocate_blocks: /* * Convert a dellalloc extent to a real one. The current page is held * locked so nothing could have removed the block backing offset_fsb, * although it could have moved from the COW to the data fork by another * thread. */ if (whichfork == XFS_COW_FORK) seq = &XFS_WPC(wpc)->cow_seq; else seq = &XFS_WPC(wpc)->data_seq; error = xfs_bmapi_convert_delalloc(ip, whichfork, offset, &wpc->iomap, seq); if (error) { /* * If we failed to find the extent in the COW fork we might have * raced with a COW to data fork conversion or truncate. * Restart the lookup to catch the extent in the data fork for * the former case, but prevent additional retries to avoid * looping forever for the latter case. */ if (error == -EAGAIN && whichfork == XFS_COW_FORK && !retries++) goto retry; ASSERT(error != -EAGAIN); return error; } /* * Due to merging the return real extent might be larger than the * original delalloc one. Trim the return extent to the next COW * boundary again to force a re-lookup. */ if (whichfork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) { loff_t cow_offset = XFS_FSB_TO_B(mp, cow_fsb); if (cow_offset < wpc->iomap.offset + wpc->iomap.length) wpc->iomap.length = cow_offset - wpc->iomap.offset; } ASSERT(wpc->iomap.offset <= offset); ASSERT(wpc->iomap.offset + wpc->iomap.length > offset); trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap); return 0; } static bool xfs_ioend_needs_wq_completion( struct iomap_ioend *ioend) { /* Changing inode size requires a transaction. */ if (xfs_ioend_is_append(ioend)) return true; /* Extent manipulation requires a transaction. */ if (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED)) return true; /* Page cache invalidation cannot be done in irq context. */ if (ioend->io_flags & IOMAP_IOEND_DONTCACHE) return true; return false; } static int xfs_submit_ioend( struct iomap_writepage_ctx *wpc, int status) { struct iomap_ioend *ioend = wpc->ioend; unsigned int nofs_flag; /* * We can allocate memory here while doing writeback on behalf of * memory reclaim. To avoid memory allocation deadlocks set the * task-wide nofs context for the following operations. */ nofs_flag = memalloc_nofs_save(); /* Convert CoW extents to regular */ if (!status && (ioend->io_flags & IOMAP_IOEND_SHARED)) { status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), ioend->io_offset, ioend->io_size); } memalloc_nofs_restore(nofs_flag); /* send ioends that might require a transaction to the completion wq */ if (xfs_ioend_needs_wq_completion(ioend)) ioend->io_bio.bi_end_io = xfs_end_bio; if (status) return status; submit_bio(&ioend->io_bio); return 0; } /* * If the folio has delalloc blocks on it, the caller is asking us to punch them * out. If we don't, we can leave a stale delalloc mapping covered by a clean * page that needs to be dirtied again before the delalloc mapping can be * converted. This stale delalloc mapping can trip up a later direct I/O read * operation on the same region. * * We prevent this by truncating away the delalloc regions on the folio. Because * they are delalloc, we can do this without needing a transaction. Indeed - if * we get ENOSPC errors, we have to be able to do this truncation without a * transaction as there is no space left for block reservation (typically why * we see a ENOSPC in writeback). */ static void xfs_discard_folio( struct folio *folio, loff_t pos) { struct xfs_inode *ip = XFS_I(folio->mapping->host); struct xfs_mount *mp = ip->i_mount; if (xfs_is_shutdown(mp)) return; xfs_alert_ratelimited(mp, "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", folio, ip->i_ino, pos); /* * The end of the punch range is always the offset of the first * byte of the next folio. Hence the end offset is only dependent on the * folio itself and not the start offset that is passed in. */ xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos, folio_pos(folio) + folio_size(folio), NULL); } static const struct iomap_writeback_ops xfs_writeback_ops = { .map_blocks = xfs_map_blocks, .submit_ioend = xfs_submit_ioend, .discard_folio = xfs_discard_folio, }; struct xfs_zoned_writepage_ctx { struct iomap_writepage_ctx ctx; struct xfs_open_zone *open_zone; }; static inline struct xfs_zoned_writepage_ctx * XFS_ZWPC(struct iomap_writepage_ctx *ctx) { return container_of(ctx, struct xfs_zoned_writepage_ctx, ctx); } static int xfs_zoned_map_blocks( struct iomap_writepage_ctx *wpc, struct inode *inode, loff_t offset, unsigned int len) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + len); xfs_filblks_t count_fsb; struct xfs_bmbt_irec imap, del; struct xfs_iext_cursor icur; if (xfs_is_shutdown(mp)) return -EIO; XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS); /* * All dirty data must be covered by delalloc extents. But truncate can * remove delalloc extents underneath us or reduce their size. * Returning a hole tells iomap to not write back any data from this * range, which is the right thing to do in that case. * * Otherwise just tell iomap to treat ranges previously covered by a * delalloc extent as mapped. The actual block allocation will be done * just before submitting the bio. * * This implies we never map outside folios that are locked or marked * as under writeback, and thus there is no need check the fork sequence * count here. */ xfs_ilock(ip, XFS_ILOCK_EXCL); if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap)) imap.br_startoff = end_fsb; /* fake a hole past EOF */ if (imap.br_startoff > offset_fsb) { imap.br_blockcount = imap.br_startoff - offset_fsb; imap.br_startoff = offset_fsb; imap.br_startblock = HOLESTARTBLOCK; imap.br_state = XFS_EXT_NORM; xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, 0); return 0; } end_fsb = min(end_fsb, imap.br_startoff + imap.br_blockcount); count_fsb = end_fsb - offset_fsb; del = imap; xfs_trim_extent(&del, offset_fsb, count_fsb); xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, &icur, &imap, &del, XFS_BMAPI_REMAP); xfs_iunlock(ip, XFS_ILOCK_EXCL); wpc->iomap.type = IOMAP_MAPPED; wpc->iomap.flags = IOMAP_F_DIRTY; wpc->iomap.bdev = mp->m_rtdev_targp->bt_bdev; wpc->iomap.offset = offset; wpc->iomap.length = XFS_FSB_TO_B(mp, count_fsb); wpc->iomap.flags = IOMAP_F_ANON_WRITE; trace_xfs_zoned_map_blocks(ip, offset, wpc->iomap.length); return 0; } static int xfs_zoned_submit_ioend( struct iomap_writepage_ctx *wpc, int status) { wpc->ioend->io_bio.bi_end_io = xfs_end_bio; if (status) return status; xfs_zone_alloc_and_submit(wpc->ioend, &XFS_ZWPC(wpc)->open_zone); return 0; } static const struct iomap_writeback_ops xfs_zoned_writeback_ops = { .map_blocks = xfs_zoned_map_blocks, .submit_ioend = xfs_zoned_submit_ioend, .discard_folio = xfs_discard_folio, }; STATIC int xfs_vm_writepages( struct address_space *mapping, struct writeback_control *wbc) { struct xfs_inode *ip = XFS_I(mapping->host); xfs_iflags_clear(ip, XFS_ITRUNCATED); if (xfs_is_zoned_inode(ip)) { struct xfs_zoned_writepage_ctx xc = { }; int error; error = iomap_writepages(mapping, wbc, &xc.ctx, &xfs_zoned_writeback_ops); if (xc.open_zone) xfs_open_zone_put(xc.open_zone); return error; } else { struct xfs_writepage_ctx wpc = { }; return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); } } STATIC int xfs_dax_writepages( struct address_space *mapping, struct writeback_control *wbc) { struct xfs_inode *ip = XFS_I(mapping->host); xfs_iflags_clear(ip, XFS_ITRUNCATED); return dax_writeback_mapping_range(mapping, xfs_inode_buftarg(ip)->bt_daxdev, wbc); } STATIC sector_t xfs_vm_bmap( struct address_space *mapping, sector_t block) { struct xfs_inode *ip = XFS_I(mapping->host); trace_xfs_vm_bmap(ip); /* * The swap code (ab-)uses ->bmap to get a block mapping and then * bypasses the file system for actual I/O. We really can't allow * that on reflinks inodes, so we have to skip out here. And yes, * 0 is the magic code for a bmap error. * * Since we don't pass back blockdev info, we can't return bmap * information for rt files either. */ if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip)) return 0; return iomap_bmap(mapping, block, &xfs_read_iomap_ops); } STATIC int xfs_vm_read_folio( struct file *unused, struct folio *folio) { return iomap_read_folio(folio, &xfs_read_iomap_ops); } STATIC void xfs_vm_readahead( struct readahead_control *rac) { iomap_readahead(rac, &xfs_read_iomap_ops); } static int xfs_vm_swap_activate( struct swap_info_struct *sis, struct file *swap_file, sector_t *span) { struct xfs_inode *ip = XFS_I(file_inode(swap_file)); /* * Swap file activation can race against concurrent shared extent * removal in files that have been cloned. If this happens, * iomap_swapfile_iter() can fail because it encountered a shared * extent even though an operation is in progress to remove those * shared extents. * * This race becomes problematic when we defer extent removal * operations beyond the end of a syscall (i.e. use async background * processing algorithms). Users think the extents are no longer * shared, but iomap_swapfile_iter() still sees them as shared * because the refcountbt entries for the extents being removed have * not yet been updated. Hence the swapon call fails unexpectedly. * * The race condition is currently most obvious from the unlink() * operation as extent removal is deferred until after the last * reference to the inode goes away. We then process the extent * removal asynchronously, hence triggers the "syscall completed but * work not done" condition mentioned above. To close this race * window, we need to flush any pending inodegc operations to ensure * they have updated the refcountbt records before we try to map the * swapfile. */ xfs_inodegc_flush(ip->i_mount); /* * Direct the swap code to the correct block device when this file * sits on the RT device. */ sis->bdev = xfs_inode_buftarg(ip)->bt_bdev; return iomap_swapfile_activate(sis, swap_file, span, &xfs_read_iomap_ops); } const struct address_space_operations xfs_address_space_operations = { .read_folio = xfs_vm_read_folio, .readahead = xfs_vm_readahead, .writepages = xfs_vm_writepages, .dirty_folio = iomap_dirty_folio, .release_folio = iomap_release_folio, .invalidate_folio = iomap_invalidate_folio, .bmap = xfs_vm_bmap, .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_folio = generic_error_remove_folio, .swap_activate = xfs_vm_swap_activate, }; const struct address_space_operations xfs_dax_aops = { .writepages = xfs_dax_writepages, .dirty_folio = noop_dirty_folio, .swap_activate = xfs_vm_swap_activate, }; |
| 48 47 1 1 1 76 77 77 1 50 23 1 1 1 1 71 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hfsplus/wrapper.c * * Copyright (C) 2001 * Brad Boyer (flar@allandria.com) * (C) 2003 Ardis Technologies <roman@ardistech.com> * * Handling of HFS wrappers around HFS+ volumes */ #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/cdrom.h> #include <linux/unaligned.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" struct hfsplus_wd { u32 ablk_size; u16 ablk_start; u16 embed_start; u16 embed_count; }; /** * hfsplus_submit_bio - Perform block I/O * @sb: super block of volume for I/O * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes * @buf: buffer for I/O * @data: output pointer for location of requested data * @opf: I/O operation type and flags * * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads * @data will return a pointer to the start of the requested sector, * which may not be the same location as @buf. * * If @sector is not aligned to the bdev logical block size it will * be rounded down. For writes this means that @buf should contain data * that starts at the rounded-down address. As long as the data was * read using hfsplus_submit_bio() and the same buffer is used things * will work correctly. * * Returns: %0 on success else -errno code */ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, void *buf, void **data, blk_opf_t opf) { u64 io_size = hfsplus_min_io_size(sb); loff_t start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT; int offset = start & (io_size - 1); if ((opf & REQ_OP_MASK) != REQ_OP_WRITE && data) *data = (u8 *)buf + offset; /* * Align sector to hardware sector size and find offset. We assume that * io_size is a power of two, which _should_ be true. */ sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1); return bdev_rw_virt(sb->s_bdev, sector, buf, io_size, opf); } static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) { u32 extent; u16 attrib; __be16 sig; sig = *(__be16 *)(bufptr + HFSP_WRAPOFF_EMBEDSIG); if (sig != cpu_to_be16(HFSPLUS_VOLHEAD_SIG) && sig != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) return 0; attrib = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ATTRIB)); if (!(attrib & HFSP_WRAP_ATTRIB_SLOCK) || !(attrib & HFSP_WRAP_ATTRIB_SPARED)) return 0; wd->ablk_size = be32_to_cpu(*(__be32 *)(bufptr + HFSP_WRAPOFF_ABLKSIZE)); if (wd->ablk_size < HFSPLUS_SECTOR_SIZE) return 0; if (wd->ablk_size % HFSPLUS_SECTOR_SIZE) return 0; wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART)); extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT); wd->embed_start = (extent >> 16) & 0xFFFF; wd->embed_count = extent & 0xFFFF; return 1; } static int hfsplus_get_last_session(struct super_block *sb, sector_t *start, sector_t *size) { struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk); /* default values */ *start = 0; *size = bdev_nr_sectors(sb->s_bdev); if (HFSPLUS_SB(sb)->session >= 0) { struct cdrom_tocentry te; if (!cdi) return -EINVAL; te.cdte_track = HFSPLUS_SB(sb)->session; te.cdte_format = CDROM_LBA; if (cdrom_read_tocentry(cdi, &te) || (te.cdte_ctrl & CDROM_DATA_TRACK) != 4) { pr_err("invalid session number or type of track\n"); return -EINVAL; } *start = (sector_t)te.cdte_addr.lba << 2; } else if (cdi) { struct cdrom_multisession ms_info; ms_info.addr_format = CDROM_LBA; if (cdrom_multisession(cdi, &ms_info) == 0 && ms_info.xa_flag) *start = (sector_t)ms_info.addr.lba << 2; } return 0; } /* Find the volume header and fill in some minimum bits in superblock */ /* Takes in super block, returns true if good data read */ int hfsplus_read_wrapper(struct super_block *sb) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct hfsplus_wd wd; sector_t part_start, part_size; u32 blocksize; int error = 0; error = -EINVAL; blocksize = sb_min_blocksize(sb, HFSPLUS_SECTOR_SIZE); if (!blocksize) goto out; sbi->min_io_size = blocksize; if (hfsplus_get_last_session(sb, &part_start, &part_size)) goto out; error = -ENOMEM; sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); if (!sbi->s_vhdr_buf) goto out; sbi->s_backup_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); if (!sbi->s_backup_vhdr_buf) goto out_free_vhdr; reread: error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, sbi->s_vhdr_buf, (void **)&sbi->s_vhdr, REQ_OP_READ); if (error) goto out_free_backup_vhdr; error = -EINVAL; switch (sbi->s_vhdr->signature) { case cpu_to_be16(HFSPLUS_VOLHEAD_SIGX): set_bit(HFSPLUS_SB_HFSX, &sbi->flags); fallthrough; case cpu_to_be16(HFSPLUS_VOLHEAD_SIG): break; case cpu_to_be16(HFSP_WRAP_MAGIC): if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) goto out_free_backup_vhdr; wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; part_start += (sector_t)wd.ablk_start + (sector_t)wd.embed_start * wd.ablk_size; part_size = (sector_t)wd.embed_count * wd.ablk_size; goto reread; default: /* * Check for a partition block. * * (should do this only for cdrom/loop though) */ if (hfs_part_find(sb, &part_start, &part_size)) goto out_free_backup_vhdr; goto reread; } error = hfsplus_submit_bio(sb, part_start + part_size - 2, sbi->s_backup_vhdr_buf, (void **)&sbi->s_backup_vhdr, REQ_OP_READ); if (error) goto out_free_backup_vhdr; error = -EINVAL; if (sbi->s_backup_vhdr->signature != sbi->s_vhdr->signature) { pr_warn("invalid secondary volume header\n"); goto out_free_backup_vhdr; } blocksize = be32_to_cpu(sbi->s_vhdr->blocksize); /* * Block size must be at least as large as a sector and a multiple of 2. */ if (blocksize < HFSPLUS_SECTOR_SIZE || ((blocksize - 1) & blocksize)) goto out_free_backup_vhdr; sbi->alloc_blksz = blocksize; sbi->alloc_blksz_shift = ilog2(blocksize); blocksize = min_t(u32, sbi->alloc_blksz, PAGE_SIZE); /* * Align block size to block offset. */ while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1)) blocksize >>= 1; if (sb_set_blocksize(sb, blocksize) != blocksize) { pr_err("unable to set blocksize to %u!\n", blocksize); goto out_free_backup_vhdr; } sbi->blockoffset = part_start >> (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); sbi->part_start = part_start; sbi->sect_count = part_size; sbi->fs_shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; return 0; out_free_backup_vhdr: kfree(sbi->s_backup_vhdr_buf); out_free_vhdr: kfree(sbi->s_vhdr_buf); out: return error; } |
| 2 2 2 3 1 2 2 2 2 2 1 2 95 47 95 14 1 1 6 78 20 80 79 60 60 60 6 52 72 74 21 74 4 2 3 301 74 73 74 4 3 3 3 3 76 1 74 4 74 74 74 74 60 60 60 49 46 3 49 49 49 49 1 1 3 1 2 6 57 338 51 291 291 278 4 4 3 4 1 3 3 1 1 2 23 2 16 15 1 17 2 16 12 12 12 8 8 1 7 1 3 3 1 2 2 1 1 1 2 4 3 1 1 1 1 1 1 6 1 1 3 2 11 9 2 27 1 1 23 3 2 24 14 2 4 6 10 6 10 14 2 5 14 1 3 1 1 2 1 2 2 1 1 15 3 3 3 2 4 2 2 43 1 1 3 5 7 4 5 6 5 4 1 1 1 7 2 1 1 4 3 4 1 4 3 1 18 13 5 2 53 39 1 17 258 258 201 202 202 223 224 282 282 6 14 8 6 1 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Routines for driver control interface * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/threads.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/math64.h> #include <linux/sched/signal.h> #include <sound/core.h> #include <sound/minors.h> #include <sound/info.h> #include <sound/control.h> // Max allocation size for user controls. static int max_user_ctl_alloc_size = 8 * 1024 * 1024; module_param_named(max_user_ctl_alloc_size, max_user_ctl_alloc_size, int, 0444); MODULE_PARM_DESC(max_user_ctl_alloc_size, "Max allocation size for user controls"); #define MAX_CONTROL_COUNT 1028 struct snd_kctl_ioctl { struct list_head list; /* list of all ioctls */ snd_kctl_ioctl_func_t fioctl; }; static DECLARE_RWSEM(snd_ioctl_rwsem); static DECLARE_RWSEM(snd_ctl_layer_rwsem); static LIST_HEAD(snd_control_ioctls); #ifdef CONFIG_COMPAT static LIST_HEAD(snd_control_compat_ioctls); #endif static struct snd_ctl_layer_ops *snd_ctl_layer; static int snd_ctl_remove_locked(struct snd_card *card, struct snd_kcontrol *kcontrol); static int snd_ctl_open(struct inode *inode, struct file *file) { struct snd_card *card; struct snd_ctl_file *ctl; int i, err; err = stream_open(inode, file); if (err < 0) return err; card = snd_lookup_minor_data(iminor(inode), SNDRV_DEVICE_TYPE_CONTROL); if (!card) { err = -ENODEV; goto __error1; } err = snd_card_file_add(card, file); if (err < 0) { err = -ENODEV; goto __error1; } if (!try_module_get(card->module)) { err = -EFAULT; goto __error2; } ctl = kzalloc(sizeof(*ctl), GFP_KERNEL); if (ctl == NULL) { err = -ENOMEM; goto __error; } INIT_LIST_HEAD(&ctl->events); init_waitqueue_head(&ctl->change_sleep); spin_lock_init(&ctl->read_lock); ctl->card = card; for (i = 0; i < SND_CTL_SUBDEV_ITEMS; i++) ctl->preferred_subdevice[i] = -1; ctl->pid = get_pid(task_pid(current)); file->private_data = ctl; scoped_guard(write_lock_irqsave, &card->controls_rwlock) list_add_tail(&ctl->list, &card->ctl_files); snd_card_unref(card); return 0; __error: module_put(card->module); __error2: snd_card_file_remove(card, file); __error1: if (card) snd_card_unref(card); return err; } static void snd_ctl_empty_read_queue(struct snd_ctl_file * ctl) { struct snd_kctl_event *cread; guard(spinlock_irqsave)(&ctl->read_lock); while (!list_empty(&ctl->events)) { cread = snd_kctl_event(ctl->events.next); list_del(&cread->list); kfree(cread); } } static int snd_ctl_release(struct inode *inode, struct file *file) { struct snd_card *card; struct snd_ctl_file *ctl; struct snd_kcontrol *control; unsigned int idx; ctl = file->private_data; file->private_data = NULL; card = ctl->card; scoped_guard(write_lock_irqsave, &card->controls_rwlock) list_del(&ctl->list); scoped_guard(rwsem_write, &card->controls_rwsem) { list_for_each_entry(control, &card->controls, list) for (idx = 0; idx < control->count; idx++) if (control->vd[idx].owner == ctl) control->vd[idx].owner = NULL; } snd_fasync_free(ctl->fasync); snd_ctl_empty_read_queue(ctl); put_pid(ctl->pid); kfree(ctl); module_put(card->module); snd_card_file_remove(card, file); return 0; } /** * snd_ctl_notify - Send notification to user-space for a control change * @card: the card to send notification * @mask: the event mask, SNDRV_CTL_EVENT_* * @id: the ctl element id to send notification * * This function adds an event record with the given id and mask, appends * to the list and wakes up the user-space for notification. This can be * called in the atomic context. */ void snd_ctl_notify(struct snd_card *card, unsigned int mask, struct snd_ctl_elem_id *id) { struct snd_ctl_file *ctl; struct snd_kctl_event *ev; if (snd_BUG_ON(!card || !id)) return; if (card->shutdown) return; guard(read_lock_irqsave)(&card->controls_rwlock); #if IS_ENABLED(CONFIG_SND_MIXER_OSS) card->mixer_oss_change_count++; #endif list_for_each_entry(ctl, &card->ctl_files, list) { if (!ctl->subscribed) continue; scoped_guard(spinlock, &ctl->read_lock) { list_for_each_entry(ev, &ctl->events, list) { if (ev->id.numid == id->numid) { ev->mask |= mask; goto _found; } } ev = kzalloc(sizeof(*ev), GFP_ATOMIC); if (ev) { ev->id = *id; ev->mask = mask; list_add_tail(&ev->list, &ctl->events); } else { dev_err(card->dev, "No memory available to allocate event\n"); } _found: wake_up(&ctl->change_sleep); } snd_kill_fasync(ctl->fasync, SIGIO, POLL_IN); } } EXPORT_SYMBOL(snd_ctl_notify); /** * snd_ctl_notify_one - Send notification to user-space for a control change * @card: the card to send notification * @mask: the event mask, SNDRV_CTL_EVENT_* * @kctl: the pointer with the control instance * @ioff: the additional offset to the control index * * This function calls snd_ctl_notify() and does additional jobs * like LED state changes. */ void snd_ctl_notify_one(struct snd_card *card, unsigned int mask, struct snd_kcontrol *kctl, unsigned int ioff) { struct snd_ctl_elem_id id = kctl->id; struct snd_ctl_layer_ops *lops; id.index += ioff; id.numid += ioff; snd_ctl_notify(card, mask, &id); guard(rwsem_read)(&snd_ctl_layer_rwsem); for (lops = snd_ctl_layer; lops; lops = lops->next) lops->lnotify(card, mask, kctl, ioff); } EXPORT_SYMBOL(snd_ctl_notify_one); /** * snd_ctl_new - create a new control instance with some elements * @kctl: the pointer to store new control instance * @count: the number of elements in this control * @access: the default access flags for elements in this control * @file: given when locking these elements * * Allocates a memory object for a new control instance. The instance has * elements as many as the given number (@count). Each element has given * access permissions (@access). Each element is locked when @file is given. * * Return: 0 on success, error code on failure */ static int snd_ctl_new(struct snd_kcontrol **kctl, unsigned int count, unsigned int access, struct snd_ctl_file *file) { unsigned int idx; if (count == 0 || count > MAX_CONTROL_COUNT) return -EINVAL; *kctl = kzalloc(struct_size(*kctl, vd, count), GFP_KERNEL); if (!*kctl) return -ENOMEM; (*kctl)->count = count; for (idx = 0; idx < count; idx++) { (*kctl)->vd[idx].access = access; (*kctl)->vd[idx].owner = file; } return 0; } /** * snd_ctl_new1 - create a control instance from the template * @ncontrol: the initialization record * @private_data: the private data to set * * Allocates a new struct snd_kcontrol instance and initialize from the given * template. When the access field of ncontrol is 0, it's assumed as * READWRITE access. When the count field is 0, it's assumes as one. * * Return: The pointer of the newly generated instance, or %NULL on failure. */ struct snd_kcontrol *snd_ctl_new1(const struct snd_kcontrol_new *ncontrol, void *private_data) { struct snd_kcontrol *kctl; unsigned int count; unsigned int access; int err; if (snd_BUG_ON(!ncontrol || !ncontrol->info)) return NULL; count = ncontrol->count; if (count == 0) count = 1; access = ncontrol->access; if (access == 0) access = SNDRV_CTL_ELEM_ACCESS_READWRITE; access &= (SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_VOLATILE | SNDRV_CTL_ELEM_ACCESS_INACTIVE | SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND | SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK | SNDRV_CTL_ELEM_ACCESS_LED_MASK | SNDRV_CTL_ELEM_ACCESS_SKIP_CHECK); err = snd_ctl_new(&kctl, count, access, NULL); if (err < 0) return NULL; /* The 'numid' member is decided when calling snd_ctl_add(). */ kctl->id.iface = ncontrol->iface; kctl->id.device = ncontrol->device; kctl->id.subdevice = ncontrol->subdevice; if (ncontrol->name) { strscpy(kctl->id.name, ncontrol->name, sizeof(kctl->id.name)); if (strcmp(ncontrol->name, kctl->id.name) != 0) pr_warn("ALSA: Control name '%s' truncated to '%s'\n", ncontrol->name, kctl->id.name); } kctl->id.index = ncontrol->index; kctl->info = ncontrol->info; kctl->get = ncontrol->get; kctl->put = ncontrol->put; kctl->tlv.p = ncontrol->tlv.p; kctl->private_value = ncontrol->private_value; kctl->private_data = private_data; return kctl; } EXPORT_SYMBOL(snd_ctl_new1); /** * snd_ctl_free_one - release the control instance * @kcontrol: the control instance * * Releases the control instance created via snd_ctl_new() * or snd_ctl_new1(). * Don't call this after the control was added to the card. */ void snd_ctl_free_one(struct snd_kcontrol *kcontrol) { if (kcontrol) { if (kcontrol->private_free) kcontrol->private_free(kcontrol); kfree(kcontrol); } } EXPORT_SYMBOL(snd_ctl_free_one); static bool snd_ctl_remove_numid_conflict(struct snd_card *card, unsigned int count) { struct snd_kcontrol *kctl; /* Make sure that the ids assigned to the control do not wrap around */ if (card->last_numid >= UINT_MAX - count) card->last_numid = 0; list_for_each_entry(kctl, &card->controls, list) { if (kctl->id.numid < card->last_numid + 1 + count && kctl->id.numid + kctl->count > card->last_numid + 1) { card->last_numid = kctl->id.numid + kctl->count - 1; return true; } } return false; } static int snd_ctl_find_hole(struct snd_card *card, unsigned int count) { unsigned int iter = 100000; while (snd_ctl_remove_numid_conflict(card, count)) { if (--iter == 0) { /* this situation is very unlikely */ dev_err(card->dev, "unable to allocate new control numid\n"); return -ENOMEM; } } return 0; } /* check whether the given id is contained in the given kctl */ static bool elem_id_matches(const struct snd_kcontrol *kctl, const struct snd_ctl_elem_id *id) { return kctl->id.iface == id->iface && kctl->id.device == id->device && kctl->id.subdevice == id->subdevice && !strncmp(kctl->id.name, id->name, sizeof(kctl->id.name)) && kctl->id.index <= id->index && kctl->id.index + kctl->count > id->index; } #ifdef CONFIG_SND_CTL_FAST_LOOKUP /* Compute a hash key for the corresponding ctl id * It's for the name lookup, hence the numid is excluded. * The hash key is bound in LONG_MAX to be used for Xarray key. */ #define MULTIPLIER 37 static unsigned long get_ctl_id_hash(const struct snd_ctl_elem_id *id) { int i; unsigned long h; h = id->iface; h = MULTIPLIER * h + id->device; h = MULTIPLIER * h + id->subdevice; for (i = 0; i < SNDRV_CTL_ELEM_ID_NAME_MAXLEN && id->name[i]; i++) h = MULTIPLIER * h + id->name[i]; h = MULTIPLIER * h + id->index; h &= LONG_MAX; return h; } /* add hash entries to numid and ctl xarray tables */ static void add_hash_entries(struct snd_card *card, struct snd_kcontrol *kcontrol) { struct snd_ctl_elem_id id = kcontrol->id; int i; xa_store_range(&card->ctl_numids, kcontrol->id.numid, kcontrol->id.numid + kcontrol->count - 1, kcontrol, GFP_KERNEL); for (i = 0; i < kcontrol->count; i++) { id.index = kcontrol->id.index + i; if (xa_insert(&card->ctl_hash, get_ctl_id_hash(&id), kcontrol, GFP_KERNEL)) { /* skip hash for this entry, noting we had collision */ card->ctl_hash_collision = true; dev_dbg(card->dev, "ctl_hash collision %d:%s:%d\n", id.iface, id.name, id.index); } } } /* remove hash entries that have been added */ static void remove_hash_entries(struct snd_card *card, struct snd_kcontrol *kcontrol) { struct snd_ctl_elem_id id = kcontrol->id; struct snd_kcontrol *matched; unsigned long h; int i; for (i = 0; i < kcontrol->count; i++) { xa_erase(&card->ctl_numids, id.numid); h = get_ctl_id_hash(&id); matched = xa_load(&card->ctl_hash, h); if (matched && (matched == kcontrol || elem_id_matches(matched, &id))) xa_erase(&card->ctl_hash, h); id.index++; id.numid++; } } #else /* CONFIG_SND_CTL_FAST_LOOKUP */ static inline void add_hash_entries(struct snd_card *card, struct snd_kcontrol *kcontrol) { } static inline void remove_hash_entries(struct snd_card *card, struct snd_kcontrol *kcontrol) { } #endif /* CONFIG_SND_CTL_FAST_LOOKUP */ enum snd_ctl_add_mode { CTL_ADD_EXCLUSIVE, CTL_REPLACE, CTL_ADD_ON_REPLACE, }; /* add/replace a new kcontrol object; call with card->controls_rwsem locked */ static int __snd_ctl_add_replace(struct snd_card *card, struct snd_kcontrol *kcontrol, enum snd_ctl_add_mode mode) { struct snd_ctl_elem_id id; unsigned int idx; struct snd_kcontrol *old; int err; lockdep_assert_held_write(&card->controls_rwsem); id = kcontrol->id; if (id.index > UINT_MAX - kcontrol->count) return -EINVAL; old = snd_ctl_find_id(card, &id); if (!old) { if (mode == CTL_REPLACE) return -EINVAL; } else { if (mode == CTL_ADD_EXCLUSIVE) { dev_err(card->dev, "control %i:%i:%i:%s:%i is already present\n", id.iface, id.device, id.subdevice, id.name, id.index); return -EBUSY; } err = snd_ctl_remove_locked(card, old); if (err < 0) return err; } if (snd_ctl_find_hole(card, kcontrol->count) < 0) return -ENOMEM; scoped_guard(write_lock_irq, &card->controls_rwlock) { list_add_tail(&kcontrol->list, &card->controls); card->controls_count += kcontrol->count; kcontrol->id.numid = card->last_numid + 1; card->last_numid += kcontrol->count; } add_hash_entries(card, kcontrol); for (idx = 0; idx < kcontrol->count; idx++) snd_ctl_notify_one(card, SNDRV_CTL_EVENT_MASK_ADD, kcontrol, idx); return 0; } static int snd_ctl_add_replace(struct snd_card *card, struct snd_kcontrol *kcontrol, enum snd_ctl_add_mode mode) { int err = -EINVAL; if (! kcontrol) return err; if (snd_BUG_ON(!card || !kcontrol->info)) goto error; scoped_guard(rwsem_write, &card->controls_rwsem) err = __snd_ctl_add_replace(card, kcontrol, mode); if (err < 0) goto error; return 0; error: snd_ctl_free_one(kcontrol); return err; } /** * snd_ctl_add - add the control instance to the card * @card: the card instance * @kcontrol: the control instance to add * * Adds the control instance created via snd_ctl_new() or * snd_ctl_new1() to the given card. Assigns also an unique * numid used for fast search. * * It frees automatically the control which cannot be added. * * Return: Zero if successful, or a negative error code on failure. * */ int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol) { return snd_ctl_add_replace(card, kcontrol, CTL_ADD_EXCLUSIVE); } EXPORT_SYMBOL(snd_ctl_add); /** * snd_ctl_replace - replace the control instance of the card * @card: the card instance * @kcontrol: the control instance to replace * @add_on_replace: add the control if not already added * * Replaces the given control. If the given control does not exist * and the add_on_replace flag is set, the control is added. If the * control exists, it is destroyed first. * * It frees automatically the control which cannot be added or replaced. * * Return: Zero if successful, or a negative error code on failure. */ int snd_ctl_replace(struct snd_card *card, struct snd_kcontrol *kcontrol, bool add_on_replace) { return snd_ctl_add_replace(card, kcontrol, add_on_replace ? CTL_ADD_ON_REPLACE : CTL_REPLACE); } EXPORT_SYMBOL(snd_ctl_replace); static int __snd_ctl_remove(struct snd_card *card, struct snd_kcontrol *kcontrol, bool remove_hash) { unsigned int idx; lockdep_assert_held_write(&card->controls_rwsem); if (snd_BUG_ON(!card || !kcontrol)) return -EINVAL; if (remove_hash) remove_hash_entries(card, kcontrol); scoped_guard(write_lock_irq, &card->controls_rwlock) { list_del(&kcontrol->list); card->controls_count -= kcontrol->count; } for (idx = 0; idx < kcontrol->count; idx++) snd_ctl_notify_one(card, SNDRV_CTL_EVENT_MASK_REMOVE, kcontrol, idx); snd_ctl_free_one(kcontrol); return 0; } static inline int snd_ctl_remove_locked(struct snd_card *card, struct snd_kcontrol *kcontrol) { return __snd_ctl_remove(card, kcontrol, true); } /** * snd_ctl_remove - remove the control from the card and release it * @card: the card instance * @kcontrol: the control instance to remove * * Removes the control from the card and then releases the instance. * You don't need to call snd_ctl_free_one(). * Passing NULL to @kcontrol argument is allowed as noop. * * Return: 0 if successful, or a negative error code on failure. * * Note that this function takes card->controls_rwsem lock internally. */ int snd_ctl_remove(struct snd_card *card, struct snd_kcontrol *kcontrol) { if (!kcontrol) return 0; guard(rwsem_write)(&card->controls_rwsem); return snd_ctl_remove_locked(card, kcontrol); } EXPORT_SYMBOL(snd_ctl_remove); /** * snd_ctl_remove_id - remove the control of the given id and release it * @card: the card instance * @id: the control id to remove * * Finds the control instance with the given id, removes it from the * card list and releases it. * * Return: 0 if successful, or a negative error code on failure. */ int snd_ctl_remove_id(struct snd_card *card, struct snd_ctl_elem_id *id) { struct snd_kcontrol *kctl; guard(rwsem_write)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, id); if (kctl == NULL) return -ENOENT; return snd_ctl_remove_locked(card, kctl); } EXPORT_SYMBOL(snd_ctl_remove_id); /** * snd_ctl_remove_user_ctl - remove and release the unlocked user control * @file: active control handle * @id: the control id to remove * * Finds the control instance with the given id, removes it from the * card list and releases it. * * Return: 0 if successful, or a negative error code on failure. */ static int snd_ctl_remove_user_ctl(struct snd_ctl_file * file, struct snd_ctl_elem_id *id) { struct snd_card *card = file->card; struct snd_kcontrol *kctl; int idx; guard(rwsem_write)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, id); if (kctl == NULL) return -ENOENT; if (!(kctl->vd[0].access & SNDRV_CTL_ELEM_ACCESS_USER)) return -EINVAL; for (idx = 0; idx < kctl->count; idx++) if (kctl->vd[idx].owner != NULL && kctl->vd[idx].owner != file) return -EBUSY; return snd_ctl_remove_locked(card, kctl); } /** * snd_ctl_activate_id - activate/inactivate the control of the given id * @card: the card instance * @id: the control id to activate/inactivate * @active: non-zero to activate * * Finds the control instance with the given id, and activate or * inactivate the control together with notification, if changed. * The given ID data is filled with full information. * * Return: 0 if unchanged, 1 if changed, or a negative error code on failure. */ int snd_ctl_activate_id(struct snd_card *card, struct snd_ctl_elem_id *id, int active) { struct snd_kcontrol *kctl; struct snd_kcontrol_volatile *vd; unsigned int index_offset; int ret; down_write(&card->controls_rwsem); kctl = snd_ctl_find_id(card, id); if (kctl == NULL) { ret = -ENOENT; goto unlock; } index_offset = snd_ctl_get_ioff(kctl, id); vd = &kctl->vd[index_offset]; ret = 0; if (active) { if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_INACTIVE)) goto unlock; vd->access &= ~SNDRV_CTL_ELEM_ACCESS_INACTIVE; } else { if (vd->access & SNDRV_CTL_ELEM_ACCESS_INACTIVE) goto unlock; vd->access |= SNDRV_CTL_ELEM_ACCESS_INACTIVE; } snd_ctl_build_ioff(id, kctl, index_offset); downgrade_write(&card->controls_rwsem); snd_ctl_notify_one(card, SNDRV_CTL_EVENT_MASK_INFO, kctl, index_offset); up_read(&card->controls_rwsem); return 1; unlock: up_write(&card->controls_rwsem); return ret; } EXPORT_SYMBOL_GPL(snd_ctl_activate_id); /** * snd_ctl_rename_id - replace the id of a control on the card * @card: the card instance * @src_id: the old id * @dst_id: the new id * * Finds the control with the old id from the card, and replaces the * id with the new one. * * The function tries to keep the already assigned numid while replacing * the rest. * * Note that this function should be used only in the card initialization * phase. Calling after the card instantiation may cause issues with * user-space expecting persistent numids. * * Return: Zero if successful, or a negative error code on failure. */ int snd_ctl_rename_id(struct snd_card *card, struct snd_ctl_elem_id *src_id, struct snd_ctl_elem_id *dst_id) { struct snd_kcontrol *kctl; int saved_numid; guard(rwsem_write)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, src_id); if (kctl == NULL) return -ENOENT; saved_numid = kctl->id.numid; remove_hash_entries(card, kctl); kctl->id = *dst_id; kctl->id.numid = saved_numid; add_hash_entries(card, kctl); return 0; } EXPORT_SYMBOL(snd_ctl_rename_id); /** * snd_ctl_rename - rename the control on the card * @card: the card instance * @kctl: the control to rename * @name: the new name * * Renames the specified control on the card to the new name. * * Note that this function takes card->controls_rwsem lock internally. */ void snd_ctl_rename(struct snd_card *card, struct snd_kcontrol *kctl, const char *name) { guard(rwsem_write)(&card->controls_rwsem); remove_hash_entries(card, kctl); if (strscpy(kctl->id.name, name, sizeof(kctl->id.name)) < 0) pr_warn("ALSA: Renamed control new name '%s' truncated to '%s'\n", name, kctl->id.name); add_hash_entries(card, kctl); } EXPORT_SYMBOL(snd_ctl_rename); #ifndef CONFIG_SND_CTL_FAST_LOOKUP static struct snd_kcontrol * snd_ctl_find_numid_slow(struct snd_card *card, unsigned int numid) { struct snd_kcontrol *kctl; guard(read_lock_irqsave)(&card->controls_rwlock); list_for_each_entry(kctl, &card->controls, list) { if (kctl->id.numid <= numid && kctl->id.numid + kctl->count > numid) return kctl; } return NULL; } #endif /* !CONFIG_SND_CTL_FAST_LOOKUP */ /** * snd_ctl_find_numid - find the control instance with the given number-id * @card: the card instance * @numid: the number-id to search * * Finds the control instance with the given number-id from the card. * * Return: The pointer of the instance if found, or %NULL if not. * * Note that this function takes card->controls_rwlock lock internally. */ struct snd_kcontrol *snd_ctl_find_numid(struct snd_card *card, unsigned int numid) { if (snd_BUG_ON(!card || !numid)) return NULL; #ifdef CONFIG_SND_CTL_FAST_LOOKUP return xa_load(&card->ctl_numids, numid); #else return snd_ctl_find_numid_slow(card, numid); #endif } EXPORT_SYMBOL(snd_ctl_find_numid); /** * snd_ctl_find_id - find the control instance with the given id * @card: the card instance * @id: the id to search * * Finds the control instance with the given id from the card. * * Return: The pointer of the instance if found, or %NULL if not. * * Note that this function takes card->controls_rwlock lock internally. */ struct snd_kcontrol *snd_ctl_find_id(struct snd_card *card, const struct snd_ctl_elem_id *id) { struct snd_kcontrol *kctl; if (snd_BUG_ON(!card || !id)) return NULL; if (id->numid != 0) return snd_ctl_find_numid(card, id->numid); #ifdef CONFIG_SND_CTL_FAST_LOOKUP kctl = xa_load(&card->ctl_hash, get_ctl_id_hash(id)); if (kctl && elem_id_matches(kctl, id)) return kctl; if (!card->ctl_hash_collision) return NULL; /* we can rely on only hash table */ #endif /* no matching in hash table - try all as the last resort */ guard(read_lock_irqsave)(&card->controls_rwlock); list_for_each_entry(kctl, &card->controls, list) if (elem_id_matches(kctl, id)) return kctl; return NULL; } EXPORT_SYMBOL(snd_ctl_find_id); static int snd_ctl_card_info(struct snd_card *card, struct snd_ctl_file * ctl, unsigned int cmd, void __user *arg) { struct snd_ctl_card_info *info __free(kfree) = NULL; info = kzalloc(sizeof(*info), GFP_KERNEL); if (! info) return -ENOMEM; scoped_guard(rwsem_read, &snd_ioctl_rwsem) { info->card = card->number; strscpy(info->id, card->id, sizeof(info->id)); strscpy(info->driver, card->driver, sizeof(info->driver)); strscpy(info->name, card->shortname, sizeof(info->name)); strscpy(info->longname, card->longname, sizeof(info->longname)); strscpy(info->mixername, card->mixername, sizeof(info->mixername)); strscpy(info->components, card->components, sizeof(info->components)); } if (copy_to_user(arg, info, sizeof(struct snd_ctl_card_info))) return -EFAULT; return 0; } static int snd_ctl_elem_list(struct snd_card *card, struct snd_ctl_elem_list *list) { struct snd_kcontrol *kctl; struct snd_ctl_elem_id id; unsigned int offset, space, jidx; offset = list->offset; space = list->space; guard(rwsem_read)(&card->controls_rwsem); list->count = card->controls_count; list->used = 0; if (!space) return 0; list_for_each_entry(kctl, &card->controls, list) { if (offset >= kctl->count) { offset -= kctl->count; continue; } for (jidx = offset; jidx < kctl->count; jidx++) { snd_ctl_build_ioff(&id, kctl, jidx); if (copy_to_user(list->pids + list->used, &id, sizeof(id))) return -EFAULT; list->used++; if (!--space) return 0; } offset = 0; } return 0; } static int snd_ctl_elem_list_user(struct snd_card *card, struct snd_ctl_elem_list __user *_list) { struct snd_ctl_elem_list list; int err; if (copy_from_user(&list, _list, sizeof(list))) return -EFAULT; err = snd_ctl_elem_list(card, &list); if (err) return err; if (copy_to_user(_list, &list, sizeof(list))) return -EFAULT; return 0; } /* Check whether the given kctl info is valid */ static int snd_ctl_check_elem_info(struct snd_card *card, const struct snd_ctl_elem_info *info) { static const unsigned int max_value_counts[] = { [SNDRV_CTL_ELEM_TYPE_BOOLEAN] = 128, [SNDRV_CTL_ELEM_TYPE_INTEGER] = 128, [SNDRV_CTL_ELEM_TYPE_ENUMERATED] = 128, [SNDRV_CTL_ELEM_TYPE_BYTES] = 512, [SNDRV_CTL_ELEM_TYPE_IEC958] = 1, [SNDRV_CTL_ELEM_TYPE_INTEGER64] = 64, }; if (info->type < SNDRV_CTL_ELEM_TYPE_BOOLEAN || info->type > SNDRV_CTL_ELEM_TYPE_INTEGER64) { if (card) dev_err(card->dev, "control %i:%i:%i:%s:%i: invalid type %d\n", info->id.iface, info->id.device, info->id.subdevice, info->id.name, info->id.index, info->type); return -EINVAL; } if (info->type == SNDRV_CTL_ELEM_TYPE_ENUMERATED && info->value.enumerated.items == 0) { if (card) dev_err(card->dev, "control %i:%i:%i:%s:%i: zero enum items\n", info->id.iface, info->id.device, info->id.subdevice, info->id.name, info->id.index); return -EINVAL; } if (info->count > max_value_counts[info->type]) { if (card) dev_err(card->dev, "control %i:%i:%i:%s:%i: invalid count %d\n", info->id.iface, info->id.device, info->id.subdevice, info->id.name, info->id.index, info->count); return -EINVAL; } return 0; } /* The capacity of struct snd_ctl_elem_value.value.*/ static const unsigned int value_sizes[] = { [SNDRV_CTL_ELEM_TYPE_BOOLEAN] = sizeof(long), [SNDRV_CTL_ELEM_TYPE_INTEGER] = sizeof(long), [SNDRV_CTL_ELEM_TYPE_ENUMERATED] = sizeof(unsigned int), [SNDRV_CTL_ELEM_TYPE_BYTES] = sizeof(unsigned char), [SNDRV_CTL_ELEM_TYPE_IEC958] = sizeof(struct snd_aes_iec958), [SNDRV_CTL_ELEM_TYPE_INTEGER64] = sizeof(long long), }; /* fill the remaining snd_ctl_elem_value data with the given pattern */ static void fill_remaining_elem_value(struct snd_ctl_elem_value *control, struct snd_ctl_elem_info *info, u32 pattern) { size_t offset = value_sizes[info->type] * info->count; offset = DIV_ROUND_UP(offset, sizeof(u32)); memset32((u32 *)control->value.bytes.data + offset, pattern, sizeof(control->value) / sizeof(u32) - offset); } /* check whether the given integer ctl value is valid */ static int sanity_check_int_value(struct snd_card *card, const struct snd_ctl_elem_value *control, const struct snd_ctl_elem_info *info, int i, bool print_error) { long long lval, lmin, lmax, lstep; u64 rem; switch (info->type) { default: case SNDRV_CTL_ELEM_TYPE_BOOLEAN: lval = control->value.integer.value[i]; lmin = 0; lmax = 1; lstep = 0; break; case SNDRV_CTL_ELEM_TYPE_INTEGER: lval = control->value.integer.value[i]; lmin = info->value.integer.min; lmax = info->value.integer.max; lstep = info->value.integer.step; break; case SNDRV_CTL_ELEM_TYPE_INTEGER64: lval = control->value.integer64.value[i]; lmin = info->value.integer64.min; lmax = info->value.integer64.max; lstep = info->value.integer64.step; break; case SNDRV_CTL_ELEM_TYPE_ENUMERATED: lval = control->value.enumerated.item[i]; lmin = 0; lmax = info->value.enumerated.items - 1; lstep = 0; break; } if (lval < lmin || lval > lmax) { if (print_error) dev_err(card->dev, "control %i:%i:%i:%s:%i: value out of range %lld (%lld/%lld) at count %i\n", control->id.iface, control->id.device, control->id.subdevice, control->id.name, control->id.index, lval, lmin, lmax, i); return -EINVAL; } if (lstep) { div64_u64_rem(lval, lstep, &rem); if (rem) { if (print_error) dev_err(card->dev, "control %i:%i:%i:%s:%i: unaligned value %lld (step %lld) at count %i\n", control->id.iface, control->id.device, control->id.subdevice, control->id.name, control->id.index, lval, lstep, i); return -EINVAL; } } return 0; } /* check whether the all input values are valid for the given elem value */ static int sanity_check_input_values(struct snd_card *card, const struct snd_ctl_elem_value *control, const struct snd_ctl_elem_info *info, bool print_error) { int i, ret; switch (info->type) { case SNDRV_CTL_ELEM_TYPE_BOOLEAN: case SNDRV_CTL_ELEM_TYPE_INTEGER: case SNDRV_CTL_ELEM_TYPE_INTEGER64: case SNDRV_CTL_ELEM_TYPE_ENUMERATED: for (i = 0; i < info->count; i++) { ret = sanity_check_int_value(card, control, info, i, print_error); if (ret < 0) return ret; } break; default: break; } return 0; } /* perform sanity checks to the given snd_ctl_elem_value object */ static int sanity_check_elem_value(struct snd_card *card, const struct snd_ctl_elem_value *control, const struct snd_ctl_elem_info *info, u32 pattern) { size_t offset; int ret; u32 *p; ret = sanity_check_input_values(card, control, info, true); if (ret < 0) return ret; /* check whether the remaining area kept untouched */ offset = value_sizes[info->type] * info->count; offset = DIV_ROUND_UP(offset, sizeof(u32)); p = (u32 *)control->value.bytes.data + offset; for (; offset < sizeof(control->value) / sizeof(u32); offset++, p++) { if (*p != pattern) { ret = -EINVAL; break; } *p = 0; /* clear the checked area */ } return ret; } static int __snd_ctl_elem_info(struct snd_card *card, struct snd_kcontrol *kctl, struct snd_ctl_elem_info *info, struct snd_ctl_file *ctl) { struct snd_kcontrol_volatile *vd; unsigned int index_offset; int result; #ifdef CONFIG_SND_DEBUG info->access = 0; #endif result = kctl->info(kctl, info); if (result >= 0) { snd_BUG_ON(info->access); index_offset = snd_ctl_get_ioff(kctl, &info->id); vd = &kctl->vd[index_offset]; snd_ctl_build_ioff(&info->id, kctl, index_offset); info->access = vd->access; if (vd->owner) { info->access |= SNDRV_CTL_ELEM_ACCESS_LOCK; if (vd->owner == ctl) info->access |= SNDRV_CTL_ELEM_ACCESS_OWNER; info->owner = pid_vnr(vd->owner->pid); } else { info->owner = -1; } if (!snd_ctl_skip_validation(info) && snd_ctl_check_elem_info(card, info) < 0) result = -EINVAL; } return result; } static int snd_ctl_elem_info(struct snd_ctl_file *ctl, struct snd_ctl_elem_info *info) { struct snd_card *card = ctl->card; struct snd_kcontrol *kctl; guard(rwsem_read)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, &info->id); if (!kctl) return -ENOENT; return __snd_ctl_elem_info(card, kctl, info, ctl); } static int snd_ctl_elem_info_user(struct snd_ctl_file *ctl, struct snd_ctl_elem_info __user *_info) { struct snd_card *card = ctl->card; struct snd_ctl_elem_info info; int result; if (copy_from_user(&info, _info, sizeof(info))) return -EFAULT; result = snd_power_ref_and_wait(card); if (result) return result; result = snd_ctl_elem_info(ctl, &info); snd_power_unref(card); if (result < 0) return result; /* drop internal access flags */ info.access &= ~(SNDRV_CTL_ELEM_ACCESS_SKIP_CHECK| SNDRV_CTL_ELEM_ACCESS_LED_MASK); if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return result; } static int snd_ctl_elem_read(struct snd_card *card, struct snd_ctl_elem_value *control) { struct snd_kcontrol *kctl; struct snd_kcontrol_volatile *vd; unsigned int index_offset; struct snd_ctl_elem_info info; const u32 pattern = 0xdeadbeef; int ret; guard(rwsem_read)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, &control->id); if (!kctl) return -ENOENT; index_offset = snd_ctl_get_ioff(kctl, &control->id); vd = &kctl->vd[index_offset]; if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || !kctl->get) return -EPERM; snd_ctl_build_ioff(&control->id, kctl, index_offset); #ifdef CONFIG_SND_CTL_DEBUG /* info is needed only for validation */ memset(&info, 0, sizeof(info)); info.id = control->id; ret = __snd_ctl_elem_info(card, kctl, &info, NULL); if (ret < 0) return ret; #endif if (!snd_ctl_skip_validation(&info)) fill_remaining_elem_value(control, &info, pattern); ret = kctl->get(kctl, control); if (ret < 0) return ret; if (!snd_ctl_skip_validation(&info) && sanity_check_elem_value(card, control, &info, pattern) < 0) { dev_err(card->dev, "control %i:%i:%i:%s:%i: access overflow\n", control->id.iface, control->id.device, control->id.subdevice, control->id.name, control->id.index); return -EINVAL; } return 0; } static int snd_ctl_elem_read_user(struct snd_card *card, struct snd_ctl_elem_value __user *_control) { struct snd_ctl_elem_value *control __free(kfree) = NULL; int result; control = memdup_user(_control, sizeof(*control)); if (IS_ERR(control)) return PTR_ERR(control); result = snd_power_ref_and_wait(card); if (result) return result; result = snd_ctl_elem_read(card, control); snd_power_unref(card); if (result < 0) return result; if (copy_to_user(_control, control, sizeof(*control))) return -EFAULT; return result; } static int snd_ctl_elem_write(struct snd_card *card, struct snd_ctl_file *file, struct snd_ctl_elem_value *control) { struct snd_kcontrol *kctl; struct snd_kcontrol_volatile *vd; unsigned int index_offset; int result = 0; down_write(&card->controls_rwsem); kctl = snd_ctl_find_id(card, &control->id); if (kctl == NULL) { up_write(&card->controls_rwsem); return -ENOENT; } index_offset = snd_ctl_get_ioff(kctl, &control->id); vd = &kctl->vd[index_offset]; if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_WRITE) || kctl->put == NULL || (file && vd->owner && vd->owner != file)) { up_write(&card->controls_rwsem); return -EPERM; } snd_ctl_build_ioff(&control->id, kctl, index_offset); /* validate input values */ if (IS_ENABLED(CONFIG_SND_CTL_INPUT_VALIDATION)) { struct snd_ctl_elem_info info; memset(&info, 0, sizeof(info)); info.id = control->id; result = __snd_ctl_elem_info(card, kctl, &info, NULL); if (!result) result = sanity_check_input_values(card, control, &info, false); } if (!result) result = kctl->put(kctl, control); if (result < 0) { up_write(&card->controls_rwsem); return result; } if (result > 0) { downgrade_write(&card->controls_rwsem); snd_ctl_notify_one(card, SNDRV_CTL_EVENT_MASK_VALUE, kctl, index_offset); up_read(&card->controls_rwsem); } else { up_write(&card->controls_rwsem); } return 0; } static int snd_ctl_elem_write_user(struct snd_ctl_file *file, struct snd_ctl_elem_value __user *_control) { struct snd_ctl_elem_value *control __free(kfree) = NULL; struct snd_card *card; int result; control = memdup_user(_control, sizeof(*control)); if (IS_ERR(control)) return PTR_ERR(control); card = file->card; result = snd_power_ref_and_wait(card); if (result < 0) return result; result = snd_ctl_elem_write(card, file, control); snd_power_unref(card); if (result < 0) return result; if (copy_to_user(_control, control, sizeof(*control))) return -EFAULT; return result; } static int snd_ctl_elem_lock(struct snd_ctl_file *file, struct snd_ctl_elem_id __user *_id) { struct snd_card *card = file->card; struct snd_ctl_elem_id id; struct snd_kcontrol *kctl; struct snd_kcontrol_volatile *vd; if (copy_from_user(&id, _id, sizeof(id))) return -EFAULT; guard(rwsem_write)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, &id); if (!kctl) return -ENOENT; vd = &kctl->vd[snd_ctl_get_ioff(kctl, &id)]; if (vd->owner) return -EBUSY; vd->owner = file; return 0; } static int snd_ctl_elem_unlock(struct snd_ctl_file *file, struct snd_ctl_elem_id __user *_id) { struct snd_card *card = file->card; struct snd_ctl_elem_id id; struct snd_kcontrol *kctl; struct snd_kcontrol_volatile *vd; if (copy_from_user(&id, _id, sizeof(id))) return -EFAULT; guard(rwsem_write)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, &id); if (!kctl) return -ENOENT; vd = &kctl->vd[snd_ctl_get_ioff(kctl, &id)]; if (!vd->owner) return -EINVAL; if (vd->owner != file) return -EPERM; vd->owner = NULL; return 0; } struct user_element { struct snd_ctl_elem_info info; struct snd_card *card; char *elem_data; /* element data */ unsigned long elem_data_size; /* size of element data in bytes */ void *tlv_data; /* TLV data */ unsigned long tlv_data_size; /* TLV data size */ void *priv_data; /* private data (like strings for enumerated type) */ }; // check whether the addition (in bytes) of user ctl element may overflow the limit. static bool check_user_elem_overflow(struct snd_card *card, ssize_t add) { return (ssize_t)card->user_ctl_alloc_size + add > max_user_ctl_alloc_size; } static int snd_ctl_elem_user_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { struct user_element *ue = snd_kcontrol_chip(kcontrol); unsigned int offset; offset = snd_ctl_get_ioff(kcontrol, &uinfo->id); *uinfo = ue->info; snd_ctl_build_ioff(&uinfo->id, kcontrol, offset); return 0; } static int snd_ctl_elem_user_enum_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { struct user_element *ue = snd_kcontrol_chip(kcontrol); const char *names; unsigned int item; unsigned int offset; item = uinfo->value.enumerated.item; offset = snd_ctl_get_ioff(kcontrol, &uinfo->id); *uinfo = ue->info; snd_ctl_build_ioff(&uinfo->id, kcontrol, offset); item = min(item, uinfo->value.enumerated.items - 1); uinfo->value.enumerated.item = item; names = ue->priv_data; for (; item > 0; --item) names += strlen(names) + 1; strcpy(uinfo->value.enumerated.name, names); return 0; } static int snd_ctl_elem_user_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct user_element *ue = snd_kcontrol_chip(kcontrol); unsigned int size = ue->elem_data_size; char *src = ue->elem_data + snd_ctl_get_ioff(kcontrol, &ucontrol->id) * size; memcpy(&ucontrol->value, src, size); return 0; } static int snd_ctl_elem_user_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { int err, change; struct user_element *ue = snd_kcontrol_chip(kcontrol); unsigned int size = ue->elem_data_size; char *dst = ue->elem_data + snd_ctl_get_ioff(kcontrol, &ucontrol->id) * size; err = sanity_check_input_values(ue->card, ucontrol, &ue->info, false); if (err < 0) return err; change = memcmp(&ucontrol->value, dst, size) != 0; if (change) memcpy(dst, &ucontrol->value, size); return change; } /* called in controls_rwsem write lock */ static int replace_user_tlv(struct snd_kcontrol *kctl, unsigned int __user *buf, unsigned int size) { struct user_element *ue = snd_kcontrol_chip(kctl); unsigned int *container; unsigned int mask = 0; int i; int change; lockdep_assert_held_write(&ue->card->controls_rwsem); if (size > 1024 * 128) /* sane value */ return -EINVAL; // does the TLV size change cause overflow? if (check_user_elem_overflow(ue->card, (ssize_t)(size - ue->tlv_data_size))) return -ENOMEM; container = vmemdup_user(buf, size); if (IS_ERR(container)) return PTR_ERR(container); change = ue->tlv_data_size != size; if (!change) change = memcmp(ue->tlv_data, container, size) != 0; if (!change) { kvfree(container); return 0; } if (ue->tlv_data == NULL) { /* Now TLV data is available. */ for (i = 0; i < kctl->count; ++i) kctl->vd[i].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ; mask = SNDRV_CTL_EVENT_MASK_INFO; } else { ue->card->user_ctl_alloc_size -= ue->tlv_data_size; ue->tlv_data_size = 0; kvfree(ue->tlv_data); } ue->tlv_data = container; ue->tlv_data_size = size; // decremented at private_free. ue->card->user_ctl_alloc_size += size; mask |= SNDRV_CTL_EVENT_MASK_TLV; for (i = 0; i < kctl->count; ++i) snd_ctl_notify_one(ue->card, mask, kctl, i); return change; } static int read_user_tlv(struct snd_kcontrol *kctl, unsigned int __user *buf, unsigned int size) { struct user_element *ue = snd_kcontrol_chip(kctl); if (ue->tlv_data_size == 0 || ue->tlv_data == NULL) return -ENXIO; if (size < ue->tlv_data_size) return -ENOSPC; if (copy_to_user(buf, ue->tlv_data, ue->tlv_data_size)) return -EFAULT; return 0; } static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kctl, int op_flag, unsigned int size, unsigned int __user *buf) { if (op_flag == SNDRV_CTL_TLV_OP_WRITE) return replace_user_tlv(kctl, buf, size); else return read_user_tlv(kctl, buf, size); } /* called in controls_rwsem write lock */ static int snd_ctl_elem_init_enum_names(struct user_element *ue) { char *names, *p; size_t buf_len, name_len; unsigned int i; const uintptr_t user_ptrval = ue->info.value.enumerated.names_ptr; lockdep_assert_held_write(&ue->card->controls_rwsem); buf_len = ue->info.value.enumerated.names_length; if (buf_len > 64 * 1024) return -EINVAL; if (check_user_elem_overflow(ue->card, buf_len)) return -ENOMEM; names = vmemdup_user((const void __user *)user_ptrval, buf_len); if (IS_ERR(names)) return PTR_ERR(names); /* check that there are enough valid names */ p = names; for (i = 0; i < ue->info.value.enumerated.items; ++i) { name_len = strnlen(p, buf_len); if (name_len == 0 || name_len >= 64 || name_len == buf_len) { kvfree(names); return -EINVAL; } p += name_len + 1; buf_len -= name_len + 1; } ue->priv_data = names; ue->info.value.enumerated.names_ptr = 0; // increment the allocation size; decremented again at private_free. ue->card->user_ctl_alloc_size += ue->info.value.enumerated.names_length; return 0; } static size_t compute_user_elem_size(size_t size, unsigned int count) { return sizeof(struct user_element) + size * count; } static void snd_ctl_elem_user_free(struct snd_kcontrol *kcontrol) { struct user_element *ue = snd_kcontrol_chip(kcontrol); // decrement the allocation size. ue->card->user_ctl_alloc_size -= compute_user_elem_size(ue->elem_data_size, kcontrol->count); ue->card->user_ctl_alloc_size -= ue->tlv_data_size; if (ue->priv_data) ue->card->user_ctl_alloc_size -= ue->info.value.enumerated.names_length; kvfree(ue->tlv_data); kvfree(ue->priv_data); kfree(ue); } static int snd_ctl_elem_add(struct snd_ctl_file *file, struct snd_ctl_elem_info *info, int replace) { struct snd_card *card = file->card; struct snd_kcontrol *kctl; unsigned int count; unsigned int access; long private_size; size_t alloc_size; struct user_element *ue; unsigned int offset; int err; if (!*info->id.name) return -EINVAL; if (strnlen(info->id.name, sizeof(info->id.name)) >= sizeof(info->id.name)) return -EINVAL; /* Delete a control to replace them if needed. */ if (replace) { info->id.numid = 0; err = snd_ctl_remove_user_ctl(file, &info->id); if (err) return err; } /* Check the number of elements for this userspace control. */ count = info->owner; if (count == 0) count = 1; if (count > MAX_CONTROL_COUNT) return -EINVAL; /* Arrange access permissions if needed. */ access = info->access; if (access == 0) access = SNDRV_CTL_ELEM_ACCESS_READWRITE; access &= (SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE | SNDRV_CTL_ELEM_ACCESS_TLV_WRITE); /* In initial state, nothing is available as TLV container. */ if (access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) access |= SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK; access |= SNDRV_CTL_ELEM_ACCESS_USER; /* * Check information and calculate the size of data specific to * this userspace control. */ /* pass NULL to card for suppressing error messages */ err = snd_ctl_check_elem_info(NULL, info); if (err < 0) return err; /* user-space control doesn't allow zero-size data */ if (info->count < 1) return -EINVAL; private_size = value_sizes[info->type] * info->count; alloc_size = compute_user_elem_size(private_size, count); guard(rwsem_write)(&card->controls_rwsem); if (check_user_elem_overflow(card, alloc_size)) return -ENOMEM; /* * Keep memory object for this userspace control. After passing this * code block, the instance should be freed by snd_ctl_free_one(). * * Note that these elements in this control are locked. */ err = snd_ctl_new(&kctl, count, access, file); if (err < 0) return err; memcpy(&kctl->id, &info->id, sizeof(kctl->id)); ue = kzalloc(alloc_size, GFP_KERNEL); if (!ue) { kfree(kctl); return -ENOMEM; } kctl->private_data = ue; kctl->private_free = snd_ctl_elem_user_free; // increment the allocated size; decremented again at private_free. card->user_ctl_alloc_size += alloc_size; /* Set private data for this userspace control. */ ue->card = card; ue->info = *info; ue->info.access = 0; ue->elem_data = (char *)ue + sizeof(*ue); ue->elem_data_size = private_size; if (ue->info.type == SNDRV_CTL_ELEM_TYPE_ENUMERATED) { err = snd_ctl_elem_init_enum_names(ue); if (err < 0) { snd_ctl_free_one(kctl); return err; } } /* Set callback functions. */ if (info->type == SNDRV_CTL_ELEM_TYPE_ENUMERATED) kctl->info = snd_ctl_elem_user_enum_info; else kctl->info = snd_ctl_elem_user_info; if (access & SNDRV_CTL_ELEM_ACCESS_READ) kctl->get = snd_ctl_elem_user_get; if (access & SNDRV_CTL_ELEM_ACCESS_WRITE) kctl->put = snd_ctl_elem_user_put; if (access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) kctl->tlv.c = snd_ctl_elem_user_tlv; /* This function manage to free the instance on failure. */ err = __snd_ctl_add_replace(card, kctl, CTL_ADD_EXCLUSIVE); if (err < 0) { snd_ctl_free_one(kctl); return err; } offset = snd_ctl_get_ioff(kctl, &info->id); snd_ctl_build_ioff(&info->id, kctl, offset); /* * Here we cannot fill any field for the number of elements added by * this operation because there're no specific fields. The usage of * 'owner' field for this purpose may cause any bugs to userspace * applications because the field originally means PID of a process * which locks the element. */ return 0; } static int snd_ctl_elem_add_user(struct snd_ctl_file *file, struct snd_ctl_elem_info __user *_info, int replace) { struct snd_ctl_elem_info info; int err; if (copy_from_user(&info, _info, sizeof(info))) return -EFAULT; err = snd_ctl_elem_add(file, &info, replace); if (err < 0) return err; if (copy_to_user(_info, &info, sizeof(info))) { snd_ctl_remove_user_ctl(file, &info.id); return -EFAULT; } return 0; } static int snd_ctl_elem_remove(struct snd_ctl_file *file, struct snd_ctl_elem_id __user *_id) { struct snd_ctl_elem_id id; if (copy_from_user(&id, _id, sizeof(id))) return -EFAULT; return snd_ctl_remove_user_ctl(file, &id); } static int snd_ctl_subscribe_events(struct snd_ctl_file *file, int __user *ptr) { int subscribe; if (get_user(subscribe, ptr)) return -EFAULT; if (subscribe < 0) { subscribe = file->subscribed; if (put_user(subscribe, ptr)) return -EFAULT; return 0; } if (subscribe) { file->subscribed = 1; return 0; } else if (file->subscribed) { snd_ctl_empty_read_queue(file); file->subscribed = 0; } return 0; } static int call_tlv_handler(struct snd_ctl_file *file, int op_flag, struct snd_kcontrol *kctl, struct snd_ctl_elem_id *id, unsigned int __user *buf, unsigned int size) { static const struct { int op; int perm; } pairs[] = { {SNDRV_CTL_TLV_OP_READ, SNDRV_CTL_ELEM_ACCESS_TLV_READ}, {SNDRV_CTL_TLV_OP_WRITE, SNDRV_CTL_ELEM_ACCESS_TLV_WRITE}, {SNDRV_CTL_TLV_OP_CMD, SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND}, }; struct snd_kcontrol_volatile *vd = &kctl->vd[snd_ctl_get_ioff(kctl, id)]; int i; /* Check support of the request for this element. */ for (i = 0; i < ARRAY_SIZE(pairs); ++i) { if (op_flag == pairs[i].op && (vd->access & pairs[i].perm)) break; } if (i == ARRAY_SIZE(pairs)) return -ENXIO; if (kctl->tlv.c == NULL) return -ENXIO; /* Write and command operations are not allowed for locked element. */ if (op_flag != SNDRV_CTL_TLV_OP_READ && vd->owner != NULL && vd->owner != file) return -EPERM; return kctl->tlv.c(kctl, op_flag, size, buf); } static int read_tlv_buf(struct snd_kcontrol *kctl, struct snd_ctl_elem_id *id, unsigned int __user *buf, unsigned int size) { struct snd_kcontrol_volatile *vd = &kctl->vd[snd_ctl_get_ioff(kctl, id)]; unsigned int len; if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_READ)) return -ENXIO; if (kctl->tlv.p == NULL) return -ENXIO; len = sizeof(unsigned int) * 2 + kctl->tlv.p[1]; if (size < len) return -ENOMEM; if (copy_to_user(buf, kctl->tlv.p, len)) return -EFAULT; return 0; } static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file, struct snd_ctl_tlv __user *buf, int op_flag) { struct snd_ctl_tlv header; unsigned int __user *container; unsigned int container_size; struct snd_kcontrol *kctl; struct snd_ctl_elem_id id; struct snd_kcontrol_volatile *vd; lockdep_assert_held(&file->card->controls_rwsem); if (copy_from_user(&header, buf, sizeof(header))) return -EFAULT; /* In design of control core, numerical ID starts at 1. */ if (header.numid == 0) return -EINVAL; /* At least, container should include type and length fields. */ if (header.length < sizeof(unsigned int) * 2) return -EINVAL; container_size = header.length; container = buf->tlv; kctl = snd_ctl_find_numid(file->card, header.numid); if (kctl == NULL) return -ENOENT; /* Calculate index of the element in this set. */ id = kctl->id; snd_ctl_build_ioff(&id, kctl, header.numid - id.numid); vd = &kctl->vd[snd_ctl_get_ioff(kctl, &id)]; if (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { return call_tlv_handler(file, op_flag, kctl, &id, container, container_size); } else { if (op_flag == SNDRV_CTL_TLV_OP_READ) { return read_tlv_buf(kctl, &id, container, container_size); } } /* Not supported. */ return -ENXIO; } static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_ctl_file *ctl; struct snd_card *card; struct snd_kctl_ioctl *p; void __user *argp = (void __user *)arg; int __user *ip = argp; int err; ctl = file->private_data; card = ctl->card; if (snd_BUG_ON(!card)) return -ENXIO; switch (cmd) { case SNDRV_CTL_IOCTL_PVERSION: return put_user(SNDRV_CTL_VERSION, ip) ? -EFAULT : 0; case SNDRV_CTL_IOCTL_CARD_INFO: return snd_ctl_card_info(card, ctl, cmd, argp); case SNDRV_CTL_IOCTL_ELEM_LIST: return snd_ctl_elem_list_user(card, argp); case SNDRV_CTL_IOCTL_ELEM_INFO: return snd_ctl_elem_info_user(ctl, argp); case SNDRV_CTL_IOCTL_ELEM_READ: return snd_ctl_elem_read_user(card, argp); case SNDRV_CTL_IOCTL_ELEM_WRITE: return snd_ctl_elem_write_user(ctl, argp); case SNDRV_CTL_IOCTL_ELEM_LOCK: return snd_ctl_elem_lock(ctl, argp); case SNDRV_CTL_IOCTL_ELEM_UNLOCK: return snd_ctl_elem_unlock(ctl, argp); case SNDRV_CTL_IOCTL_ELEM_ADD: return snd_ctl_elem_add_user(ctl, argp, 0); case SNDRV_CTL_IOCTL_ELEM_REPLACE: return snd_ctl_elem_add_user(ctl, argp, 1); case SNDRV_CTL_IOCTL_ELEM_REMOVE: return snd_ctl_elem_remove(ctl, argp); case SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS: return snd_ctl_subscribe_events(ctl, ip); case SNDRV_CTL_IOCTL_TLV_READ: err = snd_power_ref_and_wait(card); if (err < 0) return err; scoped_guard(rwsem_read, &card->controls_rwsem) err = snd_ctl_tlv_ioctl(ctl, argp, SNDRV_CTL_TLV_OP_READ); snd_power_unref(card); return err; case SNDRV_CTL_IOCTL_TLV_WRITE: err = snd_power_ref_and_wait(card); if (err < 0) return err; scoped_guard(rwsem_write, &card->controls_rwsem) err = snd_ctl_tlv_ioctl(ctl, argp, SNDRV_CTL_TLV_OP_WRITE); snd_power_unref(card); return err; case SNDRV_CTL_IOCTL_TLV_COMMAND: err = snd_power_ref_and_wait(card); if (err < 0) return err; scoped_guard(rwsem_write, &card->controls_rwsem) err = snd_ctl_tlv_ioctl(ctl, argp, SNDRV_CTL_TLV_OP_CMD); snd_power_unref(card); return err; case SNDRV_CTL_IOCTL_POWER: return -ENOPROTOOPT; case SNDRV_CTL_IOCTL_POWER_STATE: return put_user(SNDRV_CTL_POWER_D0, ip) ? -EFAULT : 0; } guard(rwsem_read)(&snd_ioctl_rwsem); list_for_each_entry(p, &snd_control_ioctls, list) { err = p->fioctl(card, ctl, cmd, arg); if (err != -ENOIOCTLCMD) return err; } dev_dbg(card->dev, "unknown ioctl = 0x%x\n", cmd); return -ENOTTY; } static ssize_t snd_ctl_read(struct file *file, char __user *buffer, size_t count, loff_t * offset) { struct snd_ctl_file *ctl; int err = 0; ssize_t result = 0; ctl = file->private_data; if (snd_BUG_ON(!ctl || !ctl->card)) return -ENXIO; if (!ctl->subscribed) return -EBADFD; if (count < sizeof(struct snd_ctl_event)) return -EINVAL; spin_lock_irq(&ctl->read_lock); while (count >= sizeof(struct snd_ctl_event)) { struct snd_ctl_event ev; struct snd_kctl_event *kev; while (list_empty(&ctl->events)) { wait_queue_entry_t wait; if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) { err = -EAGAIN; goto __end_lock; } init_waitqueue_entry(&wait, current); add_wait_queue(&ctl->change_sleep, &wait); set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irq(&ctl->read_lock); schedule(); remove_wait_queue(&ctl->change_sleep, &wait); if (ctl->card->shutdown) return -ENODEV; if (signal_pending(current)) return -ERESTARTSYS; spin_lock_irq(&ctl->read_lock); } kev = snd_kctl_event(ctl->events.next); ev.type = SNDRV_CTL_EVENT_ELEM; ev.data.elem.mask = kev->mask; ev.data.elem.id = kev->id; list_del(&kev->list); spin_unlock_irq(&ctl->read_lock); kfree(kev); if (copy_to_user(buffer, &ev, sizeof(struct snd_ctl_event))) { err = -EFAULT; goto __end; } spin_lock_irq(&ctl->read_lock); buffer += sizeof(struct snd_ctl_event); count -= sizeof(struct snd_ctl_event); result += sizeof(struct snd_ctl_event); } __end_lock: spin_unlock_irq(&ctl->read_lock); __end: return result > 0 ? result : err; } static __poll_t snd_ctl_poll(struct file *file, poll_table * wait) { __poll_t mask; struct snd_ctl_file *ctl; ctl = file->private_data; if (!ctl->subscribed) return 0; poll_wait(file, &ctl->change_sleep, wait); mask = 0; if (!list_empty(&ctl->events)) mask |= EPOLLIN | EPOLLRDNORM; return mask; } /* * register the device-specific control-ioctls. * called from each device manager like pcm.c, hwdep.c, etc. */ static int _snd_ctl_register_ioctl(snd_kctl_ioctl_func_t fcn, struct list_head *lists) { struct snd_kctl_ioctl *pn; pn = kzalloc(sizeof(struct snd_kctl_ioctl), GFP_KERNEL); if (pn == NULL) return -ENOMEM; pn->fioctl = fcn; guard(rwsem_write)(&snd_ioctl_rwsem); list_add_tail(&pn->list, lists); return 0; } /** * snd_ctl_register_ioctl - register the device-specific control-ioctls * @fcn: ioctl callback function * * called from each device manager like pcm.c, hwdep.c, etc. * * Return: zero if successful, or a negative error code */ int snd_ctl_register_ioctl(snd_kctl_ioctl_func_t fcn) { return _snd_ctl_register_ioctl(fcn, &snd_control_ioctls); } EXPORT_SYMBOL(snd_ctl_register_ioctl); #ifdef CONFIG_COMPAT /** * snd_ctl_register_ioctl_compat - register the device-specific 32bit compat * control-ioctls * @fcn: ioctl callback function * * Return: zero if successful, or a negative error code */ int snd_ctl_register_ioctl_compat(snd_kctl_ioctl_func_t fcn) { return _snd_ctl_register_ioctl(fcn, &snd_control_compat_ioctls); } EXPORT_SYMBOL(snd_ctl_register_ioctl_compat); #endif /* * de-register the device-specific control-ioctls. */ static int _snd_ctl_unregister_ioctl(snd_kctl_ioctl_func_t fcn, struct list_head *lists) { struct snd_kctl_ioctl *p; if (snd_BUG_ON(!fcn)) return -EINVAL; guard(rwsem_write)(&snd_ioctl_rwsem); list_for_each_entry(p, lists, list) { if (p->fioctl == fcn) { list_del(&p->list); kfree(p); return 0; } } snd_BUG(); return -EINVAL; } /** * snd_ctl_unregister_ioctl - de-register the device-specific control-ioctls * @fcn: ioctl callback function to unregister * * Return: zero if successful, or a negative error code */ int snd_ctl_unregister_ioctl(snd_kctl_ioctl_func_t fcn) { return _snd_ctl_unregister_ioctl(fcn, &snd_control_ioctls); } EXPORT_SYMBOL(snd_ctl_unregister_ioctl); #ifdef CONFIG_COMPAT /** * snd_ctl_unregister_ioctl_compat - de-register the device-specific compat * 32bit control-ioctls * @fcn: ioctl callback function to unregister * * Return: zero if successful, or a negative error code */ int snd_ctl_unregister_ioctl_compat(snd_kctl_ioctl_func_t fcn) { return _snd_ctl_unregister_ioctl(fcn, &snd_control_compat_ioctls); } EXPORT_SYMBOL(snd_ctl_unregister_ioctl_compat); #endif static int snd_ctl_fasync(int fd, struct file * file, int on) { struct snd_ctl_file *ctl; ctl = file->private_data; return snd_fasync_helper(fd, file, on, &ctl->fasync); } /* return the preferred subdevice number if already assigned; * otherwise return -1 */ int snd_ctl_get_preferred_subdevice(struct snd_card *card, int type) { struct snd_ctl_file *kctl; int subdevice = -1; guard(read_lock_irqsave)(&card->controls_rwlock); list_for_each_entry(kctl, &card->ctl_files, list) { if (kctl->pid == task_pid(current)) { subdevice = kctl->preferred_subdevice[type]; if (subdevice != -1) break; } } return subdevice; } EXPORT_SYMBOL_GPL(snd_ctl_get_preferred_subdevice); /* * ioctl32 compat */ #ifdef CONFIG_COMPAT #include "control_compat.c" #else #define snd_ctl_ioctl_compat NULL #endif /* * control layers (audio LED etc.) */ /** * snd_ctl_request_layer - request to use the layer * @module_name: Name of the kernel module (NULL == build-in) * * Return: zero if successful, or an error code when the module cannot be loaded */ int snd_ctl_request_layer(const char *module_name) { struct snd_ctl_layer_ops *lops; if (module_name == NULL) return 0; scoped_guard(rwsem_read, &snd_ctl_layer_rwsem) { for (lops = snd_ctl_layer; lops; lops = lops->next) if (strcmp(lops->module_name, module_name) == 0) return 0; } return request_module(module_name); } EXPORT_SYMBOL_GPL(snd_ctl_request_layer); /** * snd_ctl_register_layer - register new control layer * @lops: operation structure * * The new layer can track all control elements and do additional * operations on top (like audio LED handling). */ void snd_ctl_register_layer(struct snd_ctl_layer_ops *lops) { struct snd_card *card; int card_number; scoped_guard(rwsem_write, &snd_ctl_layer_rwsem) { lops->next = snd_ctl_layer; snd_ctl_layer = lops; } for (card_number = 0; card_number < SNDRV_CARDS; card_number++) { card = snd_card_ref(card_number); if (card) { scoped_guard(rwsem_read, &card->controls_rwsem) lops->lregister(card); snd_card_unref(card); } } } EXPORT_SYMBOL_GPL(snd_ctl_register_layer); /** * snd_ctl_disconnect_layer - disconnect control layer * @lops: operation structure * * It is expected that the information about tracked cards * is freed before this call (the disconnect callback is * not called here). */ void snd_ctl_disconnect_layer(struct snd_ctl_layer_ops *lops) { struct snd_ctl_layer_ops *lops2, *prev_lops2; guard(rwsem_write)(&snd_ctl_layer_rwsem); for (lops2 = snd_ctl_layer, prev_lops2 = NULL; lops2; lops2 = lops2->next) { if (lops2 == lops) { if (!prev_lops2) snd_ctl_layer = lops->next; else prev_lops2->next = lops->next; break; } prev_lops2 = lops2; } } EXPORT_SYMBOL_GPL(snd_ctl_disconnect_layer); /* * INIT PART */ static const struct file_operations snd_ctl_f_ops = { .owner = THIS_MODULE, .read = snd_ctl_read, .open = snd_ctl_open, .release = snd_ctl_release, .poll = snd_ctl_poll, .unlocked_ioctl = snd_ctl_ioctl, .compat_ioctl = snd_ctl_ioctl_compat, .fasync = snd_ctl_fasync, }; /* call lops under rwsems; called from snd_ctl_dev_*() below() */ #define call_snd_ctl_lops(_card, _op) \ do { \ struct snd_ctl_layer_ops *lops; \ guard(rwsem_read)(&(_card)->controls_rwsem); \ guard(rwsem_read)(&snd_ctl_layer_rwsem); \ for (lops = snd_ctl_layer; lops; lops = lops->next) \ lops->_op(_card); \ } while (0) /* * registration of the control device */ static int snd_ctl_dev_register(struct snd_device *device) { struct snd_card *card = device->device_data; int err; err = snd_register_device(SNDRV_DEVICE_TYPE_CONTROL, card, -1, &snd_ctl_f_ops, card, card->ctl_dev); if (err < 0) return err; call_snd_ctl_lops(card, lregister); return 0; } /* * disconnection of the control device */ static int snd_ctl_dev_disconnect(struct snd_device *device) { struct snd_card *card = device->device_data; struct snd_ctl_file *ctl; scoped_guard(read_lock_irqsave, &card->controls_rwlock) { list_for_each_entry(ctl, &card->ctl_files, list) { wake_up(&ctl->change_sleep); snd_kill_fasync(ctl->fasync, SIGIO, POLL_ERR); } } call_snd_ctl_lops(card, ldisconnect); return snd_unregister_device(card->ctl_dev); } /* * free all controls */ static int snd_ctl_dev_free(struct snd_device *device) { struct snd_card *card = device->device_data; struct snd_kcontrol *control; scoped_guard(rwsem_write, &card->controls_rwsem) { while (!list_empty(&card->controls)) { control = snd_kcontrol(card->controls.next); __snd_ctl_remove(card, control, false); } #ifdef CONFIG_SND_CTL_FAST_LOOKUP xa_destroy(&card->ctl_numids); xa_destroy(&card->ctl_hash); #endif } put_device(card->ctl_dev); return 0; } /* * create control core: * called from init.c */ int snd_ctl_create(struct snd_card *card) { static const struct snd_device_ops ops = { .dev_free = snd_ctl_dev_free, .dev_register = snd_ctl_dev_register, .dev_disconnect = snd_ctl_dev_disconnect, }; int err; if (snd_BUG_ON(!card)) return -ENXIO; if (snd_BUG_ON(card->number < 0 || card->number >= SNDRV_CARDS)) return -ENXIO; err = snd_device_alloc(&card->ctl_dev, card); if (err < 0) return err; dev_set_name(card->ctl_dev, "controlC%d", card->number); err = snd_device_new(card, SNDRV_DEV_CONTROL, card, &ops); if (err < 0) put_device(card->ctl_dev); return err; } /* * Frequently used control callbacks/helpers */ /** * snd_ctl_boolean_mono_info - Helper function for a standard boolean info * callback with a mono channel * @kcontrol: the kcontrol instance * @uinfo: info to store * * This is a function that can be used as info callback for a standard * boolean control with a single mono channel. * * Return: Zero (always successful) */ int snd_ctl_boolean_mono_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; uinfo->count = 1; uinfo->value.integer.min = 0; uinfo->value.integer.max = 1; return 0; } EXPORT_SYMBOL(snd_ctl_boolean_mono_info); /** * snd_ctl_boolean_stereo_info - Helper function for a standard boolean info * callback with stereo two channels * @kcontrol: the kcontrol instance * @uinfo: info to store * * This is a function that can be used as info callback for a standard * boolean control with stereo two channels. * * Return: Zero (always successful) */ int snd_ctl_boolean_stereo_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; uinfo->count = 2; uinfo->value.integer.min = 0; uinfo->value.integer.max = 1; return 0; } EXPORT_SYMBOL(snd_ctl_boolean_stereo_info); /** * snd_ctl_enum_info - fills the info structure for an enumerated control * @info: the structure to be filled * @channels: the number of the control's channels; often one * @items: the number of control values; also the size of @names * @names: an array containing the names of all control values * * Sets all required fields in @info to their appropriate values. * If the control's accessibility is not the default (readable and writable), * the caller has to fill @info->access. * * Return: Zero (always successful) */ int snd_ctl_enum_info(struct snd_ctl_elem_info *info, unsigned int channels, unsigned int items, const char *const names[]) { info->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; info->count = channels; info->value.enumerated.items = items; if (!items) return 0; if (info->value.enumerated.item >= items) info->value.enumerated.item = items - 1; WARN(strlen(names[info->value.enumerated.item]) >= sizeof(info->value.enumerated.name), "ALSA: too long item name '%s'\n", names[info->value.enumerated.item]); strscpy(info->value.enumerated.name, names[info->value.enumerated.item], sizeof(info->value.enumerated.name)); return 0; } EXPORT_SYMBOL(snd_ctl_enum_info); |
| 3 3 3 3 3 3 4 1 1 1 1 2 2 2 11 1 1 7 2 8 2 6 2 2 3 3 1 4 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | // SPDX-License-Identifier: GPL-2.0-only /* * Netlink interface for IEEE 802.15.4 stack * * Copyright 2007, 2008 Siemens AG * * Written by: * Sergey Lapin <slapin@ossfans.org> * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> * Maxim Osipov <maxim.osipov@siemens.com> */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/if_arp.h> #include <net/netlink.h> #include <net/genetlink.h> #include <net/cfg802154.h> #include <net/af_ieee802154.h> #include <net/ieee802154_netdev.h> #include <net/rtnetlink.h> /* for rtnl_{un,}lock */ #include <linux/nl802154.h> #include "ieee802154.h" #include "rdev-ops.h" #include "core.h" static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct wpan_phy *phy) { void *hdr; int i, pages = 0; u32 *buf = kcalloc(IEEE802154_MAX_PAGE + 1, sizeof(u32), GFP_KERNEL); pr_debug("%s\n", __func__); if (!buf) return -EMSGSIZE; hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, IEEE802154_LIST_PHY); if (!hdr) goto out; rtnl_lock(); if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || nla_put_u8(msg, IEEE802154_ATTR_PAGE, phy->current_page) || nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel)) goto nla_put_failure; for (i = 0; i <= IEEE802154_MAX_PAGE; i++) { if (phy->supported.channels[i]) buf[pages++] = phy->supported.channels[i] | (i << 27); } if (pages && nla_put(msg, IEEE802154_ATTR_CHANNEL_PAGE_LIST, pages * sizeof(uint32_t), buf)) goto nla_put_failure; rtnl_unlock(); kfree(buf); genlmsg_end(msg, hdr); return 0; nla_put_failure: rtnl_unlock(); genlmsg_cancel(msg, hdr); out: kfree(buf); return -EMSGSIZE; } int ieee802154_list_phy(struct sk_buff *skb, struct genl_info *info) { /* Request for interface name, index, type, IEEE address, * PAN Id, short address */ struct sk_buff *msg; struct wpan_phy *phy; const char *name; int rc = -ENOBUFS; pr_debug("%s\n", __func__); if (!info->attrs[IEEE802154_ATTR_PHY_NAME]) return -EINVAL; name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); if (name[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] != '\0') return -EINVAL; /* phy name should be null-terminated */ phy = wpan_phy_find(name); if (!phy) return -ENODEV; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) goto out_dev; rc = ieee802154_nl_fill_phy(msg, info->snd_portid, info->snd_seq, 0, phy); if (rc < 0) goto out_free; wpan_phy_put(phy); return genlmsg_reply(msg, info); out_free: nlmsg_free(msg); out_dev: wpan_phy_put(phy); return rc; } struct dump_phy_data { struct sk_buff *skb; struct netlink_callback *cb; int idx, s_idx; }; static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data) { int rc; struct dump_phy_data *data = _data; pr_debug("%s\n", __func__); if (data->idx++ < data->s_idx) return 0; rc = ieee802154_nl_fill_phy(data->skb, NETLINK_CB(data->cb->skb).portid, data->cb->nlh->nlmsg_seq, NLM_F_MULTI, phy); if (rc < 0) { data->idx--; return rc; } return 0; } int ieee802154_dump_phy(struct sk_buff *skb, struct netlink_callback *cb) { struct dump_phy_data data = { .cb = cb, .skb = skb, .s_idx = cb->args[0], .idx = 0, }; pr_debug("%s\n", __func__); wpan_phy_for_each(ieee802154_dump_phy_iter, &data); cb->args[0] = data.idx; return skb->len; } int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct wpan_phy *phy; const char *name; const char *devname; int rc = -ENOBUFS; struct net_device *dev; int type = __IEEE802154_DEV_INVALID; unsigned char name_assign_type; pr_debug("%s\n", __func__); if (!info->attrs[IEEE802154_ATTR_PHY_NAME]) return -EINVAL; name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); if (name[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] != '\0') return -EINVAL; /* phy name should be null-terminated */ if (info->attrs[IEEE802154_ATTR_DEV_NAME]) { devname = nla_data(info->attrs[IEEE802154_ATTR_DEV_NAME]); if (devname[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1] != '\0') return -EINVAL; /* phy name should be null-terminated */ name_assign_type = NET_NAME_USER; } else { devname = "wpan%d"; name_assign_type = NET_NAME_ENUM; } if (strlen(devname) >= IFNAMSIZ) return -ENAMETOOLONG; phy = wpan_phy_find(name); if (!phy) return -ENODEV; msg = ieee802154_nl_new_reply(info, 0, IEEE802154_ADD_IFACE); if (!msg) goto out_dev; if (info->attrs[IEEE802154_ATTR_HW_ADDR] && nla_len(info->attrs[IEEE802154_ATTR_HW_ADDR]) != IEEE802154_ADDR_LEN) { rc = -EINVAL; goto nla_put_failure; } if (info->attrs[IEEE802154_ATTR_DEV_TYPE]) { type = nla_get_u8(info->attrs[IEEE802154_ATTR_DEV_TYPE]); if (type >= __IEEE802154_DEV_MAX) { rc = -EINVAL; goto nla_put_failure; } } dev = rdev_add_virtual_intf_deprecated(wpan_phy_to_rdev(phy), devname, name_assign_type, type); if (IS_ERR(dev)) { rc = PTR_ERR(dev); goto nla_put_failure; } dev_hold(dev); if (info->attrs[IEEE802154_ATTR_HW_ADDR]) { struct sockaddr_storage addr; addr.ss_family = ARPHRD_IEEE802154; nla_memcpy(&addr.__data, info->attrs[IEEE802154_ATTR_HW_ADDR], IEEE802154_ADDR_LEN); /* strangely enough, some callbacks (inetdev_event) from * dev_set_mac_address require RTNL_LOCK */ rtnl_lock(); rc = dev_set_mac_address(dev, &addr, NULL); rtnl_unlock(); if (rc) goto dev_unregister; } if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name)) { rc = -EMSGSIZE; goto nla_put_failure; } dev_put(dev); wpan_phy_put(phy); return ieee802154_nl_reply(msg, info); dev_unregister: rtnl_lock(); /* del_iface must be called with RTNL lock */ rdev_del_virtual_intf_deprecated(wpan_phy_to_rdev(phy), dev); dev_put(dev); rtnl_unlock(); nla_put_failure: nlmsg_free(msg); out_dev: wpan_phy_put(phy); return rc; } int ieee802154_del_iface(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct wpan_phy *phy; const char *name; int rc; struct net_device *dev; pr_debug("%s\n", __func__); if (!info->attrs[IEEE802154_ATTR_DEV_NAME]) return -EINVAL; name = nla_data(info->attrs[IEEE802154_ATTR_DEV_NAME]); if (name[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1] != '\0') return -EINVAL; /* name should be null-terminated */ rc = -ENODEV; dev = dev_get_by_name(genl_info_net(info), name); if (!dev) return rc; if (dev->type != ARPHRD_IEEE802154) goto out; phy = dev->ieee802154_ptr->wpan_phy; BUG_ON(!phy); get_device(&phy->dev); rc = -EINVAL; /* phy name is optional, but should be checked if it's given */ if (info->attrs[IEEE802154_ATTR_PHY_NAME]) { struct wpan_phy *phy2; const char *pname = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); if (pname[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] != '\0') /* name should be null-terminated */ goto out_dev; phy2 = wpan_phy_find(pname); if (!phy2) goto out_dev; if (phy != phy2) { wpan_phy_put(phy2); goto out_dev; } } rc = -ENOBUFS; msg = ieee802154_nl_new_reply(info, 0, IEEE802154_DEL_IFACE); if (!msg) goto out_dev; rtnl_lock(); rdev_del_virtual_intf_deprecated(wpan_phy_to_rdev(phy), dev); /* We don't have device anymore */ dev_put(dev); dev = NULL; rtnl_unlock(); if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, name)) goto nla_put_failure; wpan_phy_put(phy); return ieee802154_nl_reply(msg, info); nla_put_failure: nlmsg_free(msg); out_dev: wpan_phy_put(phy); out: dev_put(dev); return rc; } |
| 61 62 62 60 2 38 38 4 38 2 36 38 102 103 75 68 6 3 72 73 75 70 3 73 2 75 3 73 75 15 2 13 2 2 1 1 1 1 6 1 1 2 2 2 2 4 2 2 8 8 8 8 2 8 40 40 5 1 4 4 1 3 4 4 4 2 2 2 2 2 2 58 58 10 7 7 6 6 1 5 10 7 9 32 32 1 33 33 50 49 47 47 46 1 38 1 37 1 1 1 1 1 51 5 46 6 2 4 34 5 32 1 8 1 5 1 1 57 56 1 3 1 14 37 1 1 2 2 36 65 64 63 14 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 | // SPDX-License-Identifier: GPL-2.0+ /* * NILFS module and super block management. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Ryusuke Konishi. */ /* * linux/fs/ext2/super.c * * Copyright (C) 1992, 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) * * from * * linux/fs/minix/inode.c * * Copyright (C) 1991, 1992 Linus Torvalds * * Big-endian to little-endian byte-swapping/bitmaps by * David S. Miller (davem@caip.rutgers.edu), 1995 */ #include <linux/module.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/crc32.h> #include <linux/vfs.h> #include <linux/writeback.h> #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include "nilfs.h" #include "export.h" #include "mdt.h" #include "alloc.h" #include "btree.h" #include "btnode.h" #include "page.h" #include "cpfile.h" #include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */ #include "ifile.h" #include "dat.h" #include "segment.h" #include "segbuf.h" MODULE_AUTHOR("NTT Corp."); MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " "(NILFS)"); MODULE_LICENSE("GPL"); static struct kmem_cache *nilfs_inode_cachep; struct kmem_cache *nilfs_transaction_cachep; struct kmem_cache *nilfs_segbuf_cachep; struct kmem_cache *nilfs_btree_path_cache; static int nilfs_setup_super(struct super_block *sb, int is_mount); void __nilfs_msg(struct super_block *sb, const char *fmt, ...) { struct va_format vaf; va_list args; int level; va_start(args, fmt); level = printk_get_level(fmt); vaf.fmt = printk_skip_level(fmt); vaf.va = &args; if (sb) printk("%c%cNILFS (%s): %pV\n", KERN_SOH_ASCII, level, sb->s_id, &vaf); else printk("%c%cNILFS: %pV\n", KERN_SOH_ASCII, level, &vaf); va_end(args); } static void nilfs_set_error(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; down_write(&nilfs->ns_sem); if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { nilfs->ns_mount_state |= NILFS_ERROR_FS; sbp = nilfs_prepare_super(sb, 0); if (likely(sbp)) { sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS); if (sbp[1]) sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS); nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } } up_write(&nilfs->ns_sem); } /** * __nilfs_error() - report failure condition on a filesystem * @sb: super block instance * @function: name of calling function * @fmt: format string for message to be output * @...: optional arguments to @fmt * * __nilfs_error() sets an ERROR_FS flag on the superblock as well as * reporting an error message. This function should be called when * NILFS detects incoherences or defects of meta data on disk. * * This implements the body of nilfs_error() macro. Normally, * nilfs_error() should be used. As for sustainable errors such as a * single-shot I/O error, nilfs_err() should be used instead. * * Callers should not add a trailing newline since this will do it. */ void __nilfs_error(struct super_block *sb, const char *function, const char *fmt, ...) { struct the_nilfs *nilfs = sb->s_fs_info; struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n", sb->s_id, function, &vaf); va_end(args); if (!sb_rdonly(sb)) { nilfs_set_error(sb); if (nilfs_test_opt(nilfs, ERRORS_RO)) { printk(KERN_CRIT "Remounting filesystem read-only\n"); sb->s_flags |= SB_RDONLY; } } if (nilfs_test_opt(nilfs, ERRORS_PANIC)) panic("NILFS (device %s): panic forced after error\n", sb->s_id); } struct inode *nilfs_alloc_inode(struct super_block *sb) { struct nilfs_inode_info *ii; ii = alloc_inode_sb(sb, nilfs_inode_cachep, GFP_NOFS); if (!ii) return NULL; ii->i_bh = NULL; ii->i_state = 0; ii->i_type = 0; ii->i_cno = 0; ii->i_assoc_inode = NULL; ii->i_bmap = &ii->i_bmap_data; return &ii->vfs_inode; } static void nilfs_free_inode(struct inode *inode) { if (nilfs_is_metadata_file_inode(inode)) nilfs_mdt_destroy(inode); kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); } static int nilfs_sync_super(struct super_block *sb, int flag) { struct the_nilfs *nilfs = sb->s_fs_info; int err; retry: set_buffer_dirty(nilfs->ns_sbh[0]); if (nilfs_test_opt(nilfs, BARRIER)) { err = __sync_dirty_buffer(nilfs->ns_sbh[0], REQ_SYNC | REQ_PREFLUSH | REQ_FUA); } else { err = sync_dirty_buffer(nilfs->ns_sbh[0]); } if (unlikely(err)) { nilfs_err(sb, "unable to write superblock: err=%d", err); if (err == -EIO && nilfs->ns_sbh[1]) { /* * sbp[0] points to newer log than sbp[1], * so copy sbp[0] to sbp[1] to take over sbp[0]. */ memcpy(nilfs->ns_sbp[1], nilfs->ns_sbp[0], nilfs->ns_sbsize); nilfs_fall_back_super_block(nilfs); goto retry; } } else { struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; nilfs->ns_sbwcount++; /* * The latest segment becomes trailable from the position * written in superblock. */ clear_nilfs_discontinued(nilfs); /* update GC protection for recent segments */ if (nilfs->ns_sbh[1]) { if (flag == NILFS_SB_COMMIT_ALL) { set_buffer_dirty(nilfs->ns_sbh[1]); if (sync_dirty_buffer(nilfs->ns_sbh[1]) < 0) goto out; } if (le64_to_cpu(nilfs->ns_sbp[1]->s_last_cno) < le64_to_cpu(nilfs->ns_sbp[0]->s_last_cno)) sbp = nilfs->ns_sbp[1]; } spin_lock(&nilfs->ns_last_segment_lock); nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); spin_unlock(&nilfs->ns_last_segment_lock); } out: return err; } void nilfs_set_log_cursor(struct nilfs_super_block *sbp, struct the_nilfs *nilfs) { sector_t nfreeblocks; /* nilfs->ns_sem must be locked by the caller. */ nilfs_count_free_blocks(nilfs, &nfreeblocks); sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks); spin_lock(&nilfs->ns_last_segment_lock); sbp->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); sbp->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); sbp->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); spin_unlock(&nilfs->ns_last_segment_lock); } struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb, int flip) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp = nilfs->ns_sbp; /* nilfs->ns_sem must be locked by the caller. */ if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { if (sbp[1] && sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) { memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); } else { nilfs_crit(sb, "superblock broke"); return NULL; } } else if (sbp[1] && sbp[1]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); } if (flip && sbp[1]) nilfs_swap_super_block(nilfs); return sbp; } int nilfs_commit_super(struct super_block *sb, int flag) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp = nilfs->ns_sbp; time64_t t; /* nilfs->ns_sem must be locked by the caller. */ t = ktime_get_real_seconds(); nilfs->ns_sbwtime = t; sbp[0]->s_wtime = cpu_to_le64(t); sbp[0]->s_sum = 0; sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, (unsigned char *)sbp[0], nilfs->ns_sbsize)); if (flag == NILFS_SB_COMMIT_ALL && sbp[1]) { sbp[1]->s_wtime = sbp[0]->s_wtime; sbp[1]->s_sum = 0; sbp[1]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, (unsigned char *)sbp[1], nilfs->ns_sbsize)); } clear_nilfs_sb_dirty(nilfs); nilfs->ns_flushed_device = 1; /* make sure store to ns_flushed_device cannot be reordered */ smp_wmb(); return nilfs_sync_super(sb, flag); } /** * nilfs_cleanup_super() - write filesystem state for cleanup * @sb: super block instance to be unmounted or degraded to read-only * * This function restores state flags in the on-disk super block. * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the * filesystem was not clean previously. * * Return: 0 on success, %-EIO if I/O error or superblock is corrupted. */ int nilfs_cleanup_super(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; int flag = NILFS_SB_COMMIT; int ret = -EIO; sbp = nilfs_prepare_super(sb, 0); if (sbp) { sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); nilfs_set_log_cursor(sbp[0], nilfs); if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) { /* * make the "clean" flag also to the opposite * super block if both super blocks point to * the same checkpoint. */ sbp[1]->s_state = sbp[0]->s_state; flag = NILFS_SB_COMMIT_ALL; } ret = nilfs_commit_super(sb, flag); } return ret; } /** * nilfs_move_2nd_super - relocate secondary super block * @sb: super block instance * @sb2off: new offset of the secondary super block (in bytes) * * Return: 0 on success, or a negative error code on failure. */ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) { struct the_nilfs *nilfs = sb->s_fs_info; struct buffer_head *nsbh; struct nilfs_super_block *nsbp; sector_t blocknr, newblocknr; unsigned long offset; int sb2i; /* array index of the secondary superblock */ int ret = 0; /* nilfs->ns_sem must be locked by the caller. */ if (nilfs->ns_sbh[1] && nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) { sb2i = 1; blocknr = nilfs->ns_sbh[1]->b_blocknr; } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) { sb2i = 0; blocknr = nilfs->ns_sbh[0]->b_blocknr; } else { sb2i = -1; blocknr = 0; } if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off) goto out; /* super block location is unchanged */ /* Get new super block buffer */ newblocknr = sb2off >> nilfs->ns_blocksize_bits; offset = sb2off & (nilfs->ns_blocksize - 1); nsbh = sb_getblk(sb, newblocknr); if (!nsbh) { nilfs_warn(sb, "unable to move secondary superblock to block %llu", (unsigned long long)newblocknr); ret = -EIO; goto out; } nsbp = (void *)nsbh->b_data + offset; lock_buffer(nsbh); if (sb2i >= 0) { /* * The position of the second superblock only changes by 4KiB, * which is larger than the maximum superblock data size * (= 1KiB), so there is no need to use memmove() to allow * overlap between source and destination. */ memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize); /* * Zero fill after copy to avoid overwriting in case of move * within the same block. */ memset(nsbh->b_data, 0, offset); memset((void *)nsbp + nilfs->ns_sbsize, 0, nsbh->b_size - offset - nilfs->ns_sbsize); } else { memset(nsbh->b_data, 0, nsbh->b_size); } set_buffer_uptodate(nsbh); unlock_buffer(nsbh); if (sb2i >= 0) { brelse(nilfs->ns_sbh[sb2i]); nilfs->ns_sbh[sb2i] = nsbh; nilfs->ns_sbp[sb2i] = nsbp; } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) { /* secondary super block will be restored to index 1 */ nilfs->ns_sbh[1] = nsbh; nilfs->ns_sbp[1] = nsbp; } else { brelse(nsbh); } out: return ret; } /** * nilfs_resize_fs - resize the filesystem * @sb: super block instance * @newsize: new size of the filesystem (in bytes) * * Return: 0 on success, or a negative error code on failure. */ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; __u64 devsize, newnsegs; loff_t sb2off; int ret; ret = -ERANGE; devsize = bdev_nr_bytes(sb->s_bdev); if (newsize > devsize) goto out; /* * Prevent underflow in second superblock position calculation. * The exact minimum size check is done in nilfs_sufile_resize(). */ if (newsize < 4096) { ret = -ENOSPC; goto out; } /* * Write lock is required to protect some functions depending * on the number of segments, the number of reserved segments, * and so forth. */ down_write(&nilfs->ns_segctor_sem); sb2off = NILFS_SB2_OFFSET_BYTES(newsize); newnsegs = sb2off >> nilfs->ns_blocksize_bits; newnsegs = div64_ul(newnsegs, nilfs->ns_blocks_per_segment); ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs); up_write(&nilfs->ns_segctor_sem); if (ret < 0) goto out; ret = nilfs_construct_segment(sb); if (ret < 0) goto out; down_write(&nilfs->ns_sem); nilfs_move_2nd_super(sb, sb2off); ret = -EIO; sbp = nilfs_prepare_super(sb, 0); if (likely(sbp)) { nilfs_set_log_cursor(sbp[0], nilfs); /* * Drop NILFS_RESIZE_FS flag for compatibility with * mount-time resize which may be implemented in a * future release. */ sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_RESIZE_FS); sbp[0]->s_dev_size = cpu_to_le64(newsize); sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments); if (sbp[1]) memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } up_write(&nilfs->ns_sem); /* * Reset the range of allocatable segments last. This order * is important in the case of expansion because the secondary * superblock must be protected from log write until migration * completes. */ if (!ret) nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1); out: return ret; } static void nilfs_put_super(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; nilfs_detach_log_writer(sb); if (!sb_rdonly(sb)) { down_write(&nilfs->ns_sem); nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); } nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); destroy_nilfs(nilfs); sb->s_fs_info = NULL; } static int nilfs_sync_fs(struct super_block *sb, int wait) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; int err = 0; /* This function is called when super block should be written back */ if (wait) err = nilfs_construct_segment(sb); down_write(&nilfs->ns_sem); if (nilfs_sb_dirty(nilfs)) { sbp = nilfs_prepare_super(sb, nilfs_sb_will_flip(nilfs)); if (likely(sbp)) { nilfs_set_log_cursor(sbp[0], nilfs); nilfs_commit_super(sb, NILFS_SB_COMMIT); } } up_write(&nilfs->ns_sem); if (!err) err = nilfs_flush_device(nilfs); return err; } int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, struct nilfs_root **rootp) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root; int err = -ENOMEM; root = nilfs_find_or_create_root( nilfs, curr_mnt ? NILFS_CPTREE_CURRENT_CNO : cno); if (!root) return err; if (root->ifile) goto reuse; /* already attached checkpoint */ down_read(&nilfs->ns_segctor_sem); err = nilfs_ifile_read(sb, root, cno, nilfs->ns_inode_size); up_read(&nilfs->ns_segctor_sem); if (unlikely(err)) goto failed; reuse: *rootp = root; return 0; failed: if (err == -EINVAL) nilfs_err(sb, "Invalid checkpoint (checkpoint number=%llu)", (unsigned long long)cno); nilfs_put_root(root); return err; } static int nilfs_freeze(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; int err; if (sb_rdonly(sb)) return 0; /* Mark super block clean */ down_write(&nilfs->ns_sem); err = nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); return err; } static int nilfs_unfreeze(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; if (sb_rdonly(sb)) return 0; down_write(&nilfs->ns_sem); nilfs_setup_super(sb, false); up_write(&nilfs->ns_sem); return 0; } static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root; struct the_nilfs *nilfs = root->nilfs; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); unsigned long long blocks; unsigned long overhead; unsigned long nrsvblocks; sector_t nfreeblocks; u64 nmaxinodes, nfreeinodes; int err; /* * Compute all of the segment blocks * * The blocks before first segment and after last segment * are excluded. */ blocks = nilfs->ns_blocks_per_segment * nilfs->ns_nsegments - nilfs->ns_first_data_block; nrsvblocks = nilfs->ns_nrsvsegs * nilfs->ns_blocks_per_segment; /* * Compute the overhead * * When distributing meta data blocks outside segment structure, * We must count them as the overhead. */ overhead = 0; err = nilfs_count_free_blocks(nilfs, &nfreeblocks); if (unlikely(err)) return err; err = nilfs_ifile_count_free_inodes(root->ifile, &nmaxinodes, &nfreeinodes); if (unlikely(err)) { nilfs_warn(sb, "failed to count free inodes: err=%d", err); if (err == -ERANGE) { /* * If nilfs_palloc_count_max_entries() returns * -ERANGE error code then we simply treat * curent inodes count as maximum possible and * zero as free inodes value. */ nmaxinodes = atomic64_read(&root->inodes_count); nfreeinodes = 0; err = 0; } else return err; } buf->f_type = NILFS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = blocks - overhead; buf->f_bfree = nfreeblocks; buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? (buf->f_bfree - nrsvblocks) : 0; buf->f_files = nmaxinodes; buf->f_ffree = nfreeinodes; buf->f_namelen = NILFS_NAME_LEN; buf->f_fsid = u64_to_fsid(id); return 0; } static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry) { struct super_block *sb = dentry->d_sb; struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root; if (!nilfs_test_opt(nilfs, BARRIER)) seq_puts(seq, ",nobarrier"); if (root->cno != NILFS_CPTREE_CURRENT_CNO) seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno); if (nilfs_test_opt(nilfs, ERRORS_PANIC)) seq_puts(seq, ",errors=panic"); if (nilfs_test_opt(nilfs, ERRORS_CONT)) seq_puts(seq, ",errors=continue"); if (nilfs_test_opt(nilfs, STRICT_ORDER)) seq_puts(seq, ",order=strict"); if (nilfs_test_opt(nilfs, NORECOVERY)) seq_puts(seq, ",norecovery"); if (nilfs_test_opt(nilfs, DISCARD)) seq_puts(seq, ",discard"); return 0; } static const struct super_operations nilfs_sops = { .alloc_inode = nilfs_alloc_inode, .free_inode = nilfs_free_inode, .dirty_inode = nilfs_dirty_inode, .evict_inode = nilfs_evict_inode, .put_super = nilfs_put_super, .sync_fs = nilfs_sync_fs, .freeze_fs = nilfs_freeze, .unfreeze_fs = nilfs_unfreeze, .statfs = nilfs_statfs, .show_options = nilfs_show_options }; enum { Opt_err, Opt_barrier, Opt_snapshot, Opt_order, Opt_norecovery, Opt_discard, }; static const struct constant_table nilfs_param_err[] = { {"continue", NILFS_MOUNT_ERRORS_CONT}, {"panic", NILFS_MOUNT_ERRORS_PANIC}, {"remount-ro", NILFS_MOUNT_ERRORS_RO}, {} }; static const struct fs_parameter_spec nilfs_param_spec[] = { fsparam_enum ("errors", Opt_err, nilfs_param_err), fsparam_flag_no ("barrier", Opt_barrier), fsparam_u64 ("cp", Opt_snapshot), fsparam_string ("order", Opt_order), fsparam_flag ("norecovery", Opt_norecovery), fsparam_flag_no ("discard", Opt_discard), {} }; struct nilfs_fs_context { unsigned long ns_mount_opt; __u64 cno; }; static int nilfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct nilfs_fs_context *nilfs = fc->fs_private; int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; struct fs_parse_result result; int opt; opt = fs_parse(fc, nilfs_param_spec, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_barrier: if (result.negated) nilfs_clear_opt(nilfs, BARRIER); else nilfs_set_opt(nilfs, BARRIER); break; case Opt_order: if (strcmp(param->string, "relaxed") == 0) /* Ordered data semantics */ nilfs_clear_opt(nilfs, STRICT_ORDER); else if (strcmp(param->string, "strict") == 0) /* Strict in-order semantics */ nilfs_set_opt(nilfs, STRICT_ORDER); else return -EINVAL; break; case Opt_err: nilfs->ns_mount_opt &= ~NILFS_MOUNT_ERROR_MODE; nilfs->ns_mount_opt |= result.uint_32; break; case Opt_snapshot: if (is_remount) { struct super_block *sb = fc->root->d_sb; nilfs_err(sb, "\"%s\" option is invalid for remount", param->key); return -EINVAL; } if (result.uint_64 == 0) { nilfs_err(NULL, "invalid option \"cp=0\": invalid checkpoint number 0"); return -EINVAL; } nilfs->cno = result.uint_64; break; case Opt_norecovery: nilfs_set_opt(nilfs, NORECOVERY); break; case Opt_discard: if (result.negated) nilfs_clear_opt(nilfs, DISCARD); else nilfs_set_opt(nilfs, DISCARD); break; default: return -EINVAL; } return 0; } static int nilfs_setup_super(struct super_block *sb, int is_mount) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; int max_mnt_count; int mnt_count; /* nilfs->ns_sem must be locked by the caller. */ sbp = nilfs_prepare_super(sb, 0); if (!sbp) return -EIO; if (!is_mount) goto skip_mount_setup; max_mnt_count = le16_to_cpu(sbp[0]->s_max_mnt_count); mnt_count = le16_to_cpu(sbp[0]->s_mnt_count); if (nilfs->ns_mount_state & NILFS_ERROR_FS) { nilfs_warn(sb, "mounting fs with errors"); #if 0 } else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) { nilfs_warn(sb, "maximal mount count reached"); #endif } if (!max_mnt_count) sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1); sbp[0]->s_mtime = cpu_to_le64(ktime_get_real_seconds()); skip_mount_setup: sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); /* synchronize sbp[1] with sbp[0] */ if (sbp[1]) memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); return nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, u64 pos, int blocksize, struct buffer_head **pbh) { unsigned long long sb_index = pos; unsigned long offset; offset = do_div(sb_index, blocksize); *pbh = sb_bread(sb, sb_index); if (!*pbh) return NULL; return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); } int nilfs_store_magic(struct super_block *sb, struct nilfs_super_block *sbp) { struct the_nilfs *nilfs = sb->s_fs_info; sb->s_magic = le16_to_cpu(sbp->s_magic); /* FS independent flags */ #ifdef NILFS_ATIME_DISABLE sb->s_flags |= SB_NOATIME; #endif nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid); nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid); nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval); nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max); return 0; } int nilfs_check_feature_compatibility(struct super_block *sb, struct nilfs_super_block *sbp) { __u64 features; features = le64_to_cpu(sbp->s_feature_incompat) & ~NILFS_FEATURE_INCOMPAT_SUPP; if (features) { nilfs_err(sb, "couldn't mount because of unsupported optional features (%llx)", (unsigned long long)features); return -EINVAL; } features = le64_to_cpu(sbp->s_feature_compat_ro) & ~NILFS_FEATURE_COMPAT_RO_SUPP; if (!sb_rdonly(sb) && features) { nilfs_err(sb, "couldn't mount RDWR because of unsupported optional features (%llx)", (unsigned long long)features); return -EINVAL; } return 0; } static int nilfs_get_root_dentry(struct super_block *sb, struct nilfs_root *root, struct dentry **root_dentry) { struct inode *inode; struct dentry *dentry; int ret = 0; inode = nilfs_iget(sb, root, NILFS_ROOT_INO); if (IS_ERR(inode)) { ret = PTR_ERR(inode); nilfs_err(sb, "error %d getting root inode", ret); goto out; } if (!S_ISDIR(inode->i_mode) || !inode->i_blocks || !inode->i_size) { iput(inode); nilfs_err(sb, "corrupt root inode"); ret = -EINVAL; goto out; } if (root->cno == NILFS_CPTREE_CURRENT_CNO) { dentry = d_find_alias(inode); if (!dentry) { dentry = d_make_root(inode); if (!dentry) { ret = -ENOMEM; goto failed_dentry; } } else { iput(inode); } } else { dentry = d_obtain_root(inode); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto failed_dentry; } } *root_dentry = dentry; out: return ret; failed_dentry: nilfs_err(sb, "error %d getting root dentry", ret); goto out; } static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, struct dentry **root_dentry) { struct the_nilfs *nilfs = s->s_fs_info; struct nilfs_root *root; int ret; mutex_lock(&nilfs->ns_snapshot_mount_mutex); down_read(&nilfs->ns_segctor_sem); ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno); up_read(&nilfs->ns_segctor_sem); if (ret < 0) { ret = (ret == -ENOENT) ? -EINVAL : ret; goto out; } else if (!ret) { nilfs_err(s, "The specified checkpoint is not a snapshot (checkpoint number=%llu)", (unsigned long long)cno); ret = -EINVAL; goto out; } ret = nilfs_attach_checkpoint(s, cno, false, &root); if (ret) { nilfs_err(s, "error %d while loading snapshot (checkpoint number=%llu)", ret, (unsigned long long)cno); goto out; } ret = nilfs_get_root_dentry(s, root, root_dentry); nilfs_put_root(root); out: mutex_unlock(&nilfs->ns_snapshot_mount_mutex); return ret; } /** * nilfs_tree_is_busy() - try to shrink dentries of a checkpoint * @root_dentry: root dentry of the tree to be shrunk * * Return: true if the tree was in-use, false otherwise. */ static bool nilfs_tree_is_busy(struct dentry *root_dentry) { shrink_dcache_parent(root_dentry); return d_count(root_dentry) > 1; } int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root; struct inode *inode; struct dentry *dentry; int ret; if (cno > nilfs->ns_cno) return false; if (cno >= nilfs_last_cno(nilfs)) return true; /* protect recent checkpoints */ ret = false; root = nilfs_lookup_root(nilfs, cno); if (root) { inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO); if (inode) { dentry = d_find_alias(inode); if (dentry) { ret = nilfs_tree_is_busy(dentry); dput(dentry); } iput(inode); } nilfs_put_root(root); } return ret; } /** * nilfs_fill_super() - initialize a super block instance * @sb: super_block * @fc: filesystem context * * This function is called exclusively by nilfs->ns_mount_mutex. * So, the recovery process is protected from other simultaneous mounts. * * Return: 0 on success, or a negative error code on failure. */ static int nilfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct the_nilfs *nilfs; struct nilfs_root *fsroot; struct nilfs_fs_context *ctx = fc->fs_private; __u64 cno; int err; nilfs = alloc_nilfs(sb); if (!nilfs) return -ENOMEM; sb->s_fs_info = nilfs; err = init_nilfs(nilfs, sb); if (err) goto failed_nilfs; /* Copy in parsed mount options */ nilfs->ns_mount_opt = ctx->ns_mount_opt; sb->s_op = &nilfs_sops; sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; sb->s_time_gran = 1; sb->s_max_links = NILFS_LINK_MAX; sb->s_bdi = bdi_get(sb->s_bdev->bd_disk->bdi); err = load_nilfs(nilfs, sb); if (err) goto failed_nilfs; super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid, sizeof(nilfs->ns_sbp[0]->s_uuid)); super_set_sysfs_name_bdev(sb); cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { nilfs_err(sb, "error %d while loading last checkpoint (checkpoint number=%llu)", err, (unsigned long long)cno); goto failed_unload; } if (!sb_rdonly(sb)) { err = nilfs_attach_log_writer(sb, fsroot); if (err) goto failed_checkpoint; } err = nilfs_get_root_dentry(sb, fsroot, &sb->s_root); if (err) goto failed_segctor; nilfs_put_root(fsroot); if (!sb_rdonly(sb)) { down_write(&nilfs->ns_sem); nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); } return 0; failed_segctor: nilfs_detach_log_writer(sb); failed_checkpoint: nilfs_put_root(fsroot); failed_unload: nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); failed_nilfs: destroy_nilfs(nilfs); return err; } static int nilfs_reconfigure(struct fs_context *fc) { struct nilfs_fs_context *ctx = fc->fs_private; struct super_block *sb = fc->root->d_sb; struct the_nilfs *nilfs = sb->s_fs_info; int err; sync_filesystem(sb); err = -EINVAL; if (!nilfs_valid_fs(nilfs)) { nilfs_warn(sb, "couldn't remount because the filesystem is in an incomplete recovery state"); goto ignore_opts; } if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb)) goto out; if (fc->sb_flags & SB_RDONLY) { sb->s_flags |= SB_RDONLY; /* * Remounting a valid RW partition RDONLY, so set * the RDONLY flag and then mark the partition as valid again. */ down_write(&nilfs->ns_sem); nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); } else { __u64 features; struct nilfs_root *root; /* * Mounting a RDONLY partition read-write, so reread and * store the current valid flag. (It may have been changed * by fsck since we originally mounted the partition.) |